{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 34278,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 2.9173230643561467e-05,
      "grad_norm": 54.045951829468514,
      "learning_rate": 9.718172983479106e-09,
      "loss": 3.307,
      "step": 1
    },
    {
      "epoch": 5.8346461287122933e-05,
      "grad_norm": 59.45590508211042,
      "learning_rate": 1.943634596695821e-08,
      "loss": 3.4011,
      "step": 2
    },
    {
      "epoch": 8.751969193068441e-05,
      "grad_norm": 56.232314342880564,
      "learning_rate": 2.915451895043732e-08,
      "loss": 3.3368,
      "step": 3
    },
    {
      "epoch": 0.00011669292257424587,
      "grad_norm": 57.02148351734131,
      "learning_rate": 3.887269193391642e-08,
      "loss": 3.349,
      "step": 4
    },
    {
      "epoch": 0.00014586615321780734,
      "grad_norm": 55.620710576506895,
      "learning_rate": 4.8590864917395535e-08,
      "loss": 3.2952,
      "step": 5
    },
    {
      "epoch": 0.00017503938386136881,
      "grad_norm": 60.152501431945254,
      "learning_rate": 5.830903790087464e-08,
      "loss": 3.4352,
      "step": 6
    },
    {
      "epoch": 0.0002042126145049303,
      "grad_norm": 63.889700682468856,
      "learning_rate": 6.802721088435375e-08,
      "loss": 3.493,
      "step": 7
    },
    {
      "epoch": 0.00023338584514849173,
      "grad_norm": 50.90441079107094,
      "learning_rate": 7.774538386783285e-08,
      "loss": 3.2058,
      "step": 8
    },
    {
      "epoch": 0.00026255907579205323,
      "grad_norm": 56.09070079855593,
      "learning_rate": 8.746355685131196e-08,
      "loss": 3.293,
      "step": 9
    },
    {
      "epoch": 0.0002917323064356147,
      "grad_norm": 63.61971462398412,
      "learning_rate": 9.718172983479107e-08,
      "loss": 3.46,
      "step": 10
    },
    {
      "epoch": 0.00032090553707917613,
      "grad_norm": 60.29565511961232,
      "learning_rate": 1.0689990281827017e-07,
      "loss": 3.4424,
      "step": 11
    },
    {
      "epoch": 0.00035007876772273763,
      "grad_norm": 52.44151203167023,
      "learning_rate": 1.1661807580174928e-07,
      "loss": 3.22,
      "step": 12
    },
    {
      "epoch": 0.0003792519983662991,
      "grad_norm": 58.19796349152196,
      "learning_rate": 1.263362487852284e-07,
      "loss": 3.4233,
      "step": 13
    },
    {
      "epoch": 0.0004084252290098606,
      "grad_norm": 61.45277974737471,
      "learning_rate": 1.360544217687075e-07,
      "loss": 3.4018,
      "step": 14
    },
    {
      "epoch": 0.000437598459653422,
      "grad_norm": 51.96697093650594,
      "learning_rate": 1.457725947521866e-07,
      "loss": 3.206,
      "step": 15
    },
    {
      "epoch": 0.00046677169029698347,
      "grad_norm": 53.54038814993851,
      "learning_rate": 1.554907677356657e-07,
      "loss": 3.2888,
      "step": 16
    },
    {
      "epoch": 0.000495944920940545,
      "grad_norm": 57.73223559145958,
      "learning_rate": 1.6520894071914482e-07,
      "loss": 3.3793,
      "step": 17
    },
    {
      "epoch": 0.0005251181515841065,
      "grad_norm": 60.66841021414202,
      "learning_rate": 1.7492711370262392e-07,
      "loss": 3.3604,
      "step": 18
    },
    {
      "epoch": 0.0005542913822276679,
      "grad_norm": 48.39521870204028,
      "learning_rate": 1.8464528668610302e-07,
      "loss": 3.149,
      "step": 19
    },
    {
      "epoch": 0.0005834646128712294,
      "grad_norm": 55.805932545996086,
      "learning_rate": 1.9436345966958214e-07,
      "loss": 3.3099,
      "step": 20
    },
    {
      "epoch": 0.0006126378435147909,
      "grad_norm": 55.822401672096156,
      "learning_rate": 2.0408163265306121e-07,
      "loss": 3.3476,
      "step": 21
    },
    {
      "epoch": 0.0006418110741583523,
      "grad_norm": 48.00227667036085,
      "learning_rate": 2.1379980563654034e-07,
      "loss": 3.1326,
      "step": 22
    },
    {
      "epoch": 0.0006709843048019138,
      "grad_norm": 46.23671337498092,
      "learning_rate": 2.2351797862001946e-07,
      "loss": 3.0579,
      "step": 23
    },
    {
      "epoch": 0.0007001575354454753,
      "grad_norm": 49.215337803463065,
      "learning_rate": 2.3323615160349856e-07,
      "loss": 3.1714,
      "step": 24
    },
    {
      "epoch": 0.0007293307660890366,
      "grad_norm": 50.0964962852148,
      "learning_rate": 2.429543245869777e-07,
      "loss": 3.2463,
      "step": 25
    },
    {
      "epoch": 0.0007585039967325981,
      "grad_norm": 40.48331009532296,
      "learning_rate": 2.526724975704568e-07,
      "loss": 2.8898,
      "step": 26
    },
    {
      "epoch": 0.0007876772273761596,
      "grad_norm": 43.58796335693116,
      "learning_rate": 2.623906705539359e-07,
      "loss": 3.0225,
      "step": 27
    },
    {
      "epoch": 0.0008168504580197211,
      "grad_norm": 50.27078559700237,
      "learning_rate": 2.72108843537415e-07,
      "loss": 3.2008,
      "step": 28
    },
    {
      "epoch": 0.0008460236886632825,
      "grad_norm": 42.62875679870639,
      "learning_rate": 2.818270165208941e-07,
      "loss": 2.9526,
      "step": 29
    },
    {
      "epoch": 0.000875196919306844,
      "grad_norm": 36.538631056416364,
      "learning_rate": 2.915451895043732e-07,
      "loss": 2.7081,
      "step": 30
    },
    {
      "epoch": 0.0009043701499504055,
      "grad_norm": 37.95410524150656,
      "learning_rate": 3.0126336248785234e-07,
      "loss": 2.7338,
      "step": 31
    },
    {
      "epoch": 0.0009335433805939669,
      "grad_norm": 38.13933215569488,
      "learning_rate": 3.109815354713314e-07,
      "loss": 2.7104,
      "step": 32
    },
    {
      "epoch": 0.0009627166112375284,
      "grad_norm": 35.171458371816755,
      "learning_rate": 3.2069970845481054e-07,
      "loss": 2.6817,
      "step": 33
    },
    {
      "epoch": 0.00099188984188109,
      "grad_norm": 34.74362617495035,
      "learning_rate": 3.3041788143828963e-07,
      "loss": 2.652,
      "step": 34
    },
    {
      "epoch": 0.0010210630725246514,
      "grad_norm": 35.15349620528716,
      "learning_rate": 3.401360544217688e-07,
      "loss": 2.6788,
      "step": 35
    },
    {
      "epoch": 0.001050236303168213,
      "grad_norm": 34.67235666928212,
      "learning_rate": 3.4985422740524783e-07,
      "loss": 2.5883,
      "step": 36
    },
    {
      "epoch": 0.0010794095338117742,
      "grad_norm": 32.38172424222036,
      "learning_rate": 3.5957240038872693e-07,
      "loss": 2.5411,
      "step": 37
    },
    {
      "epoch": 0.0011085827644553357,
      "grad_norm": 33.1882021752588,
      "learning_rate": 3.6929057337220603e-07,
      "loss": 2.573,
      "step": 38
    },
    {
      "epoch": 0.0011377559950988972,
      "grad_norm": 32.09588599180164,
      "learning_rate": 3.790087463556852e-07,
      "loss": 2.5205,
      "step": 39
    },
    {
      "epoch": 0.0011669292257424587,
      "grad_norm": 30.64571408525671,
      "learning_rate": 3.887269193391643e-07,
      "loss": 2.3672,
      "step": 40
    },
    {
      "epoch": 0.0011961024563860202,
      "grad_norm": 27.741409245103288,
      "learning_rate": 3.984450923226434e-07,
      "loss": 2.2191,
      "step": 41
    },
    {
      "epoch": 0.0012252756870295817,
      "grad_norm": 28.002704456507587,
      "learning_rate": 4.0816326530612243e-07,
      "loss": 2.1296,
      "step": 42
    },
    {
      "epoch": 0.0012544489176731432,
      "grad_norm": 27.01102683162873,
      "learning_rate": 4.1788143828960163e-07,
      "loss": 2.0846,
      "step": 43
    },
    {
      "epoch": 0.0012836221483167045,
      "grad_norm": 27.30622428117336,
      "learning_rate": 4.275996112730807e-07,
      "loss": 2.0225,
      "step": 44
    },
    {
      "epoch": 0.001312795378960266,
      "grad_norm": 30.44286494588882,
      "learning_rate": 4.373177842565598e-07,
      "loss": 1.9974,
      "step": 45
    },
    {
      "epoch": 0.0013419686096038275,
      "grad_norm": 41.43761884824672,
      "learning_rate": 4.4703595724003893e-07,
      "loss": 1.9598,
      "step": 46
    },
    {
      "epoch": 0.001371141840247389,
      "grad_norm": 83.41599647174692,
      "learning_rate": 4.5675413022351803e-07,
      "loss": 1.8443,
      "step": 47
    },
    {
      "epoch": 0.0014003150708909505,
      "grad_norm": 109.62322489197544,
      "learning_rate": 4.6647230320699713e-07,
      "loss": 1.8672,
      "step": 48
    },
    {
      "epoch": 0.001429488301534512,
      "grad_norm": 112.88366758582197,
      "learning_rate": 4.7619047619047623e-07,
      "loss": 1.7683,
      "step": 49
    },
    {
      "epoch": 0.0014586615321780733,
      "grad_norm": 96.52880293362055,
      "learning_rate": 4.859086491739554e-07,
      "loss": 1.7578,
      "step": 50
    },
    {
      "epoch": 0.0014878347628216348,
      "grad_norm": 51.58173404640064,
      "learning_rate": 4.956268221574345e-07,
      "loss": 1.6946,
      "step": 51
    },
    {
      "epoch": 0.0015170079934651963,
      "grad_norm": 98.63939811986286,
      "learning_rate": 5.053449951409136e-07,
      "loss": 1.624,
      "step": 52
    },
    {
      "epoch": 0.0015461812241087578,
      "grad_norm": 392.52631989640827,
      "learning_rate": 5.150631681243927e-07,
      "loss": 1.6404,
      "step": 53
    },
    {
      "epoch": 0.0015753544547523193,
      "grad_norm": 36.83694097250143,
      "learning_rate": 5.247813411078718e-07,
      "loss": 1.621,
      "step": 54
    },
    {
      "epoch": 0.0016045276853958808,
      "grad_norm": 37.944539281225644,
      "learning_rate": 5.344995140913509e-07,
      "loss": 1.5761,
      "step": 55
    },
    {
      "epoch": 0.0016337009160394423,
      "grad_norm": 41.05549932657623,
      "learning_rate": 5.4421768707483e-07,
      "loss": 1.6411,
      "step": 56
    },
    {
      "epoch": 0.0016628741466830036,
      "grad_norm": 42.30992951788921,
      "learning_rate": 5.539358600583091e-07,
      "loss": 1.5739,
      "step": 57
    },
    {
      "epoch": 0.001692047377326565,
      "grad_norm": 38.06215707218943,
      "learning_rate": 5.636540330417882e-07,
      "loss": 1.4234,
      "step": 58
    },
    {
      "epoch": 0.0017212206079701266,
      "grad_norm": 29.2367743854925,
      "learning_rate": 5.733722060252673e-07,
      "loss": 1.4642,
      "step": 59
    },
    {
      "epoch": 0.001750393838613688,
      "grad_norm": 25.18278161139425,
      "learning_rate": 5.830903790087464e-07,
      "loss": 1.3755,
      "step": 60
    },
    {
      "epoch": 0.0017795670692572496,
      "grad_norm": 18.831005340291743,
      "learning_rate": 5.928085519922256e-07,
      "loss": 1.2975,
      "step": 61
    },
    {
      "epoch": 0.001808740299900811,
      "grad_norm": 19.21243668324547,
      "learning_rate": 6.025267249757047e-07,
      "loss": 1.2641,
      "step": 62
    },
    {
      "epoch": 0.0018379135305443726,
      "grad_norm": 18.402347207240606,
      "learning_rate": 6.122448979591837e-07,
      "loss": 1.2559,
      "step": 63
    },
    {
      "epoch": 0.0018670867611879339,
      "grad_norm": 10.38479986233958,
      "learning_rate": 6.219630709426628e-07,
      "loss": 1.2621,
      "step": 64
    },
    {
      "epoch": 0.0018962599918314954,
      "grad_norm": 11.240970680762699,
      "learning_rate": 6.316812439261419e-07,
      "loss": 1.2273,
      "step": 65
    },
    {
      "epoch": 0.0019254332224750569,
      "grad_norm": 9.30095359552287,
      "learning_rate": 6.413994169096211e-07,
      "loss": 1.1881,
      "step": 66
    },
    {
      "epoch": 0.0019546064531186184,
      "grad_norm": 8.727234288494008,
      "learning_rate": 6.511175898931002e-07,
      "loss": 1.201,
      "step": 67
    },
    {
      "epoch": 0.00198377968376218,
      "grad_norm": 8.448178343565178,
      "learning_rate": 6.608357628765793e-07,
      "loss": 1.199,
      "step": 68
    },
    {
      "epoch": 0.0020129529144057414,
      "grad_norm": 7.605640693479639,
      "learning_rate": 6.705539358600584e-07,
      "loss": 1.1633,
      "step": 69
    },
    {
      "epoch": 0.002042126145049303,
      "grad_norm": 7.763773063041981,
      "learning_rate": 6.802721088435376e-07,
      "loss": 1.157,
      "step": 70
    },
    {
      "epoch": 0.0020712993756928644,
      "grad_norm": 7.83234540673427,
      "learning_rate": 6.899902818270166e-07,
      "loss": 1.127,
      "step": 71
    },
    {
      "epoch": 0.002100472606336426,
      "grad_norm": 7.751860059488426,
      "learning_rate": 6.997084548104957e-07,
      "loss": 1.1411,
      "step": 72
    },
    {
      "epoch": 0.0021296458369799874,
      "grad_norm": 7.221011120013558,
      "learning_rate": 7.094266277939748e-07,
      "loss": 1.1508,
      "step": 73
    },
    {
      "epoch": 0.0021588190676235484,
      "grad_norm": 7.249742675328467,
      "learning_rate": 7.191448007774539e-07,
      "loss": 1.1322,
      "step": 74
    },
    {
      "epoch": 0.00218799229826711,
      "grad_norm": 7.41932368070449,
      "learning_rate": 7.288629737609331e-07,
      "loss": 1.1169,
      "step": 75
    },
    {
      "epoch": 0.0022171655289106714,
      "grad_norm": 6.84055899908655,
      "learning_rate": 7.385811467444121e-07,
      "loss": 1.1396,
      "step": 76
    },
    {
      "epoch": 0.002246338759554233,
      "grad_norm": 7.371084361682207,
      "learning_rate": 7.482993197278913e-07,
      "loss": 1.1648,
      "step": 77
    },
    {
      "epoch": 0.0022755119901977944,
      "grad_norm": 7.28952170579296,
      "learning_rate": 7.580174927113704e-07,
      "loss": 1.0947,
      "step": 78
    },
    {
      "epoch": 0.002304685220841356,
      "grad_norm": 6.60461471141226,
      "learning_rate": 7.677356656948494e-07,
      "loss": 1.0815,
      "step": 79
    },
    {
      "epoch": 0.0023338584514849174,
      "grad_norm": 6.855555588321384,
      "learning_rate": 7.774538386783286e-07,
      "loss": 1.0779,
      "step": 80
    },
    {
      "epoch": 0.002363031682128479,
      "grad_norm": 7.464498057697863,
      "learning_rate": 7.871720116618077e-07,
      "loss": 1.0906,
      "step": 81
    },
    {
      "epoch": 0.0023922049127720404,
      "grad_norm": 6.971944178717365,
      "learning_rate": 7.968901846452868e-07,
      "loss": 1.0776,
      "step": 82
    },
    {
      "epoch": 0.002421378143415602,
      "grad_norm": 6.441529733700574,
      "learning_rate": 8.066083576287659e-07,
      "loss": 1.0514,
      "step": 83
    },
    {
      "epoch": 0.0024505513740591634,
      "grad_norm": 6.06148020983748,
      "learning_rate": 8.163265306122449e-07,
      "loss": 1.0736,
      "step": 84
    },
    {
      "epoch": 0.002479724604702725,
      "grad_norm": 6.444048939489742,
      "learning_rate": 8.260447035957241e-07,
      "loss": 1.0877,
      "step": 85
    },
    {
      "epoch": 0.0025088978353462865,
      "grad_norm": 6.181458160460038,
      "learning_rate": 8.357628765792033e-07,
      "loss": 1.0835,
      "step": 86
    },
    {
      "epoch": 0.0025380710659898475,
      "grad_norm": 5.922607770428193,
      "learning_rate": 8.454810495626823e-07,
      "loss": 1.0658,
      "step": 87
    },
    {
      "epoch": 0.002567244296633409,
      "grad_norm": 5.542619628627248,
      "learning_rate": 8.551992225461614e-07,
      "loss": 1.0226,
      "step": 88
    },
    {
      "epoch": 0.0025964175272769705,
      "grad_norm": 5.747595143803563,
      "learning_rate": 8.649173955296406e-07,
      "loss": 1.0332,
      "step": 89
    },
    {
      "epoch": 0.002625590757920532,
      "grad_norm": 5.402496013260269,
      "learning_rate": 8.746355685131196e-07,
      "loss": 1.0308,
      "step": 90
    },
    {
      "epoch": 0.0026547639885640935,
      "grad_norm": 5.562058138188112,
      "learning_rate": 8.843537414965988e-07,
      "loss": 1.059,
      "step": 91
    },
    {
      "epoch": 0.002683937219207655,
      "grad_norm": 5.361866485657315,
      "learning_rate": 8.940719144800779e-07,
      "loss": 1.0442,
      "step": 92
    },
    {
      "epoch": 0.0027131104498512165,
      "grad_norm": 5.351336135979188,
      "learning_rate": 9.037900874635569e-07,
      "loss": 1.0374,
      "step": 93
    },
    {
      "epoch": 0.002742283680494778,
      "grad_norm": 4.839716796476288,
      "learning_rate": 9.135082604470361e-07,
      "loss": 1.0648,
      "step": 94
    },
    {
      "epoch": 0.0027714569111383395,
      "grad_norm": 4.894278673171512,
      "learning_rate": 9.23226433430515e-07,
      "loss": 1.059,
      "step": 95
    },
    {
      "epoch": 0.002800630141781901,
      "grad_norm": 4.840493645110819,
      "learning_rate": 9.329446064139943e-07,
      "loss": 1.036,
      "step": 96
    },
    {
      "epoch": 0.0028298033724254625,
      "grad_norm": 4.641389959250609,
      "learning_rate": 9.426627793974734e-07,
      "loss": 1.0267,
      "step": 97
    },
    {
      "epoch": 0.002858976603069024,
      "grad_norm": 4.781491164120611,
      "learning_rate": 9.523809523809525e-07,
      "loss": 1.0354,
      "step": 98
    },
    {
      "epoch": 0.0028881498337125855,
      "grad_norm": 4.583366378736798,
      "learning_rate": 9.620991253644317e-07,
      "loss": 1.0251,
      "step": 99
    },
    {
      "epoch": 0.0029173230643561466,
      "grad_norm": 4.825726016243985,
      "learning_rate": 9.718172983479108e-07,
      "loss": 1.0144,
      "step": 100
    },
    {
      "epoch": 0.002946496294999708,
      "grad_norm": 4.556217342908838,
      "learning_rate": 9.815354713313896e-07,
      "loss": 0.9955,
      "step": 101
    },
    {
      "epoch": 0.0029756695256432696,
      "grad_norm": 4.625039174353454,
      "learning_rate": 9.91253644314869e-07,
      "loss": 0.9907,
      "step": 102
    },
    {
      "epoch": 0.003004842756286831,
      "grad_norm": 4.68840635703037,
      "learning_rate": 1.000971817298348e-06,
      "loss": 1.0229,
      "step": 103
    },
    {
      "epoch": 0.0030340159869303926,
      "grad_norm": 4.743602647176867,
      "learning_rate": 1.0106899902818272e-06,
      "loss": 1.0059,
      "step": 104
    },
    {
      "epoch": 0.003063189217573954,
      "grad_norm": 4.731460723266639,
      "learning_rate": 1.0204081632653063e-06,
      "loss": 1.0157,
      "step": 105
    },
    {
      "epoch": 0.0030923624482175156,
      "grad_norm": 4.747413103995433,
      "learning_rate": 1.0301263362487854e-06,
      "loss": 1.0059,
      "step": 106
    },
    {
      "epoch": 0.003121535678861077,
      "grad_norm": 4.8678737085012616,
      "learning_rate": 1.0398445092322645e-06,
      "loss": 1.0222,
      "step": 107
    },
    {
      "epoch": 0.0031507089095046386,
      "grad_norm": 4.836400548026891,
      "learning_rate": 1.0495626822157436e-06,
      "loss": 1.0162,
      "step": 108
    },
    {
      "epoch": 0.0031798821401482,
      "grad_norm": 4.847008660412388,
      "learning_rate": 1.0592808551992226e-06,
      "loss": 1.0279,
      "step": 109
    },
    {
      "epoch": 0.0032090553707917616,
      "grad_norm": 4.987535364124845,
      "learning_rate": 1.0689990281827017e-06,
      "loss": 1.0224,
      "step": 110
    },
    {
      "epoch": 0.003238228601435323,
      "grad_norm": 4.779277015313041,
      "learning_rate": 1.0787172011661808e-06,
      "loss": 1.0064,
      "step": 111
    },
    {
      "epoch": 0.0032674018320788846,
      "grad_norm": 4.903732341827149,
      "learning_rate": 1.08843537414966e-06,
      "loss": 1.0306,
      "step": 112
    },
    {
      "epoch": 0.003296575062722446,
      "grad_norm": 5.067311367747353,
      "learning_rate": 1.098153547133139e-06,
      "loss": 0.9965,
      "step": 113
    },
    {
      "epoch": 0.003325748293366007,
      "grad_norm": 5.147921224622042,
      "learning_rate": 1.1078717201166181e-06,
      "loss": 0.9921,
      "step": 114
    },
    {
      "epoch": 0.0033549215240095687,
      "grad_norm": 5.143403518771765,
      "learning_rate": 1.1175898931000972e-06,
      "loss": 0.9971,
      "step": 115
    },
    {
      "epoch": 0.00338409475465313,
      "grad_norm": 5.219385919357969,
      "learning_rate": 1.1273080660835763e-06,
      "loss": 1.006,
      "step": 116
    },
    {
      "epoch": 0.0034132679852966917,
      "grad_norm": 5.132465962986135,
      "learning_rate": 1.1370262390670554e-06,
      "loss": 0.9755,
      "step": 117
    },
    {
      "epoch": 0.003442441215940253,
      "grad_norm": 5.195001396279438,
      "learning_rate": 1.1467444120505345e-06,
      "loss": 0.9852,
      "step": 118
    },
    {
      "epoch": 0.0034716144465838147,
      "grad_norm": 5.258841826130356,
      "learning_rate": 1.1564625850340136e-06,
      "loss": 0.9595,
      "step": 119
    },
    {
      "epoch": 0.003500787677227376,
      "grad_norm": 5.511324577831448,
      "learning_rate": 1.1661807580174927e-06,
      "loss": 0.978,
      "step": 120
    },
    {
      "epoch": 0.0035299609078709377,
      "grad_norm": 5.615991123547827,
      "learning_rate": 1.1758989310009718e-06,
      "loss": 1.0011,
      "step": 121
    },
    {
      "epoch": 0.003559134138514499,
      "grad_norm": 5.366948772170658,
      "learning_rate": 1.1856171039844512e-06,
      "loss": 0.9758,
      "step": 122
    },
    {
      "epoch": 0.0035883073691580607,
      "grad_norm": 5.615408935080543,
      "learning_rate": 1.19533527696793e-06,
      "loss": 0.9831,
      "step": 123
    },
    {
      "epoch": 0.003617480599801622,
      "grad_norm": 5.754702397510861,
      "learning_rate": 1.2050534499514093e-06,
      "loss": 1.0104,
      "step": 124
    },
    {
      "epoch": 0.0036466538304451837,
      "grad_norm": 5.842477132807181,
      "learning_rate": 1.2147716229348884e-06,
      "loss": 0.994,
      "step": 125
    },
    {
      "epoch": 0.003675827061088745,
      "grad_norm": 6.011047654181344,
      "learning_rate": 1.2244897959183673e-06,
      "loss": 0.9818,
      "step": 126
    },
    {
      "epoch": 0.0037050002917323062,
      "grad_norm": 6.192699227110393,
      "learning_rate": 1.2342079689018466e-06,
      "loss": 0.9825,
      "step": 127
    },
    {
      "epoch": 0.0037341735223758677,
      "grad_norm": 6.216835153057091,
      "learning_rate": 1.2439261418853255e-06,
      "loss": 0.9925,
      "step": 128
    },
    {
      "epoch": 0.0037633467530194292,
      "grad_norm": 6.194530008226965,
      "learning_rate": 1.2536443148688048e-06,
      "loss": 0.9706,
      "step": 129
    },
    {
      "epoch": 0.0037925199836629907,
      "grad_norm": 6.438872617277369,
      "learning_rate": 1.2633624878522837e-06,
      "loss": 0.9501,
      "step": 130
    },
    {
      "epoch": 0.0038216932143065522,
      "grad_norm": 6.408646071205254,
      "learning_rate": 1.2730806608357628e-06,
      "loss": 0.9656,
      "step": 131
    },
    {
      "epoch": 0.0038508664449501137,
      "grad_norm": 6.8926656915994515,
      "learning_rate": 1.2827988338192421e-06,
      "loss": 0.9717,
      "step": 132
    },
    {
      "epoch": 0.0038800396755936752,
      "grad_norm": 6.786347115572355,
      "learning_rate": 1.2925170068027212e-06,
      "loss": 0.9648,
      "step": 133
    },
    {
      "epoch": 0.003909212906237237,
      "grad_norm": 7.347768619737329,
      "learning_rate": 1.3022351797862003e-06,
      "loss": 0.955,
      "step": 134
    },
    {
      "epoch": 0.003938386136880798,
      "grad_norm": 7.829610945379658,
      "learning_rate": 1.3119533527696792e-06,
      "loss": 0.9522,
      "step": 135
    },
    {
      "epoch": 0.00396755936752436,
      "grad_norm": 7.863112355202574,
      "learning_rate": 1.3216715257531585e-06,
      "loss": 0.9546,
      "step": 136
    },
    {
      "epoch": 0.003996732598167921,
      "grad_norm": 8.05567637619532,
      "learning_rate": 1.3313896987366376e-06,
      "loss": 0.9559,
      "step": 137
    },
    {
      "epoch": 0.004025905828811483,
      "grad_norm": 8.437260079464115,
      "learning_rate": 1.3411078717201167e-06,
      "loss": 0.9535,
      "step": 138
    },
    {
      "epoch": 0.004055079059455044,
      "grad_norm": 8.870074634709532,
      "learning_rate": 1.3508260447035958e-06,
      "loss": 0.9651,
      "step": 139
    },
    {
      "epoch": 0.004084252290098606,
      "grad_norm": 9.374992282612821,
      "learning_rate": 1.3605442176870751e-06,
      "loss": 0.9671,
      "step": 140
    },
    {
      "epoch": 0.004113425520742167,
      "grad_norm": 9.475276853310358,
      "learning_rate": 1.370262390670554e-06,
      "loss": 0.9546,
      "step": 141
    },
    {
      "epoch": 0.004142598751385729,
      "grad_norm": 9.937411320393801,
      "learning_rate": 1.3799805636540331e-06,
      "loss": 0.9264,
      "step": 142
    },
    {
      "epoch": 0.00417177198202929,
      "grad_norm": 9.452404801305743,
      "learning_rate": 1.3896987366375122e-06,
      "loss": 0.9081,
      "step": 143
    },
    {
      "epoch": 0.004200945212672852,
      "grad_norm": 10.388070855265655,
      "learning_rate": 1.3994169096209913e-06,
      "loss": 0.9031,
      "step": 144
    },
    {
      "epoch": 0.004230118443316413,
      "grad_norm": 10.395794080232319,
      "learning_rate": 1.4091350826044706e-06,
      "loss": 0.954,
      "step": 145
    },
    {
      "epoch": 0.004259291673959975,
      "grad_norm": 10.814640721322968,
      "learning_rate": 1.4188532555879495e-06,
      "loss": 0.8961,
      "step": 146
    },
    {
      "epoch": 0.004288464904603536,
      "grad_norm": 10.711966415412752,
      "learning_rate": 1.4285714285714286e-06,
      "loss": 0.9116,
      "step": 147
    },
    {
      "epoch": 0.004317638135247097,
      "grad_norm": 10.875004401781785,
      "learning_rate": 1.4382896015549077e-06,
      "loss": 0.9104,
      "step": 148
    },
    {
      "epoch": 0.004346811365890659,
      "grad_norm": 11.176620547920438,
      "learning_rate": 1.4480077745383868e-06,
      "loss": 0.9196,
      "step": 149
    },
    {
      "epoch": 0.00437598459653422,
      "grad_norm": 11.122481269522007,
      "learning_rate": 1.4577259475218661e-06,
      "loss": 0.9329,
      "step": 150
    },
    {
      "epoch": 0.004405157827177782,
      "grad_norm": 10.9934949208899,
      "learning_rate": 1.4674441205053452e-06,
      "loss": 0.914,
      "step": 151
    },
    {
      "epoch": 0.004434331057821343,
      "grad_norm": 11.621354462329585,
      "learning_rate": 1.4771622934888241e-06,
      "loss": 0.9149,
      "step": 152
    },
    {
      "epoch": 0.004463504288464905,
      "grad_norm": 11.18837643736424,
      "learning_rate": 1.4868804664723032e-06,
      "loss": 0.8847,
      "step": 153
    },
    {
      "epoch": 0.004492677519108466,
      "grad_norm": 11.098190668768323,
      "learning_rate": 1.4965986394557825e-06,
      "loss": 0.9389,
      "step": 154
    },
    {
      "epoch": 0.004521850749752028,
      "grad_norm": 11.596961654297534,
      "learning_rate": 1.5063168124392616e-06,
      "loss": 0.8751,
      "step": 155
    },
    {
      "epoch": 0.004551023980395589,
      "grad_norm": 11.940546503160725,
      "learning_rate": 1.5160349854227407e-06,
      "loss": 0.8757,
      "step": 156
    },
    {
      "epoch": 0.004580197211039151,
      "grad_norm": 11.701906764397489,
      "learning_rate": 1.5257531584062196e-06,
      "loss": 0.8986,
      "step": 157
    },
    {
      "epoch": 0.004609370441682712,
      "grad_norm": 11.7691566034306,
      "learning_rate": 1.5354713313896987e-06,
      "loss": 0.8803,
      "step": 158
    },
    {
      "epoch": 0.004638543672326274,
      "grad_norm": 12.287601839750279,
      "learning_rate": 1.545189504373178e-06,
      "loss": 0.8606,
      "step": 159
    },
    {
      "epoch": 0.004667716902969835,
      "grad_norm": 11.789567332957226,
      "learning_rate": 1.5549076773566571e-06,
      "loss": 0.9111,
      "step": 160
    },
    {
      "epoch": 0.004696890133613396,
      "grad_norm": 11.945030037944933,
      "learning_rate": 1.5646258503401362e-06,
      "loss": 0.9047,
      "step": 161
    },
    {
      "epoch": 0.004726063364256958,
      "grad_norm": 12.339148912685063,
      "learning_rate": 1.5743440233236153e-06,
      "loss": 0.8572,
      "step": 162
    },
    {
      "epoch": 0.004755236594900519,
      "grad_norm": 12.183905694288685,
      "learning_rate": 1.5840621963070942e-06,
      "loss": 0.8671,
      "step": 163
    },
    {
      "epoch": 0.004784409825544081,
      "grad_norm": 12.041744890895613,
      "learning_rate": 1.5937803692905735e-06,
      "loss": 0.8553,
      "step": 164
    },
    {
      "epoch": 0.004813583056187642,
      "grad_norm": 11.930518409419967,
      "learning_rate": 1.6034985422740526e-06,
      "loss": 0.8616,
      "step": 165
    },
    {
      "epoch": 0.004842756286831204,
      "grad_norm": 12.3466248439367,
      "learning_rate": 1.6132167152575317e-06,
      "loss": 0.8784,
      "step": 166
    },
    {
      "epoch": 0.004871929517474765,
      "grad_norm": 12.35820643078063,
      "learning_rate": 1.6229348882410108e-06,
      "loss": 0.8461,
      "step": 167
    },
    {
      "epoch": 0.004901102748118327,
      "grad_norm": 12.234820859625453,
      "learning_rate": 1.6326530612244897e-06,
      "loss": 0.8581,
      "step": 168
    },
    {
      "epoch": 0.004930275978761888,
      "grad_norm": 11.976455501465534,
      "learning_rate": 1.642371234207969e-06,
      "loss": 0.8453,
      "step": 169
    },
    {
      "epoch": 0.00495944920940545,
      "grad_norm": 11.42338346944746,
      "learning_rate": 1.6520894071914481e-06,
      "loss": 0.8628,
      "step": 170
    },
    {
      "epoch": 0.004988622440049011,
      "grad_norm": 12.615894588712042,
      "learning_rate": 1.6618075801749272e-06,
      "loss": 0.855,
      "step": 171
    },
    {
      "epoch": 0.005017795670692573,
      "grad_norm": 12.10212199035121,
      "learning_rate": 1.6715257531584065e-06,
      "loss": 0.8667,
      "step": 172
    },
    {
      "epoch": 0.005046968901336134,
      "grad_norm": 12.553028710083943,
      "learning_rate": 1.6812439261418856e-06,
      "loss": 0.8406,
      "step": 173
    },
    {
      "epoch": 0.005076142131979695,
      "grad_norm": 12.344688693467962,
      "learning_rate": 1.6909620991253645e-06,
      "loss": 0.8336,
      "step": 174
    },
    {
      "epoch": 0.005105315362623257,
      "grad_norm": 12.228384478751527,
      "learning_rate": 1.7006802721088436e-06,
      "loss": 0.8372,
      "step": 175
    },
    {
      "epoch": 0.005134488593266818,
      "grad_norm": 12.03649962216464,
      "learning_rate": 1.7103984450923227e-06,
      "loss": 0.8211,
      "step": 176
    },
    {
      "epoch": 0.00516366182391038,
      "grad_norm": 12.123341825107705,
      "learning_rate": 1.720116618075802e-06,
      "loss": 0.8637,
      "step": 177
    },
    {
      "epoch": 0.005192835054553941,
      "grad_norm": 12.068774423302306,
      "learning_rate": 1.7298347910592811e-06,
      "loss": 0.8329,
      "step": 178
    },
    {
      "epoch": 0.005222008285197503,
      "grad_norm": 11.806571153776764,
      "learning_rate": 1.73955296404276e-06,
      "loss": 0.8477,
      "step": 179
    },
    {
      "epoch": 0.005251181515841064,
      "grad_norm": 12.046593563644189,
      "learning_rate": 1.7492711370262391e-06,
      "loss": 0.84,
      "step": 180
    },
    {
      "epoch": 0.005280354746484626,
      "grad_norm": 11.77316188026381,
      "learning_rate": 1.7589893100097182e-06,
      "loss": 0.8441,
      "step": 181
    },
    {
      "epoch": 0.005309527977128187,
      "grad_norm": 11.85419644496021,
      "learning_rate": 1.7687074829931975e-06,
      "loss": 0.8339,
      "step": 182
    },
    {
      "epoch": 0.005338701207771749,
      "grad_norm": 11.944891604233412,
      "learning_rate": 1.7784256559766766e-06,
      "loss": 0.8214,
      "step": 183
    },
    {
      "epoch": 0.00536787443841531,
      "grad_norm": 12.159880560789963,
      "learning_rate": 1.7881438289601557e-06,
      "loss": 0.7971,
      "step": 184
    },
    {
      "epoch": 0.005397047669058872,
      "grad_norm": 11.91412578384619,
      "learning_rate": 1.7978620019436346e-06,
      "loss": 0.7957,
      "step": 185
    },
    {
      "epoch": 0.005426220899702433,
      "grad_norm": 11.911521353949063,
      "learning_rate": 1.8075801749271137e-06,
      "loss": 0.808,
      "step": 186
    },
    {
      "epoch": 0.005455394130345994,
      "grad_norm": 12.062509293974587,
      "learning_rate": 1.817298347910593e-06,
      "loss": 0.7909,
      "step": 187
    },
    {
      "epoch": 0.005484567360989556,
      "grad_norm": 11.600416772936738,
      "learning_rate": 1.8270165208940721e-06,
      "loss": 0.8154,
      "step": 188
    },
    {
      "epoch": 0.005513740591633117,
      "grad_norm": 12.03027410387505,
      "learning_rate": 1.8367346938775512e-06,
      "loss": 0.7985,
      "step": 189
    },
    {
      "epoch": 0.005542913822276679,
      "grad_norm": 11.931153892179784,
      "learning_rate": 1.84645286686103e-06,
      "loss": 0.7597,
      "step": 190
    },
    {
      "epoch": 0.00557208705292024,
      "grad_norm": 11.454083915717893,
      "learning_rate": 1.8561710398445092e-06,
      "loss": 0.787,
      "step": 191
    },
    {
      "epoch": 0.005601260283563802,
      "grad_norm": 11.909385634464144,
      "learning_rate": 1.8658892128279885e-06,
      "loss": 0.7678,
      "step": 192
    },
    {
      "epoch": 0.005630433514207363,
      "grad_norm": 12.009181246049002,
      "learning_rate": 1.8756073858114676e-06,
      "loss": 0.7739,
      "step": 193
    },
    {
      "epoch": 0.005659606744850925,
      "grad_norm": 11.646051235739243,
      "learning_rate": 1.8853255587949467e-06,
      "loss": 0.7679,
      "step": 194
    },
    {
      "epoch": 0.005688779975494486,
      "grad_norm": 11.64121860283326,
      "learning_rate": 1.895043731778426e-06,
      "loss": 0.7798,
      "step": 195
    },
    {
      "epoch": 0.005717953206138048,
      "grad_norm": 11.567810719508575,
      "learning_rate": 1.904761904761905e-06,
      "loss": 0.7368,
      "step": 196
    },
    {
      "epoch": 0.005747126436781609,
      "grad_norm": 12.010866229070107,
      "learning_rate": 1.914480077745384e-06,
      "loss": 0.7623,
      "step": 197
    },
    {
      "epoch": 0.005776299667425171,
      "grad_norm": 11.883917388929667,
      "learning_rate": 1.9241982507288633e-06,
      "loss": 0.7268,
      "step": 198
    },
    {
      "epoch": 0.005805472898068732,
      "grad_norm": 11.72346628079956,
      "learning_rate": 1.933916423712342e-06,
      "loss": 0.7368,
      "step": 199
    },
    {
      "epoch": 0.005834646128712293,
      "grad_norm": 12.110677939017009,
      "learning_rate": 1.9436345966958215e-06,
      "loss": 0.7469,
      "step": 200
    },
    {
      "epoch": 0.005863819359355855,
      "grad_norm": 11.783926774204087,
      "learning_rate": 1.9533527696793004e-06,
      "loss": 0.7279,
      "step": 201
    },
    {
      "epoch": 0.005892992589999416,
      "grad_norm": 11.60066904064106,
      "learning_rate": 1.9630709426627793e-06,
      "loss": 0.7358,
      "step": 202
    },
    {
      "epoch": 0.005922165820642978,
      "grad_norm": 11.201533202218338,
      "learning_rate": 1.9727891156462586e-06,
      "loss": 0.7407,
      "step": 203
    },
    {
      "epoch": 0.005951339051286539,
      "grad_norm": 11.767916494241318,
      "learning_rate": 1.982507288629738e-06,
      "loss": 0.7033,
      "step": 204
    },
    {
      "epoch": 0.005980512281930101,
      "grad_norm": 11.57038811045555,
      "learning_rate": 1.992225461613217e-06,
      "loss": 0.7102,
      "step": 205
    },
    {
      "epoch": 0.006009685512573662,
      "grad_norm": 12.001815514280002,
      "learning_rate": 2.001943634596696e-06,
      "loss": 0.6806,
      "step": 206
    },
    {
      "epoch": 0.006038858743217224,
      "grad_norm": 11.79524440382437,
      "learning_rate": 2.011661807580175e-06,
      "loss": 0.7051,
      "step": 207
    },
    {
      "epoch": 0.006068031973860785,
      "grad_norm": 12.383127140344047,
      "learning_rate": 2.0213799805636543e-06,
      "loss": 0.6937,
      "step": 208
    },
    {
      "epoch": 0.006097205204504347,
      "grad_norm": 11.84018887052034,
      "learning_rate": 2.031098153547133e-06,
      "loss": 0.6978,
      "step": 209
    },
    {
      "epoch": 0.006126378435147908,
      "grad_norm": 11.447041483789565,
      "learning_rate": 2.0408163265306125e-06,
      "loss": 0.6793,
      "step": 210
    },
    {
      "epoch": 0.00615555166579147,
      "grad_norm": 11.658341946520713,
      "learning_rate": 2.050534499514092e-06,
      "loss": 0.6889,
      "step": 211
    },
    {
      "epoch": 0.006184724896435031,
      "grad_norm": 12.123436638014988,
      "learning_rate": 2.0602526724975707e-06,
      "loss": 0.684,
      "step": 212
    },
    {
      "epoch": 0.006213898127078592,
      "grad_norm": 11.504916523431982,
      "learning_rate": 2.0699708454810496e-06,
      "loss": 0.7034,
      "step": 213
    },
    {
      "epoch": 0.006243071357722154,
      "grad_norm": 11.538239186493822,
      "learning_rate": 2.079689018464529e-06,
      "loss": 0.6744,
      "step": 214
    },
    {
      "epoch": 0.006272244588365715,
      "grad_norm": 11.980938305971051,
      "learning_rate": 2.089407191448008e-06,
      "loss": 0.6989,
      "step": 215
    },
    {
      "epoch": 0.006301417819009277,
      "grad_norm": 11.945898901146133,
      "learning_rate": 2.099125364431487e-06,
      "loss": 0.6497,
      "step": 216
    },
    {
      "epoch": 0.006330591049652838,
      "grad_norm": 11.739472956996947,
      "learning_rate": 2.1088435374149664e-06,
      "loss": 0.6457,
      "step": 217
    },
    {
      "epoch": 0.0063597642802964,
      "grad_norm": 11.384022274679449,
      "learning_rate": 2.1185617103984453e-06,
      "loss": 0.6612,
      "step": 218
    },
    {
      "epoch": 0.006388937510939961,
      "grad_norm": 11.40822270680767,
      "learning_rate": 2.128279883381924e-06,
      "loss": 0.678,
      "step": 219
    },
    {
      "epoch": 0.006418110741583523,
      "grad_norm": 11.553583332185259,
      "learning_rate": 2.1379980563654035e-06,
      "loss": 0.6624,
      "step": 220
    },
    {
      "epoch": 0.006447283972227084,
      "grad_norm": 11.939577591021372,
      "learning_rate": 2.147716229348883e-06,
      "loss": 0.6379,
      "step": 221
    },
    {
      "epoch": 0.006476457202870646,
      "grad_norm": 11.7091863365697,
      "learning_rate": 2.1574344023323617e-06,
      "loss": 0.619,
      "step": 222
    },
    {
      "epoch": 0.006505630433514207,
      "grad_norm": 11.346639699567936,
      "learning_rate": 2.1671525753158406e-06,
      "loss": 0.6678,
      "step": 223
    },
    {
      "epoch": 0.006534803664157769,
      "grad_norm": 11.758854403205179,
      "learning_rate": 2.17687074829932e-06,
      "loss": 0.6105,
      "step": 224
    },
    {
      "epoch": 0.00656397689480133,
      "grad_norm": 11.473014914788202,
      "learning_rate": 2.1865889212827988e-06,
      "loss": 0.6417,
      "step": 225
    },
    {
      "epoch": 0.006593150125444892,
      "grad_norm": 12.027836450653655,
      "learning_rate": 2.196307094266278e-06,
      "loss": 0.6289,
      "step": 226
    },
    {
      "epoch": 0.006622323356088453,
      "grad_norm": 11.884689757159778,
      "learning_rate": 2.2060252672497574e-06,
      "loss": 0.5889,
      "step": 227
    },
    {
      "epoch": 0.006651496586732014,
      "grad_norm": 11.468961426808615,
      "learning_rate": 2.2157434402332363e-06,
      "loss": 0.6089,
      "step": 228
    },
    {
      "epoch": 0.006680669817375576,
      "grad_norm": 11.446624636013864,
      "learning_rate": 2.225461613216715e-06,
      "loss": 0.5994,
      "step": 229
    },
    {
      "epoch": 0.006709843048019137,
      "grad_norm": 11.66395812991183,
      "learning_rate": 2.2351797862001945e-06,
      "loss": 0.6043,
      "step": 230
    },
    {
      "epoch": 0.006739016278662699,
      "grad_norm": 11.332469127007451,
      "learning_rate": 2.244897959183674e-06,
      "loss": 0.5903,
      "step": 231
    },
    {
      "epoch": 0.00676818950930626,
      "grad_norm": 11.471237317246185,
      "learning_rate": 2.2546161321671527e-06,
      "loss": 0.6114,
      "step": 232
    },
    {
      "epoch": 0.006797362739949822,
      "grad_norm": 11.850247614856915,
      "learning_rate": 2.264334305150632e-06,
      "loss": 0.5855,
      "step": 233
    },
    {
      "epoch": 0.006826535970593383,
      "grad_norm": 11.82730517467475,
      "learning_rate": 2.274052478134111e-06,
      "loss": 0.5949,
      "step": 234
    },
    {
      "epoch": 0.006855709201236945,
      "grad_norm": 11.485264437660307,
      "learning_rate": 2.28377065111759e-06,
      "loss": 0.5551,
      "step": 235
    },
    {
      "epoch": 0.006884882431880506,
      "grad_norm": 11.405145187429145,
      "learning_rate": 2.293488824101069e-06,
      "loss": 0.5722,
      "step": 236
    },
    {
      "epoch": 0.006914055662524068,
      "grad_norm": 11.660399592734256,
      "learning_rate": 2.3032069970845484e-06,
      "loss": 0.5748,
      "step": 237
    },
    {
      "epoch": 0.006943228893167629,
      "grad_norm": 11.439655601904152,
      "learning_rate": 2.3129251700680273e-06,
      "loss": 0.5499,
      "step": 238
    },
    {
      "epoch": 0.006972402123811191,
      "grad_norm": 11.708246461632152,
      "learning_rate": 2.3226433430515066e-06,
      "loss": 0.559,
      "step": 239
    },
    {
      "epoch": 0.007001575354454752,
      "grad_norm": 11.53076305043417,
      "learning_rate": 2.3323615160349855e-06,
      "loss": 0.5765,
      "step": 240
    },
    {
      "epoch": 0.007030748585098313,
      "grad_norm": 11.210280964251877,
      "learning_rate": 2.342079689018465e-06,
      "loss": 0.549,
      "step": 241
    },
    {
      "epoch": 0.007059921815741875,
      "grad_norm": 11.496267401715498,
      "learning_rate": 2.3517978620019437e-06,
      "loss": 0.5216,
      "step": 242
    },
    {
      "epoch": 0.007089095046385436,
      "grad_norm": 11.055364756441806,
      "learning_rate": 2.361516034985423e-06,
      "loss": 0.5352,
      "step": 243
    },
    {
      "epoch": 0.007118268277028998,
      "grad_norm": 11.642585506110569,
      "learning_rate": 2.3712342079689023e-06,
      "loss": 0.5391,
      "step": 244
    },
    {
      "epoch": 0.007147441507672559,
      "grad_norm": 11.407627642817618,
      "learning_rate": 2.380952380952381e-06,
      "loss": 0.5056,
      "step": 245
    },
    {
      "epoch": 0.007176614738316121,
      "grad_norm": 11.082367037594938,
      "learning_rate": 2.39067055393586e-06,
      "loss": 0.5396,
      "step": 246
    },
    {
      "epoch": 0.007205787968959682,
      "grad_norm": 11.441394058724788,
      "learning_rate": 2.4003887269193394e-06,
      "loss": 0.5244,
      "step": 247
    },
    {
      "epoch": 0.007234961199603244,
      "grad_norm": 11.296712351311688,
      "learning_rate": 2.4101068999028187e-06,
      "loss": 0.5261,
      "step": 248
    },
    {
      "epoch": 0.007264134430246805,
      "grad_norm": 10.950446712681304,
      "learning_rate": 2.4198250728862976e-06,
      "loss": 0.5049,
      "step": 249
    },
    {
      "epoch": 0.007293307660890367,
      "grad_norm": 11.19605427329169,
      "learning_rate": 2.429543245869777e-06,
      "loss": 0.5067,
      "step": 250
    },
    {
      "epoch": 0.007322480891533928,
      "grad_norm": 11.412942617701502,
      "learning_rate": 2.4392614188532558e-06,
      "loss": 0.4921,
      "step": 251
    },
    {
      "epoch": 0.00735165412217749,
      "grad_norm": 11.100669247591972,
      "learning_rate": 2.4489795918367347e-06,
      "loss": 0.48,
      "step": 252
    },
    {
      "epoch": 0.007380827352821051,
      "grad_norm": 11.16500214937961,
      "learning_rate": 2.458697764820214e-06,
      "loss": 0.4881,
      "step": 253
    },
    {
      "epoch": 0.0074100005834646125,
      "grad_norm": 10.921514466016799,
      "learning_rate": 2.4684159378036933e-06,
      "loss": 0.496,
      "step": 254
    },
    {
      "epoch": 0.007439173814108174,
      "grad_norm": 11.13552901328187,
      "learning_rate": 2.478134110787172e-06,
      "loss": 0.4621,
      "step": 255
    },
    {
      "epoch": 0.0074683470447517355,
      "grad_norm": 10.988723529484387,
      "learning_rate": 2.487852283770651e-06,
      "loss": 0.4794,
      "step": 256
    },
    {
      "epoch": 0.007497520275395297,
      "grad_norm": 10.592447527785755,
      "learning_rate": 2.4975704567541304e-06,
      "loss": 0.4642,
      "step": 257
    },
    {
      "epoch": 0.0075266935060388585,
      "grad_norm": 11.09648035063937,
      "learning_rate": 2.5072886297376097e-06,
      "loss": 0.4454,
      "step": 258
    },
    {
      "epoch": 0.00755586673668242,
      "grad_norm": 10.789756784936234,
      "learning_rate": 2.5170068027210886e-06,
      "loss": 0.484,
      "step": 259
    },
    {
      "epoch": 0.0075850399673259815,
      "grad_norm": 10.505112737452256,
      "learning_rate": 2.5267249757045675e-06,
      "loss": 0.4742,
      "step": 260
    },
    {
      "epoch": 0.007614213197969543,
      "grad_norm": 10.198260105944357,
      "learning_rate": 2.5364431486880468e-06,
      "loss": 0.463,
      "step": 261
    },
    {
      "epoch": 0.0076433864286131045,
      "grad_norm": 10.58117157693541,
      "learning_rate": 2.5461613216715257e-06,
      "loss": 0.4485,
      "step": 262
    },
    {
      "epoch": 0.007672559659256666,
      "grad_norm": 10.057462462246159,
      "learning_rate": 2.5558794946550054e-06,
      "loss": 0.4607,
      "step": 263
    },
    {
      "epoch": 0.0077017328899002275,
      "grad_norm": 10.244038062005783,
      "learning_rate": 2.5655976676384843e-06,
      "loss": 0.4314,
      "step": 264
    },
    {
      "epoch": 0.007730906120543789,
      "grad_norm": 10.243916239162006,
      "learning_rate": 2.575315840621963e-06,
      "loss": 0.4033,
      "step": 265
    },
    {
      "epoch": 0.0077600793511873505,
      "grad_norm": 10.133218763856435,
      "learning_rate": 2.5850340136054425e-06,
      "loss": 0.4201,
      "step": 266
    },
    {
      "epoch": 0.0077892525818309116,
      "grad_norm": 9.992581101051586,
      "learning_rate": 2.5947521865889214e-06,
      "loss": 0.4381,
      "step": 267
    },
    {
      "epoch": 0.007818425812474473,
      "grad_norm": 9.799748051020952,
      "learning_rate": 2.6044703595724007e-06,
      "loss": 0.4254,
      "step": 268
    },
    {
      "epoch": 0.007847599043118035,
      "grad_norm": 10.097500004633288,
      "learning_rate": 2.6141885325558796e-06,
      "loss": 0.3934,
      "step": 269
    },
    {
      "epoch": 0.007876772273761596,
      "grad_norm": 9.710073580467833,
      "learning_rate": 2.6239067055393585e-06,
      "loss": 0.4059,
      "step": 270
    },
    {
      "epoch": 0.007905945504405158,
      "grad_norm": 9.505183836821201,
      "learning_rate": 2.633624878522838e-06,
      "loss": 0.3774,
      "step": 271
    },
    {
      "epoch": 0.00793511873504872,
      "grad_norm": 9.1929482166285,
      "learning_rate": 2.643343051506317e-06,
      "loss": 0.3996,
      "step": 272
    },
    {
      "epoch": 0.007964291965692281,
      "grad_norm": 9.364972993584722,
      "learning_rate": 2.6530612244897964e-06,
      "loss": 0.3946,
      "step": 273
    },
    {
      "epoch": 0.007993465196335842,
      "grad_norm": 8.870779249745748,
      "learning_rate": 2.6627793974732753e-06,
      "loss": 0.3545,
      "step": 274
    },
    {
      "epoch": 0.008022638426979404,
      "grad_norm": 8.735956325839938,
      "learning_rate": 2.6724975704567546e-06,
      "loss": 0.3629,
      "step": 275
    },
    {
      "epoch": 0.008051811657622965,
      "grad_norm": 8.576383992093225,
      "learning_rate": 2.6822157434402335e-06,
      "loss": 0.3931,
      "step": 276
    },
    {
      "epoch": 0.008080984888266527,
      "grad_norm": 8.555201112815267,
      "learning_rate": 2.6919339164237124e-06,
      "loss": 0.3627,
      "step": 277
    },
    {
      "epoch": 0.008110158118910088,
      "grad_norm": 7.984072846166019,
      "learning_rate": 2.7016520894071917e-06,
      "loss": 0.3729,
      "step": 278
    },
    {
      "epoch": 0.00813933134955365,
      "grad_norm": 8.08509797796882,
      "learning_rate": 2.7113702623906706e-06,
      "loss": 0.3879,
      "step": 279
    },
    {
      "epoch": 0.008168504580197211,
      "grad_norm": 7.80702870942227,
      "learning_rate": 2.7210884353741503e-06,
      "loss": 0.3499,
      "step": 280
    },
    {
      "epoch": 0.008197677810840772,
      "grad_norm": 7.574259010158371,
      "learning_rate": 2.730806608357629e-06,
      "loss": 0.3568,
      "step": 281
    },
    {
      "epoch": 0.008226851041484334,
      "grad_norm": 7.222573175239761,
      "learning_rate": 2.740524781341108e-06,
      "loss": 0.3348,
      "step": 282
    },
    {
      "epoch": 0.008256024272127896,
      "grad_norm": 6.879930759003031,
      "learning_rate": 2.7502429543245874e-06,
      "loss": 0.3409,
      "step": 283
    },
    {
      "epoch": 0.008285197502771458,
      "grad_norm": 6.978015023639886,
      "learning_rate": 2.7599611273080663e-06,
      "loss": 0.3651,
      "step": 284
    },
    {
      "epoch": 0.008314370733415018,
      "grad_norm": 6.358843281212613,
      "learning_rate": 2.7696793002915456e-06,
      "loss": 0.3603,
      "step": 285
    },
    {
      "epoch": 0.00834354396405858,
      "grad_norm": 6.320642016108672,
      "learning_rate": 2.7793974732750245e-06,
      "loss": 0.3388,
      "step": 286
    },
    {
      "epoch": 0.008372717194702142,
      "grad_norm": 6.32099186551206,
      "learning_rate": 2.7891156462585034e-06,
      "loss": 0.3689,
      "step": 287
    },
    {
      "epoch": 0.008401890425345704,
      "grad_norm": 5.807201388060067,
      "learning_rate": 2.7988338192419827e-06,
      "loss": 0.3409,
      "step": 288
    },
    {
      "epoch": 0.008431063655989264,
      "grad_norm": 5.647941332066215,
      "learning_rate": 2.8085519922254615e-06,
      "loss": 0.3326,
      "step": 289
    },
    {
      "epoch": 0.008460236886632826,
      "grad_norm": 5.453137115098798,
      "learning_rate": 2.8182701652089413e-06,
      "loss": 0.343,
      "step": 290
    },
    {
      "epoch": 0.008489410117276388,
      "grad_norm": 5.12504239664104,
      "learning_rate": 2.82798833819242e-06,
      "loss": 0.3198,
      "step": 291
    },
    {
      "epoch": 0.00851858334791995,
      "grad_norm": 5.063327780982386,
      "learning_rate": 2.837706511175899e-06,
      "loss": 0.2995,
      "step": 292
    },
    {
      "epoch": 0.00854775657856351,
      "grad_norm": 4.675748098209446,
      "learning_rate": 2.8474246841593784e-06,
      "loss": 0.3412,
      "step": 293
    },
    {
      "epoch": 0.008576929809207072,
      "grad_norm": 4.66246409593304,
      "learning_rate": 2.8571428571428573e-06,
      "loss": 0.2962,
      "step": 294
    },
    {
      "epoch": 0.008606103039850634,
      "grad_norm": 4.919686861392447,
      "learning_rate": 2.8668610301263366e-06,
      "loss": 0.3284,
      "step": 295
    },
    {
      "epoch": 0.008635276270494194,
      "grad_norm": 4.431848908970389,
      "learning_rate": 2.8765792031098155e-06,
      "loss": 0.3167,
      "step": 296
    },
    {
      "epoch": 0.008664449501137756,
      "grad_norm": 4.4158568891954335,
      "learning_rate": 2.8862973760932948e-06,
      "loss": 0.3108,
      "step": 297
    },
    {
      "epoch": 0.008693622731781318,
      "grad_norm": 3.998923406763035,
      "learning_rate": 2.8960155490767737e-06,
      "loss": 0.314,
      "step": 298
    },
    {
      "epoch": 0.00872279596242488,
      "grad_norm": 3.944104628252764,
      "learning_rate": 2.9057337220602525e-06,
      "loss": 0.3043,
      "step": 299
    },
    {
      "epoch": 0.00875196919306844,
      "grad_norm": 3.7899421781554365,
      "learning_rate": 2.9154518950437323e-06,
      "loss": 0.3225,
      "step": 300
    },
    {
      "epoch": 0.008781142423712002,
      "grad_norm": 3.552107196564264,
      "learning_rate": 2.925170068027211e-06,
      "loss": 0.2831,
      "step": 301
    },
    {
      "epoch": 0.008810315654355564,
      "grad_norm": 3.5812651862591265,
      "learning_rate": 2.9348882410106905e-06,
      "loss": 0.3414,
      "step": 302
    },
    {
      "epoch": 0.008839488884999126,
      "grad_norm": 3.5160124869214493,
      "learning_rate": 2.9446064139941694e-06,
      "loss": 0.3229,
      "step": 303
    },
    {
      "epoch": 0.008868662115642686,
      "grad_norm": 3.6089633308193254,
      "learning_rate": 2.9543245869776482e-06,
      "loss": 0.3126,
      "step": 304
    },
    {
      "epoch": 0.008897835346286248,
      "grad_norm": 3.5453234107237632,
      "learning_rate": 2.9640427599611276e-06,
      "loss": 0.3235,
      "step": 305
    },
    {
      "epoch": 0.00892700857692981,
      "grad_norm": 3.5035962731904386,
      "learning_rate": 2.9737609329446064e-06,
      "loss": 0.3105,
      "step": 306
    },
    {
      "epoch": 0.00895618180757337,
      "grad_norm": 3.7501205724538558,
      "learning_rate": 2.983479105928086e-06,
      "loss": 0.3153,
      "step": 307
    },
    {
      "epoch": 0.008985355038216932,
      "grad_norm": 3.394941486669191,
      "learning_rate": 2.993197278911565e-06,
      "loss": 0.3196,
      "step": 308
    },
    {
      "epoch": 0.009014528268860494,
      "grad_norm": 3.241172414342122,
      "learning_rate": 3.002915451895044e-06,
      "loss": 0.2779,
      "step": 309
    },
    {
      "epoch": 0.009043701499504056,
      "grad_norm": 3.1934920458602507,
      "learning_rate": 3.0126336248785233e-06,
      "loss": 0.3087,
      "step": 310
    },
    {
      "epoch": 0.009072874730147616,
      "grad_norm": 3.2268698451322657,
      "learning_rate": 3.022351797862002e-06,
      "loss": 0.2996,
      "step": 311
    },
    {
      "epoch": 0.009102047960791178,
      "grad_norm": 3.1635587205391626,
      "learning_rate": 3.0320699708454815e-06,
      "loss": 0.3178,
      "step": 312
    },
    {
      "epoch": 0.00913122119143474,
      "grad_norm": 3.0093458116750313,
      "learning_rate": 3.0417881438289604e-06,
      "loss": 0.2968,
      "step": 313
    },
    {
      "epoch": 0.009160394422078302,
      "grad_norm": 2.741170367470087,
      "learning_rate": 3.0515063168124392e-06,
      "loss": 0.2976,
      "step": 314
    },
    {
      "epoch": 0.009189567652721862,
      "grad_norm": 2.9455145470142714,
      "learning_rate": 3.0612244897959185e-06,
      "loss": 0.2977,
      "step": 315
    },
    {
      "epoch": 0.009218740883365424,
      "grad_norm": 2.7230192545396723,
      "learning_rate": 3.0709426627793974e-06,
      "loss": 0.3148,
      "step": 316
    },
    {
      "epoch": 0.009247914114008986,
      "grad_norm": 2.6993941016970457,
      "learning_rate": 3.080660835762877e-06,
      "loss": 0.3047,
      "step": 317
    },
    {
      "epoch": 0.009277087344652548,
      "grad_norm": 2.5951026957119185,
      "learning_rate": 3.090379008746356e-06,
      "loss": 0.304,
      "step": 318
    },
    {
      "epoch": 0.009306260575296108,
      "grad_norm": 2.754713376574571,
      "learning_rate": 3.1000971817298354e-06,
      "loss": 0.2758,
      "step": 319
    },
    {
      "epoch": 0.00933543380593967,
      "grad_norm": 2.608129232034051,
      "learning_rate": 3.1098153547133143e-06,
      "loss": 0.3091,
      "step": 320
    },
    {
      "epoch": 0.009364607036583232,
      "grad_norm": 2.3588671050032035,
      "learning_rate": 3.119533527696793e-06,
      "loss": 0.2734,
      "step": 321
    },
    {
      "epoch": 0.009393780267226792,
      "grad_norm": 2.576246544800748,
      "learning_rate": 3.1292517006802725e-06,
      "loss": 0.3143,
      "step": 322
    },
    {
      "epoch": 0.009422953497870354,
      "grad_norm": 2.442115567378784,
      "learning_rate": 3.1389698736637513e-06,
      "loss": 0.3121,
      "step": 323
    },
    {
      "epoch": 0.009452126728513916,
      "grad_norm": 2.5068164870128395,
      "learning_rate": 3.1486880466472307e-06,
      "loss": 0.3202,
      "step": 324
    },
    {
      "epoch": 0.009481299959157478,
      "grad_norm": 2.606783819651253,
      "learning_rate": 3.1584062196307095e-06,
      "loss": 0.2937,
      "step": 325
    },
    {
      "epoch": 0.009510473189801038,
      "grad_norm": 2.1178884108003984,
      "learning_rate": 3.1681243926141884e-06,
      "loss": 0.271,
      "step": 326
    },
    {
      "epoch": 0.0095396464204446,
      "grad_norm": 2.0652110483668404,
      "learning_rate": 3.177842565597668e-06,
      "loss": 0.2843,
      "step": 327
    },
    {
      "epoch": 0.009568819651088162,
      "grad_norm": 2.204974423982823,
      "learning_rate": 3.187560738581147e-06,
      "loss": 0.2592,
      "step": 328
    },
    {
      "epoch": 0.009597992881731724,
      "grad_norm": 2.0655925911611805,
      "learning_rate": 3.1972789115646264e-06,
      "loss": 0.2789,
      "step": 329
    },
    {
      "epoch": 0.009627166112375284,
      "grad_norm": 1.9531825316794622,
      "learning_rate": 3.2069970845481052e-06,
      "loss": 0.2545,
      "step": 330
    },
    {
      "epoch": 0.009656339343018846,
      "grad_norm": 2.1590601507754106,
      "learning_rate": 3.216715257531584e-06,
      "loss": 0.2606,
      "step": 331
    },
    {
      "epoch": 0.009685512573662408,
      "grad_norm": 2.0642596187813056,
      "learning_rate": 3.2264334305150634e-06,
      "loss": 0.2892,
      "step": 332
    },
    {
      "epoch": 0.009714685804305968,
      "grad_norm": 1.945148069761541,
      "learning_rate": 3.2361516034985423e-06,
      "loss": 0.2509,
      "step": 333
    },
    {
      "epoch": 0.00974385903494953,
      "grad_norm": 2.0957514084341544,
      "learning_rate": 3.2458697764820216e-06,
      "loss": 0.2663,
      "step": 334
    },
    {
      "epoch": 0.009773032265593092,
      "grad_norm": 2.094914155968183,
      "learning_rate": 3.2555879494655005e-06,
      "loss": 0.2792,
      "step": 335
    },
    {
      "epoch": 0.009802205496236654,
      "grad_norm": 2.153157228626012,
      "learning_rate": 3.2653061224489794e-06,
      "loss": 0.2866,
      "step": 336
    },
    {
      "epoch": 0.009831378726880214,
      "grad_norm": 1.8843072897963975,
      "learning_rate": 3.275024295432459e-06,
      "loss": 0.2772,
      "step": 337
    },
    {
      "epoch": 0.009860551957523776,
      "grad_norm": 1.9095121160251654,
      "learning_rate": 3.284742468415938e-06,
      "loss": 0.2631,
      "step": 338
    },
    {
      "epoch": 0.009889725188167338,
      "grad_norm": 2.1063348860832543,
      "learning_rate": 3.2944606413994174e-06,
      "loss": 0.2663,
      "step": 339
    },
    {
      "epoch": 0.0099188984188109,
      "grad_norm": 1.640192998600247,
      "learning_rate": 3.3041788143828962e-06,
      "loss": 0.3219,
      "step": 340
    },
    {
      "epoch": 0.00994807164945446,
      "grad_norm": 1.8783143584476323,
      "learning_rate": 3.3138969873663755e-06,
      "loss": 0.2576,
      "step": 341
    },
    {
      "epoch": 0.009977244880098022,
      "grad_norm": 1.8743606251045954,
      "learning_rate": 3.3236151603498544e-06,
      "loss": 0.3015,
      "step": 342
    },
    {
      "epoch": 0.010006418110741584,
      "grad_norm": 2.3756269137859913,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 0.3041,
      "step": 343
    },
    {
      "epoch": 0.010035591341385146,
      "grad_norm": 1.820683881601083,
      "learning_rate": 3.343051506316813e-06,
      "loss": 0.2436,
      "step": 344
    },
    {
      "epoch": 0.010064764572028706,
      "grad_norm": 1.6527189980635084,
      "learning_rate": 3.352769679300292e-06,
      "loss": 0.3129,
      "step": 345
    },
    {
      "epoch": 0.010093937802672268,
      "grad_norm": 2.0328740369563074,
      "learning_rate": 3.3624878522837713e-06,
      "loss": 0.2767,
      "step": 346
    },
    {
      "epoch": 0.01012311103331583,
      "grad_norm": 1.7729144762059441,
      "learning_rate": 3.37220602526725e-06,
      "loss": 0.2849,
      "step": 347
    },
    {
      "epoch": 0.01015228426395939,
      "grad_norm": 1.6319492499213792,
      "learning_rate": 3.381924198250729e-06,
      "loss": 0.3011,
      "step": 348
    },
    {
      "epoch": 0.010181457494602952,
      "grad_norm": 1.5627342952620744,
      "learning_rate": 3.3916423712342083e-06,
      "loss": 0.2881,
      "step": 349
    },
    {
      "epoch": 0.010210630725246514,
      "grad_norm": 2.1436140364775365,
      "learning_rate": 3.4013605442176872e-06,
      "loss": 0.3088,
      "step": 350
    },
    {
      "epoch": 0.010239803955890076,
      "grad_norm": 1.5432322995400232,
      "learning_rate": 3.4110787172011665e-06,
      "loss": 0.2619,
      "step": 351
    },
    {
      "epoch": 0.010268977186533636,
      "grad_norm": 1.4919599215439503,
      "learning_rate": 3.4207968901846454e-06,
      "loss": 0.288,
      "step": 352
    },
    {
      "epoch": 0.010298150417177198,
      "grad_norm": 1.588909354418498,
      "learning_rate": 3.4305150631681243e-06,
      "loss": 0.2656,
      "step": 353
    },
    {
      "epoch": 0.01032732364782076,
      "grad_norm": 1.7238025208697898,
      "learning_rate": 3.440233236151604e-06,
      "loss": 0.2971,
      "step": 354
    },
    {
      "epoch": 0.010356496878464322,
      "grad_norm": 1.3741848014034796,
      "learning_rate": 3.449951409135083e-06,
      "loss": 0.2524,
      "step": 355
    },
    {
      "epoch": 0.010385670109107882,
      "grad_norm": 1.6950266671469552,
      "learning_rate": 3.4596695821185622e-06,
      "loss": 0.264,
      "step": 356
    },
    {
      "epoch": 0.010414843339751444,
      "grad_norm": 1.620413829771043,
      "learning_rate": 3.469387755102041e-06,
      "loss": 0.2825,
      "step": 357
    },
    {
      "epoch": 0.010444016570395006,
      "grad_norm": 1.4843445148445067,
      "learning_rate": 3.47910592808552e-06,
      "loss": 0.2524,
      "step": 358
    },
    {
      "epoch": 0.010473189801038568,
      "grad_norm": 1.4282873977233463,
      "learning_rate": 3.4888241010689993e-06,
      "loss": 0.2446,
      "step": 359
    },
    {
      "epoch": 0.010502363031682128,
      "grad_norm": 1.5458831165492712,
      "learning_rate": 3.4985422740524782e-06,
      "loss": 0.2602,
      "step": 360
    },
    {
      "epoch": 0.01053153626232569,
      "grad_norm": 1.9803172614901905,
      "learning_rate": 3.5082604470359575e-06,
      "loss": 0.3021,
      "step": 361
    },
    {
      "epoch": 0.010560709492969252,
      "grad_norm": 1.7412676481476632,
      "learning_rate": 3.5179786200194364e-06,
      "loss": 0.29,
      "step": 362
    },
    {
      "epoch": 0.010589882723612812,
      "grad_norm": 1.4824319966265418,
      "learning_rate": 3.527696793002916e-06,
      "loss": 0.2645,
      "step": 363
    },
    {
      "epoch": 0.010619055954256374,
      "grad_norm": 1.6505631113298946,
      "learning_rate": 3.537414965986395e-06,
      "loss": 0.3022,
      "step": 364
    },
    {
      "epoch": 0.010648229184899936,
      "grad_norm": 1.5184315725990505,
      "learning_rate": 3.547133138969874e-06,
      "loss": 0.2778,
      "step": 365
    },
    {
      "epoch": 0.010677402415543498,
      "grad_norm": 1.8930868281870492,
      "learning_rate": 3.5568513119533532e-06,
      "loss": 0.2707,
      "step": 366
    },
    {
      "epoch": 0.010706575646187058,
      "grad_norm": 1.462237554708469,
      "learning_rate": 3.566569484936832e-06,
      "loss": 0.2454,
      "step": 367
    },
    {
      "epoch": 0.01073574887683062,
      "grad_norm": 1.6865796691913675,
      "learning_rate": 3.5762876579203114e-06,
      "loss": 0.2669,
      "step": 368
    },
    {
      "epoch": 0.010764922107474182,
      "grad_norm": 1.5496968461346172,
      "learning_rate": 3.5860058309037903e-06,
      "loss": 0.2817,
      "step": 369
    },
    {
      "epoch": 0.010794095338117744,
      "grad_norm": 2.0249897064534523,
      "learning_rate": 3.595724003887269e-06,
      "loss": 0.2597,
      "step": 370
    },
    {
      "epoch": 0.010823268568761304,
      "grad_norm": 1.5958207293933246,
      "learning_rate": 3.6054421768707485e-06,
      "loss": 0.2521,
      "step": 371
    },
    {
      "epoch": 0.010852441799404866,
      "grad_norm": 1.4351172330015676,
      "learning_rate": 3.6151603498542274e-06,
      "loss": 0.2493,
      "step": 372
    },
    {
      "epoch": 0.010881615030048428,
      "grad_norm": 1.3779667132396949,
      "learning_rate": 3.624878522837707e-06,
      "loss": 0.2746,
      "step": 373
    },
    {
      "epoch": 0.010910788260691988,
      "grad_norm": 1.686772218077411,
      "learning_rate": 3.634596695821186e-06,
      "loss": 0.2697,
      "step": 374
    },
    {
      "epoch": 0.01093996149133555,
      "grad_norm": 1.63760834587086,
      "learning_rate": 3.644314868804665e-06,
      "loss": 0.3058,
      "step": 375
    },
    {
      "epoch": 0.010969134721979112,
      "grad_norm": 1.1955134036184547,
      "learning_rate": 3.6540330417881442e-06,
      "loss": 0.2521,
      "step": 376
    },
    {
      "epoch": 0.010998307952622674,
      "grad_norm": 1.2622028647576145,
      "learning_rate": 3.663751214771623e-06,
      "loss": 0.2619,
      "step": 377
    },
    {
      "epoch": 0.011027481183266234,
      "grad_norm": 1.5249266049659616,
      "learning_rate": 3.6734693877551024e-06,
      "loss": 0.2717,
      "step": 378
    },
    {
      "epoch": 0.011056654413909796,
      "grad_norm": 1.6900785662024531,
      "learning_rate": 3.6831875607385813e-06,
      "loss": 0.2573,
      "step": 379
    },
    {
      "epoch": 0.011085827644553358,
      "grad_norm": 1.211142533860906,
      "learning_rate": 3.69290573372206e-06,
      "loss": 0.2607,
      "step": 380
    },
    {
      "epoch": 0.01111500087519692,
      "grad_norm": 1.2637964860665716,
      "learning_rate": 3.70262390670554e-06,
      "loss": 0.2549,
      "step": 381
    },
    {
      "epoch": 0.01114417410584048,
      "grad_norm": 1.5521314677876958,
      "learning_rate": 3.7123420796890184e-06,
      "loss": 0.2704,
      "step": 382
    },
    {
      "epoch": 0.011173347336484042,
      "grad_norm": 1.4219165908310176,
      "learning_rate": 3.722060252672498e-06,
      "loss": 0.2822,
      "step": 383
    },
    {
      "epoch": 0.011202520567127604,
      "grad_norm": 1.3945148177559197,
      "learning_rate": 3.731778425655977e-06,
      "loss": 0.2554,
      "step": 384
    },
    {
      "epoch": 0.011231693797771166,
      "grad_norm": 1.4800200894833206,
      "learning_rate": 3.7414965986394563e-06,
      "loss": 0.2653,
      "step": 385
    },
    {
      "epoch": 0.011260867028414726,
      "grad_norm": 1.7467062544533822,
      "learning_rate": 3.7512147716229352e-06,
      "loss": 0.2694,
      "step": 386
    },
    {
      "epoch": 0.011290040259058288,
      "grad_norm": 1.4124869764924537,
      "learning_rate": 3.760932944606414e-06,
      "loss": 0.2681,
      "step": 387
    },
    {
      "epoch": 0.01131921348970185,
      "grad_norm": 1.4230904605810348,
      "learning_rate": 3.7706511175898934e-06,
      "loss": 0.288,
      "step": 388
    },
    {
      "epoch": 0.01134838672034541,
      "grad_norm": 1.357060034334733,
      "learning_rate": 3.7803692905733723e-06,
      "loss": 0.2743,
      "step": 389
    },
    {
      "epoch": 0.011377559950988972,
      "grad_norm": 1.372100485282738,
      "learning_rate": 3.790087463556852e-06,
      "loss": 0.2562,
      "step": 390
    },
    {
      "epoch": 0.011406733181632534,
      "grad_norm": 1.3167946418471566,
      "learning_rate": 3.799805636540331e-06,
      "loss": 0.2613,
      "step": 391
    },
    {
      "epoch": 0.011435906412276096,
      "grad_norm": 1.2965892630475417,
      "learning_rate": 3.80952380952381e-06,
      "loss": 0.3135,
      "step": 392
    },
    {
      "epoch": 0.011465079642919656,
      "grad_norm": 1.3897749697059423,
      "learning_rate": 3.819241982507289e-06,
      "loss": 0.2629,
      "step": 393
    },
    {
      "epoch": 0.011494252873563218,
      "grad_norm": 1.3108126440033776,
      "learning_rate": 3.828960155490768e-06,
      "loss": 0.2503,
      "step": 394
    },
    {
      "epoch": 0.01152342610420678,
      "grad_norm": 1.2565582473203867,
      "learning_rate": 3.838678328474247e-06,
      "loss": 0.2643,
      "step": 395
    },
    {
      "epoch": 0.011552599334850342,
      "grad_norm": 1.3845977026331957,
      "learning_rate": 3.848396501457727e-06,
      "loss": 0.257,
      "step": 396
    },
    {
      "epoch": 0.011581772565493902,
      "grad_norm": 1.0733200186856116,
      "learning_rate": 3.858114674441205e-06,
      "loss": 0.2543,
      "step": 397
    },
    {
      "epoch": 0.011610945796137464,
      "grad_norm": 1.3431895406111862,
      "learning_rate": 3.867832847424684e-06,
      "loss": 0.2564,
      "step": 398
    },
    {
      "epoch": 0.011640119026781026,
      "grad_norm": 1.5303229422311955,
      "learning_rate": 3.877551020408164e-06,
      "loss": 0.2493,
      "step": 399
    },
    {
      "epoch": 0.011669292257424586,
      "grad_norm": 1.5256524022733309,
      "learning_rate": 3.887269193391643e-06,
      "loss": 0.2632,
      "step": 400
    },
    {
      "epoch": 0.011698465488068148,
      "grad_norm": 1.3465739038902813,
      "learning_rate": 3.8969873663751215e-06,
      "loss": 0.2529,
      "step": 401
    },
    {
      "epoch": 0.01172763871871171,
      "grad_norm": 1.6741775016426725,
      "learning_rate": 3.906705539358601e-06,
      "loss": 0.2718,
      "step": 402
    },
    {
      "epoch": 0.011756811949355272,
      "grad_norm": 1.5926160920030783,
      "learning_rate": 3.91642371234208e-06,
      "loss": 0.2522,
      "step": 403
    },
    {
      "epoch": 0.011785985179998832,
      "grad_norm": 1.2376052927951111,
      "learning_rate": 3.926141885325559e-06,
      "loss": 0.2862,
      "step": 404
    },
    {
      "epoch": 0.011815158410642394,
      "grad_norm": 1.3319212228022448,
      "learning_rate": 3.935860058309039e-06,
      "loss": 0.253,
      "step": 405
    },
    {
      "epoch": 0.011844331641285956,
      "grad_norm": 1.3629418138465732,
      "learning_rate": 3.945578231292517e-06,
      "loss": 0.2689,
      "step": 406
    },
    {
      "epoch": 0.011873504871929518,
      "grad_norm": 1.2912180310714865,
      "learning_rate": 3.9552964042759965e-06,
      "loss": 0.2525,
      "step": 407
    },
    {
      "epoch": 0.011902678102573078,
      "grad_norm": 1.5134083021404499,
      "learning_rate": 3.965014577259476e-06,
      "loss": 0.2796,
      "step": 408
    },
    {
      "epoch": 0.01193185133321664,
      "grad_norm": 1.531948495145115,
      "learning_rate": 3.974732750242954e-06,
      "loss": 0.2531,
      "step": 409
    },
    {
      "epoch": 0.011961024563860202,
      "grad_norm": 1.4702809793293405,
      "learning_rate": 3.984450923226434e-06,
      "loss": 0.3054,
      "step": 410
    },
    {
      "epoch": 0.011990197794503764,
      "grad_norm": 1.3305350002771992,
      "learning_rate": 3.994169096209913e-06,
      "loss": 0.2884,
      "step": 411
    },
    {
      "epoch": 0.012019371025147324,
      "grad_norm": 1.4697924000376428,
      "learning_rate": 4.003887269193392e-06,
      "loss": 0.26,
      "step": 412
    },
    {
      "epoch": 0.012048544255790886,
      "grad_norm": 1.3735374659502786,
      "learning_rate": 4.013605442176871e-06,
      "loss": 0.2664,
      "step": 413
    },
    {
      "epoch": 0.012077717486434448,
      "grad_norm": 1.2954656043771686,
      "learning_rate": 4.02332361516035e-06,
      "loss": 0.2782,
      "step": 414
    },
    {
      "epoch": 0.012106890717078008,
      "grad_norm": 1.1448721497856589,
      "learning_rate": 4.033041788143829e-06,
      "loss": 0.2296,
      "step": 415
    },
    {
      "epoch": 0.01213606394772157,
      "grad_norm": 1.3316732945761685,
      "learning_rate": 4.042759961127309e-06,
      "loss": 0.2468,
      "step": 416
    },
    {
      "epoch": 0.012165237178365132,
      "grad_norm": 1.5767206129208087,
      "learning_rate": 4.052478134110788e-06,
      "loss": 0.2444,
      "step": 417
    },
    {
      "epoch": 0.012194410409008694,
      "grad_norm": 1.4368737411520363,
      "learning_rate": 4.062196307094266e-06,
      "loss": 0.2723,
      "step": 418
    },
    {
      "epoch": 0.012223583639652254,
      "grad_norm": 1.1620650429155768,
      "learning_rate": 4.071914480077746e-06,
      "loss": 0.2678,
      "step": 419
    },
    {
      "epoch": 0.012252756870295816,
      "grad_norm": 1.4174843135919835,
      "learning_rate": 4.081632653061225e-06,
      "loss": 0.2393,
      "step": 420
    },
    {
      "epoch": 0.012281930100939378,
      "grad_norm": 1.2567250097238387,
      "learning_rate": 4.0913508260447035e-06,
      "loss": 0.2574,
      "step": 421
    },
    {
      "epoch": 0.01231110333158294,
      "grad_norm": 1.3670647007335115,
      "learning_rate": 4.101068999028184e-06,
      "loss": 0.2528,
      "step": 422
    },
    {
      "epoch": 0.0123402765622265,
      "grad_norm": 1.5587689474510382,
      "learning_rate": 4.110787172011662e-06,
      "loss": 0.2748,
      "step": 423
    },
    {
      "epoch": 0.012369449792870062,
      "grad_norm": 1.3272715078796335,
      "learning_rate": 4.120505344995141e-06,
      "loss": 0.2565,
      "step": 424
    },
    {
      "epoch": 0.012398623023513624,
      "grad_norm": 1.2378761782370675,
      "learning_rate": 4.130223517978621e-06,
      "loss": 0.2631,
      "step": 425
    },
    {
      "epoch": 0.012427796254157185,
      "grad_norm": 1.1665020326387956,
      "learning_rate": 4.139941690962099e-06,
      "loss": 0.263,
      "step": 426
    },
    {
      "epoch": 0.012456969484800746,
      "grad_norm": 1.3182377098074065,
      "learning_rate": 4.1496598639455785e-06,
      "loss": 0.259,
      "step": 427
    },
    {
      "epoch": 0.012486142715444308,
      "grad_norm": 1.1516319554886278,
      "learning_rate": 4.159378036929058e-06,
      "loss": 0.2442,
      "step": 428
    },
    {
      "epoch": 0.01251531594608787,
      "grad_norm": 1.0877267499954546,
      "learning_rate": 4.169096209912537e-06,
      "loss": 0.2456,
      "step": 429
    },
    {
      "epoch": 0.01254448917673143,
      "grad_norm": 1.4293359511150157,
      "learning_rate": 4.178814382896016e-06,
      "loss": 0.2448,
      "step": 430
    },
    {
      "epoch": 0.012573662407374992,
      "grad_norm": 1.383345105895979,
      "learning_rate": 4.188532555879495e-06,
      "loss": 0.2564,
      "step": 431
    },
    {
      "epoch": 0.012602835638018554,
      "grad_norm": 1.3677681652027938,
      "learning_rate": 4.198250728862974e-06,
      "loss": 0.2483,
      "step": 432
    },
    {
      "epoch": 0.012632008868662116,
      "grad_norm": 1.3290166256189182,
      "learning_rate": 4.2079689018464535e-06,
      "loss": 0.2602,
      "step": 433
    },
    {
      "epoch": 0.012661182099305677,
      "grad_norm": 1.332224555434403,
      "learning_rate": 4.217687074829933e-06,
      "loss": 0.2335,
      "step": 434
    },
    {
      "epoch": 0.012690355329949238,
      "grad_norm": 1.3313100417461197,
      "learning_rate": 4.227405247813411e-06,
      "loss": 0.2392,
      "step": 435
    },
    {
      "epoch": 0.0127195285605928,
      "grad_norm": 1.2006719353729596,
      "learning_rate": 4.237123420796891e-06,
      "loss": 0.2504,
      "step": 436
    },
    {
      "epoch": 0.012748701791236362,
      "grad_norm": 1.0581534964792212,
      "learning_rate": 4.24684159378037e-06,
      "loss": 0.249,
      "step": 437
    },
    {
      "epoch": 0.012777875021879923,
      "grad_norm": 1.1171627090319456,
      "learning_rate": 4.256559766763848e-06,
      "loss": 0.2482,
      "step": 438
    },
    {
      "epoch": 0.012807048252523484,
      "grad_norm": 1.2435943357855226,
      "learning_rate": 4.266277939747328e-06,
      "loss": 0.2691,
      "step": 439
    },
    {
      "epoch": 0.012836221483167046,
      "grad_norm": 1.2748446085082195,
      "learning_rate": 4.275996112730807e-06,
      "loss": 0.2961,
      "step": 440
    },
    {
      "epoch": 0.012865394713810607,
      "grad_norm": 1.4193221476812536,
      "learning_rate": 4.2857142857142855e-06,
      "loss": 0.2467,
      "step": 441
    },
    {
      "epoch": 0.012894567944454169,
      "grad_norm": 1.1594012561569815,
      "learning_rate": 4.295432458697766e-06,
      "loss": 0.2271,
      "step": 442
    },
    {
      "epoch": 0.01292374117509773,
      "grad_norm": 1.4913336551053733,
      "learning_rate": 4.305150631681244e-06,
      "loss": 0.2665,
      "step": 443
    },
    {
      "epoch": 0.012952914405741292,
      "grad_norm": 1.2669816916780523,
      "learning_rate": 4.314868804664723e-06,
      "loss": 0.2363,
      "step": 444
    },
    {
      "epoch": 0.012982087636384853,
      "grad_norm": 1.3234504651952432,
      "learning_rate": 4.324586977648203e-06,
      "loss": 0.2656,
      "step": 445
    },
    {
      "epoch": 0.013011260867028415,
      "grad_norm": 1.6579390405095555,
      "learning_rate": 4.334305150631681e-06,
      "loss": 0.2455,
      "step": 446
    },
    {
      "epoch": 0.013040434097671976,
      "grad_norm": 1.349419654484547,
      "learning_rate": 4.3440233236151605e-06,
      "loss": 0.248,
      "step": 447
    },
    {
      "epoch": 0.013069607328315538,
      "grad_norm": 1.2588927126523761,
      "learning_rate": 4.35374149659864e-06,
      "loss": 0.2478,
      "step": 448
    },
    {
      "epoch": 0.013098780558959099,
      "grad_norm": 1.357180967939837,
      "learning_rate": 4.363459669582119e-06,
      "loss": 0.2391,
      "step": 449
    },
    {
      "epoch": 0.01312795378960266,
      "grad_norm": 1.3862470942315106,
      "learning_rate": 4.3731778425655976e-06,
      "loss": 0.2532,
      "step": 450
    },
    {
      "epoch": 0.013157127020246222,
      "grad_norm": 1.1208491806284107,
      "learning_rate": 4.382896015549078e-06,
      "loss": 0.249,
      "step": 451
    },
    {
      "epoch": 0.013186300250889784,
      "grad_norm": 1.4265465775294321,
      "learning_rate": 4.392614188532556e-06,
      "loss": 0.2598,
      "step": 452
    },
    {
      "epoch": 0.013215473481533345,
      "grad_norm": 1.3011849065304812,
      "learning_rate": 4.4023323615160355e-06,
      "loss": 0.2615,
      "step": 453
    },
    {
      "epoch": 0.013244646712176907,
      "grad_norm": 1.1182457723949686,
      "learning_rate": 4.412050534499515e-06,
      "loss": 0.2468,
      "step": 454
    },
    {
      "epoch": 0.013273819942820468,
      "grad_norm": 1.08008703791803,
      "learning_rate": 4.421768707482993e-06,
      "loss": 0.2381,
      "step": 455
    },
    {
      "epoch": 0.013302993173464029,
      "grad_norm": 1.2908888359703325,
      "learning_rate": 4.431486880466473e-06,
      "loss": 0.2302,
      "step": 456
    },
    {
      "epoch": 0.01333216640410759,
      "grad_norm": 1.3553893078632153,
      "learning_rate": 4.441205053449952e-06,
      "loss": 0.2389,
      "step": 457
    },
    {
      "epoch": 0.013361339634751153,
      "grad_norm": 1.2980071707761118,
      "learning_rate": 4.45092322643343e-06,
      "loss": 0.2318,
      "step": 458
    },
    {
      "epoch": 0.013390512865394714,
      "grad_norm": 1.3181871945419834,
      "learning_rate": 4.4606413994169105e-06,
      "loss": 0.2551,
      "step": 459
    },
    {
      "epoch": 0.013419686096038275,
      "grad_norm": 1.238630790097823,
      "learning_rate": 4.470359572400389e-06,
      "loss": 0.2586,
      "step": 460
    },
    {
      "epoch": 0.013448859326681837,
      "grad_norm": 1.3362103010683621,
      "learning_rate": 4.480077745383868e-06,
      "loss": 0.2602,
      "step": 461
    },
    {
      "epoch": 0.013478032557325399,
      "grad_norm": 1.0962828772234607,
      "learning_rate": 4.489795918367348e-06,
      "loss": 0.243,
      "step": 462
    },
    {
      "epoch": 0.01350720578796896,
      "grad_norm": 1.2585926563412242,
      "learning_rate": 4.499514091350826e-06,
      "loss": 0.2683,
      "step": 463
    },
    {
      "epoch": 0.01353637901861252,
      "grad_norm": 1.4080036055157774,
      "learning_rate": 4.509232264334305e-06,
      "loss": 0.314,
      "step": 464
    },
    {
      "epoch": 0.013565552249256083,
      "grad_norm": 1.2666446602777268,
      "learning_rate": 4.518950437317785e-06,
      "loss": 0.2601,
      "step": 465
    },
    {
      "epoch": 0.013594725479899645,
      "grad_norm": 1.183584288132124,
      "learning_rate": 4.528668610301264e-06,
      "loss": 0.2677,
      "step": 466
    },
    {
      "epoch": 0.013623898710543205,
      "grad_norm": 1.2559272580191174,
      "learning_rate": 4.5383867832847425e-06,
      "loss": 0.2623,
      "step": 467
    },
    {
      "epoch": 0.013653071941186767,
      "grad_norm": 1.2137655364874729,
      "learning_rate": 4.548104956268222e-06,
      "loss": 0.3135,
      "step": 468
    },
    {
      "epoch": 0.013682245171830329,
      "grad_norm": 1.0815132249305057,
      "learning_rate": 4.557823129251701e-06,
      "loss": 0.2281,
      "step": 469
    },
    {
      "epoch": 0.01371141840247389,
      "grad_norm": 1.1782532527157554,
      "learning_rate": 4.56754130223518e-06,
      "loss": 0.2367,
      "step": 470
    },
    {
      "epoch": 0.01374059163311745,
      "grad_norm": 1.4533580084977513,
      "learning_rate": 4.57725947521866e-06,
      "loss": 0.2484,
      "step": 471
    },
    {
      "epoch": 0.013769764863761013,
      "grad_norm": 1.3583985599668706,
      "learning_rate": 4.586977648202138e-06,
      "loss": 0.2664,
      "step": 472
    },
    {
      "epoch": 0.013798938094404575,
      "grad_norm": 1.2708757427546593,
      "learning_rate": 4.5966958211856175e-06,
      "loss": 0.2355,
      "step": 473
    },
    {
      "epoch": 0.013828111325048137,
      "grad_norm": 1.198494775196898,
      "learning_rate": 4.606413994169097e-06,
      "loss": 0.2299,
      "step": 474
    },
    {
      "epoch": 0.013857284555691697,
      "grad_norm": 1.5538009585829942,
      "learning_rate": 4.616132167152575e-06,
      "loss": 0.2457,
      "step": 475
    },
    {
      "epoch": 0.013886457786335259,
      "grad_norm": 1.2367763455447653,
      "learning_rate": 4.6258503401360546e-06,
      "loss": 0.2527,
      "step": 476
    },
    {
      "epoch": 0.01391563101697882,
      "grad_norm": 1.143066225777548,
      "learning_rate": 4.635568513119534e-06,
      "loss": 0.2428,
      "step": 477
    },
    {
      "epoch": 0.013944804247622383,
      "grad_norm": 1.1629123562353916,
      "learning_rate": 4.645286686103013e-06,
      "loss": 0.2441,
      "step": 478
    },
    {
      "epoch": 0.013973977478265943,
      "grad_norm": 1.2925898615315257,
      "learning_rate": 4.6550048590864925e-06,
      "loss": 0.262,
      "step": 479
    },
    {
      "epoch": 0.014003150708909505,
      "grad_norm": 1.300482449456053,
      "learning_rate": 4.664723032069971e-06,
      "loss": 0.24,
      "step": 480
    },
    {
      "epoch": 0.014032323939553067,
      "grad_norm": 1.4025190963822738,
      "learning_rate": 4.67444120505345e-06,
      "loss": 0.2551,
      "step": 481
    },
    {
      "epoch": 0.014061497170196627,
      "grad_norm": 1.335127065360521,
      "learning_rate": 4.68415937803693e-06,
      "loss": 0.235,
      "step": 482
    },
    {
      "epoch": 0.014090670400840189,
      "grad_norm": 1.0738846808453941,
      "learning_rate": 4.693877551020409e-06,
      "loss": 0.2362,
      "step": 483
    },
    {
      "epoch": 0.01411984363148375,
      "grad_norm": 1.2366364177512028,
      "learning_rate": 4.703595724003887e-06,
      "loss": 0.2267,
      "step": 484
    },
    {
      "epoch": 0.014149016862127313,
      "grad_norm": 1.568015498853348,
      "learning_rate": 4.713313896987367e-06,
      "loss": 0.2549,
      "step": 485
    },
    {
      "epoch": 0.014178190092770873,
      "grad_norm": 0.99074606036659,
      "learning_rate": 4.723032069970846e-06,
      "loss": 0.2484,
      "step": 486
    },
    {
      "epoch": 0.014207363323414435,
      "grad_norm": 1.2545910290085318,
      "learning_rate": 4.7327502429543244e-06,
      "loss": 0.2631,
      "step": 487
    },
    {
      "epoch": 0.014236536554057997,
      "grad_norm": 1.3838940278828762,
      "learning_rate": 4.742468415937805e-06,
      "loss": 0.2561,
      "step": 488
    },
    {
      "epoch": 0.014265709784701559,
      "grad_norm": 1.3331219246766512,
      "learning_rate": 4.752186588921283e-06,
      "loss": 0.2545,
      "step": 489
    },
    {
      "epoch": 0.014294883015345119,
      "grad_norm": 1.5306031190582838,
      "learning_rate": 4.761904761904762e-06,
      "loss": 0.2667,
      "step": 490
    },
    {
      "epoch": 0.01432405624598868,
      "grad_norm": 1.1863511505688096,
      "learning_rate": 4.771622934888242e-06,
      "loss": 0.245,
      "step": 491
    },
    {
      "epoch": 0.014353229476632243,
      "grad_norm": 1.2939705851848529,
      "learning_rate": 4.78134110787172e-06,
      "loss": 0.2342,
      "step": 492
    },
    {
      "epoch": 0.014382402707275803,
      "grad_norm": 1.2050040257403767,
      "learning_rate": 4.7910592808551995e-06,
      "loss": 0.2233,
      "step": 493
    },
    {
      "epoch": 0.014411575937919365,
      "grad_norm": 1.2670366063806675,
      "learning_rate": 4.800777453838679e-06,
      "loss": 0.2548,
      "step": 494
    },
    {
      "epoch": 0.014440749168562927,
      "grad_norm": 1.4186837559720642,
      "learning_rate": 4.810495626822158e-06,
      "loss": 0.2634,
      "step": 495
    },
    {
      "epoch": 0.014469922399206489,
      "grad_norm": 1.2092502080757692,
      "learning_rate": 4.820213799805637e-06,
      "loss": 0.245,
      "step": 496
    },
    {
      "epoch": 0.014499095629850049,
      "grad_norm": 1.2523718195794546,
      "learning_rate": 4.829931972789116e-06,
      "loss": 0.2804,
      "step": 497
    },
    {
      "epoch": 0.01452826886049361,
      "grad_norm": 1.2254633096437888,
      "learning_rate": 4.839650145772595e-06,
      "loss": 0.231,
      "step": 498
    },
    {
      "epoch": 0.014557442091137173,
      "grad_norm": 1.2755412540876685,
      "learning_rate": 4.8493683187560745e-06,
      "loss": 0.2798,
      "step": 499
    },
    {
      "epoch": 0.014586615321780735,
      "grad_norm": 1.6972872186718777,
      "learning_rate": 4.859086491739554e-06,
      "loss": 0.2423,
      "step": 500
    },
    {
      "epoch": 0.014615788552424295,
      "grad_norm": 1.49574324230808,
      "learning_rate": 4.868804664723032e-06,
      "loss": 0.2727,
      "step": 501
    },
    {
      "epoch": 0.014644961783067857,
      "grad_norm": 1.4688837322711812,
      "learning_rate": 4.8785228377065116e-06,
      "loss": 0.2799,
      "step": 502
    },
    {
      "epoch": 0.014674135013711419,
      "grad_norm": 1.3040968475741357,
      "learning_rate": 4.888241010689991e-06,
      "loss": 0.2468,
      "step": 503
    },
    {
      "epoch": 0.01470330824435498,
      "grad_norm": 1.399788268983685,
      "learning_rate": 4.897959183673469e-06,
      "loss": 0.2456,
      "step": 504
    },
    {
      "epoch": 0.014732481474998541,
      "grad_norm": 1.198851293120145,
      "learning_rate": 4.9076773566569495e-06,
      "loss": 0.2221,
      "step": 505
    },
    {
      "epoch": 0.014761654705642103,
      "grad_norm": 1.2491295874169632,
      "learning_rate": 4.917395529640428e-06,
      "loss": 0.2491,
      "step": 506
    },
    {
      "epoch": 0.014790827936285665,
      "grad_norm": 1.4679610151184677,
      "learning_rate": 4.927113702623907e-06,
      "loss": 0.2495,
      "step": 507
    },
    {
      "epoch": 0.014820001166929225,
      "grad_norm": 1.5826969791489809,
      "learning_rate": 4.936831875607387e-06,
      "loss": 0.2608,
      "step": 508
    },
    {
      "epoch": 0.014849174397572787,
      "grad_norm": 1.3592760935131265,
      "learning_rate": 4.946550048590865e-06,
      "loss": 0.2283,
      "step": 509
    },
    {
      "epoch": 0.014878347628216349,
      "grad_norm": 1.2970047256431163,
      "learning_rate": 4.956268221574344e-06,
      "loss": 0.2588,
      "step": 510
    },
    {
      "epoch": 0.01490752085885991,
      "grad_norm": 1.2880449054790777,
      "learning_rate": 4.965986394557824e-06,
      "loss": 0.2641,
      "step": 511
    },
    {
      "epoch": 0.014936694089503471,
      "grad_norm": 1.0672005591191107,
      "learning_rate": 4.975704567541302e-06,
      "loss": 0.2479,
      "step": 512
    },
    {
      "epoch": 0.014965867320147033,
      "grad_norm": 1.2408070495317607,
      "learning_rate": 4.9854227405247814e-06,
      "loss": 0.237,
      "step": 513
    },
    {
      "epoch": 0.014995040550790595,
      "grad_norm": 1.4600327611566248,
      "learning_rate": 4.995140913508261e-06,
      "loss": 0.2462,
      "step": 514
    },
    {
      "epoch": 0.015024213781434157,
      "grad_norm": 1.0322934537143222,
      "learning_rate": 5.00485908649174e-06,
      "loss": 0.2282,
      "step": 515
    },
    {
      "epoch": 0.015053387012077717,
      "grad_norm": 1.17271083022763,
      "learning_rate": 5.014577259475219e-06,
      "loss": 0.2335,
      "step": 516
    },
    {
      "epoch": 0.015082560242721279,
      "grad_norm": 1.2339027239877591,
      "learning_rate": 5.024295432458698e-06,
      "loss": 0.2605,
      "step": 517
    },
    {
      "epoch": 0.01511173347336484,
      "grad_norm": 1.343196723311313,
      "learning_rate": 5.034013605442177e-06,
      "loss": 0.2563,
      "step": 518
    },
    {
      "epoch": 0.015140906704008401,
      "grad_norm": 1.2042926311020554,
      "learning_rate": 5.0437317784256565e-06,
      "loss": 0.2351,
      "step": 519
    },
    {
      "epoch": 0.015170079934651963,
      "grad_norm": 1.2392118146441158,
      "learning_rate": 5.053449951409135e-06,
      "loss": 0.2681,
      "step": 520
    },
    {
      "epoch": 0.015199253165295525,
      "grad_norm": 1.1642156874413723,
      "learning_rate": 5.063168124392614e-06,
      "loss": 0.2342,
      "step": 521
    },
    {
      "epoch": 0.015228426395939087,
      "grad_norm": 1.0936206726276263,
      "learning_rate": 5.0728862973760935e-06,
      "loss": 0.2354,
      "step": 522
    },
    {
      "epoch": 0.015257599626582647,
      "grad_norm": 1.1784572696574878,
      "learning_rate": 5.082604470359572e-06,
      "loss": 0.2315,
      "step": 523
    },
    {
      "epoch": 0.015286772857226209,
      "grad_norm": 1.4018108993214766,
      "learning_rate": 5.092322643343051e-06,
      "loss": 0.2559,
      "step": 524
    },
    {
      "epoch": 0.015315946087869771,
      "grad_norm": 1.4868977642568852,
      "learning_rate": 5.1020408163265315e-06,
      "loss": 0.2738,
      "step": 525
    },
    {
      "epoch": 0.015345119318513333,
      "grad_norm": 1.0975374744514939,
      "learning_rate": 5.111758989310011e-06,
      "loss": 0.2424,
      "step": 526
    },
    {
      "epoch": 0.015374292549156893,
      "grad_norm": 1.1451405719954026,
      "learning_rate": 5.121477162293489e-06,
      "loss": 0.2563,
      "step": 527
    },
    {
      "epoch": 0.015403465779800455,
      "grad_norm": 1.2648820762237045,
      "learning_rate": 5.1311953352769686e-06,
      "loss": 0.2718,
      "step": 528
    },
    {
      "epoch": 0.015432639010444017,
      "grad_norm": 1.0871343834254525,
      "learning_rate": 5.140913508260448e-06,
      "loss": 0.2541,
      "step": 529
    },
    {
      "epoch": 0.015461812241087579,
      "grad_norm": 1.3389746879539115,
      "learning_rate": 5.150631681243926e-06,
      "loss": 0.2263,
      "step": 530
    },
    {
      "epoch": 0.015490985471731139,
      "grad_norm": 1.3609250813513585,
      "learning_rate": 5.160349854227406e-06,
      "loss": 0.2366,
      "step": 531
    },
    {
      "epoch": 0.015520158702374701,
      "grad_norm": 1.3349704582365165,
      "learning_rate": 5.170068027210885e-06,
      "loss": 0.2535,
      "step": 532
    },
    {
      "epoch": 0.015549331933018263,
      "grad_norm": 1.3591505569965345,
      "learning_rate": 5.179786200194364e-06,
      "loss": 0.2533,
      "step": 533
    },
    {
      "epoch": 0.015578505163661823,
      "grad_norm": 1.0546896668336805,
      "learning_rate": 5.189504373177843e-06,
      "loss": 0.2312,
      "step": 534
    },
    {
      "epoch": 0.015607678394305385,
      "grad_norm": 1.1642247748102443,
      "learning_rate": 5.199222546161322e-06,
      "loss": 0.2533,
      "step": 535
    },
    {
      "epoch": 0.015636851624948947,
      "grad_norm": 1.2734641669395699,
      "learning_rate": 5.208940719144801e-06,
      "loss": 0.2498,
      "step": 536
    },
    {
      "epoch": 0.015666024855592507,
      "grad_norm": 1.0357302408386122,
      "learning_rate": 5.21865889212828e-06,
      "loss": 0.2141,
      "step": 537
    },
    {
      "epoch": 0.01569519808623607,
      "grad_norm": 1.0304599709978406,
      "learning_rate": 5.228377065111759e-06,
      "loss": 0.2341,
      "step": 538
    },
    {
      "epoch": 0.01572437131687963,
      "grad_norm": 1.4047972859558309,
      "learning_rate": 5.2380952380952384e-06,
      "loss": 0.2325,
      "step": 539
    },
    {
      "epoch": 0.01575354454752319,
      "grad_norm": 1.3253297357545974,
      "learning_rate": 5.247813411078717e-06,
      "loss": 0.2495,
      "step": 540
    },
    {
      "epoch": 0.015782717778166755,
      "grad_norm": 1.0493019209181853,
      "learning_rate": 5.257531584062196e-06,
      "loss": 0.2275,
      "step": 541
    },
    {
      "epoch": 0.015811891008810315,
      "grad_norm": 1.3123986483884977,
      "learning_rate": 5.267249757045676e-06,
      "loss": 0.2218,
      "step": 542
    },
    {
      "epoch": 0.01584106423945388,
      "grad_norm": 1.515493491087858,
      "learning_rate": 5.276967930029156e-06,
      "loss": 0.2488,
      "step": 543
    },
    {
      "epoch": 0.01587023747009744,
      "grad_norm": 1.1272225662027693,
      "learning_rate": 5.286686103012634e-06,
      "loss": 0.2349,
      "step": 544
    },
    {
      "epoch": 0.015899410700741,
      "grad_norm": 1.1164426331892674,
      "learning_rate": 5.2964042759961135e-06,
      "loss": 0.261,
      "step": 545
    },
    {
      "epoch": 0.015928583931384563,
      "grad_norm": 1.2763851002221152,
      "learning_rate": 5.306122448979593e-06,
      "loss": 0.2466,
      "step": 546
    },
    {
      "epoch": 0.015957757162028123,
      "grad_norm": 1.2014379229596053,
      "learning_rate": 5.315840621963071e-06,
      "loss": 0.2335,
      "step": 547
    },
    {
      "epoch": 0.015986930392671683,
      "grad_norm": 1.2100206293147397,
      "learning_rate": 5.3255587949465505e-06,
      "loss": 0.2506,
      "step": 548
    },
    {
      "epoch": 0.016016103623315247,
      "grad_norm": 1.3275336886079625,
      "learning_rate": 5.33527696793003e-06,
      "loss": 0.2634,
      "step": 549
    },
    {
      "epoch": 0.016045276853958807,
      "grad_norm": 1.3174878416843403,
      "learning_rate": 5.344995140913509e-06,
      "loss": 0.2299,
      "step": 550
    },
    {
      "epoch": 0.016074450084602367,
      "grad_norm": 0.9008258507259723,
      "learning_rate": 5.354713313896988e-06,
      "loss": 0.2384,
      "step": 551
    },
    {
      "epoch": 0.01610362331524593,
      "grad_norm": 1.3084802157410997,
      "learning_rate": 5.364431486880467e-06,
      "loss": 0.2366,
      "step": 552
    },
    {
      "epoch": 0.01613279654588949,
      "grad_norm": 1.2136094468462453,
      "learning_rate": 5.374149659863946e-06,
      "loss": 0.2624,
      "step": 553
    },
    {
      "epoch": 0.016161969776533055,
      "grad_norm": 1.4392730033686674,
      "learning_rate": 5.383867832847425e-06,
      "loss": 0.2801,
      "step": 554
    },
    {
      "epoch": 0.016191143007176615,
      "grad_norm": 1.2542445458376885,
      "learning_rate": 5.393586005830904e-06,
      "loss": 0.2458,
      "step": 555
    },
    {
      "epoch": 0.016220316237820175,
      "grad_norm": 1.34420603075901,
      "learning_rate": 5.403304178814383e-06,
      "loss": 0.2392,
      "step": 556
    },
    {
      "epoch": 0.01624948946846374,
      "grad_norm": 1.3102475035554522,
      "learning_rate": 5.413022351797862e-06,
      "loss": 0.2572,
      "step": 557
    },
    {
      "epoch": 0.0162786626991073,
      "grad_norm": 1.2479299081465867,
      "learning_rate": 5.422740524781341e-06,
      "loss": 0.2454,
      "step": 558
    },
    {
      "epoch": 0.01630783592975086,
      "grad_norm": 1.4107698889438265,
      "learning_rate": 5.4324586977648204e-06,
      "loss": 0.2442,
      "step": 559
    },
    {
      "epoch": 0.016337009160394423,
      "grad_norm": 1.4361667300580239,
      "learning_rate": 5.442176870748301e-06,
      "loss": 0.2634,
      "step": 560
    },
    {
      "epoch": 0.016366182391037983,
      "grad_norm": 1.072946103830777,
      "learning_rate": 5.451895043731778e-06,
      "loss": 0.2227,
      "step": 561
    },
    {
      "epoch": 0.016395355621681543,
      "grad_norm": 1.1858106557560961,
      "learning_rate": 5.461613216715258e-06,
      "loss": 0.2203,
      "step": 562
    },
    {
      "epoch": 0.016424528852325107,
      "grad_norm": 1.1162929363472966,
      "learning_rate": 5.471331389698738e-06,
      "loss": 0.2395,
      "step": 563
    },
    {
      "epoch": 0.016453702082968667,
      "grad_norm": 1.1884848177255527,
      "learning_rate": 5.481049562682216e-06,
      "loss": 0.2282,
      "step": 564
    },
    {
      "epoch": 0.01648287531361223,
      "grad_norm": 1.1283973242586844,
      "learning_rate": 5.4907677356656954e-06,
      "loss": 0.2603,
      "step": 565
    },
    {
      "epoch": 0.01651204854425579,
      "grad_norm": 1.354333277408854,
      "learning_rate": 5.500485908649175e-06,
      "loss": 0.2459,
      "step": 566
    },
    {
      "epoch": 0.01654122177489935,
      "grad_norm": 1.325314031879747,
      "learning_rate": 5.510204081632653e-06,
      "loss": 0.2178,
      "step": 567
    },
    {
      "epoch": 0.016570395005542915,
      "grad_norm": 1.11324996018437,
      "learning_rate": 5.5199222546161325e-06,
      "loss": 0.2338,
      "step": 568
    },
    {
      "epoch": 0.016599568236186475,
      "grad_norm": 1.3652803222148933,
      "learning_rate": 5.529640427599612e-06,
      "loss": 0.2141,
      "step": 569
    },
    {
      "epoch": 0.016628741466830035,
      "grad_norm": 1.5019331891505299,
      "learning_rate": 5.539358600583091e-06,
      "loss": 0.2889,
      "step": 570
    },
    {
      "epoch": 0.0166579146974736,
      "grad_norm": 1.2501896889679502,
      "learning_rate": 5.54907677356657e-06,
      "loss": 0.2358,
      "step": 571
    },
    {
      "epoch": 0.01668708792811716,
      "grad_norm": 1.295296482077547,
      "learning_rate": 5.558794946550049e-06,
      "loss": 0.25,
      "step": 572
    },
    {
      "epoch": 0.01671626115876072,
      "grad_norm": 1.1903028277520935,
      "learning_rate": 5.568513119533528e-06,
      "loss": 0.2442,
      "step": 573
    },
    {
      "epoch": 0.016745434389404283,
      "grad_norm": 1.308995566767906,
      "learning_rate": 5.578231292517007e-06,
      "loss": 0.2548,
      "step": 574
    },
    {
      "epoch": 0.016774607620047843,
      "grad_norm": 1.3248921047172681,
      "learning_rate": 5.587949465500486e-06,
      "loss": 0.2714,
      "step": 575
    },
    {
      "epoch": 0.016803780850691407,
      "grad_norm": 1.2266592847317053,
      "learning_rate": 5.597667638483965e-06,
      "loss": 0.2275,
      "step": 576
    },
    {
      "epoch": 0.016832954081334967,
      "grad_norm": 1.399214573599055,
      "learning_rate": 5.6073858114674455e-06,
      "loss": 0.2321,
      "step": 577
    },
    {
      "epoch": 0.016862127311978527,
      "grad_norm": 1.22807791517516,
      "learning_rate": 5.617103984450923e-06,
      "loss": 0.2557,
      "step": 578
    },
    {
      "epoch": 0.01689130054262209,
      "grad_norm": 1.020201564208349,
      "learning_rate": 5.626822157434403e-06,
      "loss": 0.2544,
      "step": 579
    },
    {
      "epoch": 0.01692047377326565,
      "grad_norm": 1.15048463646873,
      "learning_rate": 5.6365403304178826e-06,
      "loss": 0.2258,
      "step": 580
    },
    {
      "epoch": 0.01694964700390921,
      "grad_norm": 1.3848293352426153,
      "learning_rate": 5.646258503401361e-06,
      "loss": 0.2637,
      "step": 581
    },
    {
      "epoch": 0.016978820234552775,
      "grad_norm": 1.2734601804541426,
      "learning_rate": 5.65597667638484e-06,
      "loss": 0.2179,
      "step": 582
    },
    {
      "epoch": 0.017007993465196335,
      "grad_norm": 1.05233161257467,
      "learning_rate": 5.66569484936832e-06,
      "loss": 0.2198,
      "step": 583
    },
    {
      "epoch": 0.0170371666958399,
      "grad_norm": 1.2250757368811662,
      "learning_rate": 5.675413022351798e-06,
      "loss": 0.2452,
      "step": 584
    },
    {
      "epoch": 0.01706633992648346,
      "grad_norm": 1.2896924036488904,
      "learning_rate": 5.6851311953352774e-06,
      "loss": 0.2414,
      "step": 585
    },
    {
      "epoch": 0.01709551315712702,
      "grad_norm": 1.0976720248422727,
      "learning_rate": 5.694849368318757e-06,
      "loss": 0.2263,
      "step": 586
    },
    {
      "epoch": 0.017124686387770583,
      "grad_norm": 1.0181006871906253,
      "learning_rate": 5.704567541302236e-06,
      "loss": 0.2573,
      "step": 587
    },
    {
      "epoch": 0.017153859618414143,
      "grad_norm": 1.2622292761764857,
      "learning_rate": 5.7142857142857145e-06,
      "loss": 0.2535,
      "step": 588
    },
    {
      "epoch": 0.017183032849057703,
      "grad_norm": 1.3264411006970873,
      "learning_rate": 5.724003887269194e-06,
      "loss": 0.2532,
      "step": 589
    },
    {
      "epoch": 0.017212206079701267,
      "grad_norm": 1.3876971135647425,
      "learning_rate": 5.733722060252673e-06,
      "loss": 0.2511,
      "step": 590
    },
    {
      "epoch": 0.017241379310344827,
      "grad_norm": 0.9140296006283072,
      "learning_rate": 5.743440233236152e-06,
      "loss": 0.2181,
      "step": 591
    },
    {
      "epoch": 0.017270552540988388,
      "grad_norm": 1.1627511150725285,
      "learning_rate": 5.753158406219631e-06,
      "loss": 0.2426,
      "step": 592
    },
    {
      "epoch": 0.01729972577163195,
      "grad_norm": 0.9866981750071782,
      "learning_rate": 5.76287657920311e-06,
      "loss": 0.2276,
      "step": 593
    },
    {
      "epoch": 0.01732889900227551,
      "grad_norm": 1.007490986657743,
      "learning_rate": 5.7725947521865895e-06,
      "loss": 0.2073,
      "step": 594
    },
    {
      "epoch": 0.017358072232919075,
      "grad_norm": 1.2474666067068907,
      "learning_rate": 5.782312925170068e-06,
      "loss": 0.2378,
      "step": 595
    },
    {
      "epoch": 0.017387245463562635,
      "grad_norm": 1.279196133454035,
      "learning_rate": 5.792031098153547e-06,
      "loss": 0.2607,
      "step": 596
    },
    {
      "epoch": 0.017416418694206195,
      "grad_norm": 1.1945455195443986,
      "learning_rate": 5.8017492711370275e-06,
      "loss": 0.2442,
      "step": 597
    },
    {
      "epoch": 0.01744559192484976,
      "grad_norm": 1.420540164405751,
      "learning_rate": 5.811467444120505e-06,
      "loss": 0.2123,
      "step": 598
    },
    {
      "epoch": 0.01747476515549332,
      "grad_norm": 1.3801792520800733,
      "learning_rate": 5.821185617103985e-06,
      "loss": 0.2866,
      "step": 599
    },
    {
      "epoch": 0.01750393838613688,
      "grad_norm": 1.4450475245321002,
      "learning_rate": 5.8309037900874645e-06,
      "loss": 0.2689,
      "step": 600
    },
    {
      "epoch": 0.017533111616780443,
      "grad_norm": 1.4434020910980732,
      "learning_rate": 5.840621963070943e-06,
      "loss": 0.2461,
      "step": 601
    },
    {
      "epoch": 0.017562284847424003,
      "grad_norm": 1.170915524215105,
      "learning_rate": 5.850340136054422e-06,
      "loss": 0.2304,
      "step": 602
    },
    {
      "epoch": 0.017591458078067564,
      "grad_norm": 1.184787623641522,
      "learning_rate": 5.860058309037902e-06,
      "loss": 0.2216,
      "step": 603
    },
    {
      "epoch": 0.017620631308711127,
      "grad_norm": 1.4226813433172771,
      "learning_rate": 5.869776482021381e-06,
      "loss": 0.266,
      "step": 604
    },
    {
      "epoch": 0.017649804539354687,
      "grad_norm": 1.1701452340689182,
      "learning_rate": 5.879494655004859e-06,
      "loss": 0.2365,
      "step": 605
    },
    {
      "epoch": 0.01767897776999825,
      "grad_norm": 1.1314185813102013,
      "learning_rate": 5.889212827988339e-06,
      "loss": 0.2279,
      "step": 606
    },
    {
      "epoch": 0.01770815100064181,
      "grad_norm": 1.3417308850278244,
      "learning_rate": 5.898931000971818e-06,
      "loss": 0.2297,
      "step": 607
    },
    {
      "epoch": 0.01773732423128537,
      "grad_norm": 1.1774011258844665,
      "learning_rate": 5.9086491739552965e-06,
      "loss": 0.2268,
      "step": 608
    },
    {
      "epoch": 0.017766497461928935,
      "grad_norm": 1.1483123651885505,
      "learning_rate": 5.918367346938776e-06,
      "loss": 0.261,
      "step": 609
    },
    {
      "epoch": 0.017795670692572495,
      "grad_norm": 1.0659224829575464,
      "learning_rate": 5.928085519922255e-06,
      "loss": 0.2557,
      "step": 610
    },
    {
      "epoch": 0.017824843923216056,
      "grad_norm": 1.3453124406569783,
      "learning_rate": 5.937803692905734e-06,
      "loss": 0.2597,
      "step": 611
    },
    {
      "epoch": 0.01785401715385962,
      "grad_norm": 0.9446040810558779,
      "learning_rate": 5.947521865889213e-06,
      "loss": 0.2445,
      "step": 612
    },
    {
      "epoch": 0.01788319038450318,
      "grad_norm": 1.075287696623533,
      "learning_rate": 5.957240038872692e-06,
      "loss": 0.2346,
      "step": 613
    },
    {
      "epoch": 0.01791236361514674,
      "grad_norm": 1.1094662802964779,
      "learning_rate": 5.966958211856172e-06,
      "loss": 0.2217,
      "step": 614
    },
    {
      "epoch": 0.017941536845790303,
      "grad_norm": 1.4239082627795743,
      "learning_rate": 5.97667638483965e-06,
      "loss": 0.2595,
      "step": 615
    },
    {
      "epoch": 0.017970710076433864,
      "grad_norm": 1.4489530560590527,
      "learning_rate": 5.98639455782313e-06,
      "loss": 0.2404,
      "step": 616
    },
    {
      "epoch": 0.017999883307077427,
      "grad_norm": 1.1926829007742819,
      "learning_rate": 5.9961127308066094e-06,
      "loss": 0.2334,
      "step": 617
    },
    {
      "epoch": 0.018029056537720987,
      "grad_norm": 1.2726694122969338,
      "learning_rate": 6.005830903790088e-06,
      "loss": 0.2362,
      "step": 618
    },
    {
      "epoch": 0.018058229768364548,
      "grad_norm": 1.2354341099058872,
      "learning_rate": 6.015549076773567e-06,
      "loss": 0.236,
      "step": 619
    },
    {
      "epoch": 0.01808740299900811,
      "grad_norm": 1.1685072760117496,
      "learning_rate": 6.0252672497570465e-06,
      "loss": 0.2352,
      "step": 620
    },
    {
      "epoch": 0.01811657622965167,
      "grad_norm": 1.334388794918696,
      "learning_rate": 6.034985422740526e-06,
      "loss": 0.2369,
      "step": 621
    },
    {
      "epoch": 0.01814574946029523,
      "grad_norm": 1.2956117401686313,
      "learning_rate": 6.044703595724004e-06,
      "loss": 0.2287,
      "step": 622
    },
    {
      "epoch": 0.018174922690938795,
      "grad_norm": 1.0273512689823068,
      "learning_rate": 6.054421768707484e-06,
      "loss": 0.2498,
      "step": 623
    },
    {
      "epoch": 0.018204095921582356,
      "grad_norm": 1.2532451149411814,
      "learning_rate": 6.064139941690963e-06,
      "loss": 0.2438,
      "step": 624
    },
    {
      "epoch": 0.018233269152225916,
      "grad_norm": 1.2453640640748649,
      "learning_rate": 6.073858114674441e-06,
      "loss": 0.2363,
      "step": 625
    },
    {
      "epoch": 0.01826244238286948,
      "grad_norm": 1.0258445071379283,
      "learning_rate": 6.083576287657921e-06,
      "loss": 0.2254,
      "step": 626
    },
    {
      "epoch": 0.01829161561351304,
      "grad_norm": 1.1176272783245296,
      "learning_rate": 6.0932944606414e-06,
      "loss": 0.2287,
      "step": 627
    },
    {
      "epoch": 0.018320788844156603,
      "grad_norm": 0.9938821905647645,
      "learning_rate": 6.1030126336248785e-06,
      "loss": 0.232,
      "step": 628
    },
    {
      "epoch": 0.018349962074800164,
      "grad_norm": 1.2124939329163622,
      "learning_rate": 6.112730806608358e-06,
      "loss": 0.2254,
      "step": 629
    },
    {
      "epoch": 0.018379135305443724,
      "grad_norm": 1.2653067642151536,
      "learning_rate": 6.122448979591837e-06,
      "loss": 0.2331,
      "step": 630
    },
    {
      "epoch": 0.018408308536087287,
      "grad_norm": 1.2801920844112518,
      "learning_rate": 6.132167152575316e-06,
      "loss": 0.2646,
      "step": 631
    },
    {
      "epoch": 0.018437481766730848,
      "grad_norm": 1.3997481743012654,
      "learning_rate": 6.141885325558795e-06,
      "loss": 0.2271,
      "step": 632
    },
    {
      "epoch": 0.018466654997374408,
      "grad_norm": 1.0788089008051882,
      "learning_rate": 6.151603498542274e-06,
      "loss": 0.231,
      "step": 633
    },
    {
      "epoch": 0.01849582822801797,
      "grad_norm": 1.1463139231059625,
      "learning_rate": 6.161321671525754e-06,
      "loss": 0.2745,
      "step": 634
    },
    {
      "epoch": 0.01852500145866153,
      "grad_norm": 1.5979156001730184,
      "learning_rate": 6.171039844509232e-06,
      "loss": 0.2569,
      "step": 635
    },
    {
      "epoch": 0.018554174689305095,
      "grad_norm": 1.179192085643085,
      "learning_rate": 6.180758017492712e-06,
      "loss": 0.2307,
      "step": 636
    },
    {
      "epoch": 0.018583347919948656,
      "grad_norm": 0.9235492089736143,
      "learning_rate": 6.1904761904761914e-06,
      "loss": 0.2272,
      "step": 637
    },
    {
      "epoch": 0.018612521150592216,
      "grad_norm": 1.3841914000140947,
      "learning_rate": 6.200194363459671e-06,
      "loss": 0.2366,
      "step": 638
    },
    {
      "epoch": 0.01864169438123578,
      "grad_norm": 1.6345443503321042,
      "learning_rate": 6.209912536443149e-06,
      "loss": 0.2582,
      "step": 639
    },
    {
      "epoch": 0.01867086761187934,
      "grad_norm": 0.991943928840273,
      "learning_rate": 6.2196307094266285e-06,
      "loss": 0.2453,
      "step": 640
    },
    {
      "epoch": 0.0187000408425229,
      "grad_norm": 1.0783813604538777,
      "learning_rate": 6.229348882410108e-06,
      "loss": 0.2411,
      "step": 641
    },
    {
      "epoch": 0.018729214073166463,
      "grad_norm": 1.1270768208689745,
      "learning_rate": 6.239067055393586e-06,
      "loss": 0.2298,
      "step": 642
    },
    {
      "epoch": 0.018758387303810024,
      "grad_norm": 1.5482563859839245,
      "learning_rate": 6.248785228377066e-06,
      "loss": 0.2603,
      "step": 643
    },
    {
      "epoch": 0.018787560534453584,
      "grad_norm": 1.1322433743515374,
      "learning_rate": 6.258503401360545e-06,
      "loss": 0.2231,
      "step": 644
    },
    {
      "epoch": 0.018816733765097148,
      "grad_norm": 1.1655810249844738,
      "learning_rate": 6.268221574344023e-06,
      "loss": 0.2245,
      "step": 645
    },
    {
      "epoch": 0.018845906995740708,
      "grad_norm": 1.302144670429167,
      "learning_rate": 6.277939747327503e-06,
      "loss": 0.2509,
      "step": 646
    },
    {
      "epoch": 0.01887508022638427,
      "grad_norm": 1.3650227046508312,
      "learning_rate": 6.287657920310982e-06,
      "loss": 0.2391,
      "step": 647
    },
    {
      "epoch": 0.01890425345702783,
      "grad_norm": 1.2382916532992942,
      "learning_rate": 6.297376093294461e-06,
      "loss": 0.2299,
      "step": 648
    },
    {
      "epoch": 0.018933426687671392,
      "grad_norm": 1.2406123637674724,
      "learning_rate": 6.30709426627794e-06,
      "loss": 0.2179,
      "step": 649
    },
    {
      "epoch": 0.018962599918314955,
      "grad_norm": 1.1771581053389553,
      "learning_rate": 6.316812439261419e-06,
      "loss": 0.228,
      "step": 650
    },
    {
      "epoch": 0.018991773148958516,
      "grad_norm": 1.1978049273503264,
      "learning_rate": 6.326530612244899e-06,
      "loss": 0.2345,
      "step": 651
    },
    {
      "epoch": 0.019020946379602076,
      "grad_norm": 1.0438236362338311,
      "learning_rate": 6.336248785228377e-06,
      "loss": 0.2301,
      "step": 652
    },
    {
      "epoch": 0.01905011961024564,
      "grad_norm": 1.2392735048781178,
      "learning_rate": 6.345966958211857e-06,
      "loss": 0.2241,
      "step": 653
    },
    {
      "epoch": 0.0190792928408892,
      "grad_norm": 1.2713076329828823,
      "learning_rate": 6.355685131195336e-06,
      "loss": 0.2511,
      "step": 654
    },
    {
      "epoch": 0.01910846607153276,
      "grad_norm": 1.0998516143216948,
      "learning_rate": 6.365403304178814e-06,
      "loss": 0.2352,
      "step": 655
    },
    {
      "epoch": 0.019137639302176324,
      "grad_norm": 1.4780548053617104,
      "learning_rate": 6.375121477162294e-06,
      "loss": 0.2953,
      "step": 656
    },
    {
      "epoch": 0.019166812532819884,
      "grad_norm": 1.0594420018595556,
      "learning_rate": 6.384839650145773e-06,
      "loss": 0.2081,
      "step": 657
    },
    {
      "epoch": 0.019195985763463447,
      "grad_norm": 1.0429732466991684,
      "learning_rate": 6.394557823129253e-06,
      "loss": 0.2488,
      "step": 658
    },
    {
      "epoch": 0.019225158994107008,
      "grad_norm": 1.23883413417218,
      "learning_rate": 6.404275996112731e-06,
      "loss": 0.2444,
      "step": 659
    },
    {
      "epoch": 0.019254332224750568,
      "grad_norm": 1.2195983025524706,
      "learning_rate": 6.4139941690962105e-06,
      "loss": 0.2385,
      "step": 660
    },
    {
      "epoch": 0.01928350545539413,
      "grad_norm": 1.4125414694072445,
      "learning_rate": 6.42371234207969e-06,
      "loss": 0.2599,
      "step": 661
    },
    {
      "epoch": 0.01931267868603769,
      "grad_norm": 1.3614194178922787,
      "learning_rate": 6.433430515063168e-06,
      "loss": 0.2275,
      "step": 662
    },
    {
      "epoch": 0.019341851916681252,
      "grad_norm": 1.2375400770445757,
      "learning_rate": 6.443148688046648e-06,
      "loss": 0.2279,
      "step": 663
    },
    {
      "epoch": 0.019371025147324816,
      "grad_norm": 1.541586033440628,
      "learning_rate": 6.452866861030127e-06,
      "loss": 0.2657,
      "step": 664
    },
    {
      "epoch": 0.019400198377968376,
      "grad_norm": 1.2173335817869875,
      "learning_rate": 6.462585034013606e-06,
      "loss": 0.2331,
      "step": 665
    },
    {
      "epoch": 0.019429371608611936,
      "grad_norm": 0.9510361983667885,
      "learning_rate": 6.472303206997085e-06,
      "loss": 0.2238,
      "step": 666
    },
    {
      "epoch": 0.0194585448392555,
      "grad_norm": 1.2748878906489571,
      "learning_rate": 6.482021379980564e-06,
      "loss": 0.2486,
      "step": 667
    },
    {
      "epoch": 0.01948771806989906,
      "grad_norm": 1.4634602527052192,
      "learning_rate": 6.491739552964043e-06,
      "loss": 0.2657,
      "step": 668
    },
    {
      "epoch": 0.019516891300542624,
      "grad_norm": 0.899820820063537,
      "learning_rate": 6.501457725947522e-06,
      "loss": 0.2149,
      "step": 669
    },
    {
      "epoch": 0.019546064531186184,
      "grad_norm": 1.1159452835771118,
      "learning_rate": 6.511175898931001e-06,
      "loss": 0.2526,
      "step": 670
    },
    {
      "epoch": 0.019575237761829744,
      "grad_norm": 1.8587001243768357,
      "learning_rate": 6.520894071914481e-06,
      "loss": 0.2368,
      "step": 671
    },
    {
      "epoch": 0.019604410992473308,
      "grad_norm": 1.1995907976290137,
      "learning_rate": 6.530612244897959e-06,
      "loss": 0.2454,
      "step": 672
    },
    {
      "epoch": 0.019633584223116868,
      "grad_norm": 1.1396371491198773,
      "learning_rate": 6.540330417881439e-06,
      "loss": 0.2436,
      "step": 673
    },
    {
      "epoch": 0.019662757453760428,
      "grad_norm": 1.0339295633577104,
      "learning_rate": 6.550048590864918e-06,
      "loss": 0.2352,
      "step": 674
    },
    {
      "epoch": 0.01969193068440399,
      "grad_norm": 1.3780473198007388,
      "learning_rate": 6.559766763848398e-06,
      "loss": 0.2559,
      "step": 675
    },
    {
      "epoch": 0.019721103915047552,
      "grad_norm": 1.0840698315294541,
      "learning_rate": 6.569484936831876e-06,
      "loss": 0.2264,
      "step": 676
    },
    {
      "epoch": 0.019750277145691116,
      "grad_norm": 1.1117692534296524,
      "learning_rate": 6.579203109815355e-06,
      "loss": 0.2306,
      "step": 677
    },
    {
      "epoch": 0.019779450376334676,
      "grad_norm": 1.2177199856031593,
      "learning_rate": 6.588921282798835e-06,
      "loss": 0.2173,
      "step": 678
    },
    {
      "epoch": 0.019808623606978236,
      "grad_norm": 1.4263377351884565,
      "learning_rate": 6.598639455782313e-06,
      "loss": 0.2632,
      "step": 679
    },
    {
      "epoch": 0.0198377968376218,
      "grad_norm": 1.1117249106111613,
      "learning_rate": 6.6083576287657925e-06,
      "loss": 0.2449,
      "step": 680
    },
    {
      "epoch": 0.01986697006826536,
      "grad_norm": 1.3031984868239914,
      "learning_rate": 6.618075801749272e-06,
      "loss": 0.2645,
      "step": 681
    },
    {
      "epoch": 0.01989614329890892,
      "grad_norm": 1.3056784859426693,
      "learning_rate": 6.627793974732751e-06,
      "loss": 0.2322,
      "step": 682
    },
    {
      "epoch": 0.019925316529552484,
      "grad_norm": 0.9106923138955538,
      "learning_rate": 6.6375121477162296e-06,
      "loss": 0.2255,
      "step": 683
    },
    {
      "epoch": 0.019954489760196044,
      "grad_norm": 1.154730924057303,
      "learning_rate": 6.647230320699709e-06,
      "loss": 0.2543,
      "step": 684
    },
    {
      "epoch": 0.019983662990839604,
      "grad_norm": 1.040696964643687,
      "learning_rate": 6.656948493683188e-06,
      "loss": 0.2235,
      "step": 685
    },
    {
      "epoch": 0.020012836221483168,
      "grad_norm": 1.1673430548551929,
      "learning_rate": 6.666666666666667e-06,
      "loss": 0.2393,
      "step": 686
    },
    {
      "epoch": 0.020042009452126728,
      "grad_norm": 1.0125759625993465,
      "learning_rate": 6.676384839650146e-06,
      "loss": 0.2178,
      "step": 687
    },
    {
      "epoch": 0.02007118268277029,
      "grad_norm": 1.3676924651707596,
      "learning_rate": 6.686103012633626e-06,
      "loss": 0.252,
      "step": 688
    },
    {
      "epoch": 0.020100355913413852,
      "grad_norm": 1.1636943279474745,
      "learning_rate": 6.695821185617104e-06,
      "loss": 0.2342,
      "step": 689
    },
    {
      "epoch": 0.020129529144057412,
      "grad_norm": 1.2277204714048011,
      "learning_rate": 6.705539358600584e-06,
      "loss": 0.2505,
      "step": 690
    },
    {
      "epoch": 0.020158702374700976,
      "grad_norm": 1.2978532674924002,
      "learning_rate": 6.715257531584063e-06,
      "loss": 0.2369,
      "step": 691
    },
    {
      "epoch": 0.020187875605344536,
      "grad_norm": 1.1769595666714598,
      "learning_rate": 6.7249757045675425e-06,
      "loss": 0.2515,
      "step": 692
    },
    {
      "epoch": 0.020217048835988096,
      "grad_norm": 1.1000942989695253,
      "learning_rate": 6.734693877551021e-06,
      "loss": 0.2428,
      "step": 693
    },
    {
      "epoch": 0.02024622206663166,
      "grad_norm": 0.8371113735131498,
      "learning_rate": 6.7444120505345e-06,
      "loss": 0.1993,
      "step": 694
    },
    {
      "epoch": 0.02027539529727522,
      "grad_norm": 1.120861467251313,
      "learning_rate": 6.75413022351798e-06,
      "loss": 0.2342,
      "step": 695
    },
    {
      "epoch": 0.02030456852791878,
      "grad_norm": 1.0814084397294184,
      "learning_rate": 6.763848396501458e-06,
      "loss": 0.2589,
      "step": 696
    },
    {
      "epoch": 0.020333741758562344,
      "grad_norm": 1.1083675959863164,
      "learning_rate": 6.773566569484937e-06,
      "loss": 0.2288,
      "step": 697
    },
    {
      "epoch": 0.020362914989205904,
      "grad_norm": 1.328044711803676,
      "learning_rate": 6.783284742468417e-06,
      "loss": 0.2873,
      "step": 698
    },
    {
      "epoch": 0.020392088219849468,
      "grad_norm": 1.2457702284333594,
      "learning_rate": 6.793002915451895e-06,
      "loss": 0.2384,
      "step": 699
    },
    {
      "epoch": 0.020421261450493028,
      "grad_norm": 0.9138010870235221,
      "learning_rate": 6.8027210884353745e-06,
      "loss": 0.2123,
      "step": 700
    },
    {
      "epoch": 0.020450434681136588,
      "grad_norm": 1.0760978257264822,
      "learning_rate": 6.812439261418854e-06,
      "loss": 0.2563,
      "step": 701
    },
    {
      "epoch": 0.020479607911780152,
      "grad_norm": 1.3198861326415898,
      "learning_rate": 6.822157434402333e-06,
      "loss": 0.2572,
      "step": 702
    },
    {
      "epoch": 0.020508781142423712,
      "grad_norm": 1.4575455604532646,
      "learning_rate": 6.8318756073858115e-06,
      "loss": 0.2379,
      "step": 703
    },
    {
      "epoch": 0.020537954373067272,
      "grad_norm": 1.109333016897937,
      "learning_rate": 6.841593780369291e-06,
      "loss": 0.2112,
      "step": 704
    },
    {
      "epoch": 0.020567127603710836,
      "grad_norm": 1.0302927524542895,
      "learning_rate": 6.85131195335277e-06,
      "loss": 0.2145,
      "step": 705
    },
    {
      "epoch": 0.020596300834354396,
      "grad_norm": 1.2909537063625136,
      "learning_rate": 6.861030126336249e-06,
      "loss": 0.2866,
      "step": 706
    },
    {
      "epoch": 0.020625474064997956,
      "grad_norm": 1.6093127500609739,
      "learning_rate": 6.870748299319728e-06,
      "loss": 0.2486,
      "step": 707
    },
    {
      "epoch": 0.02065464729564152,
      "grad_norm": 1.2878326694626485,
      "learning_rate": 6.880466472303208e-06,
      "loss": 0.2341,
      "step": 708
    },
    {
      "epoch": 0.02068382052628508,
      "grad_norm": 1.1876576872307236,
      "learning_rate": 6.890184645286687e-06,
      "loss": 0.2422,
      "step": 709
    },
    {
      "epoch": 0.020712993756928644,
      "grad_norm": 1.2053349767690207,
      "learning_rate": 6.899902818270166e-06,
      "loss": 0.226,
      "step": 710
    },
    {
      "epoch": 0.020742166987572204,
      "grad_norm": 1.1621295917851582,
      "learning_rate": 6.909620991253645e-06,
      "loss": 0.2573,
      "step": 711
    },
    {
      "epoch": 0.020771340218215764,
      "grad_norm": 1.0779227597831955,
      "learning_rate": 6.9193391642371245e-06,
      "loss": 0.2219,
      "step": 712
    },
    {
      "epoch": 0.020800513448859328,
      "grad_norm": 1.0600744780537699,
      "learning_rate": 6.929057337220603e-06,
      "loss": 0.2435,
      "step": 713
    },
    {
      "epoch": 0.020829686679502888,
      "grad_norm": 1.1870402162790261,
      "learning_rate": 6.938775510204082e-06,
      "loss": 0.2448,
      "step": 714
    },
    {
      "epoch": 0.020858859910146448,
      "grad_norm": 1.15577933444016,
      "learning_rate": 6.948493683187562e-06,
      "loss": 0.241,
      "step": 715
    },
    {
      "epoch": 0.020888033140790012,
      "grad_norm": 1.0385025420205705,
      "learning_rate": 6.95821185617104e-06,
      "loss": 0.2184,
      "step": 716
    },
    {
      "epoch": 0.020917206371433572,
      "grad_norm": 1.1981330447897032,
      "learning_rate": 6.967930029154519e-06,
      "loss": 0.2529,
      "step": 717
    },
    {
      "epoch": 0.020946379602077136,
      "grad_norm": 1.3893074797081237,
      "learning_rate": 6.977648202137999e-06,
      "loss": 0.2406,
      "step": 718
    },
    {
      "epoch": 0.020975552832720696,
      "grad_norm": 1.3478510367178977,
      "learning_rate": 6.987366375121478e-06,
      "loss": 0.2499,
      "step": 719
    },
    {
      "epoch": 0.021004726063364256,
      "grad_norm": 1.2290443600628422,
      "learning_rate": 6.9970845481049564e-06,
      "loss": 0.2249,
      "step": 720
    },
    {
      "epoch": 0.02103389929400782,
      "grad_norm": 1.223682726594295,
      "learning_rate": 7.006802721088436e-06,
      "loss": 0.234,
      "step": 721
    },
    {
      "epoch": 0.02106307252465138,
      "grad_norm": 1.2519725592077438,
      "learning_rate": 7.016520894071915e-06,
      "loss": 0.2158,
      "step": 722
    },
    {
      "epoch": 0.02109224575529494,
      "grad_norm": 1.0305745510312196,
      "learning_rate": 7.0262390670553935e-06,
      "loss": 0.2625,
      "step": 723
    },
    {
      "epoch": 0.021121418985938504,
      "grad_norm": 1.4914540459963992,
      "learning_rate": 7.035957240038873e-06,
      "loss": 0.2494,
      "step": 724
    },
    {
      "epoch": 0.021150592216582064,
      "grad_norm": 1.4949867206265093,
      "learning_rate": 7.045675413022353e-06,
      "loss": 0.2352,
      "step": 725
    },
    {
      "epoch": 0.021179765447225624,
      "grad_norm": 1.3330272613270544,
      "learning_rate": 7.055393586005832e-06,
      "loss": 0.2236,
      "step": 726
    },
    {
      "epoch": 0.021208938677869188,
      "grad_norm": 1.1526352996187208,
      "learning_rate": 7.06511175898931e-06,
      "loss": 0.2502,
      "step": 727
    },
    {
      "epoch": 0.021238111908512748,
      "grad_norm": 1.166306458061035,
      "learning_rate": 7.07482993197279e-06,
      "loss": 0.2195,
      "step": 728
    },
    {
      "epoch": 0.021267285139156312,
      "grad_norm": 1.2374836997108423,
      "learning_rate": 7.084548104956269e-06,
      "loss": 0.2595,
      "step": 729
    },
    {
      "epoch": 0.021296458369799872,
      "grad_norm": 1.0946137599976848,
      "learning_rate": 7.094266277939748e-06,
      "loss": 0.2235,
      "step": 730
    },
    {
      "epoch": 0.021325631600443432,
      "grad_norm": 1.3542403105266712,
      "learning_rate": 7.103984450923227e-06,
      "loss": 0.2517,
      "step": 731
    },
    {
      "epoch": 0.021354804831086996,
      "grad_norm": 1.3603040073749126,
      "learning_rate": 7.1137026239067065e-06,
      "loss": 0.259,
      "step": 732
    },
    {
      "epoch": 0.021383978061730556,
      "grad_norm": 1.148411391960263,
      "learning_rate": 7.123420796890185e-06,
      "loss": 0.2511,
      "step": 733
    },
    {
      "epoch": 0.021413151292374116,
      "grad_norm": 1.3990863614174234,
      "learning_rate": 7.133138969873664e-06,
      "loss": 0.2233,
      "step": 734
    },
    {
      "epoch": 0.02144232452301768,
      "grad_norm": 1.1289883923040358,
      "learning_rate": 7.1428571428571436e-06,
      "loss": 0.2483,
      "step": 735
    },
    {
      "epoch": 0.02147149775366124,
      "grad_norm": 1.592308270330597,
      "learning_rate": 7.152575315840623e-06,
      "loss": 0.2566,
      "step": 736
    },
    {
      "epoch": 0.0215006709843048,
      "grad_norm": 1.1676915593536734,
      "learning_rate": 7.162293488824101e-06,
      "loss": 0.2244,
      "step": 737
    },
    {
      "epoch": 0.021529844214948364,
      "grad_norm": 1.434217925251431,
      "learning_rate": 7.172011661807581e-06,
      "loss": 0.2694,
      "step": 738
    },
    {
      "epoch": 0.021559017445591924,
      "grad_norm": 1.3398064548223516,
      "learning_rate": 7.18172983479106e-06,
      "loss": 0.2445,
      "step": 739
    },
    {
      "epoch": 0.021588190676235488,
      "grad_norm": 1.0631062210348727,
      "learning_rate": 7.191448007774538e-06,
      "loss": 0.2387,
      "step": 740
    },
    {
      "epoch": 0.021617363906879048,
      "grad_norm": 1.2419787284853856,
      "learning_rate": 7.201166180758018e-06,
      "loss": 0.2128,
      "step": 741
    },
    {
      "epoch": 0.02164653713752261,
      "grad_norm": 1.347170823544848,
      "learning_rate": 7.210884353741497e-06,
      "loss": 0.2189,
      "step": 742
    },
    {
      "epoch": 0.021675710368166172,
      "grad_norm": 1.1052609255450854,
      "learning_rate": 7.2206025267249755e-06,
      "loss": 0.2274,
      "step": 743
    },
    {
      "epoch": 0.021704883598809732,
      "grad_norm": 0.9141740311729317,
      "learning_rate": 7.230320699708455e-06,
      "loss": 0.2621,
      "step": 744
    },
    {
      "epoch": 0.021734056829453292,
      "grad_norm": 1.2036279072941747,
      "learning_rate": 7.240038872691935e-06,
      "loss": 0.2294,
      "step": 745
    },
    {
      "epoch": 0.021763230060096856,
      "grad_norm": 1.1373876492741393,
      "learning_rate": 7.249757045675414e-06,
      "loss": 0.248,
      "step": 746
    },
    {
      "epoch": 0.021792403290740416,
      "grad_norm": 1.03097313866654,
      "learning_rate": 7.259475218658893e-06,
      "loss": 0.205,
      "step": 747
    },
    {
      "epoch": 0.021821576521383976,
      "grad_norm": 1.225664083192016,
      "learning_rate": 7.269193391642372e-06,
      "loss": 0.2117,
      "step": 748
    },
    {
      "epoch": 0.02185074975202754,
      "grad_norm": 1.116765043842244,
      "learning_rate": 7.278911564625851e-06,
      "loss": 0.2331,
      "step": 749
    },
    {
      "epoch": 0.0218799229826711,
      "grad_norm": 0.9894490765307579,
      "learning_rate": 7.28862973760933e-06,
      "loss": 0.2103,
      "step": 750
    },
    {
      "epoch": 0.021909096213314664,
      "grad_norm": 0.9377323370832152,
      "learning_rate": 7.298347910592809e-06,
      "loss": 0.2209,
      "step": 751
    },
    {
      "epoch": 0.021938269443958224,
      "grad_norm": 1.1477138262356188,
      "learning_rate": 7.3080660835762885e-06,
      "loss": 0.2073,
      "step": 752
    },
    {
      "epoch": 0.021967442674601784,
      "grad_norm": 1.1800756399150825,
      "learning_rate": 7.317784256559768e-06,
      "loss": 0.2314,
      "step": 753
    },
    {
      "epoch": 0.021996615905245348,
      "grad_norm": 1.1324552297353292,
      "learning_rate": 7.327502429543246e-06,
      "loss": 0.2559,
      "step": 754
    },
    {
      "epoch": 0.022025789135888908,
      "grad_norm": 1.187821230593947,
      "learning_rate": 7.3372206025267255e-06,
      "loss": 0.2116,
      "step": 755
    },
    {
      "epoch": 0.02205496236653247,
      "grad_norm": 1.2814527100250865,
      "learning_rate": 7.346938775510205e-06,
      "loss": 0.2385,
      "step": 756
    },
    {
      "epoch": 0.022084135597176032,
      "grad_norm": 1.3137757179370568,
      "learning_rate": 7.356656948493683e-06,
      "loss": 0.2269,
      "step": 757
    },
    {
      "epoch": 0.022113308827819592,
      "grad_norm": 1.03965993372393,
      "learning_rate": 7.366375121477163e-06,
      "loss": 0.2237,
      "step": 758
    },
    {
      "epoch": 0.022142482058463153,
      "grad_norm": 1.2122225918284888,
      "learning_rate": 7.376093294460642e-06,
      "loss": 0.2602,
      "step": 759
    },
    {
      "epoch": 0.022171655289106716,
      "grad_norm": 1.1906781306110434,
      "learning_rate": 7.38581146744412e-06,
      "loss": 0.2457,
      "step": 760
    },
    {
      "epoch": 0.022200828519750276,
      "grad_norm": 1.0016455739396728,
      "learning_rate": 7.3955296404276e-06,
      "loss": 0.245,
      "step": 761
    },
    {
      "epoch": 0.02223000175039384,
      "grad_norm": 1.0791268810825152,
      "learning_rate": 7.40524781341108e-06,
      "loss": 0.2,
      "step": 762
    },
    {
      "epoch": 0.0222591749810374,
      "grad_norm": 1.2559405314741636,
      "learning_rate": 7.414965986394559e-06,
      "loss": 0.2267,
      "step": 763
    },
    {
      "epoch": 0.02228834821168096,
      "grad_norm": 1.2986118162267253,
      "learning_rate": 7.424684159378037e-06,
      "loss": 0.2468,
      "step": 764
    },
    {
      "epoch": 0.022317521442324524,
      "grad_norm": 0.9914341493989796,
      "learning_rate": 7.434402332361517e-06,
      "loss": 0.2304,
      "step": 765
    },
    {
      "epoch": 0.022346694672968084,
      "grad_norm": 1.2410376941673065,
      "learning_rate": 7.444120505344996e-06,
      "loss": 0.2341,
      "step": 766
    },
    {
      "epoch": 0.022375867903611645,
      "grad_norm": 1.144974483693795,
      "learning_rate": 7.453838678328475e-06,
      "loss": 0.2419,
      "step": 767
    },
    {
      "epoch": 0.022405041134255208,
      "grad_norm": 1.0885957617854234,
      "learning_rate": 7.463556851311954e-06,
      "loss": 0.2379,
      "step": 768
    },
    {
      "epoch": 0.02243421436489877,
      "grad_norm": 1.1391880660459601,
      "learning_rate": 7.473275024295433e-06,
      "loss": 0.2227,
      "step": 769
    },
    {
      "epoch": 0.022463387595542332,
      "grad_norm": 1.159826237177272,
      "learning_rate": 7.482993197278913e-06,
      "loss": 0.2249,
      "step": 770
    },
    {
      "epoch": 0.022492560826185892,
      "grad_norm": 1.1279372315475507,
      "learning_rate": 7.492711370262391e-06,
      "loss": 0.2464,
      "step": 771
    },
    {
      "epoch": 0.022521734056829452,
      "grad_norm": 1.0538247174746573,
      "learning_rate": 7.5024295432458704e-06,
      "loss": 0.2701,
      "step": 772
    },
    {
      "epoch": 0.022550907287473016,
      "grad_norm": 1.0464884319638112,
      "learning_rate": 7.51214771622935e-06,
      "loss": 0.2694,
      "step": 773
    },
    {
      "epoch": 0.022580080518116576,
      "grad_norm": 1.0341781965712196,
      "learning_rate": 7.521865889212828e-06,
      "loss": 0.2483,
      "step": 774
    },
    {
      "epoch": 0.022609253748760137,
      "grad_norm": 1.1521353006383643,
      "learning_rate": 7.5315840621963075e-06,
      "loss": 0.2214,
      "step": 775
    },
    {
      "epoch": 0.0226384269794037,
      "grad_norm": 1.0305586585976119,
      "learning_rate": 7.541302235179787e-06,
      "loss": 0.2137,
      "step": 776
    },
    {
      "epoch": 0.02266760021004726,
      "grad_norm": 1.2017679461793598,
      "learning_rate": 7.551020408163265e-06,
      "loss": 0.2167,
      "step": 777
    },
    {
      "epoch": 0.02269677344069082,
      "grad_norm": 1.147090709009779,
      "learning_rate": 7.560738581146745e-06,
      "loss": 0.2342,
      "step": 778
    },
    {
      "epoch": 0.022725946671334384,
      "grad_norm": 1.0402610823434146,
      "learning_rate": 7.570456754130224e-06,
      "loss": 0.2206,
      "step": 779
    },
    {
      "epoch": 0.022755119901977944,
      "grad_norm": 1.31274832001162,
      "learning_rate": 7.580174927113704e-06,
      "loss": 0.2367,
      "step": 780
    },
    {
      "epoch": 0.022784293132621508,
      "grad_norm": 1.1955641509643553,
      "learning_rate": 7.589893100097182e-06,
      "loss": 0.2567,
      "step": 781
    },
    {
      "epoch": 0.02281346636326507,
      "grad_norm": 1.3969703270390994,
      "learning_rate": 7.599611273080662e-06,
      "loss": 0.2256,
      "step": 782
    },
    {
      "epoch": 0.02284263959390863,
      "grad_norm": 1.0412933297362201,
      "learning_rate": 7.609329446064141e-06,
      "loss": 0.2043,
      "step": 783
    },
    {
      "epoch": 0.022871812824552192,
      "grad_norm": 1.0444517135384463,
      "learning_rate": 7.61904761904762e-06,
      "loss": 0.2195,
      "step": 784
    },
    {
      "epoch": 0.022900986055195752,
      "grad_norm": 1.3301626783376574,
      "learning_rate": 7.628765792031099e-06,
      "loss": 0.2525,
      "step": 785
    },
    {
      "epoch": 0.022930159285839313,
      "grad_norm": 1.2009624863679822,
      "learning_rate": 7.638483965014577e-06,
      "loss": 0.2383,
      "step": 786
    },
    {
      "epoch": 0.022959332516482876,
      "grad_norm": 1.0323992570020093,
      "learning_rate": 7.648202137998057e-06,
      "loss": 0.2256,
      "step": 787
    },
    {
      "epoch": 0.022988505747126436,
      "grad_norm": 1.1705082357772691,
      "learning_rate": 7.657920310981536e-06,
      "loss": 0.2162,
      "step": 788
    },
    {
      "epoch": 0.023017678977769997,
      "grad_norm": 1.016935911820381,
      "learning_rate": 7.667638483965015e-06,
      "loss": 0.2397,
      "step": 789
    },
    {
      "epoch": 0.02304685220841356,
      "grad_norm": 1.2098682858694556,
      "learning_rate": 7.677356656948495e-06,
      "loss": 0.2488,
      "step": 790
    },
    {
      "epoch": 0.02307602543905712,
      "grad_norm": 1.1292603128213778,
      "learning_rate": 7.687074829931972e-06,
      "loss": 0.2336,
      "step": 791
    },
    {
      "epoch": 0.023105198669700684,
      "grad_norm": 1.063301311419607,
      "learning_rate": 7.696793002915453e-06,
      "loss": 0.2286,
      "step": 792
    },
    {
      "epoch": 0.023134371900344244,
      "grad_norm": 1.49425163247432,
      "learning_rate": 7.706511175898933e-06,
      "loss": 0.2195,
      "step": 793
    },
    {
      "epoch": 0.023163545130987805,
      "grad_norm": 1.2549036667959854,
      "learning_rate": 7.71622934888241e-06,
      "loss": 0.204,
      "step": 794
    },
    {
      "epoch": 0.02319271836163137,
      "grad_norm": 1.4739247197768788,
      "learning_rate": 7.72594752186589e-06,
      "loss": 0.2417,
      "step": 795
    },
    {
      "epoch": 0.02322189159227493,
      "grad_norm": 1.3112302205058461,
      "learning_rate": 7.735665694849369e-06,
      "loss": 0.2782,
      "step": 796
    },
    {
      "epoch": 0.02325106482291849,
      "grad_norm": 1.619905754938196,
      "learning_rate": 7.745383867832848e-06,
      "loss": 0.3017,
      "step": 797
    },
    {
      "epoch": 0.023280238053562052,
      "grad_norm": 1.3563231856399103,
      "learning_rate": 7.755102040816327e-06,
      "loss": 0.2599,
      "step": 798
    },
    {
      "epoch": 0.023309411284205613,
      "grad_norm": 1.327772442289883,
      "learning_rate": 7.764820213799807e-06,
      "loss": 0.248,
      "step": 799
    },
    {
      "epoch": 0.023338584514849173,
      "grad_norm": 1.2852257136338878,
      "learning_rate": 7.774538386783286e-06,
      "loss": 0.231,
      "step": 800
    },
    {
      "epoch": 0.023367757745492736,
      "grad_norm": 1.2482077449290176,
      "learning_rate": 7.784256559766764e-06,
      "loss": 0.2229,
      "step": 801
    },
    {
      "epoch": 0.023396930976136297,
      "grad_norm": 1.1450007187952924,
      "learning_rate": 7.793974732750243e-06,
      "loss": 0.272,
      "step": 802
    },
    {
      "epoch": 0.02342610420677986,
      "grad_norm": 1.0787544506279128,
      "learning_rate": 7.803692905733722e-06,
      "loss": 0.224,
      "step": 803
    },
    {
      "epoch": 0.02345527743742342,
      "grad_norm": 1.6160197667204934,
      "learning_rate": 7.813411078717202e-06,
      "loss": 0.2463,
      "step": 804
    },
    {
      "epoch": 0.02348445066806698,
      "grad_norm": 0.8985221469400513,
      "learning_rate": 7.823129251700681e-06,
      "loss": 0.2042,
      "step": 805
    },
    {
      "epoch": 0.023513623898710544,
      "grad_norm": 1.1277547620984618,
      "learning_rate": 7.83284742468416e-06,
      "loss": 0.2648,
      "step": 806
    },
    {
      "epoch": 0.023542797129354105,
      "grad_norm": 1.0706806725213707,
      "learning_rate": 7.84256559766764e-06,
      "loss": 0.233,
      "step": 807
    },
    {
      "epoch": 0.023571970359997665,
      "grad_norm": 1.2250251136180632,
      "learning_rate": 7.852283770651117e-06,
      "loss": 0.2505,
      "step": 808
    },
    {
      "epoch": 0.02360114359064123,
      "grad_norm": 1.0975135269964724,
      "learning_rate": 7.862001943634598e-06,
      "loss": 0.2332,
      "step": 809
    },
    {
      "epoch": 0.02363031682128479,
      "grad_norm": 1.2494564394211676,
      "learning_rate": 7.871720116618077e-06,
      "loss": 0.2133,
      "step": 810
    },
    {
      "epoch": 0.023659490051928352,
      "grad_norm": 1.3489757437988497,
      "learning_rate": 7.881438289601555e-06,
      "loss": 0.2253,
      "step": 811
    },
    {
      "epoch": 0.023688663282571912,
      "grad_norm": 1.0251269096777629,
      "learning_rate": 7.891156462585034e-06,
      "loss": 0.2213,
      "step": 812
    },
    {
      "epoch": 0.023717836513215473,
      "grad_norm": 0.9716459114793602,
      "learning_rate": 7.900874635568514e-06,
      "loss": 0.2236,
      "step": 813
    },
    {
      "epoch": 0.023747009743859036,
      "grad_norm": 1.0508870648670574,
      "learning_rate": 7.910592808551993e-06,
      "loss": 0.2336,
      "step": 814
    },
    {
      "epoch": 0.023776182974502597,
      "grad_norm": 1.0360282373369818,
      "learning_rate": 7.920310981535472e-06,
      "loss": 0.2213,
      "step": 815
    },
    {
      "epoch": 0.023805356205146157,
      "grad_norm": 1.098218013035958,
      "learning_rate": 7.930029154518952e-06,
      "loss": 0.2357,
      "step": 816
    },
    {
      "epoch": 0.02383452943578972,
      "grad_norm": 1.1575540066522236,
      "learning_rate": 7.939747327502431e-06,
      "loss": 0.2421,
      "step": 817
    },
    {
      "epoch": 0.02386370266643328,
      "grad_norm": 1.4602086588436836,
      "learning_rate": 7.949465500485909e-06,
      "loss": 0.2123,
      "step": 818
    },
    {
      "epoch": 0.02389287589707684,
      "grad_norm": 1.054613072449019,
      "learning_rate": 7.959183673469388e-06,
      "loss": 0.2331,
      "step": 819
    },
    {
      "epoch": 0.023922049127720404,
      "grad_norm": 1.380533444087093,
      "learning_rate": 7.968901846452867e-06,
      "loss": 0.2186,
      "step": 820
    },
    {
      "epoch": 0.023951222358363965,
      "grad_norm": 1.0968081127881506,
      "learning_rate": 7.978620019436347e-06,
      "loss": 0.2151,
      "step": 821
    },
    {
      "epoch": 0.02398039558900753,
      "grad_norm": 1.2678370235797236,
      "learning_rate": 7.988338192419826e-06,
      "loss": 0.2447,
      "step": 822
    },
    {
      "epoch": 0.02400956881965109,
      "grad_norm": 1.1840706113496122,
      "learning_rate": 7.998056365403305e-06,
      "loss": 0.2225,
      "step": 823
    },
    {
      "epoch": 0.02403874205029465,
      "grad_norm": 1.2965871034391179,
      "learning_rate": 8.007774538386784e-06,
      "loss": 0.2178,
      "step": 824
    },
    {
      "epoch": 0.024067915280938212,
      "grad_norm": 1.264222685552627,
      "learning_rate": 8.017492711370262e-06,
      "loss": 0.2683,
      "step": 825
    },
    {
      "epoch": 0.024097088511581773,
      "grad_norm": 1.3527595201863405,
      "learning_rate": 8.027210884353741e-06,
      "loss": 0.216,
      "step": 826
    },
    {
      "epoch": 0.024126261742225333,
      "grad_norm": 1.4038635883884818,
      "learning_rate": 8.036929057337222e-06,
      "loss": 0.2665,
      "step": 827
    },
    {
      "epoch": 0.024155434972868896,
      "grad_norm": 0.9586492703955889,
      "learning_rate": 8.0466472303207e-06,
      "loss": 0.2331,
      "step": 828
    },
    {
      "epoch": 0.024184608203512457,
      "grad_norm": 1.052317215766637,
      "learning_rate": 8.05636540330418e-06,
      "loss": 0.2352,
      "step": 829
    },
    {
      "epoch": 0.024213781434156017,
      "grad_norm": 1.0505145744789148,
      "learning_rate": 8.066083576287659e-06,
      "loss": 0.2424,
      "step": 830
    },
    {
      "epoch": 0.02424295466479958,
      "grad_norm": 1.259839897348867,
      "learning_rate": 8.075801749271138e-06,
      "loss": 0.264,
      "step": 831
    },
    {
      "epoch": 0.02427212789544314,
      "grad_norm": 1.165680747204167,
      "learning_rate": 8.085519922254617e-06,
      "loss": 0.2346,
      "step": 832
    },
    {
      "epoch": 0.024301301126086704,
      "grad_norm": 1.269560461730615,
      "learning_rate": 8.095238095238097e-06,
      "loss": 0.2324,
      "step": 833
    },
    {
      "epoch": 0.024330474356730265,
      "grad_norm": 1.0600569819159396,
      "learning_rate": 8.104956268221576e-06,
      "loss": 0.2305,
      "step": 834
    },
    {
      "epoch": 0.024359647587373825,
      "grad_norm": 1.1810103725868992,
      "learning_rate": 8.114674441205053e-06,
      "loss": 0.2636,
      "step": 835
    },
    {
      "epoch": 0.02438882081801739,
      "grad_norm": 0.9233645643119518,
      "learning_rate": 8.124392614188533e-06,
      "loss": 0.2255,
      "step": 836
    },
    {
      "epoch": 0.02441799404866095,
      "grad_norm": 0.839916756444877,
      "learning_rate": 8.134110787172012e-06,
      "loss": 0.1963,
      "step": 837
    },
    {
      "epoch": 0.02444716727930451,
      "grad_norm": 1.1297884673558336,
      "learning_rate": 8.143828960155491e-06,
      "loss": 0.2182,
      "step": 838
    },
    {
      "epoch": 0.024476340509948073,
      "grad_norm": 1.012255127000924,
      "learning_rate": 8.15354713313897e-06,
      "loss": 0.243,
      "step": 839
    },
    {
      "epoch": 0.024505513740591633,
      "grad_norm": 1.1376812095346318,
      "learning_rate": 8.16326530612245e-06,
      "loss": 0.2034,
      "step": 840
    },
    {
      "epoch": 0.024534686971235193,
      "grad_norm": 1.15627653019019,
      "learning_rate": 8.17298347910593e-06,
      "loss": 0.237,
      "step": 841
    },
    {
      "epoch": 0.024563860201878757,
      "grad_norm": 1.3314035681696466,
      "learning_rate": 8.182701652089407e-06,
      "loss": 0.2091,
      "step": 842
    },
    {
      "epoch": 0.024593033432522317,
      "grad_norm": 1.0392278314289032,
      "learning_rate": 8.192419825072886e-06,
      "loss": 0.2321,
      "step": 843
    },
    {
      "epoch": 0.02462220666316588,
      "grad_norm": 1.1571268182840173,
      "learning_rate": 8.202137998056367e-06,
      "loss": 0.242,
      "step": 844
    },
    {
      "epoch": 0.02465137989380944,
      "grad_norm": 1.0872892444485658,
      "learning_rate": 8.211856171039845e-06,
      "loss": 0.2329,
      "step": 845
    },
    {
      "epoch": 0.024680553124453,
      "grad_norm": 1.1112375767980343,
      "learning_rate": 8.221574344023324e-06,
      "loss": 0.2332,
      "step": 846
    },
    {
      "epoch": 0.024709726355096565,
      "grad_norm": 1.028165264665385,
      "learning_rate": 8.231292517006804e-06,
      "loss": 0.2148,
      "step": 847
    },
    {
      "epoch": 0.024738899585740125,
      "grad_norm": 1.2878773067873275,
      "learning_rate": 8.241010689990283e-06,
      "loss": 0.24,
      "step": 848
    },
    {
      "epoch": 0.024768072816383685,
      "grad_norm": 1.2089653481220402,
      "learning_rate": 8.250728862973762e-06,
      "loss": 0.2291,
      "step": 849
    },
    {
      "epoch": 0.02479724604702725,
      "grad_norm": 1.2961702554882062,
      "learning_rate": 8.260447035957241e-06,
      "loss": 0.2293,
      "step": 850
    },
    {
      "epoch": 0.02482641927767081,
      "grad_norm": 1.1287301452949055,
      "learning_rate": 8.27016520894072e-06,
      "loss": 0.2203,
      "step": 851
    },
    {
      "epoch": 0.02485559250831437,
      "grad_norm": 1.0994200408217525,
      "learning_rate": 8.279883381924198e-06,
      "loss": 0.2097,
      "step": 852
    },
    {
      "epoch": 0.024884765738957933,
      "grad_norm": 1.1749016881150973,
      "learning_rate": 8.289601554907678e-06,
      "loss": 0.2324,
      "step": 853
    },
    {
      "epoch": 0.024913938969601493,
      "grad_norm": 1.459717519565719,
      "learning_rate": 8.299319727891157e-06,
      "loss": 0.2252,
      "step": 854
    },
    {
      "epoch": 0.024943112200245057,
      "grad_norm": 1.455160742778526,
      "learning_rate": 8.309037900874636e-06,
      "loss": 0.2333,
      "step": 855
    },
    {
      "epoch": 0.024972285430888617,
      "grad_norm": 1.023204593740571,
      "learning_rate": 8.318756073858116e-06,
      "loss": 0.2271,
      "step": 856
    },
    {
      "epoch": 0.025001458661532177,
      "grad_norm": 1.1995934918409512,
      "learning_rate": 8.328474246841595e-06,
      "loss": 0.2578,
      "step": 857
    },
    {
      "epoch": 0.02503063189217574,
      "grad_norm": 1.1620257301761994,
      "learning_rate": 8.338192419825074e-06,
      "loss": 0.2304,
      "step": 858
    },
    {
      "epoch": 0.0250598051228193,
      "grad_norm": 1.1110701350031862,
      "learning_rate": 8.347910592808552e-06,
      "loss": 0.2199,
      "step": 859
    },
    {
      "epoch": 0.02508897835346286,
      "grad_norm": 1.4282060642842584,
      "learning_rate": 8.357628765792031e-06,
      "loss": 0.226,
      "step": 860
    },
    {
      "epoch": 0.025118151584106425,
      "grad_norm": 1.1058873150035762,
      "learning_rate": 8.36734693877551e-06,
      "loss": 0.2325,
      "step": 861
    },
    {
      "epoch": 0.025147324814749985,
      "grad_norm": 1.0212208946954713,
      "learning_rate": 8.37706511175899e-06,
      "loss": 0.231,
      "step": 862
    },
    {
      "epoch": 0.02517649804539355,
      "grad_norm": 1.0584289725256428,
      "learning_rate": 8.386783284742469e-06,
      "loss": 0.2372,
      "step": 863
    },
    {
      "epoch": 0.02520567127603711,
      "grad_norm": 1.3915339635059458,
      "learning_rate": 8.396501457725948e-06,
      "loss": 0.2155,
      "step": 864
    },
    {
      "epoch": 0.02523484450668067,
      "grad_norm": 0.9602181741267753,
      "learning_rate": 8.406219630709426e-06,
      "loss": 0.2274,
      "step": 865
    },
    {
      "epoch": 0.025264017737324233,
      "grad_norm": 1.0949774891934438,
      "learning_rate": 8.415937803692907e-06,
      "loss": 0.2359,
      "step": 866
    },
    {
      "epoch": 0.025293190967967793,
      "grad_norm": 1.1976347719025633,
      "learning_rate": 8.425655976676386e-06,
      "loss": 0.247,
      "step": 867
    },
    {
      "epoch": 0.025322364198611353,
      "grad_norm": 1.0966583335751576,
      "learning_rate": 8.435374149659866e-06,
      "loss": 0.2243,
      "step": 868
    },
    {
      "epoch": 0.025351537429254917,
      "grad_norm": 0.999186771932867,
      "learning_rate": 8.445092322643343e-06,
      "loss": 0.1996,
      "step": 869
    },
    {
      "epoch": 0.025380710659898477,
      "grad_norm": 1.1549892838997626,
      "learning_rate": 8.454810495626823e-06,
      "loss": 0.2291,
      "step": 870
    },
    {
      "epoch": 0.025409883890542037,
      "grad_norm": 1.2565325042452247,
      "learning_rate": 8.464528668610302e-06,
      "loss": 0.23,
      "step": 871
    },
    {
      "epoch": 0.0254390571211856,
      "grad_norm": 1.264468498651524,
      "learning_rate": 8.474246841593781e-06,
      "loss": 0.2371,
      "step": 872
    },
    {
      "epoch": 0.02546823035182916,
      "grad_norm": 1.1107458572943656,
      "learning_rate": 8.48396501457726e-06,
      "loss": 0.2308,
      "step": 873
    },
    {
      "epoch": 0.025497403582472725,
      "grad_norm": 1.245594829405502,
      "learning_rate": 8.49368318756074e-06,
      "loss": 0.2476,
      "step": 874
    },
    {
      "epoch": 0.025526576813116285,
      "grad_norm": 1.188427098742183,
      "learning_rate": 8.503401360544217e-06,
      "loss": 0.2309,
      "step": 875
    },
    {
      "epoch": 0.025555750043759845,
      "grad_norm": 1.2916364374413594,
      "learning_rate": 8.513119533527697e-06,
      "loss": 0.2275,
      "step": 876
    },
    {
      "epoch": 0.02558492327440341,
      "grad_norm": 1.1123804597760718,
      "learning_rate": 8.522837706511176e-06,
      "loss": 0.2216,
      "step": 877
    },
    {
      "epoch": 0.02561409650504697,
      "grad_norm": 1.0506988222299276,
      "learning_rate": 8.532555879494655e-06,
      "loss": 0.2402,
      "step": 878
    },
    {
      "epoch": 0.02564326973569053,
      "grad_norm": 1.07888229018488,
      "learning_rate": 8.542274052478135e-06,
      "loss": 0.247,
      "step": 879
    },
    {
      "epoch": 0.025672442966334093,
      "grad_norm": 1.114420567762565,
      "learning_rate": 8.551992225461614e-06,
      "loss": 0.2199,
      "step": 880
    },
    {
      "epoch": 0.025701616196977653,
      "grad_norm": 1.0733804086551966,
      "learning_rate": 8.561710398445093e-06,
      "loss": 0.2364,
      "step": 881
    },
    {
      "epoch": 0.025730789427621213,
      "grad_norm": 1.1515411067974415,
      "learning_rate": 8.571428571428571e-06,
      "loss": 0.2129,
      "step": 882
    },
    {
      "epoch": 0.025759962658264777,
      "grad_norm": 1.043433286716918,
      "learning_rate": 8.581146744412052e-06,
      "loss": 0.2355,
      "step": 883
    },
    {
      "epoch": 0.025789135888908337,
      "grad_norm": 1.0336903893330118,
      "learning_rate": 8.590864917395531e-06,
      "loss": 0.2135,
      "step": 884
    },
    {
      "epoch": 0.0258183091195519,
      "grad_norm": 1.256521542254548,
      "learning_rate": 8.60058309037901e-06,
      "loss": 0.2093,
      "step": 885
    },
    {
      "epoch": 0.02584748235019546,
      "grad_norm": 1.3417628029323483,
      "learning_rate": 8.610301263362488e-06,
      "loss": 0.261,
      "step": 886
    },
    {
      "epoch": 0.02587665558083902,
      "grad_norm": 1.0107801209511784,
      "learning_rate": 8.620019436345967e-06,
      "loss": 0.2119,
      "step": 887
    },
    {
      "epoch": 0.025905828811482585,
      "grad_norm": 1.1406597825944313,
      "learning_rate": 8.629737609329447e-06,
      "loss": 0.2322,
      "step": 888
    },
    {
      "epoch": 0.025935002042126145,
      "grad_norm": 1.3920662585308612,
      "learning_rate": 8.639455782312926e-06,
      "loss": 0.215,
      "step": 889
    },
    {
      "epoch": 0.025964175272769705,
      "grad_norm": 1.1511277459389424,
      "learning_rate": 8.649173955296405e-06,
      "loss": 0.2472,
      "step": 890
    },
    {
      "epoch": 0.02599334850341327,
      "grad_norm": 0.9606071792676083,
      "learning_rate": 8.658892128279885e-06,
      "loss": 0.2161,
      "step": 891
    },
    {
      "epoch": 0.02602252173405683,
      "grad_norm": 1.2710760257723615,
      "learning_rate": 8.668610301263362e-06,
      "loss": 0.2287,
      "step": 892
    },
    {
      "epoch": 0.02605169496470039,
      "grad_norm": 1.168320003779378,
      "learning_rate": 8.678328474246842e-06,
      "loss": 0.2431,
      "step": 893
    },
    {
      "epoch": 0.026080868195343953,
      "grad_norm": 0.8512252460881229,
      "learning_rate": 8.688046647230321e-06,
      "loss": 0.1971,
      "step": 894
    },
    {
      "epoch": 0.026110041425987513,
      "grad_norm": 1.0740925586658399,
      "learning_rate": 8.6977648202138e-06,
      "loss": 0.2528,
      "step": 895
    },
    {
      "epoch": 0.026139214656631077,
      "grad_norm": 1.0778145588806725,
      "learning_rate": 8.70748299319728e-06,
      "loss": 0.2134,
      "step": 896
    },
    {
      "epoch": 0.026168387887274637,
      "grad_norm": 1.0246197372812758,
      "learning_rate": 8.717201166180759e-06,
      "loss": 0.2418,
      "step": 897
    },
    {
      "epoch": 0.026197561117918197,
      "grad_norm": 0.9971908131729528,
      "learning_rate": 8.726919339164238e-06,
      "loss": 0.2249,
      "step": 898
    },
    {
      "epoch": 0.02622673434856176,
      "grad_norm": 0.9847569765468884,
      "learning_rate": 8.736637512147716e-06,
      "loss": 0.2177,
      "step": 899
    },
    {
      "epoch": 0.02625590757920532,
      "grad_norm": 1.223519064180211,
      "learning_rate": 8.746355685131195e-06,
      "loss": 0.2563,
      "step": 900
    },
    {
      "epoch": 0.02628508080984888,
      "grad_norm": 1.1518220206967986,
      "learning_rate": 8.756073858114676e-06,
      "loss": 0.2264,
      "step": 901
    },
    {
      "epoch": 0.026314254040492445,
      "grad_norm": 1.189621075076587,
      "learning_rate": 8.765792031098155e-06,
      "loss": 0.2184,
      "step": 902
    },
    {
      "epoch": 0.026343427271136005,
      "grad_norm": 1.2186169070260915,
      "learning_rate": 8.775510204081633e-06,
      "loss": 0.2407,
      "step": 903
    },
    {
      "epoch": 0.02637260050177957,
      "grad_norm": 1.2635856808123254,
      "learning_rate": 8.785228377065112e-06,
      "loss": 0.2262,
      "step": 904
    },
    {
      "epoch": 0.02640177373242313,
      "grad_norm": 0.9812279936248159,
      "learning_rate": 8.794946550048592e-06,
      "loss": 0.2247,
      "step": 905
    },
    {
      "epoch": 0.02643094696306669,
      "grad_norm": 1.1066437925647374,
      "learning_rate": 8.804664723032071e-06,
      "loss": 0.2362,
      "step": 906
    },
    {
      "epoch": 0.026460120193710253,
      "grad_norm": 1.111992497097666,
      "learning_rate": 8.81438289601555e-06,
      "loss": 0.2073,
      "step": 907
    },
    {
      "epoch": 0.026489293424353813,
      "grad_norm": 0.9645432811176635,
      "learning_rate": 8.82410106899903e-06,
      "loss": 0.2296,
      "step": 908
    },
    {
      "epoch": 0.026518466654997373,
      "grad_norm": 0.8492398097468394,
      "learning_rate": 8.833819241982507e-06,
      "loss": 0.2228,
      "step": 909
    },
    {
      "epoch": 0.026547639885640937,
      "grad_norm": 1.0748246979612344,
      "learning_rate": 8.843537414965987e-06,
      "loss": 0.2317,
      "step": 910
    },
    {
      "epoch": 0.026576813116284497,
      "grad_norm": 1.0053226522490737,
      "learning_rate": 8.853255587949466e-06,
      "loss": 0.2302,
      "step": 911
    },
    {
      "epoch": 0.026605986346928057,
      "grad_norm": 0.9971656931450253,
      "learning_rate": 8.862973760932945e-06,
      "loss": 0.2193,
      "step": 912
    },
    {
      "epoch": 0.02663515957757162,
      "grad_norm": 0.9847409980163035,
      "learning_rate": 8.872691933916424e-06,
      "loss": 0.2199,
      "step": 913
    },
    {
      "epoch": 0.02666433280821518,
      "grad_norm": 1.25498568142084,
      "learning_rate": 8.882410106899904e-06,
      "loss": 0.2146,
      "step": 914
    },
    {
      "epoch": 0.026693506038858745,
      "grad_norm": 0.9161040678664293,
      "learning_rate": 8.892128279883383e-06,
      "loss": 0.2379,
      "step": 915
    },
    {
      "epoch": 0.026722679269502305,
      "grad_norm": 1.2235565726421131,
      "learning_rate": 8.90184645286686e-06,
      "loss": 0.25,
      "step": 916
    },
    {
      "epoch": 0.026751852500145865,
      "grad_norm": 1.406004105551934,
      "learning_rate": 8.91156462585034e-06,
      "loss": 0.2239,
      "step": 917
    },
    {
      "epoch": 0.02678102573078943,
      "grad_norm": 1.1342506113992574,
      "learning_rate": 8.921282798833821e-06,
      "loss": 0.2106,
      "step": 918
    },
    {
      "epoch": 0.02681019896143299,
      "grad_norm": 1.1239375255085295,
      "learning_rate": 8.931000971817299e-06,
      "loss": 0.2329,
      "step": 919
    },
    {
      "epoch": 0.02683937219207655,
      "grad_norm": 0.9457265195547269,
      "learning_rate": 8.940719144800778e-06,
      "loss": 0.2118,
      "step": 920
    },
    {
      "epoch": 0.026868545422720113,
      "grad_norm": 1.0706261685912044,
      "learning_rate": 8.950437317784257e-06,
      "loss": 0.2731,
      "step": 921
    },
    {
      "epoch": 0.026897718653363673,
      "grad_norm": 1.110692695974071,
      "learning_rate": 8.960155490767737e-06,
      "loss": 0.2014,
      "step": 922
    },
    {
      "epoch": 0.026926891884007233,
      "grad_norm": 0.8500774921458645,
      "learning_rate": 8.969873663751216e-06,
      "loss": 0.2016,
      "step": 923
    },
    {
      "epoch": 0.026956065114650797,
      "grad_norm": 1.1852674803784724,
      "learning_rate": 8.979591836734695e-06,
      "loss": 0.2098,
      "step": 924
    },
    {
      "epoch": 0.026985238345294357,
      "grad_norm": 1.1139269298523111,
      "learning_rate": 8.989310009718175e-06,
      "loss": 0.2131,
      "step": 925
    },
    {
      "epoch": 0.02701441157593792,
      "grad_norm": 0.8962746684288225,
      "learning_rate": 8.999028182701652e-06,
      "loss": 0.2088,
      "step": 926
    },
    {
      "epoch": 0.02704358480658148,
      "grad_norm": 1.2521461375304326,
      "learning_rate": 9.008746355685131e-06,
      "loss": 0.2539,
      "step": 927
    },
    {
      "epoch": 0.02707275803722504,
      "grad_norm": 1.1281918008149143,
      "learning_rate": 9.01846452866861e-06,
      "loss": 0.2362,
      "step": 928
    },
    {
      "epoch": 0.027101931267868605,
      "grad_norm": 0.9363948903611427,
      "learning_rate": 9.02818270165209e-06,
      "loss": 0.2083,
      "step": 929
    },
    {
      "epoch": 0.027131104498512165,
      "grad_norm": 1.145122754071775,
      "learning_rate": 9.03790087463557e-06,
      "loss": 0.1974,
      "step": 930
    },
    {
      "epoch": 0.027160277729155725,
      "grad_norm": 1.0865477187764203,
      "learning_rate": 9.047619047619049e-06,
      "loss": 0.2341,
      "step": 931
    },
    {
      "epoch": 0.02718945095979929,
      "grad_norm": 1.3141465162510526,
      "learning_rate": 9.057337220602528e-06,
      "loss": 0.2234,
      "step": 932
    },
    {
      "epoch": 0.02721862419044285,
      "grad_norm": 1.1538744272422732,
      "learning_rate": 9.067055393586006e-06,
      "loss": 0.225,
      "step": 933
    },
    {
      "epoch": 0.02724779742108641,
      "grad_norm": 1.0662250990413884,
      "learning_rate": 9.076773566569485e-06,
      "loss": 0.2199,
      "step": 934
    },
    {
      "epoch": 0.027276970651729973,
      "grad_norm": 1.0405211203137403,
      "learning_rate": 9.086491739552964e-06,
      "loss": 0.2134,
      "step": 935
    },
    {
      "epoch": 0.027306143882373533,
      "grad_norm": 1.167944520650825,
      "learning_rate": 9.096209912536444e-06,
      "loss": 0.2542,
      "step": 936
    },
    {
      "epoch": 0.027335317113017097,
      "grad_norm": 1.2235520404347386,
      "learning_rate": 9.105928085519923e-06,
      "loss": 0.2302,
      "step": 937
    },
    {
      "epoch": 0.027364490343660657,
      "grad_norm": 0.8376879036238295,
      "learning_rate": 9.115646258503402e-06,
      "loss": 0.2084,
      "step": 938
    },
    {
      "epoch": 0.027393663574304217,
      "grad_norm": 1.1672300639818123,
      "learning_rate": 9.125364431486881e-06,
      "loss": 0.2129,
      "step": 939
    },
    {
      "epoch": 0.02742283680494778,
      "grad_norm": 1.238442350287253,
      "learning_rate": 9.13508260447036e-06,
      "loss": 0.2273,
      "step": 940
    },
    {
      "epoch": 0.02745201003559134,
      "grad_norm": 1.1226085765871399,
      "learning_rate": 9.14480077745384e-06,
      "loss": 0.2313,
      "step": 941
    },
    {
      "epoch": 0.0274811832662349,
      "grad_norm": 1.1102788007428515,
      "learning_rate": 9.15451895043732e-06,
      "loss": 0.2087,
      "step": 942
    },
    {
      "epoch": 0.027510356496878465,
      "grad_norm": 1.04976885817183,
      "learning_rate": 9.164237123420797e-06,
      "loss": 0.2206,
      "step": 943
    },
    {
      "epoch": 0.027539529727522025,
      "grad_norm": 1.1268358378697063,
      "learning_rate": 9.173955296404276e-06,
      "loss": 0.2234,
      "step": 944
    },
    {
      "epoch": 0.027568702958165586,
      "grad_norm": 1.0288268415220287,
      "learning_rate": 9.183673469387756e-06,
      "loss": 0.2237,
      "step": 945
    },
    {
      "epoch": 0.02759787618880915,
      "grad_norm": 1.1745349868779273,
      "learning_rate": 9.193391642371235e-06,
      "loss": 0.2179,
      "step": 946
    },
    {
      "epoch": 0.02762704941945271,
      "grad_norm": 1.1508004689734275,
      "learning_rate": 9.203109815354714e-06,
      "loss": 0.2146,
      "step": 947
    },
    {
      "epoch": 0.027656222650096273,
      "grad_norm": 0.9210802873402436,
      "learning_rate": 9.212827988338194e-06,
      "loss": 0.2247,
      "step": 948
    },
    {
      "epoch": 0.027685395880739833,
      "grad_norm": 1.1622540536116515,
      "learning_rate": 9.222546161321673e-06,
      "loss": 0.2363,
      "step": 949
    },
    {
      "epoch": 0.027714569111383393,
      "grad_norm": 1.1425629262142145,
      "learning_rate": 9.23226433430515e-06,
      "loss": 0.2479,
      "step": 950
    },
    {
      "epoch": 0.027743742342026957,
      "grad_norm": 0.9106780037226964,
      "learning_rate": 9.24198250728863e-06,
      "loss": 0.2409,
      "step": 951
    },
    {
      "epoch": 0.027772915572670517,
      "grad_norm": 1.0965125281970576,
      "learning_rate": 9.251700680272109e-06,
      "loss": 0.2306,
      "step": 952
    },
    {
      "epoch": 0.027802088803314078,
      "grad_norm": 1.1201151553658704,
      "learning_rate": 9.261418853255588e-06,
      "loss": 0.2198,
      "step": 953
    },
    {
      "epoch": 0.02783126203395764,
      "grad_norm": 1.0211064270830699,
      "learning_rate": 9.271137026239068e-06,
      "loss": 0.2362,
      "step": 954
    },
    {
      "epoch": 0.0278604352646012,
      "grad_norm": 1.1512785422164178,
      "learning_rate": 9.280855199222547e-06,
      "loss": 0.2131,
      "step": 955
    },
    {
      "epoch": 0.027889608495244765,
      "grad_norm": 1.066555236087168,
      "learning_rate": 9.290573372206026e-06,
      "loss": 0.2318,
      "step": 956
    },
    {
      "epoch": 0.027918781725888325,
      "grad_norm": 1.1943198550527352,
      "learning_rate": 9.300291545189504e-06,
      "loss": 0.227,
      "step": 957
    },
    {
      "epoch": 0.027947954956531885,
      "grad_norm": 1.2512433672693648,
      "learning_rate": 9.310009718172985e-06,
      "loss": 0.2331,
      "step": 958
    },
    {
      "epoch": 0.02797712818717545,
      "grad_norm": 1.1449455664610984,
      "learning_rate": 9.319727891156464e-06,
      "loss": 0.2235,
      "step": 959
    },
    {
      "epoch": 0.02800630141781901,
      "grad_norm": 1.3393978089874983,
      "learning_rate": 9.329446064139942e-06,
      "loss": 0.2308,
      "step": 960
    },
    {
      "epoch": 0.02803547464846257,
      "grad_norm": 1.056733030847754,
      "learning_rate": 9.339164237123421e-06,
      "loss": 0.2708,
      "step": 961
    },
    {
      "epoch": 0.028064647879106133,
      "grad_norm": 0.9567251442342112,
      "learning_rate": 9.3488824101069e-06,
      "loss": 0.2195,
      "step": 962
    },
    {
      "epoch": 0.028093821109749693,
      "grad_norm": 1.2786594166323981,
      "learning_rate": 9.35860058309038e-06,
      "loss": 0.2112,
      "step": 963
    },
    {
      "epoch": 0.028122994340393254,
      "grad_norm": 1.077364504649222,
      "learning_rate": 9.36831875607386e-06,
      "loss": 0.208,
      "step": 964
    },
    {
      "epoch": 0.028152167571036817,
      "grad_norm": 1.2229919345979692,
      "learning_rate": 9.378036929057338e-06,
      "loss": 0.2155,
      "step": 965
    },
    {
      "epoch": 0.028181340801680378,
      "grad_norm": 1.066008540075167,
      "learning_rate": 9.387755102040818e-06,
      "loss": 0.2116,
      "step": 966
    },
    {
      "epoch": 0.02821051403232394,
      "grad_norm": 0.9000305593331918,
      "learning_rate": 9.397473275024295e-06,
      "loss": 0.2192,
      "step": 967
    },
    {
      "epoch": 0.0282396872629675,
      "grad_norm": 1.2483103558638065,
      "learning_rate": 9.407191448007775e-06,
      "loss": 0.2446,
      "step": 968
    },
    {
      "epoch": 0.02826886049361106,
      "grad_norm": 1.1654942991154087,
      "learning_rate": 9.416909620991254e-06,
      "loss": 0.2237,
      "step": 969
    },
    {
      "epoch": 0.028298033724254625,
      "grad_norm": 1.191747132746991,
      "learning_rate": 9.426627793974733e-06,
      "loss": 0.2449,
      "step": 970
    },
    {
      "epoch": 0.028327206954898185,
      "grad_norm": 1.1163267821303762,
      "learning_rate": 9.436345966958213e-06,
      "loss": 0.2364,
      "step": 971
    },
    {
      "epoch": 0.028356380185541746,
      "grad_norm": 1.3026286877189537,
      "learning_rate": 9.446064139941692e-06,
      "loss": 0.2179,
      "step": 972
    },
    {
      "epoch": 0.02838555341618531,
      "grad_norm": 1.1532836150899277,
      "learning_rate": 9.455782312925171e-06,
      "loss": 0.2369,
      "step": 973
    },
    {
      "epoch": 0.02841472664682887,
      "grad_norm": 1.2249757364981582,
      "learning_rate": 9.465500485908649e-06,
      "loss": 0.2381,
      "step": 974
    },
    {
      "epoch": 0.02844389987747243,
      "grad_norm": 0.895540398285069,
      "learning_rate": 9.47521865889213e-06,
      "loss": 0.2228,
      "step": 975
    },
    {
      "epoch": 0.028473073108115993,
      "grad_norm": 1.0567271109109968,
      "learning_rate": 9.48493683187561e-06,
      "loss": 0.2292,
      "step": 976
    },
    {
      "epoch": 0.028502246338759554,
      "grad_norm": 1.0687373189904286,
      "learning_rate": 9.494655004859087e-06,
      "loss": 0.2548,
      "step": 977
    },
    {
      "epoch": 0.028531419569403117,
      "grad_norm": 1.305079059525117,
      "learning_rate": 9.504373177842566e-06,
      "loss": 0.273,
      "step": 978
    },
    {
      "epoch": 0.028560592800046677,
      "grad_norm": 1.1352561925162497,
      "learning_rate": 9.514091350826045e-06,
      "loss": 0.2006,
      "step": 979
    },
    {
      "epoch": 0.028589766030690238,
      "grad_norm": 0.9489404568910308,
      "learning_rate": 9.523809523809525e-06,
      "loss": 0.2206,
      "step": 980
    },
    {
      "epoch": 0.0286189392613338,
      "grad_norm": 1.5175181641343878,
      "learning_rate": 9.533527696793004e-06,
      "loss": 0.2104,
      "step": 981
    },
    {
      "epoch": 0.02864811249197736,
      "grad_norm": 1.401465604706397,
      "learning_rate": 9.543245869776483e-06,
      "loss": 0.228,
      "step": 982
    },
    {
      "epoch": 0.02867728572262092,
      "grad_norm": 0.8548407597315596,
      "learning_rate": 9.552964042759963e-06,
      "loss": 0.1962,
      "step": 983
    },
    {
      "epoch": 0.028706458953264485,
      "grad_norm": 1.2278122948693826,
      "learning_rate": 9.56268221574344e-06,
      "loss": 0.2216,
      "step": 984
    },
    {
      "epoch": 0.028735632183908046,
      "grad_norm": 1.0253338593853756,
      "learning_rate": 9.57240038872692e-06,
      "loss": 0.2103,
      "step": 985
    },
    {
      "epoch": 0.028764805414551606,
      "grad_norm": 1.0819021309696246,
      "learning_rate": 9.582118561710399e-06,
      "loss": 0.2151,
      "step": 986
    },
    {
      "epoch": 0.02879397864519517,
      "grad_norm": 1.1621048015455133,
      "learning_rate": 9.591836734693878e-06,
      "loss": 0.2278,
      "step": 987
    },
    {
      "epoch": 0.02882315187583873,
      "grad_norm": 1.148004969755955,
      "learning_rate": 9.601554907677358e-06,
      "loss": 0.2065,
      "step": 988
    },
    {
      "epoch": 0.028852325106482293,
      "grad_norm": 1.4542424991666156,
      "learning_rate": 9.611273080660837e-06,
      "loss": 0.2662,
      "step": 989
    },
    {
      "epoch": 0.028881498337125854,
      "grad_norm": 1.4312639880264453,
      "learning_rate": 9.620991253644316e-06,
      "loss": 0.2584,
      "step": 990
    },
    {
      "epoch": 0.028910671567769414,
      "grad_norm": 1.248581176882113,
      "learning_rate": 9.630709426627794e-06,
      "loss": 0.2002,
      "step": 991
    },
    {
      "epoch": 0.028939844798412977,
      "grad_norm": 1.075820748048547,
      "learning_rate": 9.640427599611275e-06,
      "loss": 0.2338,
      "step": 992
    },
    {
      "epoch": 0.028969018029056538,
      "grad_norm": 1.4514438131506477,
      "learning_rate": 9.650145772594754e-06,
      "loss": 0.2322,
      "step": 993
    },
    {
      "epoch": 0.028998191259700098,
      "grad_norm": 1.139222248383189,
      "learning_rate": 9.659863945578232e-06,
      "loss": 0.2208,
      "step": 994
    },
    {
      "epoch": 0.02902736449034366,
      "grad_norm": 1.2710835376802465,
      "learning_rate": 9.669582118561711e-06,
      "loss": 0.2531,
      "step": 995
    },
    {
      "epoch": 0.02905653772098722,
      "grad_norm": 1.3670607561351638,
      "learning_rate": 9.67930029154519e-06,
      "loss": 0.217,
      "step": 996
    },
    {
      "epoch": 0.029085710951630785,
      "grad_norm": 1.0421126991423406,
      "learning_rate": 9.68901846452867e-06,
      "loss": 0.2368,
      "step": 997
    },
    {
      "epoch": 0.029114884182274346,
      "grad_norm": 1.164074668991034,
      "learning_rate": 9.698736637512149e-06,
      "loss": 0.2484,
      "step": 998
    },
    {
      "epoch": 0.029144057412917906,
      "grad_norm": 1.1465573713760517,
      "learning_rate": 9.708454810495628e-06,
      "loss": 0.2278,
      "step": 999
    },
    {
      "epoch": 0.02917323064356147,
      "grad_norm": 1.0784643564271585,
      "learning_rate": 9.718172983479108e-06,
      "loss": 0.2462,
      "step": 1000
    },
    {
      "epoch": 0.02920240387420503,
      "grad_norm": 1.1130447279632785,
      "learning_rate": 9.727891156462585e-06,
      "loss": 0.2311,
      "step": 1001
    },
    {
      "epoch": 0.02923157710484859,
      "grad_norm": 1.2503403259198171,
      "learning_rate": 9.737609329446065e-06,
      "loss": 0.232,
      "step": 1002
    },
    {
      "epoch": 0.029260750335492153,
      "grad_norm": 1.1556947570749498,
      "learning_rate": 9.747327502429544e-06,
      "loss": 0.2339,
      "step": 1003
    },
    {
      "epoch": 0.029289923566135714,
      "grad_norm": 0.9942713910664488,
      "learning_rate": 9.757045675413023e-06,
      "loss": 0.2439,
      "step": 1004
    },
    {
      "epoch": 0.029319096796779274,
      "grad_norm": 0.979660492071142,
      "learning_rate": 9.766763848396502e-06,
      "loss": 0.2094,
      "step": 1005
    },
    {
      "epoch": 0.029348270027422838,
      "grad_norm": 0.9381926924935178,
      "learning_rate": 9.776482021379982e-06,
      "loss": 0.2114,
      "step": 1006
    },
    {
      "epoch": 0.029377443258066398,
      "grad_norm": 0.9335735269940261,
      "learning_rate": 9.78620019436346e-06,
      "loss": 0.2134,
      "step": 1007
    },
    {
      "epoch": 0.02940661648870996,
      "grad_norm": 1.0880985866604813,
      "learning_rate": 9.795918367346939e-06,
      "loss": 0.2344,
      "step": 1008
    },
    {
      "epoch": 0.02943578971935352,
      "grad_norm": 1.2107379716170956,
      "learning_rate": 9.805636540330418e-06,
      "loss": 0.2284,
      "step": 1009
    },
    {
      "epoch": 0.029464962949997082,
      "grad_norm": 1.1463554003095695,
      "learning_rate": 9.815354713313899e-06,
      "loss": 0.2275,
      "step": 1010
    },
    {
      "epoch": 0.029494136180640645,
      "grad_norm": 0.9356145745746572,
      "learning_rate": 9.825072886297377e-06,
      "loss": 0.2212,
      "step": 1011
    },
    {
      "epoch": 0.029523309411284206,
      "grad_norm": 1.1456220731005373,
      "learning_rate": 9.834791059280856e-06,
      "loss": 0.2009,
      "step": 1012
    },
    {
      "epoch": 0.029552482641927766,
      "grad_norm": 0.9493920852972088,
      "learning_rate": 9.844509232264335e-06,
      "loss": 0.2433,
      "step": 1013
    },
    {
      "epoch": 0.02958165587257133,
      "grad_norm": 1.120088355834498,
      "learning_rate": 9.854227405247815e-06,
      "loss": 0.2425,
      "step": 1014
    },
    {
      "epoch": 0.02961082910321489,
      "grad_norm": 1.0822183880901732,
      "learning_rate": 9.863945578231294e-06,
      "loss": 0.2248,
      "step": 1015
    },
    {
      "epoch": 0.02964000233385845,
      "grad_norm": 0.9589973441307148,
      "learning_rate": 9.873663751214773e-06,
      "loss": 0.2257,
      "step": 1016
    },
    {
      "epoch": 0.029669175564502014,
      "grad_norm": 1.1954242272928033,
      "learning_rate": 9.883381924198252e-06,
      "loss": 0.2179,
      "step": 1017
    },
    {
      "epoch": 0.029698348795145574,
      "grad_norm": 1.0242808476806817,
      "learning_rate": 9.89310009718173e-06,
      "loss": 0.2343,
      "step": 1018
    },
    {
      "epoch": 0.029727522025789137,
      "grad_norm": 0.9177405765805806,
      "learning_rate": 9.90281827016521e-06,
      "loss": 0.2291,
      "step": 1019
    },
    {
      "epoch": 0.029756695256432698,
      "grad_norm": 1.2918547765404111,
      "learning_rate": 9.912536443148689e-06,
      "loss": 0.2086,
      "step": 1020
    },
    {
      "epoch": 0.029785868487076258,
      "grad_norm": 0.9779153452842309,
      "learning_rate": 9.922254616132168e-06,
      "loss": 0.1967,
      "step": 1021
    },
    {
      "epoch": 0.02981504171771982,
      "grad_norm": 1.4523478461643309,
      "learning_rate": 9.931972789115647e-06,
      "loss": 0.2171,
      "step": 1022
    },
    {
      "epoch": 0.02984421494836338,
      "grad_norm": 1.2042093839394294,
      "learning_rate": 9.941690962099127e-06,
      "loss": 0.2229,
      "step": 1023
    },
    {
      "epoch": 0.029873388179006942,
      "grad_norm": 1.373374530674181,
      "learning_rate": 9.951409135082604e-06,
      "loss": 0.2259,
      "step": 1024
    },
    {
      "epoch": 0.029902561409650506,
      "grad_norm": 1.4002754178324355,
      "learning_rate": 9.961127308066084e-06,
      "loss": 0.2517,
      "step": 1025
    },
    {
      "epoch": 0.029931734640294066,
      "grad_norm": 1.1459327387497917,
      "learning_rate": 9.970845481049563e-06,
      "loss": 0.2115,
      "step": 1026
    },
    {
      "epoch": 0.029960907870937626,
      "grad_norm": 1.2113509802193894,
      "learning_rate": 9.980563654033044e-06,
      "loss": 0.2271,
      "step": 1027
    },
    {
      "epoch": 0.02999008110158119,
      "grad_norm": 1.2171569257268688,
      "learning_rate": 9.990281827016522e-06,
      "loss": 0.2373,
      "step": 1028
    },
    {
      "epoch": 0.03001925433222475,
      "grad_norm": 1.2814230734715215,
      "learning_rate": 1e-05,
      "loss": 0.2208,
      "step": 1029
    },
    {
      "epoch": 0.030048427562868314,
      "grad_norm": 1.1038896598078904,
      "learning_rate": 9.999999977680598e-06,
      "loss": 0.2418,
      "step": 1030
    },
    {
      "epoch": 0.030077600793511874,
      "grad_norm": 1.1780794329233468,
      "learning_rate": 9.99999991072239e-06,
      "loss": 0.271,
      "step": 1031
    },
    {
      "epoch": 0.030106774024155434,
      "grad_norm": 1.2161672986842347,
      "learning_rate": 9.999999799125373e-06,
      "loss": 0.2393,
      "step": 1032
    },
    {
      "epoch": 0.030135947254798998,
      "grad_norm": 1.0013279539555224,
      "learning_rate": 9.999999642889553e-06,
      "loss": 0.2331,
      "step": 1033
    },
    {
      "epoch": 0.030165120485442558,
      "grad_norm": 1.1191553516627004,
      "learning_rate": 9.999999442014931e-06,
      "loss": 0.2565,
      "step": 1034
    },
    {
      "epoch": 0.030194293716086118,
      "grad_norm": 1.0903349175568633,
      "learning_rate": 9.999999196501506e-06,
      "loss": 0.2656,
      "step": 1035
    },
    {
      "epoch": 0.03022346694672968,
      "grad_norm": 1.6161898354090392,
      "learning_rate": 9.999998906349283e-06,
      "loss": 0.2493,
      "step": 1036
    },
    {
      "epoch": 0.030252640177373242,
      "grad_norm": 1.0186301722121622,
      "learning_rate": 9.999998571558263e-06,
      "loss": 0.2247,
      "step": 1037
    },
    {
      "epoch": 0.030281813408016802,
      "grad_norm": 0.9351870649492161,
      "learning_rate": 9.999998192128449e-06,
      "loss": 0.2076,
      "step": 1038
    },
    {
      "epoch": 0.030310986638660366,
      "grad_norm": 1.352484917920168,
      "learning_rate": 9.999997768059845e-06,
      "loss": 0.248,
      "step": 1039
    },
    {
      "epoch": 0.030340159869303926,
      "grad_norm": 0.9669604788901461,
      "learning_rate": 9.999997299352456e-06,
      "loss": 0.1975,
      "step": 1040
    },
    {
      "epoch": 0.03036933309994749,
      "grad_norm": 1.2802932840500656,
      "learning_rate": 9.999996786006282e-06,
      "loss": 0.2477,
      "step": 1041
    },
    {
      "epoch": 0.03039850633059105,
      "grad_norm": 1.0765392634378834,
      "learning_rate": 9.999996228021332e-06,
      "loss": 0.1886,
      "step": 1042
    },
    {
      "epoch": 0.03042767956123461,
      "grad_norm": 1.2080753231670838,
      "learning_rate": 9.999995625397607e-06,
      "loss": 0.2101,
      "step": 1043
    },
    {
      "epoch": 0.030456852791878174,
      "grad_norm": 1.0108288252893798,
      "learning_rate": 9.999994978135117e-06,
      "loss": 0.2501,
      "step": 1044
    },
    {
      "epoch": 0.030486026022521734,
      "grad_norm": 1.0778645089294687,
      "learning_rate": 9.999994286233866e-06,
      "loss": 0.219,
      "step": 1045
    },
    {
      "epoch": 0.030515199253165294,
      "grad_norm": 1.46972921740258,
      "learning_rate": 9.999993549693859e-06,
      "loss": 0.2725,
      "step": 1046
    },
    {
      "epoch": 0.030544372483808858,
      "grad_norm": 1.0426141075524984,
      "learning_rate": 9.999992768515101e-06,
      "loss": 0.2262,
      "step": 1047
    },
    {
      "epoch": 0.030573545714452418,
      "grad_norm": 0.8282285003795017,
      "learning_rate": 9.999991942697602e-06,
      "loss": 0.2029,
      "step": 1048
    },
    {
      "epoch": 0.03060271894509598,
      "grad_norm": 1.287499827347934,
      "learning_rate": 9.999991072241371e-06,
      "loss": 0.2347,
      "step": 1049
    },
    {
      "epoch": 0.030631892175739542,
      "grad_norm": 1.0875295125811832,
      "learning_rate": 9.999990157146411e-06,
      "loss": 0.2292,
      "step": 1050
    },
    {
      "epoch": 0.030661065406383102,
      "grad_norm": 1.0651058116013083,
      "learning_rate": 9.999989197412733e-06,
      "loss": 0.2094,
      "step": 1051
    },
    {
      "epoch": 0.030690238637026666,
      "grad_norm": 1.3083532189291245,
      "learning_rate": 9.999988193040345e-06,
      "loss": 0.2448,
      "step": 1052
    },
    {
      "epoch": 0.030719411867670226,
      "grad_norm": 1.174307964114287,
      "learning_rate": 9.999987144029256e-06,
      "loss": 0.2164,
      "step": 1053
    },
    {
      "epoch": 0.030748585098313786,
      "grad_norm": 1.1792434998984536,
      "learning_rate": 9.999986050379476e-06,
      "loss": 0.2175,
      "step": 1054
    },
    {
      "epoch": 0.03077775832895735,
      "grad_norm": 1.105215045612077,
      "learning_rate": 9.999984912091012e-06,
      "loss": 0.2458,
      "step": 1055
    },
    {
      "epoch": 0.03080693155960091,
      "grad_norm": 1.2327368379074821,
      "learning_rate": 9.999983729163879e-06,
      "loss": 0.2386,
      "step": 1056
    },
    {
      "epoch": 0.03083610479024447,
      "grad_norm": 1.0071343378859123,
      "learning_rate": 9.999982501598085e-06,
      "loss": 0.2403,
      "step": 1057
    },
    {
      "epoch": 0.030865278020888034,
      "grad_norm": 0.9581753368319474,
      "learning_rate": 9.999981229393638e-06,
      "loss": 0.2333,
      "step": 1058
    },
    {
      "epoch": 0.030894451251531594,
      "grad_norm": 0.9039464725641874,
      "learning_rate": 9.999979912550554e-06,
      "loss": 0.2296,
      "step": 1059
    },
    {
      "epoch": 0.030923624482175158,
      "grad_norm": 0.9799420306191866,
      "learning_rate": 9.999978551068843e-06,
      "loss": 0.2247,
      "step": 1060
    },
    {
      "epoch": 0.030952797712818718,
      "grad_norm": 1.0631962544979339,
      "learning_rate": 9.999977144948516e-06,
      "loss": 0.2481,
      "step": 1061
    },
    {
      "epoch": 0.030981970943462278,
      "grad_norm": 0.8651539004894622,
      "learning_rate": 9.999975694189588e-06,
      "loss": 0.2337,
      "step": 1062
    },
    {
      "epoch": 0.031011144174105842,
      "grad_norm": 1.1665030369366545,
      "learning_rate": 9.999974198792071e-06,
      "loss": 0.2519,
      "step": 1063
    },
    {
      "epoch": 0.031040317404749402,
      "grad_norm": 1.114903267694991,
      "learning_rate": 9.999972658755976e-06,
      "loss": 0.2231,
      "step": 1064
    },
    {
      "epoch": 0.031069490635392962,
      "grad_norm": 0.9330234882071323,
      "learning_rate": 9.99997107408132e-06,
      "loss": 0.2148,
      "step": 1065
    },
    {
      "epoch": 0.031098663866036526,
      "grad_norm": 0.9682184930048112,
      "learning_rate": 9.999969444768116e-06,
      "loss": 0.2154,
      "step": 1066
    },
    {
      "epoch": 0.031127837096680086,
      "grad_norm": 1.10379167524357,
      "learning_rate": 9.999967770816376e-06,
      "loss": 0.2454,
      "step": 1067
    },
    {
      "epoch": 0.031157010327323646,
      "grad_norm": 0.9999280949417427,
      "learning_rate": 9.99996605222612e-06,
      "loss": 0.225,
      "step": 1068
    },
    {
      "epoch": 0.03118618355796721,
      "grad_norm": 1.0971592229248597,
      "learning_rate": 9.999964288997361e-06,
      "loss": 0.2335,
      "step": 1069
    },
    {
      "epoch": 0.03121535678861077,
      "grad_norm": 0.9988697953983956,
      "learning_rate": 9.999962481130112e-06,
      "loss": 0.2646,
      "step": 1070
    },
    {
      "epoch": 0.031244530019254334,
      "grad_norm": 1.1785567945511561,
      "learning_rate": 9.999960628624394e-06,
      "loss": 0.2036,
      "step": 1071
    },
    {
      "epoch": 0.031273703249897894,
      "grad_norm": 0.8444076295505306,
      "learning_rate": 9.999958731480219e-06,
      "loss": 0.2206,
      "step": 1072
    },
    {
      "epoch": 0.031302876480541454,
      "grad_norm": 1.1765564114291198,
      "learning_rate": 9.999956789697608e-06,
      "loss": 0.2196,
      "step": 1073
    },
    {
      "epoch": 0.031332049711185014,
      "grad_norm": 1.0027789207640034,
      "learning_rate": 9.999954803276575e-06,
      "loss": 0.2435,
      "step": 1074
    },
    {
      "epoch": 0.03136122294182858,
      "grad_norm": 1.1942686350375853,
      "learning_rate": 9.99995277221714e-06,
      "loss": 0.2146,
      "step": 1075
    },
    {
      "epoch": 0.03139039617247214,
      "grad_norm": 1.0105641916721053,
      "learning_rate": 9.99995069651932e-06,
      "loss": 0.2211,
      "step": 1076
    },
    {
      "epoch": 0.0314195694031157,
      "grad_norm": 1.035972104046321,
      "learning_rate": 9.999948576183133e-06,
      "loss": 0.2601,
      "step": 1077
    },
    {
      "epoch": 0.03144874263375926,
      "grad_norm": 1.0463994881138075,
      "learning_rate": 9.999946411208598e-06,
      "loss": 0.231,
      "step": 1078
    },
    {
      "epoch": 0.03147791586440282,
      "grad_norm": 1.0958469859592759,
      "learning_rate": 9.999944201595736e-06,
      "loss": 0.2162,
      "step": 1079
    },
    {
      "epoch": 0.03150708909504638,
      "grad_norm": 0.9170157458484897,
      "learning_rate": 9.999941947344567e-06,
      "loss": 0.2536,
      "step": 1080
    },
    {
      "epoch": 0.03153626232568995,
      "grad_norm": 1.0339277976868972,
      "learning_rate": 9.999939648455108e-06,
      "loss": 0.2283,
      "step": 1081
    },
    {
      "epoch": 0.03156543555633351,
      "grad_norm": 1.010049306504477,
      "learning_rate": 9.999937304927384e-06,
      "loss": 0.2272,
      "step": 1082
    },
    {
      "epoch": 0.03159460878697707,
      "grad_norm": 0.8365637377041215,
      "learning_rate": 9.999934916761411e-06,
      "loss": 0.2367,
      "step": 1083
    },
    {
      "epoch": 0.03162378201762063,
      "grad_norm": 1.0402870618881737,
      "learning_rate": 9.999932483957212e-06,
      "loss": 0.2162,
      "step": 1084
    },
    {
      "epoch": 0.03165295524826419,
      "grad_norm": 1.0734228405774093,
      "learning_rate": 9.999930006514811e-06,
      "loss": 0.2538,
      "step": 1085
    },
    {
      "epoch": 0.03168212847890776,
      "grad_norm": 1.064405888182433,
      "learning_rate": 9.999927484434229e-06,
      "loss": 0.2526,
      "step": 1086
    },
    {
      "epoch": 0.03171130170955132,
      "grad_norm": 1.072147764043234,
      "learning_rate": 9.999924917715486e-06,
      "loss": 0.1984,
      "step": 1087
    },
    {
      "epoch": 0.03174047494019488,
      "grad_norm": 0.9001795646488481,
      "learning_rate": 9.999922306358607e-06,
      "loss": 0.2155,
      "step": 1088
    },
    {
      "epoch": 0.03176964817083844,
      "grad_norm": 1.080438778596477,
      "learning_rate": 9.999919650363617e-06,
      "loss": 0.2185,
      "step": 1089
    },
    {
      "epoch": 0.031798821401482,
      "grad_norm": 0.9943418293671965,
      "learning_rate": 9.999916949730536e-06,
      "loss": 0.2397,
      "step": 1090
    },
    {
      "epoch": 0.03182799463212556,
      "grad_norm": 1.1837387132283383,
      "learning_rate": 9.999914204459393e-06,
      "loss": 0.2258,
      "step": 1091
    },
    {
      "epoch": 0.031857167862769126,
      "grad_norm": 1.3608341986920691,
      "learning_rate": 9.999911414550207e-06,
      "loss": 0.2383,
      "step": 1092
    },
    {
      "epoch": 0.031886341093412686,
      "grad_norm": 1.1532640252010724,
      "learning_rate": 9.999908580003006e-06,
      "loss": 0.2279,
      "step": 1093
    },
    {
      "epoch": 0.031915514324056246,
      "grad_norm": 1.2667090607333933,
      "learning_rate": 9.999905700817816e-06,
      "loss": 0.2466,
      "step": 1094
    },
    {
      "epoch": 0.031944687554699806,
      "grad_norm": 1.1186150208903016,
      "learning_rate": 9.99990277699466e-06,
      "loss": 0.2575,
      "step": 1095
    },
    {
      "epoch": 0.031973860785343367,
      "grad_norm": 1.04566986537825,
      "learning_rate": 9.999899808533566e-06,
      "loss": 0.2322,
      "step": 1096
    },
    {
      "epoch": 0.032003034015986934,
      "grad_norm": 1.1302419209531933,
      "learning_rate": 9.999896795434561e-06,
      "loss": 0.2613,
      "step": 1097
    },
    {
      "epoch": 0.032032207246630494,
      "grad_norm": 1.001348327485578,
      "learning_rate": 9.999893737697668e-06,
      "loss": 0.2061,
      "step": 1098
    },
    {
      "epoch": 0.032061380477274054,
      "grad_norm": 1.0936303751215548,
      "learning_rate": 9.99989063532292e-06,
      "loss": 0.2413,
      "step": 1099
    },
    {
      "epoch": 0.032090553707917614,
      "grad_norm": 1.2966792596343628,
      "learning_rate": 9.999887488310342e-06,
      "loss": 0.2158,
      "step": 1100
    },
    {
      "epoch": 0.032119726938561174,
      "grad_norm": 1.068266295553114,
      "learning_rate": 9.999884296659961e-06,
      "loss": 0.2343,
      "step": 1101
    },
    {
      "epoch": 0.032148900169204735,
      "grad_norm": 1.0216000271168946,
      "learning_rate": 9.999881060371808e-06,
      "loss": 0.2495,
      "step": 1102
    },
    {
      "epoch": 0.0321780733998483,
      "grad_norm": 1.4157251418123615,
      "learning_rate": 9.999877779445908e-06,
      "loss": 0.2736,
      "step": 1103
    },
    {
      "epoch": 0.03220724663049186,
      "grad_norm": 1.0096560165463653,
      "learning_rate": 9.999874453882294e-06,
      "loss": 0.2362,
      "step": 1104
    },
    {
      "epoch": 0.03223641986113542,
      "grad_norm": 0.8131015711421686,
      "learning_rate": 9.999871083680995e-06,
      "loss": 0.2317,
      "step": 1105
    },
    {
      "epoch": 0.03226559309177898,
      "grad_norm": 1.1036781682956833,
      "learning_rate": 9.99986766884204e-06,
      "loss": 0.2321,
      "step": 1106
    },
    {
      "epoch": 0.03229476632242254,
      "grad_norm": 1.1382838264022068,
      "learning_rate": 9.99986420936546e-06,
      "loss": 0.2198,
      "step": 1107
    },
    {
      "epoch": 0.03232393955306611,
      "grad_norm": 1.0162604892559457,
      "learning_rate": 9.999860705251288e-06,
      "loss": 0.2192,
      "step": 1108
    },
    {
      "epoch": 0.03235311278370967,
      "grad_norm": 0.8933296893054877,
      "learning_rate": 9.99985715649955e-06,
      "loss": 0.2077,
      "step": 1109
    },
    {
      "epoch": 0.03238228601435323,
      "grad_norm": 1.119167707859606,
      "learning_rate": 9.999853563110282e-06,
      "loss": 0.21,
      "step": 1110
    },
    {
      "epoch": 0.03241145924499679,
      "grad_norm": 1.082100130798858,
      "learning_rate": 9.999849925083516e-06,
      "loss": 0.226,
      "step": 1111
    },
    {
      "epoch": 0.03244063247564035,
      "grad_norm": 1.0214419161709158,
      "learning_rate": 9.999846242419282e-06,
      "loss": 0.1914,
      "step": 1112
    },
    {
      "epoch": 0.03246980570628391,
      "grad_norm": 0.9343600517953623,
      "learning_rate": 9.999842515117615e-06,
      "loss": 0.275,
      "step": 1113
    },
    {
      "epoch": 0.03249897893692748,
      "grad_norm": 0.9865952655303679,
      "learning_rate": 9.999838743178547e-06,
      "loss": 0.2129,
      "step": 1114
    },
    {
      "epoch": 0.03252815216757104,
      "grad_norm": 1.319896897819642,
      "learning_rate": 9.999834926602113e-06,
      "loss": 0.2174,
      "step": 1115
    },
    {
      "epoch": 0.0325573253982146,
      "grad_norm": 1.0521765205195404,
      "learning_rate": 9.999831065388345e-06,
      "loss": 0.226,
      "step": 1116
    },
    {
      "epoch": 0.03258649862885816,
      "grad_norm": 0.9654473865450861,
      "learning_rate": 9.999827159537281e-06,
      "loss": 0.2134,
      "step": 1117
    },
    {
      "epoch": 0.03261567185950172,
      "grad_norm": 1.123917566290155,
      "learning_rate": 9.999823209048951e-06,
      "loss": 0.2353,
      "step": 1118
    },
    {
      "epoch": 0.032644845090145286,
      "grad_norm": 0.9176866330298619,
      "learning_rate": 9.999819213923394e-06,
      "loss": 0.2297,
      "step": 1119
    },
    {
      "epoch": 0.032674018320788846,
      "grad_norm": 1.1257387803194792,
      "learning_rate": 9.999815174160646e-06,
      "loss": 0.2051,
      "step": 1120
    },
    {
      "epoch": 0.032703191551432406,
      "grad_norm": 1.0474059598016923,
      "learning_rate": 9.999811089760741e-06,
      "loss": 0.2263,
      "step": 1121
    },
    {
      "epoch": 0.032732364782075966,
      "grad_norm": 0.798373618759498,
      "learning_rate": 9.999806960723715e-06,
      "loss": 0.2278,
      "step": 1122
    },
    {
      "epoch": 0.03276153801271953,
      "grad_norm": 0.8280537246125049,
      "learning_rate": 9.999802787049609e-06,
      "loss": 0.2245,
      "step": 1123
    },
    {
      "epoch": 0.03279071124336309,
      "grad_norm": 0.9711641379405179,
      "learning_rate": 9.999798568738453e-06,
      "loss": 0.2524,
      "step": 1124
    },
    {
      "epoch": 0.032819884474006654,
      "grad_norm": 1.1246028188582697,
      "learning_rate": 9.99979430579029e-06,
      "loss": 0.2262,
      "step": 1125
    },
    {
      "epoch": 0.032849057704650214,
      "grad_norm": 0.953299071600111,
      "learning_rate": 9.99978999820516e-06,
      "loss": 0.1986,
      "step": 1126
    },
    {
      "epoch": 0.032878230935293774,
      "grad_norm": 0.9431805074183164,
      "learning_rate": 9.999785645983095e-06,
      "loss": 0.2109,
      "step": 1127
    },
    {
      "epoch": 0.032907404165937335,
      "grad_norm": 1.1668135926724232,
      "learning_rate": 9.999781249124142e-06,
      "loss": 0.2574,
      "step": 1128
    },
    {
      "epoch": 0.032936577396580895,
      "grad_norm": 0.9957053951434509,
      "learning_rate": 9.99977680762833e-06,
      "loss": 0.2129,
      "step": 1129
    },
    {
      "epoch": 0.03296575062722446,
      "grad_norm": 0.883050594109438,
      "learning_rate": 9.999772321495706e-06,
      "loss": 0.2155,
      "step": 1130
    },
    {
      "epoch": 0.03299492385786802,
      "grad_norm": 0.9997288408798505,
      "learning_rate": 9.999767790726309e-06,
      "loss": 0.2276,
      "step": 1131
    },
    {
      "epoch": 0.03302409708851158,
      "grad_norm": 1.3235571539055981,
      "learning_rate": 9.999763215320179e-06,
      "loss": 0.2235,
      "step": 1132
    },
    {
      "epoch": 0.03305327031915514,
      "grad_norm": 1.086190879888537,
      "learning_rate": 9.999758595277356e-06,
      "loss": 0.2637,
      "step": 1133
    },
    {
      "epoch": 0.0330824435497987,
      "grad_norm": 0.9198443741837063,
      "learning_rate": 9.999753930597882e-06,
      "loss": 0.2125,
      "step": 1134
    },
    {
      "epoch": 0.03311161678044226,
      "grad_norm": 1.046313029577075,
      "learning_rate": 9.999749221281798e-06,
      "loss": 0.2269,
      "step": 1135
    },
    {
      "epoch": 0.03314079001108583,
      "grad_norm": 1.0128006542158847,
      "learning_rate": 9.999744467329147e-06,
      "loss": 0.2413,
      "step": 1136
    },
    {
      "epoch": 0.03316996324172939,
      "grad_norm": 1.0689231153917713,
      "learning_rate": 9.999739668739971e-06,
      "loss": 0.2361,
      "step": 1137
    },
    {
      "epoch": 0.03319913647237295,
      "grad_norm": 1.036064213424324,
      "learning_rate": 9.999734825514312e-06,
      "loss": 0.2076,
      "step": 1138
    },
    {
      "epoch": 0.03322830970301651,
      "grad_norm": 0.9531214242931653,
      "learning_rate": 9.999729937652214e-06,
      "loss": 0.2032,
      "step": 1139
    },
    {
      "epoch": 0.03325748293366007,
      "grad_norm": 1.1916189472859937,
      "learning_rate": 9.999725005153721e-06,
      "loss": 0.2308,
      "step": 1140
    },
    {
      "epoch": 0.03328665616430364,
      "grad_norm": 1.093124559785437,
      "learning_rate": 9.999720028018877e-06,
      "loss": 0.2054,
      "step": 1141
    },
    {
      "epoch": 0.0333158293949472,
      "grad_norm": 1.1483154885704239,
      "learning_rate": 9.999715006247726e-06,
      "loss": 0.254,
      "step": 1142
    },
    {
      "epoch": 0.03334500262559076,
      "grad_norm": 1.2729315953713594,
      "learning_rate": 9.999709939840314e-06,
      "loss": 0.24,
      "step": 1143
    },
    {
      "epoch": 0.03337417585623432,
      "grad_norm": 1.156513619544326,
      "learning_rate": 9.999704828796683e-06,
      "loss": 0.232,
      "step": 1144
    },
    {
      "epoch": 0.03340334908687788,
      "grad_norm": 1.0072178870045805,
      "learning_rate": 9.999699673116882e-06,
      "loss": 0.225,
      "step": 1145
    },
    {
      "epoch": 0.03343252231752144,
      "grad_norm": 1.2746348783347192,
      "learning_rate": 9.999694472800956e-06,
      "loss": 0.2517,
      "step": 1146
    },
    {
      "epoch": 0.033461695548165006,
      "grad_norm": 1.034777373284874,
      "learning_rate": 9.99968922784895e-06,
      "loss": 0.2077,
      "step": 1147
    },
    {
      "epoch": 0.033490868778808566,
      "grad_norm": 0.9290033166511193,
      "learning_rate": 9.999683938260915e-06,
      "loss": 0.2146,
      "step": 1148
    },
    {
      "epoch": 0.033520042009452126,
      "grad_norm": 1.0897810951548232,
      "learning_rate": 9.999678604036893e-06,
      "loss": 0.2305,
      "step": 1149
    },
    {
      "epoch": 0.03354921524009569,
      "grad_norm": 1.0589171142505913,
      "learning_rate": 9.999673225176934e-06,
      "loss": 0.262,
      "step": 1150
    },
    {
      "epoch": 0.03357838847073925,
      "grad_norm": 1.2259342193695393,
      "learning_rate": 9.999667801681087e-06,
      "loss": 0.2618,
      "step": 1151
    },
    {
      "epoch": 0.033607561701382814,
      "grad_norm": 1.296660574782945,
      "learning_rate": 9.999662333549399e-06,
      "loss": 0.2627,
      "step": 1152
    },
    {
      "epoch": 0.033636734932026374,
      "grad_norm": 0.8510483303165537,
      "learning_rate": 9.999656820781917e-06,
      "loss": 0.2147,
      "step": 1153
    },
    {
      "epoch": 0.033665908162669934,
      "grad_norm": 1.023782054526501,
      "learning_rate": 9.999651263378696e-06,
      "loss": 0.2237,
      "step": 1154
    },
    {
      "epoch": 0.033695081393313495,
      "grad_norm": 0.8665232399303242,
      "learning_rate": 9.999645661339779e-06,
      "loss": 0.2135,
      "step": 1155
    },
    {
      "epoch": 0.033724254623957055,
      "grad_norm": 1.061822276647852,
      "learning_rate": 9.999640014665221e-06,
      "loss": 0.2472,
      "step": 1156
    },
    {
      "epoch": 0.033753427854600615,
      "grad_norm": 1.0176166037519079,
      "learning_rate": 9.99963432335507e-06,
      "loss": 0.2443,
      "step": 1157
    },
    {
      "epoch": 0.03378260108524418,
      "grad_norm": 0.8789299563084425,
      "learning_rate": 9.999628587409378e-06,
      "loss": 0.2371,
      "step": 1158
    },
    {
      "epoch": 0.03381177431588774,
      "grad_norm": 0.9076170170894672,
      "learning_rate": 9.999622806828193e-06,
      "loss": 0.2006,
      "step": 1159
    },
    {
      "epoch": 0.0338409475465313,
      "grad_norm": 1.2817894819207372,
      "learning_rate": 9.99961698161157e-06,
      "loss": 0.2241,
      "step": 1160
    },
    {
      "epoch": 0.03387012077717486,
      "grad_norm": 1.1012341450895315,
      "learning_rate": 9.999611111759562e-06,
      "loss": 0.2336,
      "step": 1161
    },
    {
      "epoch": 0.03389929400781842,
      "grad_norm": 0.9114744283139627,
      "learning_rate": 9.999605197272219e-06,
      "loss": 0.2028,
      "step": 1162
    },
    {
      "epoch": 0.03392846723846199,
      "grad_norm": 1.034475584854439,
      "learning_rate": 9.999599238149594e-06,
      "loss": 0.2502,
      "step": 1163
    },
    {
      "epoch": 0.03395764046910555,
      "grad_norm": 1.1624818383366493,
      "learning_rate": 9.999593234391739e-06,
      "loss": 0.2222,
      "step": 1164
    },
    {
      "epoch": 0.03398681369974911,
      "grad_norm": 0.956399900823704,
      "learning_rate": 9.99958718599871e-06,
      "loss": 0.2257,
      "step": 1165
    },
    {
      "epoch": 0.03401598693039267,
      "grad_norm": 0.9627877016735599,
      "learning_rate": 9.999581092970561e-06,
      "loss": 0.2126,
      "step": 1166
    },
    {
      "epoch": 0.03404516016103623,
      "grad_norm": 1.0693354601759546,
      "learning_rate": 9.999574955307345e-06,
      "loss": 0.2338,
      "step": 1167
    },
    {
      "epoch": 0.0340743333916798,
      "grad_norm": 1.0054022949745454,
      "learning_rate": 9.999568773009116e-06,
      "loss": 0.2175,
      "step": 1168
    },
    {
      "epoch": 0.03410350662232336,
      "grad_norm": 0.9618317973515895,
      "learning_rate": 9.999562546075932e-06,
      "loss": 0.2229,
      "step": 1169
    },
    {
      "epoch": 0.03413267985296692,
      "grad_norm": 1.0775137058196893,
      "learning_rate": 9.999556274507847e-06,
      "loss": 0.2223,
      "step": 1170
    },
    {
      "epoch": 0.03416185308361048,
      "grad_norm": 1.2646575418200323,
      "learning_rate": 9.999549958304917e-06,
      "loss": 0.2315,
      "step": 1171
    },
    {
      "epoch": 0.03419102631425404,
      "grad_norm": 1.286686590384167,
      "learning_rate": 9.999543597467199e-06,
      "loss": 0.2253,
      "step": 1172
    },
    {
      "epoch": 0.0342201995448976,
      "grad_norm": 0.968843466313081,
      "learning_rate": 9.999537191994747e-06,
      "loss": 0.2286,
      "step": 1173
    },
    {
      "epoch": 0.034249372775541166,
      "grad_norm": 1.2498129034205643,
      "learning_rate": 9.999530741887622e-06,
      "loss": 0.2256,
      "step": 1174
    },
    {
      "epoch": 0.034278546006184726,
      "grad_norm": 1.22475074679282,
      "learning_rate": 9.99952424714588e-06,
      "loss": 0.2319,
      "step": 1175
    },
    {
      "epoch": 0.03430771923682829,
      "grad_norm": 0.9952594767143536,
      "learning_rate": 9.99951770776958e-06,
      "loss": 0.2047,
      "step": 1176
    },
    {
      "epoch": 0.03433689246747185,
      "grad_norm": 1.0810281894508293,
      "learning_rate": 9.999511123758778e-06,
      "loss": 0.2247,
      "step": 1177
    },
    {
      "epoch": 0.03436606569811541,
      "grad_norm": 1.1338001686539567,
      "learning_rate": 9.999504495113533e-06,
      "loss": 0.2337,
      "step": 1178
    },
    {
      "epoch": 0.034395238928758974,
      "grad_norm": 0.9428062550979496,
      "learning_rate": 9.999497821833908e-06,
      "loss": 0.2058,
      "step": 1179
    },
    {
      "epoch": 0.034424412159402534,
      "grad_norm": 0.8369047360001224,
      "learning_rate": 9.999491103919958e-06,
      "loss": 0.2086,
      "step": 1180
    },
    {
      "epoch": 0.034453585390046094,
      "grad_norm": 1.2377381920897232,
      "learning_rate": 9.999484341371746e-06,
      "loss": 0.2293,
      "step": 1181
    },
    {
      "epoch": 0.034482758620689655,
      "grad_norm": 0.9937717334450213,
      "learning_rate": 9.99947753418933e-06,
      "loss": 0.2228,
      "step": 1182
    },
    {
      "epoch": 0.034511931851333215,
      "grad_norm": 0.9479845663406832,
      "learning_rate": 9.999470682372774e-06,
      "loss": 0.2136,
      "step": 1183
    },
    {
      "epoch": 0.034541105081976775,
      "grad_norm": 1.1796947730576115,
      "learning_rate": 9.999463785922136e-06,
      "loss": 0.2464,
      "step": 1184
    },
    {
      "epoch": 0.03457027831262034,
      "grad_norm": 1.1718954993744068,
      "learning_rate": 9.999456844837478e-06,
      "loss": 0.2327,
      "step": 1185
    },
    {
      "epoch": 0.0345994515432639,
      "grad_norm": 1.3750095905594888,
      "learning_rate": 9.999449859118864e-06,
      "loss": 0.2402,
      "step": 1186
    },
    {
      "epoch": 0.03462862477390746,
      "grad_norm": 1.040067389611131,
      "learning_rate": 9.999442828766354e-06,
      "loss": 0.2332,
      "step": 1187
    },
    {
      "epoch": 0.03465779800455102,
      "grad_norm": 0.9820412034818019,
      "learning_rate": 9.999435753780014e-06,
      "loss": 0.2128,
      "step": 1188
    },
    {
      "epoch": 0.03468697123519458,
      "grad_norm": 1.065750756453323,
      "learning_rate": 9.999428634159904e-06,
      "loss": 0.2201,
      "step": 1189
    },
    {
      "epoch": 0.03471614446583815,
      "grad_norm": 1.0596792182901573,
      "learning_rate": 9.999421469906088e-06,
      "loss": 0.242,
      "step": 1190
    },
    {
      "epoch": 0.03474531769648171,
      "grad_norm": 1.116131218118618,
      "learning_rate": 9.999414261018632e-06,
      "loss": 0.225,
      "step": 1191
    },
    {
      "epoch": 0.03477449092712527,
      "grad_norm": 1.2665545716134226,
      "learning_rate": 9.999407007497597e-06,
      "loss": 0.2271,
      "step": 1192
    },
    {
      "epoch": 0.03480366415776883,
      "grad_norm": 1.1856520967677182,
      "learning_rate": 9.999399709343051e-06,
      "loss": 0.228,
      "step": 1193
    },
    {
      "epoch": 0.03483283738841239,
      "grad_norm": 1.0543449316668356,
      "learning_rate": 9.999392366555056e-06,
      "loss": 0.1993,
      "step": 1194
    },
    {
      "epoch": 0.03486201061905595,
      "grad_norm": 1.09449362058283,
      "learning_rate": 9.999384979133682e-06,
      "loss": 0.2256,
      "step": 1195
    },
    {
      "epoch": 0.03489118384969952,
      "grad_norm": 1.3760256034494558,
      "learning_rate": 9.99937754707899e-06,
      "loss": 0.2213,
      "step": 1196
    },
    {
      "epoch": 0.03492035708034308,
      "grad_norm": 0.9917622178333504,
      "learning_rate": 9.999370070391051e-06,
      "loss": 0.2194,
      "step": 1197
    },
    {
      "epoch": 0.03494953031098664,
      "grad_norm": 1.080240115007742,
      "learning_rate": 9.999362549069928e-06,
      "loss": 0.2129,
      "step": 1198
    },
    {
      "epoch": 0.0349787035416302,
      "grad_norm": 1.2657859156040228,
      "learning_rate": 9.99935498311569e-06,
      "loss": 0.222,
      "step": 1199
    },
    {
      "epoch": 0.03500787677227376,
      "grad_norm": 1.1555732625979267,
      "learning_rate": 9.999347372528405e-06,
      "loss": 0.2269,
      "step": 1200
    },
    {
      "epoch": 0.035037050002917326,
      "grad_norm": 1.0211903221318566,
      "learning_rate": 9.999339717308138e-06,
      "loss": 0.2366,
      "step": 1201
    },
    {
      "epoch": 0.035066223233560886,
      "grad_norm": 1.0785203518884237,
      "learning_rate": 9.99933201745496e-06,
      "loss": 0.2094,
      "step": 1202
    },
    {
      "epoch": 0.03509539646420445,
      "grad_norm": 0.950386052115053,
      "learning_rate": 9.99932427296894e-06,
      "loss": 0.2594,
      "step": 1203
    },
    {
      "epoch": 0.03512456969484801,
      "grad_norm": 1.0639074983819572,
      "learning_rate": 9.999316483850147e-06,
      "loss": 0.2419,
      "step": 1204
    },
    {
      "epoch": 0.03515374292549157,
      "grad_norm": 0.8965983689343393,
      "learning_rate": 9.999308650098649e-06,
      "loss": 0.2275,
      "step": 1205
    },
    {
      "epoch": 0.03518291615613513,
      "grad_norm": 0.9542237939173827,
      "learning_rate": 9.999300771714518e-06,
      "loss": 0.2382,
      "step": 1206
    },
    {
      "epoch": 0.035212089386778694,
      "grad_norm": 1.1040491741166185,
      "learning_rate": 9.999292848697822e-06,
      "loss": 0.2091,
      "step": 1207
    },
    {
      "epoch": 0.035241262617422255,
      "grad_norm": 0.8636923209159175,
      "learning_rate": 9.999284881048632e-06,
      "loss": 0.1985,
      "step": 1208
    },
    {
      "epoch": 0.035270435848065815,
      "grad_norm": 0.9725826608998004,
      "learning_rate": 9.99927686876702e-06,
      "loss": 0.2266,
      "step": 1209
    },
    {
      "epoch": 0.035299609078709375,
      "grad_norm": 1.147718904673886,
      "learning_rate": 9.999268811853058e-06,
      "loss": 0.2204,
      "step": 1210
    },
    {
      "epoch": 0.035328782309352935,
      "grad_norm": 1.029354075593606,
      "learning_rate": 9.99926071030682e-06,
      "loss": 0.1992,
      "step": 1211
    },
    {
      "epoch": 0.0353579555399965,
      "grad_norm": 0.8408064466005166,
      "learning_rate": 9.999252564128373e-06,
      "loss": 0.2166,
      "step": 1212
    },
    {
      "epoch": 0.03538712877064006,
      "grad_norm": 0.9964253539333732,
      "learning_rate": 9.999244373317794e-06,
      "loss": 0.2179,
      "step": 1213
    },
    {
      "epoch": 0.03541630200128362,
      "grad_norm": 0.9281384144091911,
      "learning_rate": 9.999236137875152e-06,
      "loss": 0.2022,
      "step": 1214
    },
    {
      "epoch": 0.03544547523192718,
      "grad_norm": 1.000925384075284,
      "learning_rate": 9.999227857800526e-06,
      "loss": 0.2373,
      "step": 1215
    },
    {
      "epoch": 0.03547464846257074,
      "grad_norm": 1.1731101092695548,
      "learning_rate": 9.999219533093986e-06,
      "loss": 0.1929,
      "step": 1216
    },
    {
      "epoch": 0.0355038216932143,
      "grad_norm": 0.939288664547676,
      "learning_rate": 9.999211163755607e-06,
      "loss": 0.208,
      "step": 1217
    },
    {
      "epoch": 0.03553299492385787,
      "grad_norm": 0.9529581324176537,
      "learning_rate": 9.999202749785465e-06,
      "loss": 0.2301,
      "step": 1218
    },
    {
      "epoch": 0.03556216815450143,
      "grad_norm": 1.0034436056259843,
      "learning_rate": 9.999194291183633e-06,
      "loss": 0.2549,
      "step": 1219
    },
    {
      "epoch": 0.03559134138514499,
      "grad_norm": 0.8140498582760453,
      "learning_rate": 9.99918578795019e-06,
      "loss": 0.2059,
      "step": 1220
    },
    {
      "epoch": 0.03562051461578855,
      "grad_norm": 1.0141425705255867,
      "learning_rate": 9.999177240085207e-06,
      "loss": 0.2157,
      "step": 1221
    },
    {
      "epoch": 0.03564968784643211,
      "grad_norm": 1.1163699248491348,
      "learning_rate": 9.999168647588767e-06,
      "loss": 0.217,
      "step": 1222
    },
    {
      "epoch": 0.03567886107707568,
      "grad_norm": 0.9139091889221808,
      "learning_rate": 9.999160010460938e-06,
      "loss": 0.2209,
      "step": 1223
    },
    {
      "epoch": 0.03570803430771924,
      "grad_norm": 0.9758061128820017,
      "learning_rate": 9.999151328701804e-06,
      "loss": 0.2213,
      "step": 1224
    },
    {
      "epoch": 0.0357372075383628,
      "grad_norm": 1.142823736007986,
      "learning_rate": 9.99914260231144e-06,
      "loss": 0.2406,
      "step": 1225
    },
    {
      "epoch": 0.03576638076900636,
      "grad_norm": 1.0516074160132758,
      "learning_rate": 9.999133831289924e-06,
      "loss": 0.2119,
      "step": 1226
    },
    {
      "epoch": 0.03579555399964992,
      "grad_norm": 1.1786248717376187,
      "learning_rate": 9.999125015637337e-06,
      "loss": 0.2171,
      "step": 1227
    },
    {
      "epoch": 0.03582472723029348,
      "grad_norm": 1.0082450454305523,
      "learning_rate": 9.999116155353751e-06,
      "loss": 0.2248,
      "step": 1228
    },
    {
      "epoch": 0.035853900460937047,
      "grad_norm": 1.0995312497528595,
      "learning_rate": 9.999107250439253e-06,
      "loss": 0.2564,
      "step": 1229
    },
    {
      "epoch": 0.03588307369158061,
      "grad_norm": 0.9229222188137134,
      "learning_rate": 9.999098300893916e-06,
      "loss": 0.2179,
      "step": 1230
    },
    {
      "epoch": 0.03591224692222417,
      "grad_norm": 1.2543935634661596,
      "learning_rate": 9.999089306717827e-06,
      "loss": 0.2322,
      "step": 1231
    },
    {
      "epoch": 0.03594142015286773,
      "grad_norm": 1.0599017323782356,
      "learning_rate": 9.999080267911059e-06,
      "loss": 0.2477,
      "step": 1232
    },
    {
      "epoch": 0.03597059338351129,
      "grad_norm": 1.08198813220789,
      "learning_rate": 9.999071184473694e-06,
      "loss": 0.1955,
      "step": 1233
    },
    {
      "epoch": 0.035999766614154854,
      "grad_norm": 0.9296632008847323,
      "learning_rate": 9.999062056405818e-06,
      "loss": 0.2284,
      "step": 1234
    },
    {
      "epoch": 0.036028939844798415,
      "grad_norm": 1.124573375991063,
      "learning_rate": 9.999052883707508e-06,
      "loss": 0.2238,
      "step": 1235
    },
    {
      "epoch": 0.036058113075441975,
      "grad_norm": 1.1526743479336314,
      "learning_rate": 9.999043666378847e-06,
      "loss": 0.2219,
      "step": 1236
    },
    {
      "epoch": 0.036087286306085535,
      "grad_norm": 1.3432794585841652,
      "learning_rate": 9.999034404419918e-06,
      "loss": 0.2347,
      "step": 1237
    },
    {
      "epoch": 0.036116459536729095,
      "grad_norm": 1.2156880471894373,
      "learning_rate": 9.999025097830803e-06,
      "loss": 0.2454,
      "step": 1238
    },
    {
      "epoch": 0.036145632767372655,
      "grad_norm": 1.1655941652232784,
      "learning_rate": 9.999015746611587e-06,
      "loss": 0.2259,
      "step": 1239
    },
    {
      "epoch": 0.03617480599801622,
      "grad_norm": 0.9271587585084555,
      "learning_rate": 9.999006350762349e-06,
      "loss": 0.22,
      "step": 1240
    },
    {
      "epoch": 0.03620397922865978,
      "grad_norm": 1.335574920498898,
      "learning_rate": 9.998996910283177e-06,
      "loss": 0.2417,
      "step": 1241
    },
    {
      "epoch": 0.03623315245930334,
      "grad_norm": 1.2195165397277774,
      "learning_rate": 9.998987425174154e-06,
      "loss": 0.2118,
      "step": 1242
    },
    {
      "epoch": 0.0362623256899469,
      "grad_norm": 0.9770996619705205,
      "learning_rate": 9.998977895435365e-06,
      "loss": 0.2367,
      "step": 1243
    },
    {
      "epoch": 0.03629149892059046,
      "grad_norm": 1.1461435430787712,
      "learning_rate": 9.998968321066893e-06,
      "loss": 0.2259,
      "step": 1244
    },
    {
      "epoch": 0.03632067215123403,
      "grad_norm": 0.9637216324090665,
      "learning_rate": 9.998958702068825e-06,
      "loss": 0.2116,
      "step": 1245
    },
    {
      "epoch": 0.03634984538187759,
      "grad_norm": 1.3104402836663753,
      "learning_rate": 9.99894903844125e-06,
      "loss": 0.2229,
      "step": 1246
    },
    {
      "epoch": 0.03637901861252115,
      "grad_norm": 1.0646578161147395,
      "learning_rate": 9.99893933018425e-06,
      "loss": 0.2533,
      "step": 1247
    },
    {
      "epoch": 0.03640819184316471,
      "grad_norm": 0.9792975473419264,
      "learning_rate": 9.998929577297912e-06,
      "loss": 0.2534,
      "step": 1248
    },
    {
      "epoch": 0.03643736507380827,
      "grad_norm": 1.3632713777987333,
      "learning_rate": 9.998919779782326e-06,
      "loss": 0.2465,
      "step": 1249
    },
    {
      "epoch": 0.03646653830445183,
      "grad_norm": 1.1058489882816205,
      "learning_rate": 9.998909937637576e-06,
      "loss": 0.2462,
      "step": 1250
    },
    {
      "epoch": 0.0364957115350954,
      "grad_norm": 0.9250320891239557,
      "learning_rate": 9.998900050863751e-06,
      "loss": 0.2228,
      "step": 1251
    },
    {
      "epoch": 0.03652488476573896,
      "grad_norm": 1.0517853930639236,
      "learning_rate": 9.99889011946094e-06,
      "loss": 0.2159,
      "step": 1252
    },
    {
      "epoch": 0.03655405799638252,
      "grad_norm": 1.0641799816582995,
      "learning_rate": 9.998880143429233e-06,
      "loss": 0.2312,
      "step": 1253
    },
    {
      "epoch": 0.03658323122702608,
      "grad_norm": 1.0400544389053685,
      "learning_rate": 9.998870122768716e-06,
      "loss": 0.2234,
      "step": 1254
    },
    {
      "epoch": 0.03661240445766964,
      "grad_norm": 0.7784251761330888,
      "learning_rate": 9.99886005747948e-06,
      "loss": 0.2206,
      "step": 1255
    },
    {
      "epoch": 0.03664157768831321,
      "grad_norm": 0.9387970895902331,
      "learning_rate": 9.998849947561615e-06,
      "loss": 0.2338,
      "step": 1256
    },
    {
      "epoch": 0.03667075091895677,
      "grad_norm": 0.9265651405115524,
      "learning_rate": 9.99883979301521e-06,
      "loss": 0.2256,
      "step": 1257
    },
    {
      "epoch": 0.03669992414960033,
      "grad_norm": 1.0746778660457141,
      "learning_rate": 9.998829593840358e-06,
      "loss": 0.2585,
      "step": 1258
    },
    {
      "epoch": 0.03672909738024389,
      "grad_norm": 0.980393031070998,
      "learning_rate": 9.998819350037148e-06,
      "loss": 0.2649,
      "step": 1259
    },
    {
      "epoch": 0.03675827061088745,
      "grad_norm": 1.091527366559575,
      "learning_rate": 9.998809061605671e-06,
      "loss": 0.2196,
      "step": 1260
    },
    {
      "epoch": 0.036787443841531015,
      "grad_norm": 0.9969539814682798,
      "learning_rate": 9.998798728546022e-06,
      "loss": 0.2436,
      "step": 1261
    },
    {
      "epoch": 0.036816617072174575,
      "grad_norm": 1.0774496370979247,
      "learning_rate": 9.998788350858291e-06,
      "loss": 0.2032,
      "step": 1262
    },
    {
      "epoch": 0.036845790302818135,
      "grad_norm": 1.1692389257363829,
      "learning_rate": 9.99877792854257e-06,
      "loss": 0.2389,
      "step": 1263
    },
    {
      "epoch": 0.036874963533461695,
      "grad_norm": 1.0704672720932178,
      "learning_rate": 9.998767461598954e-06,
      "loss": 0.2021,
      "step": 1264
    },
    {
      "epoch": 0.036904136764105255,
      "grad_norm": 0.8743761740096346,
      "learning_rate": 9.998756950027535e-06,
      "loss": 0.2237,
      "step": 1265
    },
    {
      "epoch": 0.036933309994748816,
      "grad_norm": 0.8363473801995409,
      "learning_rate": 9.998746393828406e-06,
      "loss": 0.2225,
      "step": 1266
    },
    {
      "epoch": 0.03696248322539238,
      "grad_norm": 1.1059349135328747,
      "learning_rate": 9.998735793001663e-06,
      "loss": 0.2146,
      "step": 1267
    },
    {
      "epoch": 0.03699165645603594,
      "grad_norm": 1.4869298313402535,
      "learning_rate": 9.998725147547401e-06,
      "loss": 0.2393,
      "step": 1268
    },
    {
      "epoch": 0.0370208296866795,
      "grad_norm": 0.7916156050456058,
      "learning_rate": 9.998714457465715e-06,
      "loss": 0.193,
      "step": 1269
    },
    {
      "epoch": 0.03705000291732306,
      "grad_norm": 1.0332321823689818,
      "learning_rate": 9.998703722756698e-06,
      "loss": 0.2131,
      "step": 1270
    },
    {
      "epoch": 0.037079176147966623,
      "grad_norm": 1.0944133136162206,
      "learning_rate": 9.998692943420448e-06,
      "loss": 0.204,
      "step": 1271
    },
    {
      "epoch": 0.03710834937861019,
      "grad_norm": 1.0342856859310066,
      "learning_rate": 9.99868211945706e-06,
      "loss": 0.259,
      "step": 1272
    },
    {
      "epoch": 0.03713752260925375,
      "grad_norm": 0.8740729653604431,
      "learning_rate": 9.998671250866631e-06,
      "loss": 0.2106,
      "step": 1273
    },
    {
      "epoch": 0.03716669583989731,
      "grad_norm": 1.1146756165041172,
      "learning_rate": 9.998660337649261e-06,
      "loss": 0.2356,
      "step": 1274
    },
    {
      "epoch": 0.03719586907054087,
      "grad_norm": 1.0625759772365957,
      "learning_rate": 9.998649379805044e-06,
      "loss": 0.2227,
      "step": 1275
    },
    {
      "epoch": 0.03722504230118443,
      "grad_norm": 0.8965303127918206,
      "learning_rate": 9.998638377334076e-06,
      "loss": 0.21,
      "step": 1276
    },
    {
      "epoch": 0.03725421553182799,
      "grad_norm": 0.9839152452283733,
      "learning_rate": 9.99862733023646e-06,
      "loss": 0.2004,
      "step": 1277
    },
    {
      "epoch": 0.03728338876247156,
      "grad_norm": 0.9071206977228208,
      "learning_rate": 9.998616238512292e-06,
      "loss": 0.2256,
      "step": 1278
    },
    {
      "epoch": 0.03731256199311512,
      "grad_norm": 1.1487114913959353,
      "learning_rate": 9.998605102161672e-06,
      "loss": 0.198,
      "step": 1279
    },
    {
      "epoch": 0.03734173522375868,
      "grad_norm": 1.0496812943687999,
      "learning_rate": 9.998593921184699e-06,
      "loss": 0.2025,
      "step": 1280
    },
    {
      "epoch": 0.03737090845440224,
      "grad_norm": 1.0971957882567478,
      "learning_rate": 9.998582695581471e-06,
      "loss": 0.2136,
      "step": 1281
    },
    {
      "epoch": 0.0374000816850458,
      "grad_norm": 1.1588128358759724,
      "learning_rate": 9.99857142535209e-06,
      "loss": 0.2007,
      "step": 1282
    },
    {
      "epoch": 0.03742925491568937,
      "grad_norm": 0.9426336682930098,
      "learning_rate": 9.998560110496658e-06,
      "loss": 0.2297,
      "step": 1283
    },
    {
      "epoch": 0.03745842814633293,
      "grad_norm": 1.10538456148938,
      "learning_rate": 9.998548751015275e-06,
      "loss": 0.2436,
      "step": 1284
    },
    {
      "epoch": 0.03748760137697649,
      "grad_norm": 0.9765259385595629,
      "learning_rate": 9.998537346908041e-06,
      "loss": 0.2116,
      "step": 1285
    },
    {
      "epoch": 0.03751677460762005,
      "grad_norm": 1.0595574926908682,
      "learning_rate": 9.99852589817506e-06,
      "loss": 0.2166,
      "step": 1286
    },
    {
      "epoch": 0.03754594783826361,
      "grad_norm": 1.169670012152128,
      "learning_rate": 9.99851440481643e-06,
      "loss": 0.2492,
      "step": 1287
    },
    {
      "epoch": 0.03757512106890717,
      "grad_norm": 0.952212186128128,
      "learning_rate": 9.99850286683226e-06,
      "loss": 0.2302,
      "step": 1288
    },
    {
      "epoch": 0.037604294299550735,
      "grad_norm": 1.1727912344580984,
      "learning_rate": 9.998491284222647e-06,
      "loss": 0.2276,
      "step": 1289
    },
    {
      "epoch": 0.037633467530194295,
      "grad_norm": 1.0989007437200973,
      "learning_rate": 9.998479656987699e-06,
      "loss": 0.2535,
      "step": 1290
    },
    {
      "epoch": 0.037662640760837855,
      "grad_norm": 1.0529017059038746,
      "learning_rate": 9.998467985127518e-06,
      "loss": 0.2388,
      "step": 1291
    },
    {
      "epoch": 0.037691813991481415,
      "grad_norm": 1.097426098611742,
      "learning_rate": 9.998456268642207e-06,
      "loss": 0.2541,
      "step": 1292
    },
    {
      "epoch": 0.037720987222124976,
      "grad_norm": 0.9185947687485129,
      "learning_rate": 9.998444507531872e-06,
      "loss": 0.225,
      "step": 1293
    },
    {
      "epoch": 0.03775016045276854,
      "grad_norm": 0.9874842534819169,
      "learning_rate": 9.998432701796617e-06,
      "loss": 0.2301,
      "step": 1294
    },
    {
      "epoch": 0.0377793336834121,
      "grad_norm": 0.987295071667692,
      "learning_rate": 9.99842085143655e-06,
      "loss": 0.2116,
      "step": 1295
    },
    {
      "epoch": 0.03780850691405566,
      "grad_norm": 0.9891448350948405,
      "learning_rate": 9.998408956451773e-06,
      "loss": 0.22,
      "step": 1296
    },
    {
      "epoch": 0.03783768014469922,
      "grad_norm": 0.8364712310069523,
      "learning_rate": 9.998397016842394e-06,
      "loss": 0.1882,
      "step": 1297
    },
    {
      "epoch": 0.037866853375342784,
      "grad_norm": 1.0465819091625597,
      "learning_rate": 9.99838503260852e-06,
      "loss": 0.2097,
      "step": 1298
    },
    {
      "epoch": 0.037896026605986344,
      "grad_norm": 1.0831275110704366,
      "learning_rate": 9.998373003750259e-06,
      "loss": 0.2135,
      "step": 1299
    },
    {
      "epoch": 0.03792519983662991,
      "grad_norm": 1.0699745458763916,
      "learning_rate": 9.998360930267715e-06,
      "loss": 0.2468,
      "step": 1300
    },
    {
      "epoch": 0.03795437306727347,
      "grad_norm": 0.9599499742993474,
      "learning_rate": 9.998348812160999e-06,
      "loss": 0.2259,
      "step": 1301
    },
    {
      "epoch": 0.03798354629791703,
      "grad_norm": 1.0159198034611567,
      "learning_rate": 9.998336649430217e-06,
      "loss": 0.219,
      "step": 1302
    },
    {
      "epoch": 0.03801271952856059,
      "grad_norm": 0.9105327449863514,
      "learning_rate": 9.99832444207548e-06,
      "loss": 0.1939,
      "step": 1303
    },
    {
      "epoch": 0.03804189275920415,
      "grad_norm": 1.0172706973557566,
      "learning_rate": 9.998312190096896e-06,
      "loss": 0.212,
      "step": 1304
    },
    {
      "epoch": 0.03807106598984772,
      "grad_norm": 1.1388665063274868,
      "learning_rate": 9.998299893494572e-06,
      "loss": 0.1974,
      "step": 1305
    },
    {
      "epoch": 0.03810023922049128,
      "grad_norm": 0.9868309224658413,
      "learning_rate": 9.99828755226862e-06,
      "loss": 0.2216,
      "step": 1306
    },
    {
      "epoch": 0.03812941245113484,
      "grad_norm": 1.2572752732793357,
      "learning_rate": 9.998275166419152e-06,
      "loss": 0.2226,
      "step": 1307
    },
    {
      "epoch": 0.0381585856817784,
      "grad_norm": 1.1406110116712587,
      "learning_rate": 9.998262735946274e-06,
      "loss": 0.2108,
      "step": 1308
    },
    {
      "epoch": 0.03818775891242196,
      "grad_norm": 0.8878135344973613,
      "learning_rate": 9.9982502608501e-06,
      "loss": 0.2302,
      "step": 1309
    },
    {
      "epoch": 0.03821693214306552,
      "grad_norm": 1.0201696690286826,
      "learning_rate": 9.998237741130742e-06,
      "loss": 0.2345,
      "step": 1310
    },
    {
      "epoch": 0.03824610537370909,
      "grad_norm": 0.9213405910768891,
      "learning_rate": 9.998225176788309e-06,
      "loss": 0.2268,
      "step": 1311
    },
    {
      "epoch": 0.03827527860435265,
      "grad_norm": 0.9604706367061303,
      "learning_rate": 9.998212567822917e-06,
      "loss": 0.1968,
      "step": 1312
    },
    {
      "epoch": 0.03830445183499621,
      "grad_norm": 0.8607403891983637,
      "learning_rate": 9.998199914234674e-06,
      "loss": 0.2221,
      "step": 1313
    },
    {
      "epoch": 0.03833362506563977,
      "grad_norm": 0.9066436101604387,
      "learning_rate": 9.998187216023696e-06,
      "loss": 0.228,
      "step": 1314
    },
    {
      "epoch": 0.03836279829628333,
      "grad_norm": 0.9785700401350489,
      "learning_rate": 9.998174473190098e-06,
      "loss": 0.2111,
      "step": 1315
    },
    {
      "epoch": 0.038391971526926895,
      "grad_norm": 0.9121512263543413,
      "learning_rate": 9.99816168573399e-06,
      "loss": 0.2275,
      "step": 1316
    },
    {
      "epoch": 0.038421144757570455,
      "grad_norm": 1.0388189498393994,
      "learning_rate": 9.998148853655486e-06,
      "loss": 0.2348,
      "step": 1317
    },
    {
      "epoch": 0.038450317988214015,
      "grad_norm": 1.266719867745567,
      "learning_rate": 9.998135976954704e-06,
      "loss": 0.2389,
      "step": 1318
    },
    {
      "epoch": 0.038479491218857576,
      "grad_norm": 1.1566629911987631,
      "learning_rate": 9.998123055631756e-06,
      "loss": 0.2198,
      "step": 1319
    },
    {
      "epoch": 0.038508664449501136,
      "grad_norm": 1.2143406841243476,
      "learning_rate": 9.99811008968676e-06,
      "loss": 0.2266,
      "step": 1320
    },
    {
      "epoch": 0.038537837680144696,
      "grad_norm": 0.9275245921485578,
      "learning_rate": 9.998097079119828e-06,
      "loss": 0.2109,
      "step": 1321
    },
    {
      "epoch": 0.03856701091078826,
      "grad_norm": 1.2690957652371693,
      "learning_rate": 9.998084023931081e-06,
      "loss": 0.2031,
      "step": 1322
    },
    {
      "epoch": 0.03859618414143182,
      "grad_norm": 1.2652695614831517,
      "learning_rate": 9.998070924120631e-06,
      "loss": 0.2205,
      "step": 1323
    },
    {
      "epoch": 0.03862535737207538,
      "grad_norm": 0.9942675570568783,
      "learning_rate": 9.998057779688597e-06,
      "loss": 0.2145,
      "step": 1324
    },
    {
      "epoch": 0.038654530602718944,
      "grad_norm": 1.1329949453374601,
      "learning_rate": 9.998044590635099e-06,
      "loss": 0.2325,
      "step": 1325
    },
    {
      "epoch": 0.038683703833362504,
      "grad_norm": 0.8833575532196354,
      "learning_rate": 9.99803135696025e-06,
      "loss": 0.2047,
      "step": 1326
    },
    {
      "epoch": 0.03871287706400607,
      "grad_norm": 0.9744575379511667,
      "learning_rate": 9.998018078664169e-06,
      "loss": 0.2213,
      "step": 1327
    },
    {
      "epoch": 0.03874205029464963,
      "grad_norm": 0.9418066454427708,
      "learning_rate": 9.998004755746977e-06,
      "loss": 0.235,
      "step": 1328
    },
    {
      "epoch": 0.03877122352529319,
      "grad_norm": 1.0147343297476814,
      "learning_rate": 9.997991388208791e-06,
      "loss": 0.233,
      "step": 1329
    },
    {
      "epoch": 0.03880039675593675,
      "grad_norm": 1.1558484528664879,
      "learning_rate": 9.997977976049731e-06,
      "loss": 0.2271,
      "step": 1330
    },
    {
      "epoch": 0.03882956998658031,
      "grad_norm": 1.201303925343188,
      "learning_rate": 9.997964519269918e-06,
      "loss": 0.2345,
      "step": 1331
    },
    {
      "epoch": 0.03885874321722387,
      "grad_norm": 0.9019540794269065,
      "learning_rate": 9.99795101786947e-06,
      "loss": 0.2185,
      "step": 1332
    },
    {
      "epoch": 0.03888791644786744,
      "grad_norm": 1.283035114543337,
      "learning_rate": 9.997937471848508e-06,
      "loss": 0.1996,
      "step": 1333
    },
    {
      "epoch": 0.038917089678511,
      "grad_norm": 0.9430874796476467,
      "learning_rate": 9.997923881207155e-06,
      "loss": 0.2305,
      "step": 1334
    },
    {
      "epoch": 0.03894626290915456,
      "grad_norm": 0.9882398860434454,
      "learning_rate": 9.99791024594553e-06,
      "loss": 0.2537,
      "step": 1335
    },
    {
      "epoch": 0.03897543613979812,
      "grad_norm": 1.1549123435876236,
      "learning_rate": 9.997896566063754e-06,
      "loss": 0.2189,
      "step": 1336
    },
    {
      "epoch": 0.03900460937044168,
      "grad_norm": 1.0921869118312562,
      "learning_rate": 9.997882841561952e-06,
      "loss": 0.2072,
      "step": 1337
    },
    {
      "epoch": 0.03903378260108525,
      "grad_norm": 1.0278245070875547,
      "learning_rate": 9.997869072440245e-06,
      "loss": 0.2191,
      "step": 1338
    },
    {
      "epoch": 0.03906295583172881,
      "grad_norm": 1.1846947002143793,
      "learning_rate": 9.997855258698756e-06,
      "loss": 0.2164,
      "step": 1339
    },
    {
      "epoch": 0.03909212906237237,
      "grad_norm": 0.9517210256139034,
      "learning_rate": 9.997841400337608e-06,
      "loss": 0.2211,
      "step": 1340
    },
    {
      "epoch": 0.03912130229301593,
      "grad_norm": 1.1482527040068051,
      "learning_rate": 9.997827497356925e-06,
      "loss": 0.1905,
      "step": 1341
    },
    {
      "epoch": 0.03915047552365949,
      "grad_norm": 1.0407439080463416,
      "learning_rate": 9.997813549756831e-06,
      "loss": 0.2213,
      "step": 1342
    },
    {
      "epoch": 0.03917964875430305,
      "grad_norm": 1.1499738624169245,
      "learning_rate": 9.99779955753745e-06,
      "loss": 0.2798,
      "step": 1343
    },
    {
      "epoch": 0.039208821984946615,
      "grad_norm": 1.2375675402663109,
      "learning_rate": 9.99778552069891e-06,
      "loss": 0.2103,
      "step": 1344
    },
    {
      "epoch": 0.039237995215590175,
      "grad_norm": 0.8669936389379261,
      "learning_rate": 9.997771439241332e-06,
      "loss": 0.2392,
      "step": 1345
    },
    {
      "epoch": 0.039267168446233736,
      "grad_norm": 1.052290039102351,
      "learning_rate": 9.997757313164843e-06,
      "loss": 0.2091,
      "step": 1346
    },
    {
      "epoch": 0.039296341676877296,
      "grad_norm": 0.9268609287273716,
      "learning_rate": 9.997743142469571e-06,
      "loss": 0.2134,
      "step": 1347
    },
    {
      "epoch": 0.039325514907520856,
      "grad_norm": 0.9549665618673209,
      "learning_rate": 9.997728927155643e-06,
      "loss": 0.2041,
      "step": 1348
    },
    {
      "epoch": 0.03935468813816442,
      "grad_norm": 0.9331634674606833,
      "learning_rate": 9.997714667223181e-06,
      "loss": 0.2016,
      "step": 1349
    },
    {
      "epoch": 0.03938386136880798,
      "grad_norm": 0.9818661097744485,
      "learning_rate": 9.997700362672317e-06,
      "loss": 0.2192,
      "step": 1350
    },
    {
      "epoch": 0.039413034599451544,
      "grad_norm": 0.8404467533545338,
      "learning_rate": 9.997686013503178e-06,
      "loss": 0.2017,
      "step": 1351
    },
    {
      "epoch": 0.039442207830095104,
      "grad_norm": 0.9627892193536619,
      "learning_rate": 9.997671619715889e-06,
      "loss": 0.2244,
      "step": 1352
    },
    {
      "epoch": 0.039471381060738664,
      "grad_norm": 0.9226865047277064,
      "learning_rate": 9.997657181310584e-06,
      "loss": 0.2002,
      "step": 1353
    },
    {
      "epoch": 0.03950055429138223,
      "grad_norm": 1.1284562198697294,
      "learning_rate": 9.997642698287386e-06,
      "loss": 0.2183,
      "step": 1354
    },
    {
      "epoch": 0.03952972752202579,
      "grad_norm": 1.0154493713003871,
      "learning_rate": 9.997628170646428e-06,
      "loss": 0.2203,
      "step": 1355
    },
    {
      "epoch": 0.03955890075266935,
      "grad_norm": 1.028409596537489,
      "learning_rate": 9.997613598387838e-06,
      "loss": 0.2226,
      "step": 1356
    },
    {
      "epoch": 0.03958807398331291,
      "grad_norm": 1.0342809832679385,
      "learning_rate": 9.997598981511749e-06,
      "loss": 0.2271,
      "step": 1357
    },
    {
      "epoch": 0.03961724721395647,
      "grad_norm": 0.8296214691141625,
      "learning_rate": 9.997584320018287e-06,
      "loss": 0.1995,
      "step": 1358
    },
    {
      "epoch": 0.03964642044460003,
      "grad_norm": 0.9319299517444279,
      "learning_rate": 9.997569613907587e-06,
      "loss": 0.2081,
      "step": 1359
    },
    {
      "epoch": 0.0396755936752436,
      "grad_norm": 0.8792592970600249,
      "learning_rate": 9.997554863179777e-06,
      "loss": 0.2124,
      "step": 1360
    },
    {
      "epoch": 0.03970476690588716,
      "grad_norm": 0.8704137211657997,
      "learning_rate": 9.997540067834991e-06,
      "loss": 0.224,
      "step": 1361
    },
    {
      "epoch": 0.03973394013653072,
      "grad_norm": 0.9876591870234631,
      "learning_rate": 9.997525227873361e-06,
      "loss": 0.2022,
      "step": 1362
    },
    {
      "epoch": 0.03976311336717428,
      "grad_norm": 1.1291734542403797,
      "learning_rate": 9.997510343295018e-06,
      "loss": 0.2039,
      "step": 1363
    },
    {
      "epoch": 0.03979228659781784,
      "grad_norm": 1.1268418016229587,
      "learning_rate": 9.997495414100095e-06,
      "loss": 0.23,
      "step": 1364
    },
    {
      "epoch": 0.03982145982846141,
      "grad_norm": 1.148988880184851,
      "learning_rate": 9.997480440288726e-06,
      "loss": 0.2298,
      "step": 1365
    },
    {
      "epoch": 0.03985063305910497,
      "grad_norm": 1.1247163410569034,
      "learning_rate": 9.997465421861046e-06,
      "loss": 0.2555,
      "step": 1366
    },
    {
      "epoch": 0.03987980628974853,
      "grad_norm": 0.9965754859389144,
      "learning_rate": 9.997450358817185e-06,
      "loss": 0.1948,
      "step": 1367
    },
    {
      "epoch": 0.03990897952039209,
      "grad_norm": 1.0375175847296314,
      "learning_rate": 9.997435251157284e-06,
      "loss": 0.2693,
      "step": 1368
    },
    {
      "epoch": 0.03993815275103565,
      "grad_norm": 1.023653601087876,
      "learning_rate": 9.99742009888147e-06,
      "loss": 0.2025,
      "step": 1369
    },
    {
      "epoch": 0.03996732598167921,
      "grad_norm": 0.9448813207728343,
      "learning_rate": 9.997404901989884e-06,
      "loss": 0.1867,
      "step": 1370
    },
    {
      "epoch": 0.039996499212322775,
      "grad_norm": 0.938275403909158,
      "learning_rate": 9.997389660482662e-06,
      "loss": 0.2416,
      "step": 1371
    },
    {
      "epoch": 0.040025672442966335,
      "grad_norm": 0.9685428975847087,
      "learning_rate": 9.997374374359935e-06,
      "loss": 0.2569,
      "step": 1372
    },
    {
      "epoch": 0.040054845673609896,
      "grad_norm": 1.0739192210213488,
      "learning_rate": 9.997359043621844e-06,
      "loss": 0.2268,
      "step": 1373
    },
    {
      "epoch": 0.040084018904253456,
      "grad_norm": 0.9738521939663268,
      "learning_rate": 9.997343668268525e-06,
      "loss": 0.218,
      "step": 1374
    },
    {
      "epoch": 0.040113192134897016,
      "grad_norm": 0.9592428474829809,
      "learning_rate": 9.997328248300114e-06,
      "loss": 0.2071,
      "step": 1375
    },
    {
      "epoch": 0.04014236536554058,
      "grad_norm": 0.8635790952625635,
      "learning_rate": 9.997312783716751e-06,
      "loss": 0.2207,
      "step": 1376
    },
    {
      "epoch": 0.04017153859618414,
      "grad_norm": 0.9335635120510127,
      "learning_rate": 9.997297274518569e-06,
      "loss": 0.2169,
      "step": 1377
    },
    {
      "epoch": 0.040200711826827704,
      "grad_norm": 1.1756898645285696,
      "learning_rate": 9.997281720705713e-06,
      "loss": 0.228,
      "step": 1378
    },
    {
      "epoch": 0.040229885057471264,
      "grad_norm": 1.0855032514936684,
      "learning_rate": 9.997266122278317e-06,
      "loss": 0.2033,
      "step": 1379
    },
    {
      "epoch": 0.040259058288114824,
      "grad_norm": 0.8673801952091827,
      "learning_rate": 9.997250479236522e-06,
      "loss": 0.2193,
      "step": 1380
    },
    {
      "epoch": 0.040288231518758384,
      "grad_norm": 1.044271829891321,
      "learning_rate": 9.99723479158047e-06,
      "loss": 0.232,
      "step": 1381
    },
    {
      "epoch": 0.04031740474940195,
      "grad_norm": 1.1137257096053048,
      "learning_rate": 9.997219059310296e-06,
      "loss": 0.2152,
      "step": 1382
    },
    {
      "epoch": 0.04034657798004551,
      "grad_norm": 0.8227482080417956,
      "learning_rate": 9.997203282426144e-06,
      "loss": 0.2103,
      "step": 1383
    },
    {
      "epoch": 0.04037575121068907,
      "grad_norm": 0.9498515007487641,
      "learning_rate": 9.997187460928155e-06,
      "loss": 0.2167,
      "step": 1384
    },
    {
      "epoch": 0.04040492444133263,
      "grad_norm": 1.1769274227042676,
      "learning_rate": 9.997171594816467e-06,
      "loss": 0.2538,
      "step": 1385
    },
    {
      "epoch": 0.04043409767197619,
      "grad_norm": 0.9315945779231783,
      "learning_rate": 9.997155684091225e-06,
      "loss": 0.2333,
      "step": 1386
    },
    {
      "epoch": 0.04046327090261976,
      "grad_norm": 0.9803684457551111,
      "learning_rate": 9.997139728752571e-06,
      "loss": 0.1961,
      "step": 1387
    },
    {
      "epoch": 0.04049244413326332,
      "grad_norm": 0.9885088854204722,
      "learning_rate": 9.997123728800647e-06,
      "loss": 0.2194,
      "step": 1388
    },
    {
      "epoch": 0.04052161736390688,
      "grad_norm": 1.0034427516436237,
      "learning_rate": 9.997107684235592e-06,
      "loss": 0.2226,
      "step": 1389
    },
    {
      "epoch": 0.04055079059455044,
      "grad_norm": 0.9975148044713197,
      "learning_rate": 9.997091595057556e-06,
      "loss": 0.2282,
      "step": 1390
    },
    {
      "epoch": 0.040579963825194,
      "grad_norm": 1.2398854129625423,
      "learning_rate": 9.997075461266677e-06,
      "loss": 0.1854,
      "step": 1391
    },
    {
      "epoch": 0.04060913705583756,
      "grad_norm": 0.9221307377309209,
      "learning_rate": 9.997059282863103e-06,
      "loss": 0.2222,
      "step": 1392
    },
    {
      "epoch": 0.04063831028648113,
      "grad_norm": 1.4777014478559738,
      "learning_rate": 9.997043059846974e-06,
      "loss": 0.2392,
      "step": 1393
    },
    {
      "epoch": 0.04066748351712469,
      "grad_norm": 0.9999336775425626,
      "learning_rate": 9.997026792218439e-06,
      "loss": 0.2217,
      "step": 1394
    },
    {
      "epoch": 0.04069665674776825,
      "grad_norm": 1.0057735011149043,
      "learning_rate": 9.99701047997764e-06,
      "loss": 0.2011,
      "step": 1395
    },
    {
      "epoch": 0.04072582997841181,
      "grad_norm": 1.0771243992386983,
      "learning_rate": 9.996994123124727e-06,
      "loss": 0.2025,
      "step": 1396
    },
    {
      "epoch": 0.04075500320905537,
      "grad_norm": 1.1764991368919115,
      "learning_rate": 9.996977721659841e-06,
      "loss": 0.2158,
      "step": 1397
    },
    {
      "epoch": 0.040784176439698935,
      "grad_norm": 1.2576445137719265,
      "learning_rate": 9.996961275583133e-06,
      "loss": 0.2273,
      "step": 1398
    },
    {
      "epoch": 0.040813349670342496,
      "grad_norm": 0.9886776760583021,
      "learning_rate": 9.996944784894747e-06,
      "loss": 0.2327,
      "step": 1399
    },
    {
      "epoch": 0.040842522900986056,
      "grad_norm": 0.9695993546130057,
      "learning_rate": 9.99692824959483e-06,
      "loss": 0.2108,
      "step": 1400
    },
    {
      "epoch": 0.040871696131629616,
      "grad_norm": 0.9952565365059857,
      "learning_rate": 9.99691166968353e-06,
      "loss": 0.1976,
      "step": 1401
    },
    {
      "epoch": 0.040900869362273176,
      "grad_norm": 0.9257956453901235,
      "learning_rate": 9.996895045160997e-06,
      "loss": 0.2132,
      "step": 1402
    },
    {
      "epoch": 0.040930042592916736,
      "grad_norm": 0.9610088002531586,
      "learning_rate": 9.996878376027377e-06,
      "loss": 0.2241,
      "step": 1403
    },
    {
      "epoch": 0.040959215823560303,
      "grad_norm": 1.019045509669626,
      "learning_rate": 9.99686166228282e-06,
      "loss": 0.2383,
      "step": 1404
    },
    {
      "epoch": 0.040988389054203864,
      "grad_norm": 0.9650270584136241,
      "learning_rate": 9.996844903927475e-06,
      "loss": 0.198,
      "step": 1405
    },
    {
      "epoch": 0.041017562284847424,
      "grad_norm": 1.0796070829693196,
      "learning_rate": 9.996828100961491e-06,
      "loss": 0.2085,
      "step": 1406
    },
    {
      "epoch": 0.041046735515490984,
      "grad_norm": 0.9430226186570874,
      "learning_rate": 9.99681125338502e-06,
      "loss": 0.2449,
      "step": 1407
    },
    {
      "epoch": 0.041075908746134544,
      "grad_norm": 1.1829258383954429,
      "learning_rate": 9.99679436119821e-06,
      "loss": 0.2313,
      "step": 1408
    },
    {
      "epoch": 0.04110508197677811,
      "grad_norm": 0.9457366101358423,
      "learning_rate": 9.996777424401212e-06,
      "loss": 0.2551,
      "step": 1409
    },
    {
      "epoch": 0.04113425520742167,
      "grad_norm": 1.1009213065778383,
      "learning_rate": 9.996760442994177e-06,
      "loss": 0.2309,
      "step": 1410
    },
    {
      "epoch": 0.04116342843806523,
      "grad_norm": 0.9704531907078432,
      "learning_rate": 9.996743416977262e-06,
      "loss": 0.2367,
      "step": 1411
    },
    {
      "epoch": 0.04119260166870879,
      "grad_norm": 1.0429666547715157,
      "learning_rate": 9.99672634635061e-06,
      "loss": 0.2295,
      "step": 1412
    },
    {
      "epoch": 0.04122177489935235,
      "grad_norm": 1.0747472073388955,
      "learning_rate": 9.996709231114381e-06,
      "loss": 0.2381,
      "step": 1413
    },
    {
      "epoch": 0.04125094812999591,
      "grad_norm": 1.1662343939472601,
      "learning_rate": 9.996692071268724e-06,
      "loss": 0.2277,
      "step": 1414
    },
    {
      "epoch": 0.04128012136063948,
      "grad_norm": 0.9602644067615418,
      "learning_rate": 9.996674866813792e-06,
      "loss": 0.2253,
      "step": 1415
    },
    {
      "epoch": 0.04130929459128304,
      "grad_norm": 1.3243000135828542,
      "learning_rate": 9.99665761774974e-06,
      "loss": 0.2473,
      "step": 1416
    },
    {
      "epoch": 0.0413384678219266,
      "grad_norm": 0.9815509736248158,
      "learning_rate": 9.996640324076721e-06,
      "loss": 0.2018,
      "step": 1417
    },
    {
      "epoch": 0.04136764105257016,
      "grad_norm": 1.0714111788074172,
      "learning_rate": 9.996622985794891e-06,
      "loss": 0.1946,
      "step": 1418
    },
    {
      "epoch": 0.04139681428321372,
      "grad_norm": 0.9998604307350838,
      "learning_rate": 9.996605602904403e-06,
      "loss": 0.2087,
      "step": 1419
    },
    {
      "epoch": 0.04142598751385729,
      "grad_norm": 1.072961976478066,
      "learning_rate": 9.996588175405415e-06,
      "loss": 0.2381,
      "step": 1420
    },
    {
      "epoch": 0.04145516074450085,
      "grad_norm": 0.8373350153525165,
      "learning_rate": 9.996570703298078e-06,
      "loss": 0.1966,
      "step": 1421
    },
    {
      "epoch": 0.04148433397514441,
      "grad_norm": 1.089742446453986,
      "learning_rate": 9.996553186582552e-06,
      "loss": 0.2478,
      "step": 1422
    },
    {
      "epoch": 0.04151350720578797,
      "grad_norm": 0.9748766263828204,
      "learning_rate": 9.996535625258992e-06,
      "loss": 0.2123,
      "step": 1423
    },
    {
      "epoch": 0.04154268043643153,
      "grad_norm": 1.2175897141540069,
      "learning_rate": 9.996518019327555e-06,
      "loss": 0.2195,
      "step": 1424
    },
    {
      "epoch": 0.04157185366707509,
      "grad_norm": 1.0743819665833776,
      "learning_rate": 9.996500368788396e-06,
      "loss": 0.2273,
      "step": 1425
    },
    {
      "epoch": 0.041601026897718656,
      "grad_norm": 1.03651420740801,
      "learning_rate": 9.996482673641675e-06,
      "loss": 0.2063,
      "step": 1426
    },
    {
      "epoch": 0.041630200128362216,
      "grad_norm": 1.3119232498834812,
      "learning_rate": 9.996464933887551e-06,
      "loss": 0.2363,
      "step": 1427
    },
    {
      "epoch": 0.041659373359005776,
      "grad_norm": 0.9325800581765833,
      "learning_rate": 9.996447149526179e-06,
      "loss": 0.2264,
      "step": 1428
    },
    {
      "epoch": 0.041688546589649336,
      "grad_norm": 0.9127887289285253,
      "learning_rate": 9.99642932055772e-06,
      "loss": 0.2323,
      "step": 1429
    },
    {
      "epoch": 0.041717719820292896,
      "grad_norm": 1.1171056920038365,
      "learning_rate": 9.996411446982335e-06,
      "loss": 0.217,
      "step": 1430
    },
    {
      "epoch": 0.041746893050936464,
      "grad_norm": 0.9171130115726579,
      "learning_rate": 9.99639352880018e-06,
      "loss": 0.2261,
      "step": 1431
    },
    {
      "epoch": 0.041776066281580024,
      "grad_norm": 0.8525848364637603,
      "learning_rate": 9.996375566011415e-06,
      "loss": 0.2296,
      "step": 1432
    },
    {
      "epoch": 0.041805239512223584,
      "grad_norm": 0.8924284655904076,
      "learning_rate": 9.996357558616201e-06,
      "loss": 0.2172,
      "step": 1433
    },
    {
      "epoch": 0.041834412742867144,
      "grad_norm": 0.7701805957891904,
      "learning_rate": 9.996339506614703e-06,
      "loss": 0.2171,
      "step": 1434
    },
    {
      "epoch": 0.041863585973510704,
      "grad_norm": 0.9346432211708176,
      "learning_rate": 9.996321410007076e-06,
      "loss": 0.2163,
      "step": 1435
    },
    {
      "epoch": 0.04189275920415427,
      "grad_norm": 1.2609256382114076,
      "learning_rate": 9.996303268793484e-06,
      "loss": 0.2144,
      "step": 1436
    },
    {
      "epoch": 0.04192193243479783,
      "grad_norm": 0.9450348609382418,
      "learning_rate": 9.99628508297409e-06,
      "loss": 0.2139,
      "step": 1437
    },
    {
      "epoch": 0.04195110566544139,
      "grad_norm": 1.0011505709567554,
      "learning_rate": 9.996266852549056e-06,
      "loss": 0.2052,
      "step": 1438
    },
    {
      "epoch": 0.04198027889608495,
      "grad_norm": 0.9925306010267743,
      "learning_rate": 9.996248577518543e-06,
      "loss": 0.2141,
      "step": 1439
    },
    {
      "epoch": 0.04200945212672851,
      "grad_norm": 1.0884507101869654,
      "learning_rate": 9.996230257882716e-06,
      "loss": 0.2218,
      "step": 1440
    },
    {
      "epoch": 0.04203862535737207,
      "grad_norm": 1.141353351760358,
      "learning_rate": 9.99621189364174e-06,
      "loss": 0.2378,
      "step": 1441
    },
    {
      "epoch": 0.04206779858801564,
      "grad_norm": 1.0686080529219866,
      "learning_rate": 9.996193484795774e-06,
      "loss": 0.2053,
      "step": 1442
    },
    {
      "epoch": 0.0420969718186592,
      "grad_norm": 0.9193753146267514,
      "learning_rate": 9.996175031344985e-06,
      "loss": 0.2205,
      "step": 1443
    },
    {
      "epoch": 0.04212614504930276,
      "grad_norm": 1.2292913522378575,
      "learning_rate": 9.99615653328954e-06,
      "loss": 0.1881,
      "step": 1444
    },
    {
      "epoch": 0.04215531827994632,
      "grad_norm": 0.9432107283405086,
      "learning_rate": 9.996137990629601e-06,
      "loss": 0.2326,
      "step": 1445
    },
    {
      "epoch": 0.04218449151058988,
      "grad_norm": 1.0168940745265806,
      "learning_rate": 9.996119403365336e-06,
      "loss": 0.232,
      "step": 1446
    },
    {
      "epoch": 0.04221366474123345,
      "grad_norm": 0.766663241875437,
      "learning_rate": 9.996100771496908e-06,
      "loss": 0.2081,
      "step": 1447
    },
    {
      "epoch": 0.04224283797187701,
      "grad_norm": 0.9448176605055629,
      "learning_rate": 9.996082095024486e-06,
      "loss": 0.2257,
      "step": 1448
    },
    {
      "epoch": 0.04227201120252057,
      "grad_norm": 0.9189474902407835,
      "learning_rate": 9.996063373948236e-06,
      "loss": 0.2056,
      "step": 1449
    },
    {
      "epoch": 0.04230118443316413,
      "grad_norm": 0.9430389926796413,
      "learning_rate": 9.996044608268323e-06,
      "loss": 0.2285,
      "step": 1450
    },
    {
      "epoch": 0.04233035766380769,
      "grad_norm": 0.8736377488994879,
      "learning_rate": 9.996025797984917e-06,
      "loss": 0.2005,
      "step": 1451
    },
    {
      "epoch": 0.04235953089445125,
      "grad_norm": 0.9798793350912578,
      "learning_rate": 9.996006943098186e-06,
      "loss": 0.2003,
      "step": 1452
    },
    {
      "epoch": 0.042388704125094816,
      "grad_norm": 0.9939519675283078,
      "learning_rate": 9.995988043608298e-06,
      "loss": 0.2413,
      "step": 1453
    },
    {
      "epoch": 0.042417877355738376,
      "grad_norm": 1.1457999732943973,
      "learning_rate": 9.995969099515422e-06,
      "loss": 0.2392,
      "step": 1454
    },
    {
      "epoch": 0.042447050586381936,
      "grad_norm": 0.9996397635655915,
      "learning_rate": 9.995950110819725e-06,
      "loss": 0.2293,
      "step": 1455
    },
    {
      "epoch": 0.042476223817025496,
      "grad_norm": 1.0414568624002896,
      "learning_rate": 9.995931077521377e-06,
      "loss": 0.2315,
      "step": 1456
    },
    {
      "epoch": 0.042505397047669057,
      "grad_norm": 1.02445309232173,
      "learning_rate": 9.995911999620551e-06,
      "loss": 0.2548,
      "step": 1457
    },
    {
      "epoch": 0.042534570278312624,
      "grad_norm": 0.9462250634681262,
      "learning_rate": 9.995892877117415e-06,
      "loss": 0.2174,
      "step": 1458
    },
    {
      "epoch": 0.042563743508956184,
      "grad_norm": 1.3235852975520785,
      "learning_rate": 9.995873710012139e-06,
      "loss": 0.2447,
      "step": 1459
    },
    {
      "epoch": 0.042592916739599744,
      "grad_norm": 1.1324259520052409,
      "learning_rate": 9.995854498304896e-06,
      "loss": 0.2065,
      "step": 1460
    },
    {
      "epoch": 0.042622089970243304,
      "grad_norm": 0.9157819995964086,
      "learning_rate": 9.995835241995857e-06,
      "loss": 0.2081,
      "step": 1461
    },
    {
      "epoch": 0.042651263200886864,
      "grad_norm": 0.8460837172718613,
      "learning_rate": 9.995815941085193e-06,
      "loss": 0.2262,
      "step": 1462
    },
    {
      "epoch": 0.042680436431530425,
      "grad_norm": 0.8605506763271699,
      "learning_rate": 9.995796595573078e-06,
      "loss": 0.1842,
      "step": 1463
    },
    {
      "epoch": 0.04270960966217399,
      "grad_norm": 0.9943616633690506,
      "learning_rate": 9.995777205459682e-06,
      "loss": 0.2444,
      "step": 1464
    },
    {
      "epoch": 0.04273878289281755,
      "grad_norm": 0.8662157243640668,
      "learning_rate": 9.99575777074518e-06,
      "loss": 0.1969,
      "step": 1465
    },
    {
      "epoch": 0.04276795612346111,
      "grad_norm": 0.9849215744267956,
      "learning_rate": 9.995738291429745e-06,
      "loss": 0.1947,
      "step": 1466
    },
    {
      "epoch": 0.04279712935410467,
      "grad_norm": 0.9378503402257837,
      "learning_rate": 9.995718767513551e-06,
      "loss": 0.1978,
      "step": 1467
    },
    {
      "epoch": 0.04282630258474823,
      "grad_norm": 0.926220287544285,
      "learning_rate": 9.995699198996773e-06,
      "loss": 0.2013,
      "step": 1468
    },
    {
      "epoch": 0.0428554758153918,
      "grad_norm": 0.9176908859449656,
      "learning_rate": 9.995679585879585e-06,
      "loss": 0.1947,
      "step": 1469
    },
    {
      "epoch": 0.04288464904603536,
      "grad_norm": 1.0008999967219585,
      "learning_rate": 9.995659928162164e-06,
      "loss": 0.2238,
      "step": 1470
    },
    {
      "epoch": 0.04291382227667892,
      "grad_norm": 1.0602822912657908,
      "learning_rate": 9.995640225844682e-06,
      "loss": 0.2221,
      "step": 1471
    },
    {
      "epoch": 0.04294299550732248,
      "grad_norm": 0.9648436997158979,
      "learning_rate": 9.995620478927315e-06,
      "loss": 0.2049,
      "step": 1472
    },
    {
      "epoch": 0.04297216873796604,
      "grad_norm": 1.0664357527011148,
      "learning_rate": 9.995600687410244e-06,
      "loss": 0.2048,
      "step": 1473
    },
    {
      "epoch": 0.0430013419686096,
      "grad_norm": 0.9574922828372119,
      "learning_rate": 9.99558085129364e-06,
      "loss": 0.2222,
      "step": 1474
    },
    {
      "epoch": 0.04303051519925317,
      "grad_norm": 1.1493992224879264,
      "learning_rate": 9.995560970577685e-06,
      "loss": 0.2342,
      "step": 1475
    },
    {
      "epoch": 0.04305968842989673,
      "grad_norm": 1.2327635581648775,
      "learning_rate": 9.995541045262554e-06,
      "loss": 0.2257,
      "step": 1476
    },
    {
      "epoch": 0.04308886166054029,
      "grad_norm": 0.8601884200626423,
      "learning_rate": 9.995521075348423e-06,
      "loss": 0.1834,
      "step": 1477
    },
    {
      "epoch": 0.04311803489118385,
      "grad_norm": 1.0366460390906858,
      "learning_rate": 9.995501060835474e-06,
      "loss": 0.221,
      "step": 1478
    },
    {
      "epoch": 0.04314720812182741,
      "grad_norm": 1.0994996940409911,
      "learning_rate": 9.995481001723884e-06,
      "loss": 0.2226,
      "step": 1479
    },
    {
      "epoch": 0.043176381352470976,
      "grad_norm": 0.8794707319860001,
      "learning_rate": 9.995460898013831e-06,
      "loss": 0.2102,
      "step": 1480
    },
    {
      "epoch": 0.043205554583114536,
      "grad_norm": 1.088000625757929,
      "learning_rate": 9.995440749705496e-06,
      "loss": 0.2182,
      "step": 1481
    },
    {
      "epoch": 0.043234727813758096,
      "grad_norm": 0.9909439668431169,
      "learning_rate": 9.99542055679906e-06,
      "loss": 0.2199,
      "step": 1482
    },
    {
      "epoch": 0.043263901044401656,
      "grad_norm": 0.9167130629482909,
      "learning_rate": 9.9954003192947e-06,
      "loss": 0.2044,
      "step": 1483
    },
    {
      "epoch": 0.04329307427504522,
      "grad_norm": 0.8730508094905478,
      "learning_rate": 9.9953800371926e-06,
      "loss": 0.2026,
      "step": 1484
    },
    {
      "epoch": 0.04332224750568878,
      "grad_norm": 0.973396668159621,
      "learning_rate": 9.995359710492937e-06,
      "loss": 0.2085,
      "step": 1485
    },
    {
      "epoch": 0.043351420736332344,
      "grad_norm": 0.9718573235784893,
      "learning_rate": 9.995339339195898e-06,
      "loss": 0.2087,
      "step": 1486
    },
    {
      "epoch": 0.043380593966975904,
      "grad_norm": 0.9224864497323803,
      "learning_rate": 9.995318923301659e-06,
      "loss": 0.1913,
      "step": 1487
    },
    {
      "epoch": 0.043409767197619464,
      "grad_norm": 1.0016274912019036,
      "learning_rate": 9.995298462810407e-06,
      "loss": 0.2164,
      "step": 1488
    },
    {
      "epoch": 0.043438940428263025,
      "grad_norm": 1.1775308618689844,
      "learning_rate": 9.995277957722323e-06,
      "loss": 0.2464,
      "step": 1489
    },
    {
      "epoch": 0.043468113658906585,
      "grad_norm": 1.010011497366543,
      "learning_rate": 9.995257408037588e-06,
      "loss": 0.1977,
      "step": 1490
    },
    {
      "epoch": 0.04349728688955015,
      "grad_norm": 0.8892193731855427,
      "learning_rate": 9.995236813756388e-06,
      "loss": 0.1913,
      "step": 1491
    },
    {
      "epoch": 0.04352646012019371,
      "grad_norm": 1.0432195456684967,
      "learning_rate": 9.995216174878908e-06,
      "loss": 0.2269,
      "step": 1492
    },
    {
      "epoch": 0.04355563335083727,
      "grad_norm": 1.1672149910620269,
      "learning_rate": 9.99519549140533e-06,
      "loss": 0.2365,
      "step": 1493
    },
    {
      "epoch": 0.04358480658148083,
      "grad_norm": 0.9795863159696633,
      "learning_rate": 9.995174763335837e-06,
      "loss": 0.2178,
      "step": 1494
    },
    {
      "epoch": 0.04361397981212439,
      "grad_norm": 1.2355885439701726,
      "learning_rate": 9.995153990670618e-06,
      "loss": 0.2278,
      "step": 1495
    },
    {
      "epoch": 0.04364315304276795,
      "grad_norm": 1.0361904879259465,
      "learning_rate": 9.995133173409856e-06,
      "loss": 0.2164,
      "step": 1496
    },
    {
      "epoch": 0.04367232627341152,
      "grad_norm": 0.9175487337753576,
      "learning_rate": 9.995112311553736e-06,
      "loss": 0.2017,
      "step": 1497
    },
    {
      "epoch": 0.04370149950405508,
      "grad_norm": 0.9087516508905001,
      "learning_rate": 9.995091405102449e-06,
      "loss": 0.2148,
      "step": 1498
    },
    {
      "epoch": 0.04373067273469864,
      "grad_norm": 1.0065575322472309,
      "learning_rate": 9.995070454056175e-06,
      "loss": 0.2172,
      "step": 1499
    },
    {
      "epoch": 0.0437598459653422,
      "grad_norm": 1.4500252208703486,
      "learning_rate": 9.995049458415108e-06,
      "loss": 0.2393,
      "step": 1500
    },
    {
      "epoch": 0.04378901919598576,
      "grad_norm": 1.3234772355923852,
      "learning_rate": 9.995028418179429e-06,
      "loss": 0.2184,
      "step": 1501
    },
    {
      "epoch": 0.04381819242662933,
      "grad_norm": 0.9495426174390178,
      "learning_rate": 9.99500733334933e-06,
      "loss": 0.187,
      "step": 1502
    },
    {
      "epoch": 0.04384736565727289,
      "grad_norm": 1.2749208543526,
      "learning_rate": 9.994986203924996e-06,
      "loss": 0.2038,
      "step": 1503
    },
    {
      "epoch": 0.04387653888791645,
      "grad_norm": 1.2059401236051581,
      "learning_rate": 9.99496502990662e-06,
      "loss": 0.2012,
      "step": 1504
    },
    {
      "epoch": 0.04390571211856001,
      "grad_norm": 1.3535856574377956,
      "learning_rate": 9.994943811294387e-06,
      "loss": 0.2214,
      "step": 1505
    },
    {
      "epoch": 0.04393488534920357,
      "grad_norm": 1.01008938424475,
      "learning_rate": 9.994922548088488e-06,
      "loss": 0.2145,
      "step": 1506
    },
    {
      "epoch": 0.04396405857984713,
      "grad_norm": 1.106945856950492,
      "learning_rate": 9.994901240289114e-06,
      "loss": 0.2117,
      "step": 1507
    },
    {
      "epoch": 0.043993231810490696,
      "grad_norm": 1.0718073273861746,
      "learning_rate": 9.994879887896453e-06,
      "loss": 0.2093,
      "step": 1508
    },
    {
      "epoch": 0.044022405041134256,
      "grad_norm": 0.8134033627259928,
      "learning_rate": 9.994858490910699e-06,
      "loss": 0.1983,
      "step": 1509
    },
    {
      "epoch": 0.044051578271777816,
      "grad_norm": 1.1558254681157425,
      "learning_rate": 9.994837049332038e-06,
      "loss": 0.2175,
      "step": 1510
    },
    {
      "epoch": 0.04408075150242138,
      "grad_norm": 1.0413670712359624,
      "learning_rate": 9.994815563160665e-06,
      "loss": 0.2267,
      "step": 1511
    },
    {
      "epoch": 0.04410992473306494,
      "grad_norm": 0.8713546158791305,
      "learning_rate": 9.994794032396772e-06,
      "loss": 0.2053,
      "step": 1512
    },
    {
      "epoch": 0.044139097963708504,
      "grad_norm": 0.8762434296611706,
      "learning_rate": 9.99477245704055e-06,
      "loss": 0.2149,
      "step": 1513
    },
    {
      "epoch": 0.044168271194352064,
      "grad_norm": 1.140460284007135,
      "learning_rate": 9.99475083709219e-06,
      "loss": 0.2151,
      "step": 1514
    },
    {
      "epoch": 0.044197444424995624,
      "grad_norm": 0.9512975940821824,
      "learning_rate": 9.994729172551889e-06,
      "loss": 0.2065,
      "step": 1515
    },
    {
      "epoch": 0.044226617655639185,
      "grad_norm": 1.045748755414856,
      "learning_rate": 9.994707463419839e-06,
      "loss": 0.2101,
      "step": 1516
    },
    {
      "epoch": 0.044255790886282745,
      "grad_norm": 1.155441077931251,
      "learning_rate": 9.99468570969623e-06,
      "loss": 0.2313,
      "step": 1517
    },
    {
      "epoch": 0.044284964116926305,
      "grad_norm": 0.8182222302235104,
      "learning_rate": 9.99466391138126e-06,
      "loss": 0.1956,
      "step": 1518
    },
    {
      "epoch": 0.04431413734756987,
      "grad_norm": 0.985013760566948,
      "learning_rate": 9.994642068475127e-06,
      "loss": 0.1939,
      "step": 1519
    },
    {
      "epoch": 0.04434331057821343,
      "grad_norm": 1.1592479237406494,
      "learning_rate": 9.994620180978019e-06,
      "loss": 0.2333,
      "step": 1520
    },
    {
      "epoch": 0.04437248380885699,
      "grad_norm": 0.9415477013451679,
      "learning_rate": 9.994598248890132e-06,
      "loss": 0.2004,
      "step": 1521
    },
    {
      "epoch": 0.04440165703950055,
      "grad_norm": 0.887643002790456,
      "learning_rate": 9.994576272211666e-06,
      "loss": 0.2114,
      "step": 1522
    },
    {
      "epoch": 0.04443083027014411,
      "grad_norm": 1.0270416990258484,
      "learning_rate": 9.994554250942818e-06,
      "loss": 0.2181,
      "step": 1523
    },
    {
      "epoch": 0.04446000350078768,
      "grad_norm": 0.9805689129660959,
      "learning_rate": 9.994532185083778e-06,
      "loss": 0.2171,
      "step": 1524
    },
    {
      "epoch": 0.04448917673143124,
      "grad_norm": 1.0786672431143527,
      "learning_rate": 9.99451007463475e-06,
      "loss": 0.2019,
      "step": 1525
    },
    {
      "epoch": 0.0445183499620748,
      "grad_norm": 0.9372296678013002,
      "learning_rate": 9.994487919595925e-06,
      "loss": 0.2117,
      "step": 1526
    },
    {
      "epoch": 0.04454752319271836,
      "grad_norm": 1.0254043986779158,
      "learning_rate": 9.994465719967507e-06,
      "loss": 0.2313,
      "step": 1527
    },
    {
      "epoch": 0.04457669642336192,
      "grad_norm": 1.1989115949156917,
      "learning_rate": 9.994443475749692e-06,
      "loss": 0.1993,
      "step": 1528
    },
    {
      "epoch": 0.04460586965400549,
      "grad_norm": 0.9809271376858866,
      "learning_rate": 9.994421186942675e-06,
      "loss": 0.2251,
      "step": 1529
    },
    {
      "epoch": 0.04463504288464905,
      "grad_norm": 0.8140246694342183,
      "learning_rate": 9.99439885354666e-06,
      "loss": 0.1776,
      "step": 1530
    },
    {
      "epoch": 0.04466421611529261,
      "grad_norm": 0.9703925915399549,
      "learning_rate": 9.994376475561842e-06,
      "loss": 0.2158,
      "step": 1531
    },
    {
      "epoch": 0.04469338934593617,
      "grad_norm": 1.104517324078042,
      "learning_rate": 9.994354052988424e-06,
      "loss": 0.2174,
      "step": 1532
    },
    {
      "epoch": 0.04472256257657973,
      "grad_norm": 0.8316422371741246,
      "learning_rate": 9.994331585826606e-06,
      "loss": 0.2145,
      "step": 1533
    },
    {
      "epoch": 0.04475173580722329,
      "grad_norm": 0.9830344315374484,
      "learning_rate": 9.994309074076589e-06,
      "loss": 0.2276,
      "step": 1534
    },
    {
      "epoch": 0.044780909037866856,
      "grad_norm": 1.040520479452583,
      "learning_rate": 9.994286517738572e-06,
      "loss": 0.2396,
      "step": 1535
    },
    {
      "epoch": 0.044810082268510416,
      "grad_norm": 0.8665381336119805,
      "learning_rate": 9.994263916812757e-06,
      "loss": 0.2034,
      "step": 1536
    },
    {
      "epoch": 0.04483925549915398,
      "grad_norm": 0.8774297599044266,
      "learning_rate": 9.994241271299344e-06,
      "loss": 0.2083,
      "step": 1537
    },
    {
      "epoch": 0.04486842872979754,
      "grad_norm": 0.8705735421286882,
      "learning_rate": 9.994218581198539e-06,
      "loss": 0.2052,
      "step": 1538
    },
    {
      "epoch": 0.0448976019604411,
      "grad_norm": 1.0544589605513472,
      "learning_rate": 9.994195846510543e-06,
      "loss": 0.2401,
      "step": 1539
    },
    {
      "epoch": 0.044926775191084664,
      "grad_norm": 1.0616515252387178,
      "learning_rate": 9.994173067235557e-06,
      "loss": 0.2041,
      "step": 1540
    },
    {
      "epoch": 0.044955948421728224,
      "grad_norm": 0.7933865528135348,
      "learning_rate": 9.994150243373789e-06,
      "loss": 0.2138,
      "step": 1541
    },
    {
      "epoch": 0.044985121652371785,
      "grad_norm": 1.0397077363518006,
      "learning_rate": 9.994127374925438e-06,
      "loss": 0.2298,
      "step": 1542
    },
    {
      "epoch": 0.045014294883015345,
      "grad_norm": 0.887150825337438,
      "learning_rate": 9.99410446189071e-06,
      "loss": 0.2078,
      "step": 1543
    },
    {
      "epoch": 0.045043468113658905,
      "grad_norm": 0.7227271216130112,
      "learning_rate": 9.99408150426981e-06,
      "loss": 0.1886,
      "step": 1544
    },
    {
      "epoch": 0.045072641344302465,
      "grad_norm": 0.9403018061880659,
      "learning_rate": 9.994058502062942e-06,
      "loss": 0.2202,
      "step": 1545
    },
    {
      "epoch": 0.04510181457494603,
      "grad_norm": 1.16465291239193,
      "learning_rate": 9.994035455270313e-06,
      "loss": 0.2232,
      "step": 1546
    },
    {
      "epoch": 0.04513098780558959,
      "grad_norm": 1.0377796312985823,
      "learning_rate": 9.994012363892124e-06,
      "loss": 0.2132,
      "step": 1547
    },
    {
      "epoch": 0.04516016103623315,
      "grad_norm": 0.9365964797790705,
      "learning_rate": 9.993989227928588e-06,
      "loss": 0.1932,
      "step": 1548
    },
    {
      "epoch": 0.04518933426687671,
      "grad_norm": 0.9807639657732039,
      "learning_rate": 9.993966047379908e-06,
      "loss": 0.1995,
      "step": 1549
    },
    {
      "epoch": 0.04521850749752027,
      "grad_norm": 1.1793814247486445,
      "learning_rate": 9.993942822246292e-06,
      "loss": 0.2334,
      "step": 1550
    },
    {
      "epoch": 0.04524768072816384,
      "grad_norm": 0.9107580647389542,
      "learning_rate": 9.993919552527945e-06,
      "loss": 0.2024,
      "step": 1551
    },
    {
      "epoch": 0.0452768539588074,
      "grad_norm": 0.9549283293210068,
      "learning_rate": 9.993896238225079e-06,
      "loss": 0.1968,
      "step": 1552
    },
    {
      "epoch": 0.04530602718945096,
      "grad_norm": 1.1968822968012605,
      "learning_rate": 9.993872879337896e-06,
      "loss": 0.2188,
      "step": 1553
    },
    {
      "epoch": 0.04533520042009452,
      "grad_norm": 1.0057792745103897,
      "learning_rate": 9.993849475866611e-06,
      "loss": 0.2346,
      "step": 1554
    },
    {
      "epoch": 0.04536437365073808,
      "grad_norm": 1.1397853199149683,
      "learning_rate": 9.993826027811427e-06,
      "loss": 0.2131,
      "step": 1555
    },
    {
      "epoch": 0.04539354688138164,
      "grad_norm": 1.3361540815581716,
      "learning_rate": 9.993802535172558e-06,
      "loss": 0.2385,
      "step": 1556
    },
    {
      "epoch": 0.04542272011202521,
      "grad_norm": 1.2649994201282369,
      "learning_rate": 9.993778997950212e-06,
      "loss": 0.2326,
      "step": 1557
    },
    {
      "epoch": 0.04545189334266877,
      "grad_norm": 1.1605134543612885,
      "learning_rate": 9.9937554161446e-06,
      "loss": 0.2347,
      "step": 1558
    },
    {
      "epoch": 0.04548106657331233,
      "grad_norm": 1.041034454393136,
      "learning_rate": 9.993731789755931e-06,
      "loss": 0.2091,
      "step": 1559
    },
    {
      "epoch": 0.04551023980395589,
      "grad_norm": 1.0357172359899414,
      "learning_rate": 9.993708118784417e-06,
      "loss": 0.218,
      "step": 1560
    },
    {
      "epoch": 0.04553941303459945,
      "grad_norm": 1.065139304455861,
      "learning_rate": 9.993684403230268e-06,
      "loss": 0.208,
      "step": 1561
    },
    {
      "epoch": 0.045568586265243016,
      "grad_norm": 1.2222053867043257,
      "learning_rate": 9.993660643093698e-06,
      "loss": 0.2312,
      "step": 1562
    },
    {
      "epoch": 0.045597759495886576,
      "grad_norm": 0.7402429115781198,
      "learning_rate": 9.993636838374917e-06,
      "loss": 0.1861,
      "step": 1563
    },
    {
      "epoch": 0.04562693272653014,
      "grad_norm": 0.9226390491358516,
      "learning_rate": 9.99361298907414e-06,
      "loss": 0.2405,
      "step": 1564
    },
    {
      "epoch": 0.0456561059571737,
      "grad_norm": 1.1469985183875429,
      "learning_rate": 9.993589095191575e-06,
      "loss": 0.2036,
      "step": 1565
    },
    {
      "epoch": 0.04568527918781726,
      "grad_norm": 0.9693897306501376,
      "learning_rate": 9.993565156727443e-06,
      "loss": 0.1917,
      "step": 1566
    },
    {
      "epoch": 0.04571445241846082,
      "grad_norm": 1.1274672267838741,
      "learning_rate": 9.99354117368195e-06,
      "loss": 0.2022,
      "step": 1567
    },
    {
      "epoch": 0.045743625649104384,
      "grad_norm": 1.240372697669746,
      "learning_rate": 9.993517146055314e-06,
      "loss": 0.2095,
      "step": 1568
    },
    {
      "epoch": 0.045772798879747945,
      "grad_norm": 0.9120602913148715,
      "learning_rate": 9.99349307384775e-06,
      "loss": 0.1988,
      "step": 1569
    },
    {
      "epoch": 0.045801972110391505,
      "grad_norm": 1.1822521899955165,
      "learning_rate": 9.993468957059472e-06,
      "loss": 0.2203,
      "step": 1570
    },
    {
      "epoch": 0.045831145341035065,
      "grad_norm": 0.9593066799616233,
      "learning_rate": 9.993444795690694e-06,
      "loss": 0.1952,
      "step": 1571
    },
    {
      "epoch": 0.045860318571678625,
      "grad_norm": 1.0210061544811042,
      "learning_rate": 9.993420589741634e-06,
      "loss": 0.2136,
      "step": 1572
    },
    {
      "epoch": 0.04588949180232219,
      "grad_norm": 1.1139539809465127,
      "learning_rate": 9.993396339212505e-06,
      "loss": 0.2269,
      "step": 1573
    },
    {
      "epoch": 0.04591866503296575,
      "grad_norm": 1.104926442800261,
      "learning_rate": 9.993372044103528e-06,
      "loss": 0.2181,
      "step": 1574
    },
    {
      "epoch": 0.04594783826360931,
      "grad_norm": 1.0134102391549935,
      "learning_rate": 9.993347704414915e-06,
      "loss": 0.2036,
      "step": 1575
    },
    {
      "epoch": 0.04597701149425287,
      "grad_norm": 0.9529470474929717,
      "learning_rate": 9.993323320146888e-06,
      "loss": 0.1964,
      "step": 1576
    },
    {
      "epoch": 0.04600618472489643,
      "grad_norm": 0.9056768183692107,
      "learning_rate": 9.99329889129966e-06,
      "loss": 0.213,
      "step": 1577
    },
    {
      "epoch": 0.04603535795553999,
      "grad_norm": 1.0069441295048178,
      "learning_rate": 9.993274417873454e-06,
      "loss": 0.2074,
      "step": 1578
    },
    {
      "epoch": 0.04606453118618356,
      "grad_norm": 1.1463857655151746,
      "learning_rate": 9.993249899868484e-06,
      "loss": 0.2324,
      "step": 1579
    },
    {
      "epoch": 0.04609370441682712,
      "grad_norm": 1.0098795470169168,
      "learning_rate": 9.993225337284973e-06,
      "loss": 0.2036,
      "step": 1580
    },
    {
      "epoch": 0.04612287764747068,
      "grad_norm": 0.9880110224986607,
      "learning_rate": 9.993200730123137e-06,
      "loss": 0.2347,
      "step": 1581
    },
    {
      "epoch": 0.04615205087811424,
      "grad_norm": 2.314498525771706,
      "learning_rate": 9.993176078383198e-06,
      "loss": 0.2144,
      "step": 1582
    },
    {
      "epoch": 0.0461812241087578,
      "grad_norm": 0.9938216294739247,
      "learning_rate": 9.993151382065372e-06,
      "loss": 0.2137,
      "step": 1583
    },
    {
      "epoch": 0.04621039733940137,
      "grad_norm": 0.8233384269887488,
      "learning_rate": 9.993126641169884e-06,
      "loss": 0.2071,
      "step": 1584
    },
    {
      "epoch": 0.04623957057004493,
      "grad_norm": 0.9796380743525783,
      "learning_rate": 9.993101855696955e-06,
      "loss": 0.2138,
      "step": 1585
    },
    {
      "epoch": 0.04626874380068849,
      "grad_norm": 0.9481253593913958,
      "learning_rate": 9.993077025646802e-06,
      "loss": 0.2028,
      "step": 1586
    },
    {
      "epoch": 0.04629791703133205,
      "grad_norm": 0.8992629592784864,
      "learning_rate": 9.99305215101965e-06,
      "loss": 0.22,
      "step": 1587
    },
    {
      "epoch": 0.04632709026197561,
      "grad_norm": 0.9038992823317034,
      "learning_rate": 9.993027231815722e-06,
      "loss": 0.2264,
      "step": 1588
    },
    {
      "epoch": 0.04635626349261917,
      "grad_norm": 0.9287985595121647,
      "learning_rate": 9.993002268035237e-06,
      "loss": 0.2084,
      "step": 1589
    },
    {
      "epoch": 0.04638543672326274,
      "grad_norm": 0.9586303799953108,
      "learning_rate": 9.99297725967842e-06,
      "loss": 0.2085,
      "step": 1590
    },
    {
      "epoch": 0.0464146099539063,
      "grad_norm": 1.0202403101467916,
      "learning_rate": 9.992952206745494e-06,
      "loss": 0.2071,
      "step": 1591
    },
    {
      "epoch": 0.04644378318454986,
      "grad_norm": 1.0218848534619405,
      "learning_rate": 9.992927109236684e-06,
      "loss": 0.2122,
      "step": 1592
    },
    {
      "epoch": 0.04647295641519342,
      "grad_norm": 1.029016927298267,
      "learning_rate": 9.99290196715221e-06,
      "loss": 0.2096,
      "step": 1593
    },
    {
      "epoch": 0.04650212964583698,
      "grad_norm": 1.049779304993706,
      "learning_rate": 9.9928767804923e-06,
      "loss": 0.1966,
      "step": 1594
    },
    {
      "epoch": 0.046531302876480544,
      "grad_norm": 0.9050569421058766,
      "learning_rate": 9.99285154925718e-06,
      "loss": 0.2015,
      "step": 1595
    },
    {
      "epoch": 0.046560476107124105,
      "grad_norm": 0.9535046111228952,
      "learning_rate": 9.992826273447072e-06,
      "loss": 0.2117,
      "step": 1596
    },
    {
      "epoch": 0.046589649337767665,
      "grad_norm": 0.9989884600454102,
      "learning_rate": 9.9928009530622e-06,
      "loss": 0.2196,
      "step": 1597
    },
    {
      "epoch": 0.046618822568411225,
      "grad_norm": 1.0894001553216668,
      "learning_rate": 9.992775588102797e-06,
      "loss": 0.1903,
      "step": 1598
    },
    {
      "epoch": 0.046647995799054785,
      "grad_norm": 0.9933183508765261,
      "learning_rate": 9.992750178569084e-06,
      "loss": 0.2124,
      "step": 1599
    },
    {
      "epoch": 0.046677169029698345,
      "grad_norm": 0.7758056013643674,
      "learning_rate": 9.992724724461289e-06,
      "loss": 0.2065,
      "step": 1600
    },
    {
      "epoch": 0.04670634226034191,
      "grad_norm": 0.9613749623422736,
      "learning_rate": 9.992699225779641e-06,
      "loss": 0.2316,
      "step": 1601
    },
    {
      "epoch": 0.04673551549098547,
      "grad_norm": 1.1799737646147839,
      "learning_rate": 9.992673682524366e-06,
      "loss": 0.2316,
      "step": 1602
    },
    {
      "epoch": 0.04676468872162903,
      "grad_norm": 1.2648274575330423,
      "learning_rate": 9.99264809469569e-06,
      "loss": 0.2398,
      "step": 1603
    },
    {
      "epoch": 0.04679386195227259,
      "grad_norm": 1.2029527954061672,
      "learning_rate": 9.992622462293845e-06,
      "loss": 0.2238,
      "step": 1604
    },
    {
      "epoch": 0.04682303518291615,
      "grad_norm": 0.9771608790612691,
      "learning_rate": 9.992596785319057e-06,
      "loss": 0.2002,
      "step": 1605
    },
    {
      "epoch": 0.04685220841355972,
      "grad_norm": 0.9399009127460336,
      "learning_rate": 9.99257106377156e-06,
      "loss": 0.2116,
      "step": 1606
    },
    {
      "epoch": 0.04688138164420328,
      "grad_norm": 0.940691461243414,
      "learning_rate": 9.992545297651578e-06,
      "loss": 0.2136,
      "step": 1607
    },
    {
      "epoch": 0.04691055487484684,
      "grad_norm": 1.1359818303475295,
      "learning_rate": 9.992519486959345e-06,
      "loss": 0.2322,
      "step": 1608
    },
    {
      "epoch": 0.0469397281054904,
      "grad_norm": 1.0748126340976316,
      "learning_rate": 9.992493631695089e-06,
      "loss": 0.2113,
      "step": 1609
    },
    {
      "epoch": 0.04696890133613396,
      "grad_norm": 1.0834980895099093,
      "learning_rate": 9.99246773185904e-06,
      "loss": 0.2652,
      "step": 1610
    },
    {
      "epoch": 0.04699807456677752,
      "grad_norm": 1.0644096924821413,
      "learning_rate": 9.992441787451432e-06,
      "loss": 0.2122,
      "step": 1611
    },
    {
      "epoch": 0.04702724779742109,
      "grad_norm": 1.0092083097213227,
      "learning_rate": 9.992415798472496e-06,
      "loss": 0.2179,
      "step": 1612
    },
    {
      "epoch": 0.04705642102806465,
      "grad_norm": 0.8137581387339601,
      "learning_rate": 9.992389764922464e-06,
      "loss": 0.2069,
      "step": 1613
    },
    {
      "epoch": 0.04708559425870821,
      "grad_norm": 1.3476142279147683,
      "learning_rate": 9.992363686801568e-06,
      "loss": 0.2394,
      "step": 1614
    },
    {
      "epoch": 0.04711476748935177,
      "grad_norm": 1.030814079950729,
      "learning_rate": 9.992337564110038e-06,
      "loss": 0.2268,
      "step": 1615
    },
    {
      "epoch": 0.04714394071999533,
      "grad_norm": 1.0241879382155972,
      "learning_rate": 9.992311396848113e-06,
      "loss": 0.2204,
      "step": 1616
    },
    {
      "epoch": 0.0471731139506389,
      "grad_norm": 0.9256986412470775,
      "learning_rate": 9.992285185016022e-06,
      "loss": 0.2208,
      "step": 1617
    },
    {
      "epoch": 0.04720228718128246,
      "grad_norm": 1.1143884070712264,
      "learning_rate": 9.992258928614002e-06,
      "loss": 0.2413,
      "step": 1618
    },
    {
      "epoch": 0.04723146041192602,
      "grad_norm": 0.8125416427878479,
      "learning_rate": 9.992232627642284e-06,
      "loss": 0.2072,
      "step": 1619
    },
    {
      "epoch": 0.04726063364256958,
      "grad_norm": 0.8811217609983716,
      "learning_rate": 9.992206282101106e-06,
      "loss": 0.2078,
      "step": 1620
    },
    {
      "epoch": 0.04728980687321314,
      "grad_norm": 0.932856647391488,
      "learning_rate": 9.992179891990703e-06,
      "loss": 0.2095,
      "step": 1621
    },
    {
      "epoch": 0.047318980103856705,
      "grad_norm": 0.9214838875133651,
      "learning_rate": 9.992153457311308e-06,
      "loss": 0.2038,
      "step": 1622
    },
    {
      "epoch": 0.047348153334500265,
      "grad_norm": 0.9426686113551024,
      "learning_rate": 9.99212697806316e-06,
      "loss": 0.2249,
      "step": 1623
    },
    {
      "epoch": 0.047377326565143825,
      "grad_norm": 0.9968463502456327,
      "learning_rate": 9.992100454246494e-06,
      "loss": 0.2193,
      "step": 1624
    },
    {
      "epoch": 0.047406499795787385,
      "grad_norm": 0.9871110081566117,
      "learning_rate": 9.992073885861546e-06,
      "loss": 0.2156,
      "step": 1625
    },
    {
      "epoch": 0.047435673026430945,
      "grad_norm": 1.0524692523570862,
      "learning_rate": 9.992047272908554e-06,
      "loss": 0.2117,
      "step": 1626
    },
    {
      "epoch": 0.047464846257074506,
      "grad_norm": 0.893359779581283,
      "learning_rate": 9.992020615387756e-06,
      "loss": 0.1994,
      "step": 1627
    },
    {
      "epoch": 0.04749401948771807,
      "grad_norm": 1.1422946182731655,
      "learning_rate": 9.991993913299392e-06,
      "loss": 0.2082,
      "step": 1628
    },
    {
      "epoch": 0.04752319271836163,
      "grad_norm": 1.0213423646132167,
      "learning_rate": 9.991967166643695e-06,
      "loss": 0.2074,
      "step": 1629
    },
    {
      "epoch": 0.04755236594900519,
      "grad_norm": 0.8688140390904798,
      "learning_rate": 9.991940375420907e-06,
      "loss": 0.2072,
      "step": 1630
    },
    {
      "epoch": 0.04758153917964875,
      "grad_norm": 1.0926007970399714,
      "learning_rate": 9.991913539631267e-06,
      "loss": 0.2259,
      "step": 1631
    },
    {
      "epoch": 0.047610712410292313,
      "grad_norm": 1.5254049005774917,
      "learning_rate": 9.991886659275016e-06,
      "loss": 0.2821,
      "step": 1632
    },
    {
      "epoch": 0.04763988564093588,
      "grad_norm": 1.0016734470443482,
      "learning_rate": 9.991859734352391e-06,
      "loss": 0.1872,
      "step": 1633
    },
    {
      "epoch": 0.04766905887157944,
      "grad_norm": 1.1477664097592586,
      "learning_rate": 9.991832764863635e-06,
      "loss": 0.2349,
      "step": 1634
    },
    {
      "epoch": 0.047698232102223,
      "grad_norm": 1.0263970694145326,
      "learning_rate": 9.991805750808986e-06,
      "loss": 0.2047,
      "step": 1635
    },
    {
      "epoch": 0.04772740533286656,
      "grad_norm": 1.15791772232876,
      "learning_rate": 9.99177869218869e-06,
      "loss": 0.2363,
      "step": 1636
    },
    {
      "epoch": 0.04775657856351012,
      "grad_norm": 1.0025654025994395,
      "learning_rate": 9.991751589002985e-06,
      "loss": 0.2121,
      "step": 1637
    },
    {
      "epoch": 0.04778575179415368,
      "grad_norm": 0.898770828722737,
      "learning_rate": 9.99172444125211e-06,
      "loss": 0.199,
      "step": 1638
    },
    {
      "epoch": 0.04781492502479725,
      "grad_norm": 0.9471821780665727,
      "learning_rate": 9.991697248936313e-06,
      "loss": 0.2112,
      "step": 1639
    },
    {
      "epoch": 0.04784409825544081,
      "grad_norm": 0.7901277486431224,
      "learning_rate": 9.991670012055836e-06,
      "loss": 0.2362,
      "step": 1640
    },
    {
      "epoch": 0.04787327148608437,
      "grad_norm": 0.9431150578435697,
      "learning_rate": 9.991642730610919e-06,
      "loss": 0.2316,
      "step": 1641
    },
    {
      "epoch": 0.04790244471672793,
      "grad_norm": 0.872533087573785,
      "learning_rate": 9.991615404601808e-06,
      "loss": 0.2141,
      "step": 1642
    },
    {
      "epoch": 0.04793161794737149,
      "grad_norm": 0.878117108498137,
      "learning_rate": 9.991588034028746e-06,
      "loss": 0.2118,
      "step": 1643
    },
    {
      "epoch": 0.04796079117801506,
      "grad_norm": 0.8220249744036633,
      "learning_rate": 9.991560618891978e-06,
      "loss": 0.1948,
      "step": 1644
    },
    {
      "epoch": 0.04798996440865862,
      "grad_norm": 0.8555776022492638,
      "learning_rate": 9.991533159191748e-06,
      "loss": 0.1847,
      "step": 1645
    },
    {
      "epoch": 0.04801913763930218,
      "grad_norm": 0.8560024473690883,
      "learning_rate": 9.9915056549283e-06,
      "loss": 0.205,
      "step": 1646
    },
    {
      "epoch": 0.04804831086994574,
      "grad_norm": 1.0626266583178325,
      "learning_rate": 9.991478106101884e-06,
      "loss": 0.2026,
      "step": 1647
    },
    {
      "epoch": 0.0480774841005893,
      "grad_norm": 1.0366821465949032,
      "learning_rate": 9.991450512712742e-06,
      "loss": 0.2175,
      "step": 1648
    },
    {
      "epoch": 0.04810665733123286,
      "grad_norm": 0.9532431541562103,
      "learning_rate": 9.99142287476112e-06,
      "loss": 0.2355,
      "step": 1649
    },
    {
      "epoch": 0.048135830561876425,
      "grad_norm": 1.1071808286644875,
      "learning_rate": 9.991395192247267e-06,
      "loss": 0.2178,
      "step": 1650
    },
    {
      "epoch": 0.048165003792519985,
      "grad_norm": 1.0919039036106617,
      "learning_rate": 9.991367465171428e-06,
      "loss": 0.2171,
      "step": 1651
    },
    {
      "epoch": 0.048194177023163545,
      "grad_norm": 1.0521939051560518,
      "learning_rate": 9.991339693533855e-06,
      "loss": 0.2329,
      "step": 1652
    },
    {
      "epoch": 0.048223350253807105,
      "grad_norm": 1.2599307604008543,
      "learning_rate": 9.99131187733479e-06,
      "loss": 0.2113,
      "step": 1653
    },
    {
      "epoch": 0.048252523484450666,
      "grad_norm": 1.0138597731674888,
      "learning_rate": 9.991284016574485e-06,
      "loss": 0.1956,
      "step": 1654
    },
    {
      "epoch": 0.04828169671509423,
      "grad_norm": 1.0015503180634682,
      "learning_rate": 9.991256111253188e-06,
      "loss": 0.1881,
      "step": 1655
    },
    {
      "epoch": 0.04831086994573779,
      "grad_norm": 1.4548200339884936,
      "learning_rate": 9.991228161371147e-06,
      "loss": 0.2136,
      "step": 1656
    },
    {
      "epoch": 0.04834004317638135,
      "grad_norm": 0.9606732047312863,
      "learning_rate": 9.991200166928613e-06,
      "loss": 0.2435,
      "step": 1657
    },
    {
      "epoch": 0.04836921640702491,
      "grad_norm": 1.1041675199523862,
      "learning_rate": 9.991172127925836e-06,
      "loss": 0.2497,
      "step": 1658
    },
    {
      "epoch": 0.048398389637668474,
      "grad_norm": 1.0056461141323014,
      "learning_rate": 9.991144044363066e-06,
      "loss": 0.2228,
      "step": 1659
    },
    {
      "epoch": 0.048427562868312034,
      "grad_norm": 0.8770300574055199,
      "learning_rate": 9.991115916240553e-06,
      "loss": 0.2213,
      "step": 1660
    },
    {
      "epoch": 0.0484567360989556,
      "grad_norm": 0.9344253606928895,
      "learning_rate": 9.991087743558548e-06,
      "loss": 0.246,
      "step": 1661
    },
    {
      "epoch": 0.04848590932959916,
      "grad_norm": 1.1217884327800407,
      "learning_rate": 9.991059526317304e-06,
      "loss": 0.217,
      "step": 1662
    },
    {
      "epoch": 0.04851508256024272,
      "grad_norm": 0.7930454799108859,
      "learning_rate": 9.991031264517071e-06,
      "loss": 0.1797,
      "step": 1663
    },
    {
      "epoch": 0.04854425579088628,
      "grad_norm": 0.8719649707868175,
      "learning_rate": 9.991002958158102e-06,
      "loss": 0.2095,
      "step": 1664
    },
    {
      "epoch": 0.04857342902152984,
      "grad_norm": 1.0968692164460105,
      "learning_rate": 9.990974607240651e-06,
      "loss": 0.2396,
      "step": 1665
    },
    {
      "epoch": 0.04860260225217341,
      "grad_norm": 1.0112037472179267,
      "learning_rate": 9.990946211764971e-06,
      "loss": 0.2356,
      "step": 1666
    },
    {
      "epoch": 0.04863177548281697,
      "grad_norm": 1.0670663463650947,
      "learning_rate": 9.990917771731313e-06,
      "loss": 0.2219,
      "step": 1667
    },
    {
      "epoch": 0.04866094871346053,
      "grad_norm": 1.0379911288760688,
      "learning_rate": 9.990889287139933e-06,
      "loss": 0.2028,
      "step": 1668
    },
    {
      "epoch": 0.04869012194410409,
      "grad_norm": 0.7639772036524248,
      "learning_rate": 9.990860757991085e-06,
      "loss": 0.1853,
      "step": 1669
    },
    {
      "epoch": 0.04871929517474765,
      "grad_norm": 0.9674274347028614,
      "learning_rate": 9.990832184285025e-06,
      "loss": 0.2193,
      "step": 1670
    },
    {
      "epoch": 0.04874846840539121,
      "grad_norm": 0.9557308291471724,
      "learning_rate": 9.990803566022006e-06,
      "loss": 0.2198,
      "step": 1671
    },
    {
      "epoch": 0.04877764163603478,
      "grad_norm": 0.8525217282539468,
      "learning_rate": 9.990774903202282e-06,
      "loss": 0.1895,
      "step": 1672
    },
    {
      "epoch": 0.04880681486667834,
      "grad_norm": 1.1871635907429425,
      "learning_rate": 9.990746195826113e-06,
      "loss": 0.2313,
      "step": 1673
    },
    {
      "epoch": 0.0488359880973219,
      "grad_norm": 0.9281359347017197,
      "learning_rate": 9.99071744389375e-06,
      "loss": 0.194,
      "step": 1674
    },
    {
      "epoch": 0.04886516132796546,
      "grad_norm": 1.1358069207452808,
      "learning_rate": 9.990688647405457e-06,
      "loss": 0.2299,
      "step": 1675
    },
    {
      "epoch": 0.04889433455860902,
      "grad_norm": 0.9558605429837128,
      "learning_rate": 9.990659806361487e-06,
      "loss": 0.1973,
      "step": 1676
    },
    {
      "epoch": 0.048923507789252585,
      "grad_norm": 0.9014631057553211,
      "learning_rate": 9.990630920762096e-06,
      "loss": 0.2143,
      "step": 1677
    },
    {
      "epoch": 0.048952681019896145,
      "grad_norm": 1.0099362039814408,
      "learning_rate": 9.990601990607544e-06,
      "loss": 0.2007,
      "step": 1678
    },
    {
      "epoch": 0.048981854250539705,
      "grad_norm": 1.0241491990455205,
      "learning_rate": 9.99057301589809e-06,
      "loss": 0.2192,
      "step": 1679
    },
    {
      "epoch": 0.049011027481183266,
      "grad_norm": 0.9551060818761057,
      "learning_rate": 9.99054399663399e-06,
      "loss": 0.2423,
      "step": 1680
    },
    {
      "epoch": 0.049040200711826826,
      "grad_norm": 1.0235115751315123,
      "learning_rate": 9.990514932815505e-06,
      "loss": 0.2223,
      "step": 1681
    },
    {
      "epoch": 0.049069373942470386,
      "grad_norm": 1.0437110676507673,
      "learning_rate": 9.990485824442893e-06,
      "loss": 0.2292,
      "step": 1682
    },
    {
      "epoch": 0.04909854717311395,
      "grad_norm": 0.8227736075106783,
      "learning_rate": 9.990456671516418e-06,
      "loss": 0.2299,
      "step": 1683
    },
    {
      "epoch": 0.04912772040375751,
      "grad_norm": 1.1425436215855296,
      "learning_rate": 9.990427474036333e-06,
      "loss": 0.2075,
      "step": 1684
    },
    {
      "epoch": 0.04915689363440107,
      "grad_norm": 1.1496941580824722,
      "learning_rate": 9.990398232002907e-06,
      "loss": 0.2763,
      "step": 1685
    },
    {
      "epoch": 0.049186066865044634,
      "grad_norm": 0.8975550080507411,
      "learning_rate": 9.990368945416392e-06,
      "loss": 0.2092,
      "step": 1686
    },
    {
      "epoch": 0.049215240095688194,
      "grad_norm": 0.8923290018228252,
      "learning_rate": 9.990339614277058e-06,
      "loss": 0.2019,
      "step": 1687
    },
    {
      "epoch": 0.04924441332633176,
      "grad_norm": 1.0635083174019502,
      "learning_rate": 9.990310238585162e-06,
      "loss": 0.2094,
      "step": 1688
    },
    {
      "epoch": 0.04927358655697532,
      "grad_norm": 1.021382709157859,
      "learning_rate": 9.990280818340968e-06,
      "loss": 0.2252,
      "step": 1689
    },
    {
      "epoch": 0.04930275978761888,
      "grad_norm": 0.9332255670808633,
      "learning_rate": 9.990251353544738e-06,
      "loss": 0.1944,
      "step": 1690
    },
    {
      "epoch": 0.04933193301826244,
      "grad_norm": 1.0526499907531872,
      "learning_rate": 9.990221844196734e-06,
      "loss": 0.1899,
      "step": 1691
    },
    {
      "epoch": 0.049361106248906,
      "grad_norm": 0.9494404648611682,
      "learning_rate": 9.990192290297223e-06,
      "loss": 0.2116,
      "step": 1692
    },
    {
      "epoch": 0.04939027947954956,
      "grad_norm": 1.3654028412022048,
      "learning_rate": 9.990162691846466e-06,
      "loss": 0.2082,
      "step": 1693
    },
    {
      "epoch": 0.04941945271019313,
      "grad_norm": 1.1887938946549368,
      "learning_rate": 9.990133048844726e-06,
      "loss": 0.1969,
      "step": 1694
    },
    {
      "epoch": 0.04944862594083669,
      "grad_norm": 1.1183445820649272,
      "learning_rate": 9.99010336129227e-06,
      "loss": 0.2169,
      "step": 1695
    },
    {
      "epoch": 0.04947779917148025,
      "grad_norm": 1.1069014753156676,
      "learning_rate": 9.990073629189364e-06,
      "loss": 0.1969,
      "step": 1696
    },
    {
      "epoch": 0.04950697240212381,
      "grad_norm": 0.929966883144153,
      "learning_rate": 9.99004385253627e-06,
      "loss": 0.1927,
      "step": 1697
    },
    {
      "epoch": 0.04953614563276737,
      "grad_norm": 0.7625456696110425,
      "learning_rate": 9.990014031333256e-06,
      "loss": 0.1831,
      "step": 1698
    },
    {
      "epoch": 0.04956531886341094,
      "grad_norm": 0.8287507863528123,
      "learning_rate": 9.989984165580588e-06,
      "loss": 0.2018,
      "step": 1699
    },
    {
      "epoch": 0.0495944920940545,
      "grad_norm": 1.0421683718229273,
      "learning_rate": 9.989954255278534e-06,
      "loss": 0.1863,
      "step": 1700
    },
    {
      "epoch": 0.04962366532469806,
      "grad_norm": 1.030293521807953,
      "learning_rate": 9.989924300427356e-06,
      "loss": 0.1967,
      "step": 1701
    },
    {
      "epoch": 0.04965283855534162,
      "grad_norm": 1.0855062412490677,
      "learning_rate": 9.989894301027328e-06,
      "loss": 0.2197,
      "step": 1702
    },
    {
      "epoch": 0.04968201178598518,
      "grad_norm": 1.039058519086821,
      "learning_rate": 9.989864257078715e-06,
      "loss": 0.2157,
      "step": 1703
    },
    {
      "epoch": 0.04971118501662874,
      "grad_norm": 0.9145943238871364,
      "learning_rate": 9.989834168581784e-06,
      "loss": 0.1884,
      "step": 1704
    },
    {
      "epoch": 0.049740358247272305,
      "grad_norm": 0.9387708509635466,
      "learning_rate": 9.989804035536805e-06,
      "loss": 0.2036,
      "step": 1705
    },
    {
      "epoch": 0.049769531477915865,
      "grad_norm": 0.9956052149277963,
      "learning_rate": 9.989773857944048e-06,
      "loss": 0.2192,
      "step": 1706
    },
    {
      "epoch": 0.049798704708559426,
      "grad_norm": 1.0293819252919336,
      "learning_rate": 9.989743635803779e-06,
      "loss": 0.2334,
      "step": 1707
    },
    {
      "epoch": 0.049827877939202986,
      "grad_norm": 0.8222808592083097,
      "learning_rate": 9.989713369116271e-06,
      "loss": 0.2021,
      "step": 1708
    },
    {
      "epoch": 0.049857051169846546,
      "grad_norm": 0.9689052093946374,
      "learning_rate": 9.989683057881794e-06,
      "loss": 0.236,
      "step": 1709
    },
    {
      "epoch": 0.04988622440049011,
      "grad_norm": 0.8636164962595155,
      "learning_rate": 9.989652702100616e-06,
      "loss": 0.2122,
      "step": 1710
    },
    {
      "epoch": 0.04991539763113367,
      "grad_norm": 0.9883710485548339,
      "learning_rate": 9.989622301773011e-06,
      "loss": 0.2155,
      "step": 1711
    },
    {
      "epoch": 0.049944570861777234,
      "grad_norm": 0.8859232847320321,
      "learning_rate": 9.989591856899248e-06,
      "loss": 0.2236,
      "step": 1712
    },
    {
      "epoch": 0.049973744092420794,
      "grad_norm": 0.9338670941696584,
      "learning_rate": 9.989561367479603e-06,
      "loss": 0.2154,
      "step": 1713
    },
    {
      "epoch": 0.050002917323064354,
      "grad_norm": 1.0418470685744265,
      "learning_rate": 9.989530833514342e-06,
      "loss": 0.2294,
      "step": 1714
    },
    {
      "epoch": 0.05003209055370792,
      "grad_norm": 0.9223354819604049,
      "learning_rate": 9.989500255003743e-06,
      "loss": 0.2107,
      "step": 1715
    },
    {
      "epoch": 0.05006126378435148,
      "grad_norm": 0.9554485296783357,
      "learning_rate": 9.989469631948075e-06,
      "loss": 0.2039,
      "step": 1716
    },
    {
      "epoch": 0.05009043701499504,
      "grad_norm": 0.9688904025197402,
      "learning_rate": 9.989438964347614e-06,
      "loss": 0.2272,
      "step": 1717
    },
    {
      "epoch": 0.0501196102456386,
      "grad_norm": 1.0793616107432007,
      "learning_rate": 9.989408252202632e-06,
      "loss": 0.1994,
      "step": 1718
    },
    {
      "epoch": 0.05014878347628216,
      "grad_norm": 1.0896751899895707,
      "learning_rate": 9.989377495513407e-06,
      "loss": 0.2178,
      "step": 1719
    },
    {
      "epoch": 0.05017795670692572,
      "grad_norm": 1.0354239852979008,
      "learning_rate": 9.989346694280208e-06,
      "loss": 0.2494,
      "step": 1720
    },
    {
      "epoch": 0.05020712993756929,
      "grad_norm": 1.08500778617434,
      "learning_rate": 9.989315848503314e-06,
      "loss": 0.194,
      "step": 1721
    },
    {
      "epoch": 0.05023630316821285,
      "grad_norm": 0.851551946042162,
      "learning_rate": 9.989284958182998e-06,
      "loss": 0.2071,
      "step": 1722
    },
    {
      "epoch": 0.05026547639885641,
      "grad_norm": 0.8830058696664645,
      "learning_rate": 9.989254023319539e-06,
      "loss": 0.2268,
      "step": 1723
    },
    {
      "epoch": 0.05029464962949997,
      "grad_norm": 1.062087466055277,
      "learning_rate": 9.98922304391321e-06,
      "loss": 0.2171,
      "step": 1724
    },
    {
      "epoch": 0.05032382286014353,
      "grad_norm": 1.0310098785935518,
      "learning_rate": 9.98919201996429e-06,
      "loss": 0.2071,
      "step": 1725
    },
    {
      "epoch": 0.0503529960907871,
      "grad_norm": 0.9616539437889566,
      "learning_rate": 9.989160951473051e-06,
      "loss": 0.2104,
      "step": 1726
    },
    {
      "epoch": 0.05038216932143066,
      "grad_norm": 0.9808099587682962,
      "learning_rate": 9.989129838439778e-06,
      "loss": 0.1964,
      "step": 1727
    },
    {
      "epoch": 0.05041134255207422,
      "grad_norm": 0.9796749181222015,
      "learning_rate": 9.989098680864741e-06,
      "loss": 0.1986,
      "step": 1728
    },
    {
      "epoch": 0.05044051578271778,
      "grad_norm": 1.1296507753056482,
      "learning_rate": 9.989067478748225e-06,
      "loss": 0.2021,
      "step": 1729
    },
    {
      "epoch": 0.05046968901336134,
      "grad_norm": 1.0043817801041206,
      "learning_rate": 9.989036232090506e-06,
      "loss": 0.2308,
      "step": 1730
    },
    {
      "epoch": 0.0504988622440049,
      "grad_norm": 0.9883080734561521,
      "learning_rate": 9.98900494089186e-06,
      "loss": 0.2,
      "step": 1731
    },
    {
      "epoch": 0.050528035474648465,
      "grad_norm": 1.1953902816615842,
      "learning_rate": 9.98897360515257e-06,
      "loss": 0.214,
      "step": 1732
    },
    {
      "epoch": 0.050557208705292025,
      "grad_norm": 1.0499755971419535,
      "learning_rate": 9.988942224872916e-06,
      "loss": 0.2194,
      "step": 1733
    },
    {
      "epoch": 0.050586381935935586,
      "grad_norm": 1.3567395572332095,
      "learning_rate": 9.988910800053174e-06,
      "loss": 0.2297,
      "step": 1734
    },
    {
      "epoch": 0.050615555166579146,
      "grad_norm": 1.058652552738655,
      "learning_rate": 9.988879330693629e-06,
      "loss": 0.2193,
      "step": 1735
    },
    {
      "epoch": 0.050644728397222706,
      "grad_norm": 1.1152018443812863,
      "learning_rate": 9.98884781679456e-06,
      "loss": 0.2138,
      "step": 1736
    },
    {
      "epoch": 0.05067390162786627,
      "grad_norm": 1.1431676401102813,
      "learning_rate": 9.988816258356249e-06,
      "loss": 0.2326,
      "step": 1737
    },
    {
      "epoch": 0.05070307485850983,
      "grad_norm": 1.0671413599887805,
      "learning_rate": 9.988784655378976e-06,
      "loss": 0.2211,
      "step": 1738
    },
    {
      "epoch": 0.050732248089153394,
      "grad_norm": 0.9707393920933772,
      "learning_rate": 9.988753007863025e-06,
      "loss": 0.1939,
      "step": 1739
    },
    {
      "epoch": 0.050761421319796954,
      "grad_norm": 0.8133631276971836,
      "learning_rate": 9.98872131580868e-06,
      "loss": 0.2146,
      "step": 1740
    },
    {
      "epoch": 0.050790594550440514,
      "grad_norm": 0.8605833063521691,
      "learning_rate": 9.98868957921622e-06,
      "loss": 0.2003,
      "step": 1741
    },
    {
      "epoch": 0.050819767781084074,
      "grad_norm": 0.7330354260552346,
      "learning_rate": 9.98865779808593e-06,
      "loss": 0.1961,
      "step": 1742
    },
    {
      "epoch": 0.05084894101172764,
      "grad_norm": 0.8580404109212706,
      "learning_rate": 9.988625972418096e-06,
      "loss": 0.1902,
      "step": 1743
    },
    {
      "epoch": 0.0508781142423712,
      "grad_norm": 1.1967846988616961,
      "learning_rate": 9.988594102212999e-06,
      "loss": 0.1938,
      "step": 1744
    },
    {
      "epoch": 0.05090728747301476,
      "grad_norm": 1.0104081967587848,
      "learning_rate": 9.988562187470925e-06,
      "loss": 0.2121,
      "step": 1745
    },
    {
      "epoch": 0.05093646070365832,
      "grad_norm": 1.1529529643483754,
      "learning_rate": 9.988530228192158e-06,
      "loss": 0.2084,
      "step": 1746
    },
    {
      "epoch": 0.05096563393430188,
      "grad_norm": 0.9197788799969081,
      "learning_rate": 9.988498224376985e-06,
      "loss": 0.2006,
      "step": 1747
    },
    {
      "epoch": 0.05099480716494545,
      "grad_norm": 1.0174692305896282,
      "learning_rate": 9.988466176025689e-06,
      "loss": 0.1982,
      "step": 1748
    },
    {
      "epoch": 0.05102398039558901,
      "grad_norm": 0.9349769771620468,
      "learning_rate": 9.988434083138561e-06,
      "loss": 0.2276,
      "step": 1749
    },
    {
      "epoch": 0.05105315362623257,
      "grad_norm": 1.0934151233929899,
      "learning_rate": 9.988401945715882e-06,
      "loss": 0.2126,
      "step": 1750
    },
    {
      "epoch": 0.05108232685687613,
      "grad_norm": 1.36762340528711,
      "learning_rate": 9.98836976375794e-06,
      "loss": 0.2178,
      "step": 1751
    },
    {
      "epoch": 0.05111150008751969,
      "grad_norm": 1.0101928167570686,
      "learning_rate": 9.988337537265026e-06,
      "loss": 0.2071,
      "step": 1752
    },
    {
      "epoch": 0.05114067331816325,
      "grad_norm": 1.1283120636047486,
      "learning_rate": 9.988305266237425e-06,
      "loss": 0.2166,
      "step": 1753
    },
    {
      "epoch": 0.05116984654880682,
      "grad_norm": 0.9499565542893285,
      "learning_rate": 9.988272950675423e-06,
      "loss": 0.2103,
      "step": 1754
    },
    {
      "epoch": 0.05119901977945038,
      "grad_norm": 1.0255609302975421,
      "learning_rate": 9.988240590579314e-06,
      "loss": 0.2094,
      "step": 1755
    },
    {
      "epoch": 0.05122819301009394,
      "grad_norm": 1.0197668974193146,
      "learning_rate": 9.988208185949382e-06,
      "loss": 0.2069,
      "step": 1756
    },
    {
      "epoch": 0.0512573662407375,
      "grad_norm": 1.018468946312996,
      "learning_rate": 9.988175736785919e-06,
      "loss": 0.2054,
      "step": 1757
    },
    {
      "epoch": 0.05128653947138106,
      "grad_norm": 0.7720587618144342,
      "learning_rate": 9.988143243089214e-06,
      "loss": 0.1945,
      "step": 1758
    },
    {
      "epoch": 0.051315712702024625,
      "grad_norm": 0.8976148147428786,
      "learning_rate": 9.988110704859557e-06,
      "loss": 0.1977,
      "step": 1759
    },
    {
      "epoch": 0.051344885932668186,
      "grad_norm": 0.8569834394311729,
      "learning_rate": 9.988078122097238e-06,
      "loss": 0.2066,
      "step": 1760
    },
    {
      "epoch": 0.051374059163311746,
      "grad_norm": 0.8870857932805177,
      "learning_rate": 9.988045494802548e-06,
      "loss": 0.2169,
      "step": 1761
    },
    {
      "epoch": 0.051403232393955306,
      "grad_norm": 0.9764283677320921,
      "learning_rate": 9.988012822975778e-06,
      "loss": 0.2199,
      "step": 1762
    },
    {
      "epoch": 0.051432405624598866,
      "grad_norm": 0.8400425016905786,
      "learning_rate": 9.987980106617221e-06,
      "loss": 0.2174,
      "step": 1763
    },
    {
      "epoch": 0.051461578855242426,
      "grad_norm": 0.8448586166941309,
      "learning_rate": 9.987947345727167e-06,
      "loss": 0.1997,
      "step": 1764
    },
    {
      "epoch": 0.051490752085885994,
      "grad_norm": 0.9292200392314232,
      "learning_rate": 9.987914540305911e-06,
      "loss": 0.2065,
      "step": 1765
    },
    {
      "epoch": 0.051519925316529554,
      "grad_norm": 0.9746200610832763,
      "learning_rate": 9.987881690353744e-06,
      "loss": 0.2139,
      "step": 1766
    },
    {
      "epoch": 0.051549098547173114,
      "grad_norm": 1.0032499501603669,
      "learning_rate": 9.987848795870962e-06,
      "loss": 0.1997,
      "step": 1767
    },
    {
      "epoch": 0.051578271777816674,
      "grad_norm": 0.961661839373899,
      "learning_rate": 9.987815856857856e-06,
      "loss": 0.1922,
      "step": 1768
    },
    {
      "epoch": 0.051607445008460234,
      "grad_norm": 0.9737648956227785,
      "learning_rate": 9.98778287331472e-06,
      "loss": 0.1909,
      "step": 1769
    },
    {
      "epoch": 0.0516366182391038,
      "grad_norm": 1.1386668813997758,
      "learning_rate": 9.987749845241849e-06,
      "loss": 0.2159,
      "step": 1770
    },
    {
      "epoch": 0.05166579146974736,
      "grad_norm": 1.1632667233396496,
      "learning_rate": 9.987716772639537e-06,
      "loss": 0.2037,
      "step": 1771
    },
    {
      "epoch": 0.05169496470039092,
      "grad_norm": 1.1498587634493553,
      "learning_rate": 9.987683655508082e-06,
      "loss": 0.2186,
      "step": 1772
    },
    {
      "epoch": 0.05172413793103448,
      "grad_norm": 1.0121456586626543,
      "learning_rate": 9.987650493847778e-06,
      "loss": 0.2564,
      "step": 1773
    },
    {
      "epoch": 0.05175331116167804,
      "grad_norm": 1.0798339386031093,
      "learning_rate": 9.98761728765892e-06,
      "loss": 0.2089,
      "step": 1774
    },
    {
      "epoch": 0.0517824843923216,
      "grad_norm": 1.06653919711271,
      "learning_rate": 9.987584036941806e-06,
      "loss": 0.2035,
      "step": 1775
    },
    {
      "epoch": 0.05181165762296517,
      "grad_norm": 1.3176017160123297,
      "learning_rate": 9.987550741696734e-06,
      "loss": 0.2037,
      "step": 1776
    },
    {
      "epoch": 0.05184083085360873,
      "grad_norm": 1.2445537573465832,
      "learning_rate": 9.987517401923996e-06,
      "loss": 0.2313,
      "step": 1777
    },
    {
      "epoch": 0.05187000408425229,
      "grad_norm": 0.9860102086983821,
      "learning_rate": 9.987484017623896e-06,
      "loss": 0.2273,
      "step": 1778
    },
    {
      "epoch": 0.05189917731489585,
      "grad_norm": 0.9877336386360043,
      "learning_rate": 9.987450588796729e-06,
      "loss": 0.2009,
      "step": 1779
    },
    {
      "epoch": 0.05192835054553941,
      "grad_norm": 0.8539206765930014,
      "learning_rate": 9.987417115442793e-06,
      "loss": 0.2181,
      "step": 1780
    },
    {
      "epoch": 0.05195752377618298,
      "grad_norm": 0.9826417333117898,
      "learning_rate": 9.987383597562388e-06,
      "loss": 0.2471,
      "step": 1781
    },
    {
      "epoch": 0.05198669700682654,
      "grad_norm": 1.0993517355804936,
      "learning_rate": 9.987350035155813e-06,
      "loss": 0.225,
      "step": 1782
    },
    {
      "epoch": 0.0520158702374701,
      "grad_norm": 0.7973722927989587,
      "learning_rate": 9.987316428223367e-06,
      "loss": 0.182,
      "step": 1783
    },
    {
      "epoch": 0.05204504346811366,
      "grad_norm": 0.9459140369300196,
      "learning_rate": 9.98728277676535e-06,
      "loss": 0.1943,
      "step": 1784
    },
    {
      "epoch": 0.05207421669875722,
      "grad_norm": 0.9004056542612794,
      "learning_rate": 9.987249080782065e-06,
      "loss": 0.2117,
      "step": 1785
    },
    {
      "epoch": 0.05210338992940078,
      "grad_norm": 0.8568729774458818,
      "learning_rate": 9.987215340273809e-06,
      "loss": 0.2147,
      "step": 1786
    },
    {
      "epoch": 0.052132563160044346,
      "grad_norm": 0.9424697505205792,
      "learning_rate": 9.987181555240886e-06,
      "loss": 0.2059,
      "step": 1787
    },
    {
      "epoch": 0.052161736390687906,
      "grad_norm": 0.9504645368660839,
      "learning_rate": 9.987147725683595e-06,
      "loss": 0.2197,
      "step": 1788
    },
    {
      "epoch": 0.052190909621331466,
      "grad_norm": 0.8920022479488012,
      "learning_rate": 9.987113851602241e-06,
      "loss": 0.2079,
      "step": 1789
    },
    {
      "epoch": 0.052220082851975026,
      "grad_norm": 0.8335692029373526,
      "learning_rate": 9.987079932997124e-06,
      "loss": 0.2085,
      "step": 1790
    },
    {
      "epoch": 0.052249256082618586,
      "grad_norm": 0.8922263766690413,
      "learning_rate": 9.98704596986855e-06,
      "loss": 0.2016,
      "step": 1791
    },
    {
      "epoch": 0.052278429313262154,
      "grad_norm": 1.1374022594460134,
      "learning_rate": 9.987011962216817e-06,
      "loss": 0.2218,
      "step": 1792
    },
    {
      "epoch": 0.052307602543905714,
      "grad_norm": 1.0103153351072,
      "learning_rate": 9.986977910042236e-06,
      "loss": 0.2133,
      "step": 1793
    },
    {
      "epoch": 0.052336775774549274,
      "grad_norm": 0.8372195469361688,
      "learning_rate": 9.986943813345102e-06,
      "loss": 0.204,
      "step": 1794
    },
    {
      "epoch": 0.052365949005192834,
      "grad_norm": 0.9685977819820961,
      "learning_rate": 9.986909672125726e-06,
      "loss": 0.2102,
      "step": 1795
    },
    {
      "epoch": 0.052395122235836394,
      "grad_norm": 0.8709555196077586,
      "learning_rate": 9.98687548638441e-06,
      "loss": 0.1972,
      "step": 1796
    },
    {
      "epoch": 0.052424295466479955,
      "grad_norm": 0.994100812796675,
      "learning_rate": 9.986841256121462e-06,
      "loss": 0.2141,
      "step": 1797
    },
    {
      "epoch": 0.05245346869712352,
      "grad_norm": 0.8335077470829617,
      "learning_rate": 9.986806981337186e-06,
      "loss": 0.2094,
      "step": 1798
    },
    {
      "epoch": 0.05248264192776708,
      "grad_norm": 1.0429819177319595,
      "learning_rate": 9.986772662031886e-06,
      "loss": 0.2103,
      "step": 1799
    },
    {
      "epoch": 0.05251181515841064,
      "grad_norm": 1.1556976067958373,
      "learning_rate": 9.986738298205872e-06,
      "loss": 0.2137,
      "step": 1800
    },
    {
      "epoch": 0.0525409883890542,
      "grad_norm": 0.863431425020644,
      "learning_rate": 9.986703889859447e-06,
      "loss": 0.1968,
      "step": 1801
    },
    {
      "epoch": 0.05257016161969776,
      "grad_norm": 1.0146475839847757,
      "learning_rate": 9.98666943699292e-06,
      "loss": 0.2019,
      "step": 1802
    },
    {
      "epoch": 0.05259933485034133,
      "grad_norm": 1.018489384083942,
      "learning_rate": 9.9866349396066e-06,
      "loss": 0.2103,
      "step": 1803
    },
    {
      "epoch": 0.05262850808098489,
      "grad_norm": 0.9442883325392579,
      "learning_rate": 9.986600397700792e-06,
      "loss": 0.2222,
      "step": 1804
    },
    {
      "epoch": 0.05265768131162845,
      "grad_norm": 0.8475062797662067,
      "learning_rate": 9.986565811275808e-06,
      "loss": 0.1946,
      "step": 1805
    },
    {
      "epoch": 0.05268685454227201,
      "grad_norm": 0.9145959988323972,
      "learning_rate": 9.986531180331954e-06,
      "loss": 0.2294,
      "step": 1806
    },
    {
      "epoch": 0.05271602777291557,
      "grad_norm": 0.9104170814839893,
      "learning_rate": 9.986496504869539e-06,
      "loss": 0.1987,
      "step": 1807
    },
    {
      "epoch": 0.05274520100355914,
      "grad_norm": 0.9828131742766328,
      "learning_rate": 9.986461784888874e-06,
      "loss": 0.2209,
      "step": 1808
    },
    {
      "epoch": 0.0527743742342027,
      "grad_norm": 1.0438944454902002,
      "learning_rate": 9.98642702039027e-06,
      "loss": 0.2289,
      "step": 1809
    },
    {
      "epoch": 0.05280354746484626,
      "grad_norm": 0.9798356376138588,
      "learning_rate": 9.986392211374036e-06,
      "loss": 0.2171,
      "step": 1810
    },
    {
      "epoch": 0.05283272069548982,
      "grad_norm": 0.9239154507567835,
      "learning_rate": 9.986357357840482e-06,
      "loss": 0.2172,
      "step": 1811
    },
    {
      "epoch": 0.05286189392613338,
      "grad_norm": 0.9252103679074746,
      "learning_rate": 9.986322459789919e-06,
      "loss": 0.1894,
      "step": 1812
    },
    {
      "epoch": 0.05289106715677694,
      "grad_norm": 1.0655456133056938,
      "learning_rate": 9.986287517222659e-06,
      "loss": 0.1848,
      "step": 1813
    },
    {
      "epoch": 0.052920240387420506,
      "grad_norm": 1.0343671450226655,
      "learning_rate": 9.986252530139016e-06,
      "loss": 0.1938,
      "step": 1814
    },
    {
      "epoch": 0.052949413618064066,
      "grad_norm": 1.1083047523984533,
      "learning_rate": 9.9862174985393e-06,
      "loss": 0.2002,
      "step": 1815
    },
    {
      "epoch": 0.052978586848707626,
      "grad_norm": 1.0550743876531532,
      "learning_rate": 9.986182422423825e-06,
      "loss": 0.1892,
      "step": 1816
    },
    {
      "epoch": 0.053007760079351186,
      "grad_norm": 1.0801337910542261,
      "learning_rate": 9.986147301792904e-06,
      "loss": 0.2391,
      "step": 1817
    },
    {
      "epoch": 0.05303693330999475,
      "grad_norm": 1.023483843297668,
      "learning_rate": 9.986112136646849e-06,
      "loss": 0.2415,
      "step": 1818
    },
    {
      "epoch": 0.053066106540638314,
      "grad_norm": 1.0004524211652035,
      "learning_rate": 9.986076926985975e-06,
      "loss": 0.2269,
      "step": 1819
    },
    {
      "epoch": 0.053095279771281874,
      "grad_norm": 1.0348783975430784,
      "learning_rate": 9.986041672810595e-06,
      "loss": 0.2063,
      "step": 1820
    },
    {
      "epoch": 0.053124453001925434,
      "grad_norm": 1.106985517179938,
      "learning_rate": 9.98600637412103e-06,
      "loss": 0.2134,
      "step": 1821
    },
    {
      "epoch": 0.053153626232568994,
      "grad_norm": 0.976414008602237,
      "learning_rate": 9.985971030917586e-06,
      "loss": 0.2133,
      "step": 1822
    },
    {
      "epoch": 0.053182799463212554,
      "grad_norm": 1.011429960505763,
      "learning_rate": 9.985935643200584e-06,
      "loss": 0.2085,
      "step": 1823
    },
    {
      "epoch": 0.053211972693856115,
      "grad_norm": 0.8787937997521821,
      "learning_rate": 9.985900210970339e-06,
      "loss": 0.1981,
      "step": 1824
    },
    {
      "epoch": 0.05324114592449968,
      "grad_norm": 0.9403096401930826,
      "learning_rate": 9.985864734227168e-06,
      "loss": 0.2079,
      "step": 1825
    },
    {
      "epoch": 0.05327031915514324,
      "grad_norm": 0.8821673620918404,
      "learning_rate": 9.985829212971386e-06,
      "loss": 0.2124,
      "step": 1826
    },
    {
      "epoch": 0.0532994923857868,
      "grad_norm": 0.8829264311916839,
      "learning_rate": 9.98579364720331e-06,
      "loss": 0.1871,
      "step": 1827
    },
    {
      "epoch": 0.05332866561643036,
      "grad_norm": 0.9244431599258233,
      "learning_rate": 9.98575803692326e-06,
      "loss": 0.187,
      "step": 1828
    },
    {
      "epoch": 0.05335783884707392,
      "grad_norm": 1.131042655378558,
      "learning_rate": 9.985722382131554e-06,
      "loss": 0.2301,
      "step": 1829
    },
    {
      "epoch": 0.05338701207771749,
      "grad_norm": 1.147087812053146,
      "learning_rate": 9.985686682828506e-06,
      "loss": 0.2075,
      "step": 1830
    },
    {
      "epoch": 0.05341618530836105,
      "grad_norm": 1.2341563445994703,
      "learning_rate": 9.985650939014438e-06,
      "loss": 0.2015,
      "step": 1831
    },
    {
      "epoch": 0.05344535853900461,
      "grad_norm": 0.9085919110664943,
      "learning_rate": 9.98561515068967e-06,
      "loss": 0.2337,
      "step": 1832
    },
    {
      "epoch": 0.05347453176964817,
      "grad_norm": 1.0343138810120596,
      "learning_rate": 9.98557931785452e-06,
      "loss": 0.2218,
      "step": 1833
    },
    {
      "epoch": 0.05350370500029173,
      "grad_norm": 0.9640557208643279,
      "learning_rate": 9.985543440509305e-06,
      "loss": 0.1899,
      "step": 1834
    },
    {
      "epoch": 0.05353287823093529,
      "grad_norm": 0.9833535393271521,
      "learning_rate": 9.985507518654352e-06,
      "loss": 0.1991,
      "step": 1835
    },
    {
      "epoch": 0.05356205146157886,
      "grad_norm": 0.9299675607443383,
      "learning_rate": 9.985471552289976e-06,
      "loss": 0.2165,
      "step": 1836
    },
    {
      "epoch": 0.05359122469222242,
      "grad_norm": 0.9548816897494452,
      "learning_rate": 9.985435541416499e-06,
      "loss": 0.1963,
      "step": 1837
    },
    {
      "epoch": 0.05362039792286598,
      "grad_norm": 1.0268622050061784,
      "learning_rate": 9.985399486034246e-06,
      "loss": 0.2194,
      "step": 1838
    },
    {
      "epoch": 0.05364957115350954,
      "grad_norm": 1.2219135992154067,
      "learning_rate": 9.985363386143537e-06,
      "loss": 0.2047,
      "step": 1839
    },
    {
      "epoch": 0.0536787443841531,
      "grad_norm": 0.8638113387730253,
      "learning_rate": 9.985327241744692e-06,
      "loss": 0.1953,
      "step": 1840
    },
    {
      "epoch": 0.053707917614796666,
      "grad_norm": 0.8408932603755563,
      "learning_rate": 9.985291052838035e-06,
      "loss": 0.2041,
      "step": 1841
    },
    {
      "epoch": 0.053737090845440226,
      "grad_norm": 1.0482465033233304,
      "learning_rate": 9.985254819423891e-06,
      "loss": 0.208,
      "step": 1842
    },
    {
      "epoch": 0.053766264076083786,
      "grad_norm": 0.9725931765566865,
      "learning_rate": 9.985218541502581e-06,
      "loss": 0.2129,
      "step": 1843
    },
    {
      "epoch": 0.053795437306727346,
      "grad_norm": 0.8500853960485782,
      "learning_rate": 9.98518221907443e-06,
      "loss": 0.1917,
      "step": 1844
    },
    {
      "epoch": 0.05382461053737091,
      "grad_norm": 0.9499032113249442,
      "learning_rate": 9.985145852139763e-06,
      "loss": 0.1876,
      "step": 1845
    },
    {
      "epoch": 0.05385378376801447,
      "grad_norm": 1.198222177096465,
      "learning_rate": 9.985109440698903e-06,
      "loss": 0.2062,
      "step": 1846
    },
    {
      "epoch": 0.053882956998658034,
      "grad_norm": 1.1844301846482599,
      "learning_rate": 9.985072984752177e-06,
      "loss": 0.2536,
      "step": 1847
    },
    {
      "epoch": 0.053912130229301594,
      "grad_norm": 0.9866838497921299,
      "learning_rate": 9.985036484299909e-06,
      "loss": 0.21,
      "step": 1848
    },
    {
      "epoch": 0.053941303459945154,
      "grad_norm": 0.8089665687431796,
      "learning_rate": 9.984999939342426e-06,
      "loss": 0.1833,
      "step": 1849
    },
    {
      "epoch": 0.053970476690588715,
      "grad_norm": 1.0039516026030095,
      "learning_rate": 9.984963349880053e-06,
      "loss": 0.2169,
      "step": 1850
    },
    {
      "epoch": 0.053999649921232275,
      "grad_norm": 0.7919274387544472,
      "learning_rate": 9.984926715913115e-06,
      "loss": 0.2059,
      "step": 1851
    },
    {
      "epoch": 0.05402882315187584,
      "grad_norm": 0.9087935115881111,
      "learning_rate": 9.984890037441944e-06,
      "loss": 0.1813,
      "step": 1852
    },
    {
      "epoch": 0.0540579963825194,
      "grad_norm": 0.947217691867987,
      "learning_rate": 9.984853314466865e-06,
      "loss": 0.2081,
      "step": 1853
    },
    {
      "epoch": 0.05408716961316296,
      "grad_norm": 0.9038864990631458,
      "learning_rate": 9.984816546988202e-06,
      "loss": 0.2045,
      "step": 1854
    },
    {
      "epoch": 0.05411634284380652,
      "grad_norm": 1.0652095810031903,
      "learning_rate": 9.984779735006291e-06,
      "loss": 0.2084,
      "step": 1855
    },
    {
      "epoch": 0.05414551607445008,
      "grad_norm": 0.8575839585957018,
      "learning_rate": 9.984742878521456e-06,
      "loss": 0.2194,
      "step": 1856
    },
    {
      "epoch": 0.05417468930509364,
      "grad_norm": 1.177202848254806,
      "learning_rate": 9.984705977534024e-06,
      "loss": 0.233,
      "step": 1857
    },
    {
      "epoch": 0.05420386253573721,
      "grad_norm": 0.9385058659291741,
      "learning_rate": 9.98466903204433e-06,
      "loss": 0.2068,
      "step": 1858
    },
    {
      "epoch": 0.05423303576638077,
      "grad_norm": 0.9191219594294061,
      "learning_rate": 9.984632042052697e-06,
      "loss": 0.1858,
      "step": 1859
    },
    {
      "epoch": 0.05426220899702433,
      "grad_norm": 1.0403206931395317,
      "learning_rate": 9.984595007559463e-06,
      "loss": 0.2072,
      "step": 1860
    },
    {
      "epoch": 0.05429138222766789,
      "grad_norm": 1.03842369920911,
      "learning_rate": 9.984557928564952e-06,
      "loss": 0.1951,
      "step": 1861
    },
    {
      "epoch": 0.05432055545831145,
      "grad_norm": 1.058675781860113,
      "learning_rate": 9.984520805069499e-06,
      "loss": 0.2315,
      "step": 1862
    },
    {
      "epoch": 0.05434972868895502,
      "grad_norm": 0.8732767947654804,
      "learning_rate": 9.984483637073435e-06,
      "loss": 0.2074,
      "step": 1863
    },
    {
      "epoch": 0.05437890191959858,
      "grad_norm": 1.0706967745492497,
      "learning_rate": 9.984446424577089e-06,
      "loss": 0.2123,
      "step": 1864
    },
    {
      "epoch": 0.05440807515024214,
      "grad_norm": 1.030682987315947,
      "learning_rate": 9.984409167580795e-06,
      "loss": 0.2286,
      "step": 1865
    },
    {
      "epoch": 0.0544372483808857,
      "grad_norm": 1.0252946488237773,
      "learning_rate": 9.984371866084888e-06,
      "loss": 0.2053,
      "step": 1866
    },
    {
      "epoch": 0.05446642161152926,
      "grad_norm": 0.9362847529121195,
      "learning_rate": 9.984334520089698e-06,
      "loss": 0.1978,
      "step": 1867
    },
    {
      "epoch": 0.05449559484217282,
      "grad_norm": 1.3522443404635422,
      "learning_rate": 9.984297129595559e-06,
      "loss": 0.2195,
      "step": 1868
    },
    {
      "epoch": 0.054524768072816386,
      "grad_norm": 1.0720617236396515,
      "learning_rate": 9.984259694602805e-06,
      "loss": 0.2041,
      "step": 1869
    },
    {
      "epoch": 0.054553941303459946,
      "grad_norm": 1.0099605376182157,
      "learning_rate": 9.98422221511177e-06,
      "loss": 0.1878,
      "step": 1870
    },
    {
      "epoch": 0.054583114534103506,
      "grad_norm": 0.9798177316702981,
      "learning_rate": 9.984184691122789e-06,
      "loss": 0.208,
      "step": 1871
    },
    {
      "epoch": 0.05461228776474707,
      "grad_norm": 1.0716844630049964,
      "learning_rate": 9.984147122636197e-06,
      "loss": 0.2119,
      "step": 1872
    },
    {
      "epoch": 0.05464146099539063,
      "grad_norm": 0.8567982002026963,
      "learning_rate": 9.98410950965233e-06,
      "loss": 0.1929,
      "step": 1873
    },
    {
      "epoch": 0.054670634226034194,
      "grad_norm": 0.9096358723226816,
      "learning_rate": 9.984071852171522e-06,
      "loss": 0.1963,
      "step": 1874
    },
    {
      "epoch": 0.054699807456677754,
      "grad_norm": 1.0511720503575723,
      "learning_rate": 9.984034150194111e-06,
      "loss": 0.2366,
      "step": 1875
    },
    {
      "epoch": 0.054728980687321314,
      "grad_norm": 0.9256344022278301,
      "learning_rate": 9.983996403720433e-06,
      "loss": 0.1963,
      "step": 1876
    },
    {
      "epoch": 0.054758153917964875,
      "grad_norm": 0.9009449825233125,
      "learning_rate": 9.983958612750823e-06,
      "loss": 0.1908,
      "step": 1877
    },
    {
      "epoch": 0.054787327148608435,
      "grad_norm": 0.8809903046256545,
      "learning_rate": 9.983920777285623e-06,
      "loss": 0.192,
      "step": 1878
    },
    {
      "epoch": 0.054816500379251995,
      "grad_norm": 0.9111034618443862,
      "learning_rate": 9.983882897325168e-06,
      "loss": 0.1809,
      "step": 1879
    },
    {
      "epoch": 0.05484567360989556,
      "grad_norm": 0.9654746262667905,
      "learning_rate": 9.983844972869795e-06,
      "loss": 0.2326,
      "step": 1880
    },
    {
      "epoch": 0.05487484684053912,
      "grad_norm": 1.2852692308548157,
      "learning_rate": 9.983807003919843e-06,
      "loss": 0.2487,
      "step": 1881
    },
    {
      "epoch": 0.05490402007118268,
      "grad_norm": 1.0916802151149374,
      "learning_rate": 9.983768990475653e-06,
      "loss": 0.2199,
      "step": 1882
    },
    {
      "epoch": 0.05493319330182624,
      "grad_norm": 1.436610939999952,
      "learning_rate": 9.983730932537563e-06,
      "loss": 0.2137,
      "step": 1883
    },
    {
      "epoch": 0.0549623665324698,
      "grad_norm": 1.0219366994511527,
      "learning_rate": 9.983692830105914e-06,
      "loss": 0.2131,
      "step": 1884
    },
    {
      "epoch": 0.05499153976311337,
      "grad_norm": 1.1247098446347263,
      "learning_rate": 9.983654683181044e-06,
      "loss": 0.1929,
      "step": 1885
    },
    {
      "epoch": 0.05502071299375693,
      "grad_norm": 1.1384705581143768,
      "learning_rate": 9.983616491763295e-06,
      "loss": 0.2042,
      "step": 1886
    },
    {
      "epoch": 0.05504988622440049,
      "grad_norm": 1.0535108093057595,
      "learning_rate": 9.983578255853005e-06,
      "loss": 0.1956,
      "step": 1887
    },
    {
      "epoch": 0.05507905945504405,
      "grad_norm": 1.0663686940223367,
      "learning_rate": 9.983539975450522e-06,
      "loss": 0.2327,
      "step": 1888
    },
    {
      "epoch": 0.05510823268568761,
      "grad_norm": 1.0476600776117433,
      "learning_rate": 9.983501650556182e-06,
      "loss": 0.2326,
      "step": 1889
    },
    {
      "epoch": 0.05513740591633117,
      "grad_norm": 0.990293055374146,
      "learning_rate": 9.98346328117033e-06,
      "loss": 0.1957,
      "step": 1890
    },
    {
      "epoch": 0.05516657914697474,
      "grad_norm": 0.9188685309373221,
      "learning_rate": 9.983424867293305e-06,
      "loss": 0.2152,
      "step": 1891
    },
    {
      "epoch": 0.0551957523776183,
      "grad_norm": 1.0898344218818747,
      "learning_rate": 9.983386408925454e-06,
      "loss": 0.1892,
      "step": 1892
    },
    {
      "epoch": 0.05522492560826186,
      "grad_norm": 1.0735068947886786,
      "learning_rate": 9.983347906067119e-06,
      "loss": 0.1996,
      "step": 1893
    },
    {
      "epoch": 0.05525409883890542,
      "grad_norm": 1.012606888062534,
      "learning_rate": 9.983309358718642e-06,
      "loss": 0.2004,
      "step": 1894
    },
    {
      "epoch": 0.05528327206954898,
      "grad_norm": 1.1319489368529263,
      "learning_rate": 9.98327076688037e-06,
      "loss": 0.2186,
      "step": 1895
    },
    {
      "epoch": 0.055312445300192546,
      "grad_norm": 1.2018814733508303,
      "learning_rate": 9.983232130552646e-06,
      "loss": 0.1927,
      "step": 1896
    },
    {
      "epoch": 0.055341618530836106,
      "grad_norm": 1.0115475359025725,
      "learning_rate": 9.983193449735817e-06,
      "loss": 0.2108,
      "step": 1897
    },
    {
      "epoch": 0.05537079176147967,
      "grad_norm": 1.0588049836102444,
      "learning_rate": 9.983154724430224e-06,
      "loss": 0.2062,
      "step": 1898
    },
    {
      "epoch": 0.05539996499212323,
      "grad_norm": 0.8387924635726959,
      "learning_rate": 9.983115954636215e-06,
      "loss": 0.1824,
      "step": 1899
    },
    {
      "epoch": 0.05542913822276679,
      "grad_norm": 1.1120552835005346,
      "learning_rate": 9.983077140354138e-06,
      "loss": 0.2061,
      "step": 1900
    },
    {
      "epoch": 0.055458311453410354,
      "grad_norm": 0.8382455374929305,
      "learning_rate": 9.983038281584338e-06,
      "loss": 0.1873,
      "step": 1901
    },
    {
      "epoch": 0.055487484684053914,
      "grad_norm": 1.1235872104607034,
      "learning_rate": 9.98299937832716e-06,
      "loss": 0.195,
      "step": 1902
    },
    {
      "epoch": 0.055516657914697475,
      "grad_norm": 0.8194562048653667,
      "learning_rate": 9.982960430582954e-06,
      "loss": 0.1975,
      "step": 1903
    },
    {
      "epoch": 0.055545831145341035,
      "grad_norm": 0.9193206952649976,
      "learning_rate": 9.982921438352067e-06,
      "loss": 0.2097,
      "step": 1904
    },
    {
      "epoch": 0.055575004375984595,
      "grad_norm": 0.8498663432478195,
      "learning_rate": 9.982882401634846e-06,
      "loss": 0.1953,
      "step": 1905
    },
    {
      "epoch": 0.055604177606628155,
      "grad_norm": 0.861597356877587,
      "learning_rate": 9.98284332043164e-06,
      "loss": 0.1863,
      "step": 1906
    },
    {
      "epoch": 0.05563335083727172,
      "grad_norm": 0.9800120746903566,
      "learning_rate": 9.982804194742801e-06,
      "loss": 0.1998,
      "step": 1907
    },
    {
      "epoch": 0.05566252406791528,
      "grad_norm": 0.7502622720531859,
      "learning_rate": 9.982765024568675e-06,
      "loss": 0.1987,
      "step": 1908
    },
    {
      "epoch": 0.05569169729855884,
      "grad_norm": 1.000226061863958,
      "learning_rate": 9.982725809909611e-06,
      "loss": 0.2327,
      "step": 1909
    },
    {
      "epoch": 0.0557208705292024,
      "grad_norm": 1.0521598794386595,
      "learning_rate": 9.98268655076596e-06,
      "loss": 0.2027,
      "step": 1910
    },
    {
      "epoch": 0.05575004375984596,
      "grad_norm": 0.8616711043293097,
      "learning_rate": 9.982647247138075e-06,
      "loss": 0.2032,
      "step": 1911
    },
    {
      "epoch": 0.05577921699048953,
      "grad_norm": 0.9758956868989882,
      "learning_rate": 9.982607899026302e-06,
      "loss": 0.1951,
      "step": 1912
    },
    {
      "epoch": 0.05580839022113309,
      "grad_norm": 0.7968087778332504,
      "learning_rate": 9.982568506430998e-06,
      "loss": 0.1932,
      "step": 1913
    },
    {
      "epoch": 0.05583756345177665,
      "grad_norm": 1.010309091247346,
      "learning_rate": 9.982529069352509e-06,
      "loss": 0.2081,
      "step": 1914
    },
    {
      "epoch": 0.05586673668242021,
      "grad_norm": 1.0235427591374133,
      "learning_rate": 9.982489587791192e-06,
      "loss": 0.1811,
      "step": 1915
    },
    {
      "epoch": 0.05589590991306377,
      "grad_norm": 0.8834228764818954,
      "learning_rate": 9.982450061747397e-06,
      "loss": 0.1919,
      "step": 1916
    },
    {
      "epoch": 0.05592508314370733,
      "grad_norm": 1.1209481285353378,
      "learning_rate": 9.982410491221477e-06,
      "loss": 0.1993,
      "step": 1917
    },
    {
      "epoch": 0.0559542563743509,
      "grad_norm": 1.0146252281617338,
      "learning_rate": 9.982370876213785e-06,
      "loss": 0.2142,
      "step": 1918
    },
    {
      "epoch": 0.05598342960499446,
      "grad_norm": 1.0691181834273153,
      "learning_rate": 9.982331216724676e-06,
      "loss": 0.1805,
      "step": 1919
    },
    {
      "epoch": 0.05601260283563802,
      "grad_norm": 1.2740121476083013,
      "learning_rate": 9.982291512754503e-06,
      "loss": 0.2071,
      "step": 1920
    },
    {
      "epoch": 0.05604177606628158,
      "grad_norm": 0.9348187850907693,
      "learning_rate": 9.98225176430362e-06,
      "loss": 0.209,
      "step": 1921
    },
    {
      "epoch": 0.05607094929692514,
      "grad_norm": 1.1066368739997507,
      "learning_rate": 9.982211971372384e-06,
      "loss": 0.1875,
      "step": 1922
    },
    {
      "epoch": 0.056100122527568706,
      "grad_norm": 0.9388244848923358,
      "learning_rate": 9.982172133961148e-06,
      "loss": 0.1922,
      "step": 1923
    },
    {
      "epoch": 0.056129295758212266,
      "grad_norm": 1.34356212438608,
      "learning_rate": 9.982132252070271e-06,
      "loss": 0.2184,
      "step": 1924
    },
    {
      "epoch": 0.05615846898885583,
      "grad_norm": 1.015099583720631,
      "learning_rate": 9.982092325700103e-06,
      "loss": 0.2274,
      "step": 1925
    },
    {
      "epoch": 0.05618764221949939,
      "grad_norm": 0.9407140103948982,
      "learning_rate": 9.982052354851007e-06,
      "loss": 0.1928,
      "step": 1926
    },
    {
      "epoch": 0.05621681545014295,
      "grad_norm": 1.2565158444534883,
      "learning_rate": 9.982012339523335e-06,
      "loss": 0.2387,
      "step": 1927
    },
    {
      "epoch": 0.05624598868078651,
      "grad_norm": 0.9817298152237669,
      "learning_rate": 9.981972279717446e-06,
      "loss": 0.2045,
      "step": 1928
    },
    {
      "epoch": 0.056275161911430074,
      "grad_norm": 1.0505208017484677,
      "learning_rate": 9.981932175433697e-06,
      "loss": 0.1913,
      "step": 1929
    },
    {
      "epoch": 0.056304335142073635,
      "grad_norm": 1.011894686119928,
      "learning_rate": 9.981892026672449e-06,
      "loss": 0.2249,
      "step": 1930
    },
    {
      "epoch": 0.056333508372717195,
      "grad_norm": 0.9690974288239764,
      "learning_rate": 9.981851833434058e-06,
      "loss": 0.1974,
      "step": 1931
    },
    {
      "epoch": 0.056362681603360755,
      "grad_norm": 1.0278293284259004,
      "learning_rate": 9.981811595718882e-06,
      "loss": 0.2146,
      "step": 1932
    },
    {
      "epoch": 0.056391854834004315,
      "grad_norm": 0.8300068746950454,
      "learning_rate": 9.981771313527283e-06,
      "loss": 0.1974,
      "step": 1933
    },
    {
      "epoch": 0.05642102806464788,
      "grad_norm": 1.04244037260496,
      "learning_rate": 9.981730986859617e-06,
      "loss": 0.208,
      "step": 1934
    },
    {
      "epoch": 0.05645020129529144,
      "grad_norm": 0.8649873612347396,
      "learning_rate": 9.981690615716246e-06,
      "loss": 0.2176,
      "step": 1935
    },
    {
      "epoch": 0.056479374525935,
      "grad_norm": 1.0223505095805647,
      "learning_rate": 9.98165020009753e-06,
      "loss": 0.1971,
      "step": 1936
    },
    {
      "epoch": 0.05650854775657856,
      "grad_norm": 0.9045741861785711,
      "learning_rate": 9.981609740003833e-06,
      "loss": 0.1864,
      "step": 1937
    },
    {
      "epoch": 0.05653772098722212,
      "grad_norm": 1.0648583047941091,
      "learning_rate": 9.981569235435511e-06,
      "loss": 0.227,
      "step": 1938
    },
    {
      "epoch": 0.05656689421786568,
      "grad_norm": 0.9632099651165861,
      "learning_rate": 9.98152868639293e-06,
      "loss": 0.2101,
      "step": 1939
    },
    {
      "epoch": 0.05659606744850925,
      "grad_norm": 1.0210597468742788,
      "learning_rate": 9.981488092876448e-06,
      "loss": 0.2127,
      "step": 1940
    },
    {
      "epoch": 0.05662524067915281,
      "grad_norm": 1.0617786672123894,
      "learning_rate": 9.981447454886431e-06,
      "loss": 0.2293,
      "step": 1941
    },
    {
      "epoch": 0.05665441390979637,
      "grad_norm": 1.1119559740593121,
      "learning_rate": 9.981406772423238e-06,
      "loss": 0.2131,
      "step": 1942
    },
    {
      "epoch": 0.05668358714043993,
      "grad_norm": 0.9274663281988038,
      "learning_rate": 9.981366045487237e-06,
      "loss": 0.2134,
      "step": 1943
    },
    {
      "epoch": 0.05671276037108349,
      "grad_norm": 0.8160314094716041,
      "learning_rate": 9.981325274078788e-06,
      "loss": 0.1877,
      "step": 1944
    },
    {
      "epoch": 0.05674193360172706,
      "grad_norm": 0.9385816920754576,
      "learning_rate": 9.981284458198256e-06,
      "loss": 0.2127,
      "step": 1945
    },
    {
      "epoch": 0.05677110683237062,
      "grad_norm": 1.0282197362729375,
      "learning_rate": 9.981243597846006e-06,
      "loss": 0.2109,
      "step": 1946
    },
    {
      "epoch": 0.05680028006301418,
      "grad_norm": 0.8829812410880271,
      "learning_rate": 9.981202693022402e-06,
      "loss": 0.1819,
      "step": 1947
    },
    {
      "epoch": 0.05682945329365774,
      "grad_norm": 0.8247331199287998,
      "learning_rate": 9.98116174372781e-06,
      "loss": 0.1824,
      "step": 1948
    },
    {
      "epoch": 0.0568586265243013,
      "grad_norm": 1.0800299153360786,
      "learning_rate": 9.981120749962595e-06,
      "loss": 0.2071,
      "step": 1949
    },
    {
      "epoch": 0.05688779975494486,
      "grad_norm": 1.1559209610798193,
      "learning_rate": 9.981079711727123e-06,
      "loss": 0.2372,
      "step": 1950
    },
    {
      "epoch": 0.05691697298558843,
      "grad_norm": 1.0102993867748309,
      "learning_rate": 9.98103862902176e-06,
      "loss": 0.218,
      "step": 1951
    },
    {
      "epoch": 0.05694614621623199,
      "grad_norm": 1.0342717337758949,
      "learning_rate": 9.980997501846874e-06,
      "loss": 0.2478,
      "step": 1952
    },
    {
      "epoch": 0.05697531944687555,
      "grad_norm": 1.2275850937294275,
      "learning_rate": 9.98095633020283e-06,
      "loss": 0.2119,
      "step": 1953
    },
    {
      "epoch": 0.05700449267751911,
      "grad_norm": 1.0425157057129126,
      "learning_rate": 9.980915114089997e-06,
      "loss": 0.2492,
      "step": 1954
    },
    {
      "epoch": 0.05703366590816267,
      "grad_norm": 0.9505753262803261,
      "learning_rate": 9.980873853508744e-06,
      "loss": 0.1798,
      "step": 1955
    },
    {
      "epoch": 0.057062839138806234,
      "grad_norm": 0.8890408882605577,
      "learning_rate": 9.980832548459438e-06,
      "loss": 0.1912,
      "step": 1956
    },
    {
      "epoch": 0.057092012369449795,
      "grad_norm": 1.1234316354262521,
      "learning_rate": 9.980791198942449e-06,
      "loss": 0.2051,
      "step": 1957
    },
    {
      "epoch": 0.057121185600093355,
      "grad_norm": 0.8539045011047467,
      "learning_rate": 9.980749804958142e-06,
      "loss": 0.196,
      "step": 1958
    },
    {
      "epoch": 0.057150358830736915,
      "grad_norm": 1.0449744737509585,
      "learning_rate": 9.980708366506892e-06,
      "loss": 0.2136,
      "step": 1959
    },
    {
      "epoch": 0.057179532061380475,
      "grad_norm": 0.9131467350350049,
      "learning_rate": 9.980666883589066e-06,
      "loss": 0.2092,
      "step": 1960
    },
    {
      "epoch": 0.057208705292024035,
      "grad_norm": 1.0036185750520146,
      "learning_rate": 9.980625356205036e-06,
      "loss": 0.2002,
      "step": 1961
    },
    {
      "epoch": 0.0572378785226676,
      "grad_norm": 0.7412115243381714,
      "learning_rate": 9.980583784355171e-06,
      "loss": 0.1757,
      "step": 1962
    },
    {
      "epoch": 0.05726705175331116,
      "grad_norm": 1.0872784702101763,
      "learning_rate": 9.980542168039843e-06,
      "loss": 0.2324,
      "step": 1963
    },
    {
      "epoch": 0.05729622498395472,
      "grad_norm": 1.002135450712531,
      "learning_rate": 9.980500507259423e-06,
      "loss": 0.2205,
      "step": 1964
    },
    {
      "epoch": 0.05732539821459828,
      "grad_norm": 0.8774046598244668,
      "learning_rate": 9.980458802014285e-06,
      "loss": 0.1949,
      "step": 1965
    },
    {
      "epoch": 0.05735457144524184,
      "grad_norm": 0.9070400059442462,
      "learning_rate": 9.980417052304798e-06,
      "loss": 0.1959,
      "step": 1966
    },
    {
      "epoch": 0.05738374467588541,
      "grad_norm": 0.9288842557182622,
      "learning_rate": 9.98037525813134e-06,
      "loss": 0.2181,
      "step": 1967
    },
    {
      "epoch": 0.05741291790652897,
      "grad_norm": 0.9941150963868122,
      "learning_rate": 9.980333419494275e-06,
      "loss": 0.1934,
      "step": 1968
    },
    {
      "epoch": 0.05744209113717253,
      "grad_norm": 0.9205959032965471,
      "learning_rate": 9.980291536393985e-06,
      "loss": 0.1963,
      "step": 1969
    },
    {
      "epoch": 0.05747126436781609,
      "grad_norm": 0.9087053739773382,
      "learning_rate": 9.980249608830842e-06,
      "loss": 0.1938,
      "step": 1970
    },
    {
      "epoch": 0.05750043759845965,
      "grad_norm": 1.0001762011083941,
      "learning_rate": 9.980207636805218e-06,
      "loss": 0.2065,
      "step": 1971
    },
    {
      "epoch": 0.05752961082910321,
      "grad_norm": 0.9188547817314544,
      "learning_rate": 9.98016562031749e-06,
      "loss": 0.1981,
      "step": 1972
    },
    {
      "epoch": 0.05755878405974678,
      "grad_norm": 0.9671072125018545,
      "learning_rate": 9.980123559368032e-06,
      "loss": 0.2117,
      "step": 1973
    },
    {
      "epoch": 0.05758795729039034,
      "grad_norm": 0.9290917183858046,
      "learning_rate": 9.980081453957219e-06,
      "loss": 0.2168,
      "step": 1974
    },
    {
      "epoch": 0.0576171305210339,
      "grad_norm": 1.046498723225571,
      "learning_rate": 9.980039304085429e-06,
      "loss": 0.2031,
      "step": 1975
    },
    {
      "epoch": 0.05764630375167746,
      "grad_norm": 0.8340365718130629,
      "learning_rate": 9.979997109753035e-06,
      "loss": 0.1991,
      "step": 1976
    },
    {
      "epoch": 0.05767547698232102,
      "grad_norm": 0.942880536303,
      "learning_rate": 9.979954870960417e-06,
      "loss": 0.2031,
      "step": 1977
    },
    {
      "epoch": 0.05770465021296459,
      "grad_norm": 0.9021832572875198,
      "learning_rate": 9.97991258770795e-06,
      "loss": 0.1885,
      "step": 1978
    },
    {
      "epoch": 0.05773382344360815,
      "grad_norm": 1.0183555583589448,
      "learning_rate": 9.979870259996013e-06,
      "loss": 0.2325,
      "step": 1979
    },
    {
      "epoch": 0.05776299667425171,
      "grad_norm": 0.9307096496027774,
      "learning_rate": 9.979827887824983e-06,
      "loss": 0.2026,
      "step": 1980
    },
    {
      "epoch": 0.05779216990489527,
      "grad_norm": 0.891606564374665,
      "learning_rate": 9.979785471195238e-06,
      "loss": 0.1961,
      "step": 1981
    },
    {
      "epoch": 0.05782134313553883,
      "grad_norm": 0.8284938160218519,
      "learning_rate": 9.979743010107158e-06,
      "loss": 0.2038,
      "step": 1982
    },
    {
      "epoch": 0.05785051636618239,
      "grad_norm": 0.8671698559128383,
      "learning_rate": 9.979700504561118e-06,
      "loss": 0.2196,
      "step": 1983
    },
    {
      "epoch": 0.057879689596825955,
      "grad_norm": 0.795225896992179,
      "learning_rate": 9.979657954557504e-06,
      "loss": 0.2025,
      "step": 1984
    },
    {
      "epoch": 0.057908862827469515,
      "grad_norm": 0.8283274741978116,
      "learning_rate": 9.97961536009669e-06,
      "loss": 0.2174,
      "step": 1985
    },
    {
      "epoch": 0.057938036058113075,
      "grad_norm": 0.9335261797634911,
      "learning_rate": 9.97957272117906e-06,
      "loss": 0.2038,
      "step": 1986
    },
    {
      "epoch": 0.057967209288756635,
      "grad_norm": 0.7981795817685567,
      "learning_rate": 9.979530037804995e-06,
      "loss": 0.1917,
      "step": 1987
    },
    {
      "epoch": 0.057996382519400196,
      "grad_norm": 0.8962893771671759,
      "learning_rate": 9.979487309974874e-06,
      "loss": 0.2258,
      "step": 1988
    },
    {
      "epoch": 0.05802555575004376,
      "grad_norm": 0.8931335398606022,
      "learning_rate": 9.979444537689078e-06,
      "loss": 0.2107,
      "step": 1989
    },
    {
      "epoch": 0.05805472898068732,
      "grad_norm": 0.9052813707407674,
      "learning_rate": 9.979401720947989e-06,
      "loss": 0.2215,
      "step": 1990
    },
    {
      "epoch": 0.05808390221133088,
      "grad_norm": 0.91560999339626,
      "learning_rate": 9.979358859751994e-06,
      "loss": 0.2007,
      "step": 1991
    },
    {
      "epoch": 0.05811307544197444,
      "grad_norm": 0.9598330084334362,
      "learning_rate": 9.979315954101466e-06,
      "loss": 0.2168,
      "step": 1992
    },
    {
      "epoch": 0.058142248672618004,
      "grad_norm": 1.0927238732461215,
      "learning_rate": 9.979273003996798e-06,
      "loss": 0.2151,
      "step": 1993
    },
    {
      "epoch": 0.05817142190326157,
      "grad_norm": 1.4720458308251883,
      "learning_rate": 9.979230009438368e-06,
      "loss": 0.1992,
      "step": 1994
    },
    {
      "epoch": 0.05820059513390513,
      "grad_norm": 1.2242446883341342,
      "learning_rate": 9.979186970426562e-06,
      "loss": 0.2135,
      "step": 1995
    },
    {
      "epoch": 0.05822976836454869,
      "grad_norm": 1.0233646807064443,
      "learning_rate": 9.979143886961762e-06,
      "loss": 0.2088,
      "step": 1996
    },
    {
      "epoch": 0.05825894159519225,
      "grad_norm": 0.8835682418967595,
      "learning_rate": 9.979100759044355e-06,
      "loss": 0.2071,
      "step": 1997
    },
    {
      "epoch": 0.05828811482583581,
      "grad_norm": 1.2947625093001198,
      "learning_rate": 9.979057586674724e-06,
      "loss": 0.2396,
      "step": 1998
    },
    {
      "epoch": 0.05831728805647937,
      "grad_norm": 1.1049690907290772,
      "learning_rate": 9.979014369853257e-06,
      "loss": 0.2177,
      "step": 1999
    },
    {
      "epoch": 0.05834646128712294,
      "grad_norm": 0.9824809590555131,
      "learning_rate": 9.978971108580336e-06,
      "loss": 0.2488,
      "step": 2000
    },
    {
      "epoch": 0.0583756345177665,
      "grad_norm": 0.870881960356909,
      "learning_rate": 9.978927802856351e-06,
      "loss": 0.1933,
      "step": 2001
    },
    {
      "epoch": 0.05840480774841006,
      "grad_norm": 1.0664186432210303,
      "learning_rate": 9.978884452681688e-06,
      "loss": 0.216,
      "step": 2002
    },
    {
      "epoch": 0.05843398097905362,
      "grad_norm": 0.9522652056931812,
      "learning_rate": 9.978841058056731e-06,
      "loss": 0.2212,
      "step": 2003
    },
    {
      "epoch": 0.05846315420969718,
      "grad_norm": 0.9962790246780536,
      "learning_rate": 9.978797618981871e-06,
      "loss": 0.2167,
      "step": 2004
    },
    {
      "epoch": 0.05849232744034075,
      "grad_norm": 0.8910113731013437,
      "learning_rate": 9.978754135457495e-06,
      "loss": 0.2063,
      "step": 2005
    },
    {
      "epoch": 0.05852150067098431,
      "grad_norm": 1.054217756380746,
      "learning_rate": 9.97871060748399e-06,
      "loss": 0.2163,
      "step": 2006
    },
    {
      "epoch": 0.05855067390162787,
      "grad_norm": 0.9946047777949233,
      "learning_rate": 9.978667035061744e-06,
      "loss": 0.1969,
      "step": 2007
    },
    {
      "epoch": 0.05857984713227143,
      "grad_norm": 1.0504870261393238,
      "learning_rate": 9.97862341819115e-06,
      "loss": 0.2329,
      "step": 2008
    },
    {
      "epoch": 0.05860902036291499,
      "grad_norm": 1.0364764713063785,
      "learning_rate": 9.978579756872592e-06,
      "loss": 0.2247,
      "step": 2009
    },
    {
      "epoch": 0.05863819359355855,
      "grad_norm": 1.1422669472235525,
      "learning_rate": 9.978536051106463e-06,
      "loss": 0.2159,
      "step": 2010
    },
    {
      "epoch": 0.058667366824202115,
      "grad_norm": 1.0130676558291927,
      "learning_rate": 9.978492300893153e-06,
      "loss": 0.2107,
      "step": 2011
    },
    {
      "epoch": 0.058696540054845675,
      "grad_norm": 1.0082381433490109,
      "learning_rate": 9.978448506233051e-06,
      "loss": 0.1724,
      "step": 2012
    },
    {
      "epoch": 0.058725713285489235,
      "grad_norm": 1.2784693655345292,
      "learning_rate": 9.978404667126551e-06,
      "loss": 0.2165,
      "step": 2013
    },
    {
      "epoch": 0.058754886516132795,
      "grad_norm": 1.0081354924071733,
      "learning_rate": 9.978360783574042e-06,
      "loss": 0.2168,
      "step": 2014
    },
    {
      "epoch": 0.058784059746776356,
      "grad_norm": 1.1363437190587646,
      "learning_rate": 9.978316855575916e-06,
      "loss": 0.2045,
      "step": 2015
    },
    {
      "epoch": 0.05881323297741992,
      "grad_norm": 1.0779278282124554,
      "learning_rate": 9.978272883132566e-06,
      "loss": 0.1864,
      "step": 2016
    },
    {
      "epoch": 0.05884240620806348,
      "grad_norm": 1.0006301243041265,
      "learning_rate": 9.978228866244383e-06,
      "loss": 0.2105,
      "step": 2017
    },
    {
      "epoch": 0.05887157943870704,
      "grad_norm": 0.8534965953262954,
      "learning_rate": 9.97818480491176e-06,
      "loss": 0.1791,
      "step": 2018
    },
    {
      "epoch": 0.0589007526693506,
      "grad_norm": 1.1380684561062862,
      "learning_rate": 9.978140699135096e-06,
      "loss": 0.1959,
      "step": 2019
    },
    {
      "epoch": 0.058929925899994164,
      "grad_norm": 0.9092644734509622,
      "learning_rate": 9.978096548914778e-06,
      "loss": 0.1901,
      "step": 2020
    },
    {
      "epoch": 0.058959099130637724,
      "grad_norm": 1.029610767674436,
      "learning_rate": 9.9780523542512e-06,
      "loss": 0.232,
      "step": 2021
    },
    {
      "epoch": 0.05898827236128129,
      "grad_norm": 0.9509463445738663,
      "learning_rate": 9.978008115144761e-06,
      "loss": 0.2083,
      "step": 2022
    },
    {
      "epoch": 0.05901744559192485,
      "grad_norm": 1.005289032997692,
      "learning_rate": 9.977963831595854e-06,
      "loss": 0.2071,
      "step": 2023
    },
    {
      "epoch": 0.05904661882256841,
      "grad_norm": 0.8294734518263973,
      "learning_rate": 9.977919503604874e-06,
      "loss": 0.207,
      "step": 2024
    },
    {
      "epoch": 0.05907579205321197,
      "grad_norm": 0.9621392304939402,
      "learning_rate": 9.977875131172217e-06,
      "loss": 0.2089,
      "step": 2025
    },
    {
      "epoch": 0.05910496528385553,
      "grad_norm": 0.8859118314173687,
      "learning_rate": 9.97783071429828e-06,
      "loss": 0.1857,
      "step": 2026
    },
    {
      "epoch": 0.0591341385144991,
      "grad_norm": 0.8735600781913359,
      "learning_rate": 9.977786252983457e-06,
      "loss": 0.2273,
      "step": 2027
    },
    {
      "epoch": 0.05916331174514266,
      "grad_norm": 0.9573591880266328,
      "learning_rate": 9.977741747228148e-06,
      "loss": 0.2202,
      "step": 2028
    },
    {
      "epoch": 0.05919248497578622,
      "grad_norm": 0.7455728674735858,
      "learning_rate": 9.977697197032748e-06,
      "loss": 0.2016,
      "step": 2029
    },
    {
      "epoch": 0.05922165820642978,
      "grad_norm": 0.8173184985776126,
      "learning_rate": 9.977652602397657e-06,
      "loss": 0.1896,
      "step": 2030
    },
    {
      "epoch": 0.05925083143707334,
      "grad_norm": 1.0818039921066034,
      "learning_rate": 9.977607963323271e-06,
      "loss": 0.1841,
      "step": 2031
    },
    {
      "epoch": 0.0592800046677169,
      "grad_norm": 1.0360598980075821,
      "learning_rate": 9.977563279809988e-06,
      "loss": 0.199,
      "step": 2032
    },
    {
      "epoch": 0.05930917789836047,
      "grad_norm": 0.8977184522135336,
      "learning_rate": 9.97751855185821e-06,
      "loss": 0.2106,
      "step": 2033
    },
    {
      "epoch": 0.05933835112900403,
      "grad_norm": 1.03659852654072,
      "learning_rate": 9.977473779468334e-06,
      "loss": 0.2273,
      "step": 2034
    },
    {
      "epoch": 0.05936752435964759,
      "grad_norm": 1.080838841640869,
      "learning_rate": 9.977428962640761e-06,
      "loss": 0.1978,
      "step": 2035
    },
    {
      "epoch": 0.05939669759029115,
      "grad_norm": 0.9819375788267134,
      "learning_rate": 9.977384101375888e-06,
      "loss": 0.2239,
      "step": 2036
    },
    {
      "epoch": 0.05942587082093471,
      "grad_norm": 0.7784810299083599,
      "learning_rate": 9.97733919567412e-06,
      "loss": 0.1818,
      "step": 2037
    },
    {
      "epoch": 0.059455044051578275,
      "grad_norm": 1.072008696160696,
      "learning_rate": 9.977294245535856e-06,
      "loss": 0.2055,
      "step": 2038
    },
    {
      "epoch": 0.059484217282221835,
      "grad_norm": 1.0240489608288772,
      "learning_rate": 9.977249250961499e-06,
      "loss": 0.1994,
      "step": 2039
    },
    {
      "epoch": 0.059513390512865395,
      "grad_norm": 0.8617751983860222,
      "learning_rate": 9.977204211951446e-06,
      "loss": 0.2183,
      "step": 2040
    },
    {
      "epoch": 0.059542563743508956,
      "grad_norm": 0.8679195463876015,
      "learning_rate": 9.977159128506102e-06,
      "loss": 0.2032,
      "step": 2041
    },
    {
      "epoch": 0.059571736974152516,
      "grad_norm": 1.0049132936024472,
      "learning_rate": 9.97711400062587e-06,
      "loss": 0.1828,
      "step": 2042
    },
    {
      "epoch": 0.059600910204796076,
      "grad_norm": 1.000480612387385,
      "learning_rate": 9.977068828311153e-06,
      "loss": 0.1925,
      "step": 2043
    },
    {
      "epoch": 0.05963008343543964,
      "grad_norm": 1.046292525768253,
      "learning_rate": 9.977023611562353e-06,
      "loss": 0.2182,
      "step": 2044
    },
    {
      "epoch": 0.0596592566660832,
      "grad_norm": 1.143336121358048,
      "learning_rate": 9.976978350379874e-06,
      "loss": 0.2134,
      "step": 2045
    },
    {
      "epoch": 0.05968842989672676,
      "grad_norm": 1.1360663591828706,
      "learning_rate": 9.97693304476412e-06,
      "loss": 0.1778,
      "step": 2046
    },
    {
      "epoch": 0.059717603127370324,
      "grad_norm": 0.936331408856632,
      "learning_rate": 9.976887694715499e-06,
      "loss": 0.1715,
      "step": 2047
    },
    {
      "epoch": 0.059746776358013884,
      "grad_norm": 0.8936052799897413,
      "learning_rate": 9.976842300234408e-06,
      "loss": 0.2057,
      "step": 2048
    },
    {
      "epoch": 0.05977594958865745,
      "grad_norm": 1.1175970002868079,
      "learning_rate": 9.976796861321261e-06,
      "loss": 0.2327,
      "step": 2049
    },
    {
      "epoch": 0.05980512281930101,
      "grad_norm": 1.231150848434283,
      "learning_rate": 9.976751377976457e-06,
      "loss": 0.2057,
      "step": 2050
    },
    {
      "epoch": 0.05983429604994457,
      "grad_norm": 1.2699398517881084,
      "learning_rate": 9.976705850200406e-06,
      "loss": 0.2203,
      "step": 2051
    },
    {
      "epoch": 0.05986346928058813,
      "grad_norm": 1.0646876206759452,
      "learning_rate": 9.976660277993512e-06,
      "loss": 0.1984,
      "step": 2052
    },
    {
      "epoch": 0.05989264251123169,
      "grad_norm": 0.8956860862984926,
      "learning_rate": 9.976614661356185e-06,
      "loss": 0.2195,
      "step": 2053
    },
    {
      "epoch": 0.05992181574187525,
      "grad_norm": 0.8997181148171646,
      "learning_rate": 9.976569000288829e-06,
      "loss": 0.1961,
      "step": 2054
    },
    {
      "epoch": 0.05995098897251882,
      "grad_norm": 1.2379573760388698,
      "learning_rate": 9.976523294791853e-06,
      "loss": 0.2122,
      "step": 2055
    },
    {
      "epoch": 0.05998016220316238,
      "grad_norm": 0.9920211167696346,
      "learning_rate": 9.976477544865665e-06,
      "loss": 0.2087,
      "step": 2056
    },
    {
      "epoch": 0.06000933543380594,
      "grad_norm": 1.0675356660624904,
      "learning_rate": 9.976431750510676e-06,
      "loss": 0.1898,
      "step": 2057
    },
    {
      "epoch": 0.0600385086644495,
      "grad_norm": 1.0178051082291153,
      "learning_rate": 9.976385911727288e-06,
      "loss": 0.1789,
      "step": 2058
    },
    {
      "epoch": 0.06006768189509306,
      "grad_norm": 1.125079253882043,
      "learning_rate": 9.976340028515919e-06,
      "loss": 0.2228,
      "step": 2059
    },
    {
      "epoch": 0.06009685512573663,
      "grad_norm": 1.082058563289299,
      "learning_rate": 9.97629410087697e-06,
      "loss": 0.2186,
      "step": 2060
    },
    {
      "epoch": 0.06012602835638019,
      "grad_norm": 1.0186351006469425,
      "learning_rate": 9.976248128810857e-06,
      "loss": 0.2472,
      "step": 2061
    },
    {
      "epoch": 0.06015520158702375,
      "grad_norm": 0.831530272410781,
      "learning_rate": 9.97620211231799e-06,
      "loss": 0.194,
      "step": 2062
    },
    {
      "epoch": 0.06018437481766731,
      "grad_norm": 1.1083999837317409,
      "learning_rate": 9.976156051398777e-06,
      "loss": 0.184,
      "step": 2063
    },
    {
      "epoch": 0.06021354804831087,
      "grad_norm": 1.3425873591207818,
      "learning_rate": 9.97610994605363e-06,
      "loss": 0.2222,
      "step": 2064
    },
    {
      "epoch": 0.06024272127895443,
      "grad_norm": 1.0101926257248675,
      "learning_rate": 9.976063796282963e-06,
      "loss": 0.1892,
      "step": 2065
    },
    {
      "epoch": 0.060271894509597995,
      "grad_norm": 0.9631860566449668,
      "learning_rate": 9.976017602087184e-06,
      "loss": 0.1887,
      "step": 2066
    },
    {
      "epoch": 0.060301067740241555,
      "grad_norm": 1.1055800441615407,
      "learning_rate": 9.97597136346671e-06,
      "loss": 0.2246,
      "step": 2067
    },
    {
      "epoch": 0.060330240970885116,
      "grad_norm": 0.8726134047829306,
      "learning_rate": 9.97592508042195e-06,
      "loss": 0.2035,
      "step": 2068
    },
    {
      "epoch": 0.060359414201528676,
      "grad_norm": 0.934936738990123,
      "learning_rate": 9.97587875295332e-06,
      "loss": 0.2137,
      "step": 2069
    },
    {
      "epoch": 0.060388587432172236,
      "grad_norm": 0.9841407267817264,
      "learning_rate": 9.975832381061232e-06,
      "loss": 0.2112,
      "step": 2070
    },
    {
      "epoch": 0.0604177606628158,
      "grad_norm": 0.9152843445444,
      "learning_rate": 9.9757859647461e-06,
      "loss": 0.2042,
      "step": 2071
    },
    {
      "epoch": 0.06044693389345936,
      "grad_norm": 1.0854987493128536,
      "learning_rate": 9.975739504008338e-06,
      "loss": 0.2077,
      "step": 2072
    },
    {
      "epoch": 0.060476107124102924,
      "grad_norm": 0.8586813028540773,
      "learning_rate": 9.975692998848363e-06,
      "loss": 0.1957,
      "step": 2073
    },
    {
      "epoch": 0.060505280354746484,
      "grad_norm": 0.9470440215588506,
      "learning_rate": 9.975646449266588e-06,
      "loss": 0.216,
      "step": 2074
    },
    {
      "epoch": 0.060534453585390044,
      "grad_norm": 0.9768831241799613,
      "learning_rate": 9.97559985526343e-06,
      "loss": 0.2233,
      "step": 2075
    },
    {
      "epoch": 0.060563626816033604,
      "grad_norm": 0.763257404956762,
      "learning_rate": 9.975553216839302e-06,
      "loss": 0.1833,
      "step": 2076
    },
    {
      "epoch": 0.06059280004667717,
      "grad_norm": 0.8869851474454065,
      "learning_rate": 9.975506533994625e-06,
      "loss": 0.2049,
      "step": 2077
    },
    {
      "epoch": 0.06062197327732073,
      "grad_norm": 1.0869497748315717,
      "learning_rate": 9.975459806729813e-06,
      "loss": 0.2135,
      "step": 2078
    },
    {
      "epoch": 0.06065114650796429,
      "grad_norm": 1.1255167613899464,
      "learning_rate": 9.975413035045283e-06,
      "loss": 0.1918,
      "step": 2079
    },
    {
      "epoch": 0.06068031973860785,
      "grad_norm": 0.9134421328922334,
      "learning_rate": 9.975366218941452e-06,
      "loss": 0.2113,
      "step": 2080
    },
    {
      "epoch": 0.06070949296925141,
      "grad_norm": 0.8104014621563806,
      "learning_rate": 9.975319358418742e-06,
      "loss": 0.2129,
      "step": 2081
    },
    {
      "epoch": 0.06073866619989498,
      "grad_norm": 1.000356659589392,
      "learning_rate": 9.975272453477566e-06,
      "loss": 0.2514,
      "step": 2082
    },
    {
      "epoch": 0.06076783943053854,
      "grad_norm": 0.9025457848077182,
      "learning_rate": 9.975225504118346e-06,
      "loss": 0.2025,
      "step": 2083
    },
    {
      "epoch": 0.0607970126611821,
      "grad_norm": 0.9106535213473448,
      "learning_rate": 9.975178510341502e-06,
      "loss": 0.2402,
      "step": 2084
    },
    {
      "epoch": 0.06082618589182566,
      "grad_norm": 0.9690881799970482,
      "learning_rate": 9.97513147214745e-06,
      "loss": 0.222,
      "step": 2085
    },
    {
      "epoch": 0.06085535912246922,
      "grad_norm": 1.1146932688117845,
      "learning_rate": 9.975084389536612e-06,
      "loss": 0.1889,
      "step": 2086
    },
    {
      "epoch": 0.06088453235311279,
      "grad_norm": 1.213574459301282,
      "learning_rate": 9.975037262509408e-06,
      "loss": 0.1824,
      "step": 2087
    },
    {
      "epoch": 0.06091370558375635,
      "grad_norm": 1.068821129367693,
      "learning_rate": 9.974990091066258e-06,
      "loss": 0.2122,
      "step": 2088
    },
    {
      "epoch": 0.06094287881439991,
      "grad_norm": 0.9554917426061998,
      "learning_rate": 9.974942875207587e-06,
      "loss": 0.2019,
      "step": 2089
    },
    {
      "epoch": 0.06097205204504347,
      "grad_norm": 0.8726338411025051,
      "learning_rate": 9.974895614933814e-06,
      "loss": 0.2087,
      "step": 2090
    },
    {
      "epoch": 0.06100122527568703,
      "grad_norm": 1.0445440885430617,
      "learning_rate": 9.974848310245357e-06,
      "loss": 0.2152,
      "step": 2091
    },
    {
      "epoch": 0.06103039850633059,
      "grad_norm": 0.9555721092593108,
      "learning_rate": 9.974800961142644e-06,
      "loss": 0.2054,
      "step": 2092
    },
    {
      "epoch": 0.061059571736974155,
      "grad_norm": 1.143783034132988,
      "learning_rate": 9.974753567626095e-06,
      "loss": 0.2132,
      "step": 2093
    },
    {
      "epoch": 0.061088744967617715,
      "grad_norm": 0.9620146347135539,
      "learning_rate": 9.974706129696134e-06,
      "loss": 0.2086,
      "step": 2094
    },
    {
      "epoch": 0.061117918198261276,
      "grad_norm": 1.1374169372691725,
      "learning_rate": 9.974658647353183e-06,
      "loss": 0.2216,
      "step": 2095
    },
    {
      "epoch": 0.061147091428904836,
      "grad_norm": 1.1718917859515348,
      "learning_rate": 9.974611120597669e-06,
      "loss": 0.2303,
      "step": 2096
    },
    {
      "epoch": 0.061176264659548396,
      "grad_norm": 0.8625288177060566,
      "learning_rate": 9.974563549430015e-06,
      "loss": 0.2146,
      "step": 2097
    },
    {
      "epoch": 0.06120543789019196,
      "grad_norm": 0.8379967462769726,
      "learning_rate": 9.974515933850643e-06,
      "loss": 0.1687,
      "step": 2098
    },
    {
      "epoch": 0.06123461112083552,
      "grad_norm": 0.776264281700595,
      "learning_rate": 9.97446827385998e-06,
      "loss": 0.1764,
      "step": 2099
    },
    {
      "epoch": 0.061263784351479084,
      "grad_norm": 0.8726575437798988,
      "learning_rate": 9.974420569458453e-06,
      "loss": 0.2087,
      "step": 2100
    },
    {
      "epoch": 0.061292957582122644,
      "grad_norm": 0.8770632912289654,
      "learning_rate": 9.974372820646488e-06,
      "loss": 0.208,
      "step": 2101
    },
    {
      "epoch": 0.061322130812766204,
      "grad_norm": 0.8742580299006035,
      "learning_rate": 9.974325027424508e-06,
      "loss": 0.2045,
      "step": 2102
    },
    {
      "epoch": 0.061351304043409764,
      "grad_norm": 0.8363969625727758,
      "learning_rate": 9.974277189792942e-06,
      "loss": 0.1734,
      "step": 2103
    },
    {
      "epoch": 0.06138047727405333,
      "grad_norm": 0.9791199443738712,
      "learning_rate": 9.974229307752216e-06,
      "loss": 0.2064,
      "step": 2104
    },
    {
      "epoch": 0.06140965050469689,
      "grad_norm": 0.8474516665512822,
      "learning_rate": 9.97418138130276e-06,
      "loss": 0.2158,
      "step": 2105
    },
    {
      "epoch": 0.06143882373534045,
      "grad_norm": 1.1531210677607857,
      "learning_rate": 9.974133410444999e-06,
      "loss": 0.209,
      "step": 2106
    },
    {
      "epoch": 0.06146799696598401,
      "grad_norm": 1.1580693325474198,
      "learning_rate": 9.974085395179363e-06,
      "loss": 0.1918,
      "step": 2107
    },
    {
      "epoch": 0.06149717019662757,
      "grad_norm": 1.1289985282730919,
      "learning_rate": 9.974037335506279e-06,
      "loss": 0.2347,
      "step": 2108
    },
    {
      "epoch": 0.06152634342727114,
      "grad_norm": 1.0283689599134265,
      "learning_rate": 9.973989231426177e-06,
      "loss": 0.1917,
      "step": 2109
    },
    {
      "epoch": 0.0615555166579147,
      "grad_norm": 1.289355969406982,
      "learning_rate": 9.973941082939488e-06,
      "loss": 0.2044,
      "step": 2110
    },
    {
      "epoch": 0.06158468988855826,
      "grad_norm": 1.0637924130768446,
      "learning_rate": 9.97389289004664e-06,
      "loss": 0.2256,
      "step": 2111
    },
    {
      "epoch": 0.06161386311920182,
      "grad_norm": 0.8672140515563873,
      "learning_rate": 9.973844652748063e-06,
      "loss": 0.205,
      "step": 2112
    },
    {
      "epoch": 0.06164303634984538,
      "grad_norm": 1.0234528960065306,
      "learning_rate": 9.973796371044187e-06,
      "loss": 0.1795,
      "step": 2113
    },
    {
      "epoch": 0.06167220958048894,
      "grad_norm": 1.2992995590118168,
      "learning_rate": 9.973748044935446e-06,
      "loss": 0.2143,
      "step": 2114
    },
    {
      "epoch": 0.06170138281113251,
      "grad_norm": 1.0693041903325287,
      "learning_rate": 9.97369967442227e-06,
      "loss": 0.2112,
      "step": 2115
    },
    {
      "epoch": 0.06173055604177607,
      "grad_norm": 0.9951063543454214,
      "learning_rate": 9.973651259505091e-06,
      "loss": 0.2215,
      "step": 2116
    },
    {
      "epoch": 0.06175972927241963,
      "grad_norm": 1.1663975162463882,
      "learning_rate": 9.973602800184339e-06,
      "loss": 0.1981,
      "step": 2117
    },
    {
      "epoch": 0.06178890250306319,
      "grad_norm": 0.975352262670444,
      "learning_rate": 9.973554296460449e-06,
      "loss": 0.1943,
      "step": 2118
    },
    {
      "epoch": 0.06181807573370675,
      "grad_norm": 1.0415545024998738,
      "learning_rate": 9.973505748333853e-06,
      "loss": 0.1886,
      "step": 2119
    },
    {
      "epoch": 0.061847248964350315,
      "grad_norm": 0.801931958725028,
      "learning_rate": 9.973457155804988e-06,
      "loss": 0.1918,
      "step": 2120
    },
    {
      "epoch": 0.061876422194993876,
      "grad_norm": 1.215346746364988,
      "learning_rate": 9.973408518874281e-06,
      "loss": 0.2037,
      "step": 2121
    },
    {
      "epoch": 0.061905595425637436,
      "grad_norm": 1.0733016608293688,
      "learning_rate": 9.973359837542173e-06,
      "loss": 0.2045,
      "step": 2122
    },
    {
      "epoch": 0.061934768656280996,
      "grad_norm": 0.9902456673038482,
      "learning_rate": 9.973311111809094e-06,
      "loss": 0.2541,
      "step": 2123
    },
    {
      "epoch": 0.061963941886924556,
      "grad_norm": 1.0124167059750098,
      "learning_rate": 9.97326234167548e-06,
      "loss": 0.2081,
      "step": 2124
    },
    {
      "epoch": 0.061993115117568116,
      "grad_norm": 1.2004141251112712,
      "learning_rate": 9.97321352714177e-06,
      "loss": 0.2013,
      "step": 2125
    },
    {
      "epoch": 0.062022288348211684,
      "grad_norm": 0.9116694966161774,
      "learning_rate": 9.973164668208394e-06,
      "loss": 0.1998,
      "step": 2126
    },
    {
      "epoch": 0.062051461578855244,
      "grad_norm": 1.074177487403381,
      "learning_rate": 9.973115764875792e-06,
      "loss": 0.2186,
      "step": 2127
    },
    {
      "epoch": 0.062080634809498804,
      "grad_norm": 0.9602229025107512,
      "learning_rate": 9.973066817144398e-06,
      "loss": 0.2039,
      "step": 2128
    },
    {
      "epoch": 0.062109808040142364,
      "grad_norm": 0.8902293676577513,
      "learning_rate": 9.973017825014652e-06,
      "loss": 0.2004,
      "step": 2129
    },
    {
      "epoch": 0.062138981270785924,
      "grad_norm": 0.8580018720372313,
      "learning_rate": 9.972968788486992e-06,
      "loss": 0.1958,
      "step": 2130
    },
    {
      "epoch": 0.06216815450142949,
      "grad_norm": 1.041394416810979,
      "learning_rate": 9.972919707561852e-06,
      "loss": 0.2243,
      "step": 2131
    },
    {
      "epoch": 0.06219732773207305,
      "grad_norm": 1.021644935083276,
      "learning_rate": 9.97287058223967e-06,
      "loss": 0.2111,
      "step": 2132
    },
    {
      "epoch": 0.06222650096271661,
      "grad_norm": 1.0695922251783396,
      "learning_rate": 9.97282141252089e-06,
      "loss": 0.2568,
      "step": 2133
    },
    {
      "epoch": 0.06225567419336017,
      "grad_norm": 0.8600497510893946,
      "learning_rate": 9.972772198405945e-06,
      "loss": 0.1948,
      "step": 2134
    },
    {
      "epoch": 0.06228484742400373,
      "grad_norm": 0.9383435682456496,
      "learning_rate": 9.972722939895279e-06,
      "loss": 0.1983,
      "step": 2135
    },
    {
      "epoch": 0.06231402065464729,
      "grad_norm": 0.9670154811684295,
      "learning_rate": 9.972673636989327e-06,
      "loss": 0.1963,
      "step": 2136
    },
    {
      "epoch": 0.06234319388529086,
      "grad_norm": 1.0911027522815946,
      "learning_rate": 9.972624289688533e-06,
      "loss": 0.1784,
      "step": 2137
    },
    {
      "epoch": 0.06237236711593442,
      "grad_norm": 1.1053076465996292,
      "learning_rate": 9.972574897993338e-06,
      "loss": 0.201,
      "step": 2138
    },
    {
      "epoch": 0.06240154034657798,
      "grad_norm": 1.022009117982567,
      "learning_rate": 9.97252546190418e-06,
      "loss": 0.2232,
      "step": 2139
    },
    {
      "epoch": 0.06243071357722154,
      "grad_norm": 0.8704373189860868,
      "learning_rate": 9.972475981421502e-06,
      "loss": 0.202,
      "step": 2140
    },
    {
      "epoch": 0.0624598868078651,
      "grad_norm": 0.8073386618088111,
      "learning_rate": 9.972426456545745e-06,
      "loss": 0.2024,
      "step": 2141
    },
    {
      "epoch": 0.06248906003850867,
      "grad_norm": 0.9390175056440538,
      "learning_rate": 9.972376887277353e-06,
      "loss": 0.1864,
      "step": 2142
    },
    {
      "epoch": 0.06251823326915222,
      "grad_norm": 0.8741455883610038,
      "learning_rate": 9.972327273616765e-06,
      "loss": 0.1939,
      "step": 2143
    },
    {
      "epoch": 0.06254740649979579,
      "grad_norm": 0.8554803354568955,
      "learning_rate": 9.972277615564428e-06,
      "loss": 0.1739,
      "step": 2144
    },
    {
      "epoch": 0.06257657973043936,
      "grad_norm": 0.928084158501416,
      "learning_rate": 9.972227913120782e-06,
      "loss": 0.2174,
      "step": 2145
    },
    {
      "epoch": 0.06260575296108291,
      "grad_norm": 0.9267381160754137,
      "learning_rate": 9.972178166286273e-06,
      "loss": 0.1803,
      "step": 2146
    },
    {
      "epoch": 0.06263492619172648,
      "grad_norm": 0.8733856471252143,
      "learning_rate": 9.972128375061345e-06,
      "loss": 0.209,
      "step": 2147
    },
    {
      "epoch": 0.06266409942237003,
      "grad_norm": 1.0027675655267019,
      "learning_rate": 9.97207853944644e-06,
      "loss": 0.1997,
      "step": 2148
    },
    {
      "epoch": 0.0626932726530136,
      "grad_norm": 0.8467316584835951,
      "learning_rate": 9.972028659442006e-06,
      "loss": 0.2078,
      "step": 2149
    },
    {
      "epoch": 0.06272244588365716,
      "grad_norm": 0.9590674906366835,
      "learning_rate": 9.971978735048487e-06,
      "loss": 0.1902,
      "step": 2150
    },
    {
      "epoch": 0.06275161911430072,
      "grad_norm": 0.9162306588055258,
      "learning_rate": 9.971928766266328e-06,
      "loss": 0.2028,
      "step": 2151
    },
    {
      "epoch": 0.06278079234494428,
      "grad_norm": 1.0010169971801015,
      "learning_rate": 9.971878753095975e-06,
      "loss": 0.2138,
      "step": 2152
    },
    {
      "epoch": 0.06280996557558784,
      "grad_norm": 0.9096766102413545,
      "learning_rate": 9.971828695537877e-06,
      "loss": 0.2317,
      "step": 2153
    },
    {
      "epoch": 0.0628391388062314,
      "grad_norm": 0.8916366375062106,
      "learning_rate": 9.97177859359248e-06,
      "loss": 0.209,
      "step": 2154
    },
    {
      "epoch": 0.06286831203687496,
      "grad_norm": 0.8601853419671421,
      "learning_rate": 9.97172844726023e-06,
      "loss": 0.1851,
      "step": 2155
    },
    {
      "epoch": 0.06289748526751852,
      "grad_norm": 0.9660578264513769,
      "learning_rate": 9.971678256541573e-06,
      "loss": 0.199,
      "step": 2156
    },
    {
      "epoch": 0.06292665849816209,
      "grad_norm": 1.021043723589428,
      "learning_rate": 9.971628021436962e-06,
      "loss": 0.2355,
      "step": 2157
    },
    {
      "epoch": 0.06295583172880564,
      "grad_norm": 1.044111847940825,
      "learning_rate": 9.971577741946841e-06,
      "loss": 0.2054,
      "step": 2158
    },
    {
      "epoch": 0.06298500495944921,
      "grad_norm": 0.947055773303262,
      "learning_rate": 9.971527418071663e-06,
      "loss": 0.1827,
      "step": 2159
    },
    {
      "epoch": 0.06301417819009276,
      "grad_norm": 0.9863468194876462,
      "learning_rate": 9.971477049811873e-06,
      "loss": 0.24,
      "step": 2160
    },
    {
      "epoch": 0.06304335142073633,
      "grad_norm": 0.8877793611940751,
      "learning_rate": 9.971426637167924e-06,
      "loss": 0.1986,
      "step": 2161
    },
    {
      "epoch": 0.0630725246513799,
      "grad_norm": 0.9084343410271073,
      "learning_rate": 9.971376180140264e-06,
      "loss": 0.1767,
      "step": 2162
    },
    {
      "epoch": 0.06310169788202345,
      "grad_norm": 0.9435400721303311,
      "learning_rate": 9.971325678729344e-06,
      "loss": 0.2152,
      "step": 2163
    },
    {
      "epoch": 0.06313087111266702,
      "grad_norm": 0.8291741826520561,
      "learning_rate": 9.971275132935616e-06,
      "loss": 0.2022,
      "step": 2164
    },
    {
      "epoch": 0.06316004434331057,
      "grad_norm": 1.102884801299515,
      "learning_rate": 9.97122454275953e-06,
      "loss": 0.1944,
      "step": 2165
    },
    {
      "epoch": 0.06318921757395414,
      "grad_norm": 0.7718465396264464,
      "learning_rate": 9.971173908201536e-06,
      "loss": 0.1794,
      "step": 2166
    },
    {
      "epoch": 0.06321839080459771,
      "grad_norm": 0.9722053784672414,
      "learning_rate": 9.971123229262091e-06,
      "loss": 0.2162,
      "step": 2167
    },
    {
      "epoch": 0.06324756403524126,
      "grad_norm": 0.7703595361079513,
      "learning_rate": 9.971072505941643e-06,
      "loss": 0.2148,
      "step": 2168
    },
    {
      "epoch": 0.06327673726588483,
      "grad_norm": 0.8627728017973386,
      "learning_rate": 9.971021738240648e-06,
      "loss": 0.2194,
      "step": 2169
    },
    {
      "epoch": 0.06330591049652838,
      "grad_norm": 0.8168786062791672,
      "learning_rate": 9.970970926159556e-06,
      "loss": 0.1784,
      "step": 2170
    },
    {
      "epoch": 0.06333508372717195,
      "grad_norm": 0.9458401907464605,
      "learning_rate": 9.970920069698822e-06,
      "loss": 0.1989,
      "step": 2171
    },
    {
      "epoch": 0.06336425695781552,
      "grad_norm": 0.8681931135470768,
      "learning_rate": 9.970869168858901e-06,
      "loss": 0.2097,
      "step": 2172
    },
    {
      "epoch": 0.06339343018845907,
      "grad_norm": 1.0313342547076338,
      "learning_rate": 9.970818223640246e-06,
      "loss": 0.2039,
      "step": 2173
    },
    {
      "epoch": 0.06342260341910264,
      "grad_norm": 0.9147472245352678,
      "learning_rate": 9.970767234043315e-06,
      "loss": 0.1973,
      "step": 2174
    },
    {
      "epoch": 0.06345177664974619,
      "grad_norm": 1.0247069348055946,
      "learning_rate": 9.970716200068557e-06,
      "loss": 0.2036,
      "step": 2175
    },
    {
      "epoch": 0.06348094988038976,
      "grad_norm": 0.9377552652308071,
      "learning_rate": 9.970665121716434e-06,
      "loss": 0.2063,
      "step": 2176
    },
    {
      "epoch": 0.06351012311103331,
      "grad_norm": 0.8406212963866995,
      "learning_rate": 9.9706139989874e-06,
      "loss": 0.2351,
      "step": 2177
    },
    {
      "epoch": 0.06353929634167688,
      "grad_norm": 0.910002269981033,
      "learning_rate": 9.970562831881908e-06,
      "loss": 0.221,
      "step": 2178
    },
    {
      "epoch": 0.06356846957232044,
      "grad_norm": 1.0106532591392394,
      "learning_rate": 9.97051162040042e-06,
      "loss": 0.1821,
      "step": 2179
    },
    {
      "epoch": 0.063597642802964,
      "grad_norm": 0.8683149191940535,
      "learning_rate": 9.970460364543388e-06,
      "loss": 0.1846,
      "step": 2180
    },
    {
      "epoch": 0.06362681603360756,
      "grad_norm": 0.8223202382821131,
      "learning_rate": 9.970409064311275e-06,
      "loss": 0.1887,
      "step": 2181
    },
    {
      "epoch": 0.06365598926425112,
      "grad_norm": 1.1540314998941879,
      "learning_rate": 9.970357719704535e-06,
      "loss": 0.1998,
      "step": 2182
    },
    {
      "epoch": 0.06368516249489468,
      "grad_norm": 0.8113968764563977,
      "learning_rate": 9.97030633072363e-06,
      "loss": 0.1957,
      "step": 2183
    },
    {
      "epoch": 0.06371433572553825,
      "grad_norm": 1.0028786750066798,
      "learning_rate": 9.970254897369014e-06,
      "loss": 0.1942,
      "step": 2184
    },
    {
      "epoch": 0.0637435089561818,
      "grad_norm": 0.8008843570967988,
      "learning_rate": 9.970203419641152e-06,
      "loss": 0.2071,
      "step": 2185
    },
    {
      "epoch": 0.06377268218682537,
      "grad_norm": 0.9577374456325305,
      "learning_rate": 9.970151897540496e-06,
      "loss": 0.1916,
      "step": 2186
    },
    {
      "epoch": 0.06380185541746893,
      "grad_norm": 0.7721337864368466,
      "learning_rate": 9.970100331067515e-06,
      "loss": 0.1907,
      "step": 2187
    },
    {
      "epoch": 0.06383102864811249,
      "grad_norm": 0.8214557848609138,
      "learning_rate": 9.97004872022266e-06,
      "loss": 0.189,
      "step": 2188
    },
    {
      "epoch": 0.06386020187875606,
      "grad_norm": 0.8534221733409112,
      "learning_rate": 9.969997065006399e-06,
      "loss": 0.1985,
      "step": 2189
    },
    {
      "epoch": 0.06388937510939961,
      "grad_norm": 0.9731900665243962,
      "learning_rate": 9.96994536541919e-06,
      "loss": 0.2277,
      "step": 2190
    },
    {
      "epoch": 0.06391854834004318,
      "grad_norm": 0.9098656679861912,
      "learning_rate": 9.969893621461495e-06,
      "loss": 0.2049,
      "step": 2191
    },
    {
      "epoch": 0.06394772157068673,
      "grad_norm": 1.156345296203105,
      "learning_rate": 9.969841833133778e-06,
      "loss": 0.1972,
      "step": 2192
    },
    {
      "epoch": 0.0639768948013303,
      "grad_norm": 1.029714304537173,
      "learning_rate": 9.969790000436498e-06,
      "loss": 0.2061,
      "step": 2193
    },
    {
      "epoch": 0.06400606803197387,
      "grad_norm": 0.839849005411028,
      "learning_rate": 9.969738123370118e-06,
      "loss": 0.2277,
      "step": 2194
    },
    {
      "epoch": 0.06403524126261742,
      "grad_norm": 1.1541475573991666,
      "learning_rate": 9.969686201935105e-06,
      "loss": 0.2031,
      "step": 2195
    },
    {
      "epoch": 0.06406441449326099,
      "grad_norm": 1.039905017633847,
      "learning_rate": 9.969634236131918e-06,
      "loss": 0.1992,
      "step": 2196
    },
    {
      "epoch": 0.06409358772390454,
      "grad_norm": 1.0827334882608222,
      "learning_rate": 9.969582225961025e-06,
      "loss": 0.185,
      "step": 2197
    },
    {
      "epoch": 0.06412276095454811,
      "grad_norm": 0.8294662301533692,
      "learning_rate": 9.969530171422886e-06,
      "loss": 0.2021,
      "step": 2198
    },
    {
      "epoch": 0.06415193418519166,
      "grad_norm": 1.16260507700567,
      "learning_rate": 9.969478072517968e-06,
      "loss": 0.1953,
      "step": 2199
    },
    {
      "epoch": 0.06418110741583523,
      "grad_norm": 0.910075438686915,
      "learning_rate": 9.969425929246739e-06,
      "loss": 0.204,
      "step": 2200
    },
    {
      "epoch": 0.0642102806464788,
      "grad_norm": 0.7471934485024296,
      "learning_rate": 9.969373741609659e-06,
      "loss": 0.1982,
      "step": 2201
    },
    {
      "epoch": 0.06423945387712235,
      "grad_norm": 0.8166737270541704,
      "learning_rate": 9.969321509607197e-06,
      "loss": 0.1888,
      "step": 2202
    },
    {
      "epoch": 0.06426862710776592,
      "grad_norm": 1.0086772959630657,
      "learning_rate": 9.969269233239819e-06,
      "loss": 0.1879,
      "step": 2203
    },
    {
      "epoch": 0.06429780033840947,
      "grad_norm": 0.9159443963949536,
      "learning_rate": 9.96921691250799e-06,
      "loss": 0.1971,
      "step": 2204
    },
    {
      "epoch": 0.06432697356905304,
      "grad_norm": 0.884458649367082,
      "learning_rate": 9.969164547412182e-06,
      "loss": 0.1744,
      "step": 2205
    },
    {
      "epoch": 0.0643561467996966,
      "grad_norm": 0.8610354642199655,
      "learning_rate": 9.969112137952856e-06,
      "loss": 0.1923,
      "step": 2206
    },
    {
      "epoch": 0.06438532003034016,
      "grad_norm": 1.1471539639614219,
      "learning_rate": 9.969059684130484e-06,
      "loss": 0.1913,
      "step": 2207
    },
    {
      "epoch": 0.06441449326098372,
      "grad_norm": 1.0115666785152742,
      "learning_rate": 9.969007185945534e-06,
      "loss": 0.2078,
      "step": 2208
    },
    {
      "epoch": 0.06444366649162728,
      "grad_norm": 0.9262368849840441,
      "learning_rate": 9.968954643398474e-06,
      "loss": 0.2172,
      "step": 2209
    },
    {
      "epoch": 0.06447283972227084,
      "grad_norm": 1.2146042940550938,
      "learning_rate": 9.968902056489773e-06,
      "loss": 0.2038,
      "step": 2210
    },
    {
      "epoch": 0.06450201295291441,
      "grad_norm": 1.1360427085963671,
      "learning_rate": 9.9688494252199e-06,
      "loss": 0.2207,
      "step": 2211
    },
    {
      "epoch": 0.06453118618355796,
      "grad_norm": 0.9416998773571001,
      "learning_rate": 9.968796749589328e-06,
      "loss": 0.2076,
      "step": 2212
    },
    {
      "epoch": 0.06456035941420153,
      "grad_norm": 0.9456392384313109,
      "learning_rate": 9.96874402959852e-06,
      "loss": 0.1948,
      "step": 2213
    },
    {
      "epoch": 0.06458953264484509,
      "grad_norm": 1.03189788717096,
      "learning_rate": 9.968691265247954e-06,
      "loss": 0.1727,
      "step": 2214
    },
    {
      "epoch": 0.06461870587548865,
      "grad_norm": 0.8727096691708024,
      "learning_rate": 9.968638456538101e-06,
      "loss": 0.2015,
      "step": 2215
    },
    {
      "epoch": 0.06464787910613222,
      "grad_norm": 1.02655922936091,
      "learning_rate": 9.968585603469427e-06,
      "loss": 0.2137,
      "step": 2216
    },
    {
      "epoch": 0.06467705233677577,
      "grad_norm": 1.028569638988256,
      "learning_rate": 9.968532706042406e-06,
      "loss": 0.2149,
      "step": 2217
    },
    {
      "epoch": 0.06470622556741934,
      "grad_norm": 1.0048528039586941,
      "learning_rate": 9.968479764257513e-06,
      "loss": 0.2076,
      "step": 2218
    },
    {
      "epoch": 0.0647353987980629,
      "grad_norm": 0.8780669246841813,
      "learning_rate": 9.968426778115218e-06,
      "loss": 0.1826,
      "step": 2219
    },
    {
      "epoch": 0.06476457202870646,
      "grad_norm": 0.8531201656836703,
      "learning_rate": 9.968373747615996e-06,
      "loss": 0.178,
      "step": 2220
    },
    {
      "epoch": 0.06479374525935001,
      "grad_norm": 0.8613243761746758,
      "learning_rate": 9.968320672760318e-06,
      "loss": 0.1789,
      "step": 2221
    },
    {
      "epoch": 0.06482291848999358,
      "grad_norm": 0.9149738482474605,
      "learning_rate": 9.968267553548659e-06,
      "loss": 0.19,
      "step": 2222
    },
    {
      "epoch": 0.06485209172063715,
      "grad_norm": 1.0257280791333028,
      "learning_rate": 9.968214389981494e-06,
      "loss": 0.211,
      "step": 2223
    },
    {
      "epoch": 0.0648812649512807,
      "grad_norm": 0.9591929787116885,
      "learning_rate": 9.968161182059297e-06,
      "loss": 0.1982,
      "step": 2224
    },
    {
      "epoch": 0.06491043818192427,
      "grad_norm": 0.8951345868663532,
      "learning_rate": 9.968107929782543e-06,
      "loss": 0.1922,
      "step": 2225
    },
    {
      "epoch": 0.06493961141256782,
      "grad_norm": 0.7471293912834188,
      "learning_rate": 9.968054633151707e-06,
      "loss": 0.1967,
      "step": 2226
    },
    {
      "epoch": 0.06496878464321139,
      "grad_norm": 0.8574026233590367,
      "learning_rate": 9.968001292167264e-06,
      "loss": 0.2329,
      "step": 2227
    },
    {
      "epoch": 0.06499795787385496,
      "grad_norm": 0.873056545143532,
      "learning_rate": 9.967947906829694e-06,
      "loss": 0.227,
      "step": 2228
    },
    {
      "epoch": 0.06502713110449851,
      "grad_norm": 0.9092896561420567,
      "learning_rate": 9.967894477139468e-06,
      "loss": 0.2438,
      "step": 2229
    },
    {
      "epoch": 0.06505630433514208,
      "grad_norm": 0.8392856461878418,
      "learning_rate": 9.967841003097068e-06,
      "loss": 0.1972,
      "step": 2230
    },
    {
      "epoch": 0.06508547756578563,
      "grad_norm": 0.9152020310585482,
      "learning_rate": 9.967787484702968e-06,
      "loss": 0.1983,
      "step": 2231
    },
    {
      "epoch": 0.0651146507964292,
      "grad_norm": 0.7857343524542046,
      "learning_rate": 9.96773392195765e-06,
      "loss": 0.1929,
      "step": 2232
    },
    {
      "epoch": 0.06514382402707276,
      "grad_norm": 0.8205865412173147,
      "learning_rate": 9.967680314861587e-06,
      "loss": 0.211,
      "step": 2233
    },
    {
      "epoch": 0.06517299725771632,
      "grad_norm": 0.8703512415040333,
      "learning_rate": 9.967626663415261e-06,
      "loss": 0.2158,
      "step": 2234
    },
    {
      "epoch": 0.06520217048835988,
      "grad_norm": 0.9683867770588545,
      "learning_rate": 9.96757296761915e-06,
      "loss": 0.2311,
      "step": 2235
    },
    {
      "epoch": 0.06523134371900344,
      "grad_norm": 0.8237442188773494,
      "learning_rate": 9.967519227473733e-06,
      "loss": 0.1998,
      "step": 2236
    },
    {
      "epoch": 0.065260516949647,
      "grad_norm": 0.8338489173292051,
      "learning_rate": 9.96746544297949e-06,
      "loss": 0.1877,
      "step": 2237
    },
    {
      "epoch": 0.06528969018029057,
      "grad_norm": 0.9623474023766088,
      "learning_rate": 9.967411614136902e-06,
      "loss": 0.1998,
      "step": 2238
    },
    {
      "epoch": 0.06531886341093412,
      "grad_norm": 1.1544915738343882,
      "learning_rate": 9.967357740946448e-06,
      "loss": 0.1996,
      "step": 2239
    },
    {
      "epoch": 0.06534803664157769,
      "grad_norm": 0.8763839105754301,
      "learning_rate": 9.967303823408612e-06,
      "loss": 0.1962,
      "step": 2240
    },
    {
      "epoch": 0.06537720987222125,
      "grad_norm": 0.8973607034259703,
      "learning_rate": 9.96724986152387e-06,
      "loss": 0.2032,
      "step": 2241
    },
    {
      "epoch": 0.06540638310286481,
      "grad_norm": 1.0217125120852515,
      "learning_rate": 9.96719585529271e-06,
      "loss": 0.2016,
      "step": 2242
    },
    {
      "epoch": 0.06543555633350838,
      "grad_norm": 0.914861141037201,
      "learning_rate": 9.96714180471561e-06,
      "loss": 0.2156,
      "step": 2243
    },
    {
      "epoch": 0.06546472956415193,
      "grad_norm": 0.852790688174234,
      "learning_rate": 9.967087709793053e-06,
      "loss": 0.1942,
      "step": 2244
    },
    {
      "epoch": 0.0654939027947955,
      "grad_norm": 0.8901363975744998,
      "learning_rate": 9.967033570525525e-06,
      "loss": 0.188,
      "step": 2245
    },
    {
      "epoch": 0.06552307602543905,
      "grad_norm": 0.8225870555184273,
      "learning_rate": 9.966979386913504e-06,
      "loss": 0.2115,
      "step": 2246
    },
    {
      "epoch": 0.06555224925608262,
      "grad_norm": 0.863887118505021,
      "learning_rate": 9.966925158957479e-06,
      "loss": 0.1863,
      "step": 2247
    },
    {
      "epoch": 0.06558142248672617,
      "grad_norm": 0.9544258461725252,
      "learning_rate": 9.966870886657932e-06,
      "loss": 0.214,
      "step": 2248
    },
    {
      "epoch": 0.06561059571736974,
      "grad_norm": 1.417183988957697,
      "learning_rate": 9.966816570015345e-06,
      "loss": 0.2219,
      "step": 2249
    },
    {
      "epoch": 0.06563976894801331,
      "grad_norm": 1.1918637172628517,
      "learning_rate": 9.966762209030208e-06,
      "loss": 0.2249,
      "step": 2250
    },
    {
      "epoch": 0.06566894217865686,
      "grad_norm": 0.8183769491972958,
      "learning_rate": 9.966707803703002e-06,
      "loss": 0.2128,
      "step": 2251
    },
    {
      "epoch": 0.06569811540930043,
      "grad_norm": 1.0000162894697773,
      "learning_rate": 9.966653354034214e-06,
      "loss": 0.2209,
      "step": 2252
    },
    {
      "epoch": 0.06572728863994398,
      "grad_norm": 1.0300333643567683,
      "learning_rate": 9.966598860024332e-06,
      "loss": 0.1842,
      "step": 2253
    },
    {
      "epoch": 0.06575646187058755,
      "grad_norm": 0.9772418886702062,
      "learning_rate": 9.966544321673839e-06,
      "loss": 0.2076,
      "step": 2254
    },
    {
      "epoch": 0.06578563510123112,
      "grad_norm": 1.2678545558541043,
      "learning_rate": 9.966489738983226e-06,
      "loss": 0.1676,
      "step": 2255
    },
    {
      "epoch": 0.06581480833187467,
      "grad_norm": 1.107490942511155,
      "learning_rate": 9.966435111952977e-06,
      "loss": 0.2144,
      "step": 2256
    },
    {
      "epoch": 0.06584398156251824,
      "grad_norm": 0.921844265717796,
      "learning_rate": 9.966380440583581e-06,
      "loss": 0.1835,
      "step": 2257
    },
    {
      "epoch": 0.06587315479316179,
      "grad_norm": 1.3402928936873384,
      "learning_rate": 9.966325724875527e-06,
      "loss": 0.2103,
      "step": 2258
    },
    {
      "epoch": 0.06590232802380536,
      "grad_norm": 0.855362458923824,
      "learning_rate": 9.9662709648293e-06,
      "loss": 0.2007,
      "step": 2259
    },
    {
      "epoch": 0.06593150125444892,
      "grad_norm": 1.0063562289024135,
      "learning_rate": 9.966216160445394e-06,
      "loss": 0.2166,
      "step": 2260
    },
    {
      "epoch": 0.06596067448509248,
      "grad_norm": 1.0831744996916741,
      "learning_rate": 9.966161311724296e-06,
      "loss": 0.2093,
      "step": 2261
    },
    {
      "epoch": 0.06598984771573604,
      "grad_norm": 0.6840487396997437,
      "learning_rate": 9.966106418666494e-06,
      "loss": 0.1893,
      "step": 2262
    },
    {
      "epoch": 0.0660190209463796,
      "grad_norm": 0.9684448740508934,
      "learning_rate": 9.96605148127248e-06,
      "loss": 0.2043,
      "step": 2263
    },
    {
      "epoch": 0.06604819417702316,
      "grad_norm": 1.0038273974017846,
      "learning_rate": 9.965996499542742e-06,
      "loss": 0.1828,
      "step": 2264
    },
    {
      "epoch": 0.06607736740766673,
      "grad_norm": 0.8998160043586104,
      "learning_rate": 9.965941473477775e-06,
      "loss": 0.2128,
      "step": 2265
    },
    {
      "epoch": 0.06610654063831028,
      "grad_norm": 0.8605748093744029,
      "learning_rate": 9.965886403078067e-06,
      "loss": 0.209,
      "step": 2266
    },
    {
      "epoch": 0.06613571386895385,
      "grad_norm": 0.7303764695161269,
      "learning_rate": 9.965831288344112e-06,
      "loss": 0.2088,
      "step": 2267
    },
    {
      "epoch": 0.0661648870995974,
      "grad_norm": 0.88000142850267,
      "learning_rate": 9.9657761292764e-06,
      "loss": 0.2036,
      "step": 2268
    },
    {
      "epoch": 0.06619406033024097,
      "grad_norm": 0.76853823738418,
      "learning_rate": 9.965720925875421e-06,
      "loss": 0.1771,
      "step": 2269
    },
    {
      "epoch": 0.06622323356088453,
      "grad_norm": 0.916048883347126,
      "learning_rate": 9.965665678141673e-06,
      "loss": 0.2045,
      "step": 2270
    },
    {
      "epoch": 0.06625240679152809,
      "grad_norm": 1.0360752776009832,
      "learning_rate": 9.96561038607565e-06,
      "loss": 0.1836,
      "step": 2271
    },
    {
      "epoch": 0.06628158002217166,
      "grad_norm": 0.935275447511629,
      "learning_rate": 9.96555504967784e-06,
      "loss": 0.2047,
      "step": 2272
    },
    {
      "epoch": 0.06631075325281521,
      "grad_norm": 0.8368259005842033,
      "learning_rate": 9.965499668948741e-06,
      "loss": 0.1808,
      "step": 2273
    },
    {
      "epoch": 0.06633992648345878,
      "grad_norm": 1.2344291155833595,
      "learning_rate": 9.965444243888846e-06,
      "loss": 0.2463,
      "step": 2274
    },
    {
      "epoch": 0.06636909971410233,
      "grad_norm": 1.2281465329487147,
      "learning_rate": 9.96538877449865e-06,
      "loss": 0.2175,
      "step": 2275
    },
    {
      "epoch": 0.0663982729447459,
      "grad_norm": 1.0374219247249994,
      "learning_rate": 9.965333260778649e-06,
      "loss": 0.1772,
      "step": 2276
    },
    {
      "epoch": 0.06642744617538947,
      "grad_norm": 0.8921888732790692,
      "learning_rate": 9.965277702729338e-06,
      "loss": 0.2241,
      "step": 2277
    },
    {
      "epoch": 0.06645661940603302,
      "grad_norm": 1.0007032657722952,
      "learning_rate": 9.965222100351211e-06,
      "loss": 0.1979,
      "step": 2278
    },
    {
      "epoch": 0.06648579263667659,
      "grad_norm": 0.8774868372628707,
      "learning_rate": 9.965166453644767e-06,
      "loss": 0.1953,
      "step": 2279
    },
    {
      "epoch": 0.06651496586732014,
      "grad_norm": 0.7244194252569438,
      "learning_rate": 9.965110762610504e-06,
      "loss": 0.1711,
      "step": 2280
    },
    {
      "epoch": 0.06654413909796371,
      "grad_norm": 1.0813616908314334,
      "learning_rate": 9.965055027248915e-06,
      "loss": 0.2493,
      "step": 2281
    },
    {
      "epoch": 0.06657331232860728,
      "grad_norm": 0.9372654404430211,
      "learning_rate": 9.964999247560501e-06,
      "loss": 0.2176,
      "step": 2282
    },
    {
      "epoch": 0.06660248555925083,
      "grad_norm": 0.9265101736038648,
      "learning_rate": 9.96494342354576e-06,
      "loss": 0.193,
      "step": 2283
    },
    {
      "epoch": 0.0666316587898944,
      "grad_norm": 1.1503195481058759,
      "learning_rate": 9.964887555205189e-06,
      "loss": 0.2132,
      "step": 2284
    },
    {
      "epoch": 0.06666083202053795,
      "grad_norm": 0.9757278540844526,
      "learning_rate": 9.964831642539285e-06,
      "loss": 0.202,
      "step": 2285
    },
    {
      "epoch": 0.06669000525118152,
      "grad_norm": 1.2181286560018258,
      "learning_rate": 9.964775685548552e-06,
      "loss": 0.2026,
      "step": 2286
    },
    {
      "epoch": 0.06671917848182508,
      "grad_norm": 0.8442524527932412,
      "learning_rate": 9.964719684233486e-06,
      "loss": 0.2049,
      "step": 2287
    },
    {
      "epoch": 0.06674835171246864,
      "grad_norm": 0.9553116229166884,
      "learning_rate": 9.964663638594587e-06,
      "loss": 0.1791,
      "step": 2288
    },
    {
      "epoch": 0.0667775249431122,
      "grad_norm": 0.8899230068080627,
      "learning_rate": 9.964607548632356e-06,
      "loss": 0.1953,
      "step": 2289
    },
    {
      "epoch": 0.06680669817375576,
      "grad_norm": 1.0511859131465593,
      "learning_rate": 9.964551414347297e-06,
      "loss": 0.2116,
      "step": 2290
    },
    {
      "epoch": 0.06683587140439932,
      "grad_norm": 0.9932528612213981,
      "learning_rate": 9.964495235739907e-06,
      "loss": 0.1906,
      "step": 2291
    },
    {
      "epoch": 0.06686504463504288,
      "grad_norm": 1.0239559240853664,
      "learning_rate": 9.964439012810686e-06,
      "loss": 0.1783,
      "step": 2292
    },
    {
      "epoch": 0.06689421786568645,
      "grad_norm": 1.2101691483406114,
      "learning_rate": 9.96438274556014e-06,
      "loss": 0.2021,
      "step": 2293
    },
    {
      "epoch": 0.06692339109633001,
      "grad_norm": 0.8340825736135985,
      "learning_rate": 9.96432643398877e-06,
      "loss": 0.2169,
      "step": 2294
    },
    {
      "epoch": 0.06695256432697357,
      "grad_norm": 0.9372531126845941,
      "learning_rate": 9.96427007809708e-06,
      "loss": 0.2003,
      "step": 2295
    },
    {
      "epoch": 0.06698173755761713,
      "grad_norm": 0.9408517668032419,
      "learning_rate": 9.964213677885571e-06,
      "loss": 0.1893,
      "step": 2296
    },
    {
      "epoch": 0.06701091078826069,
      "grad_norm": 0.9129308162969344,
      "learning_rate": 9.964157233354745e-06,
      "loss": 0.1915,
      "step": 2297
    },
    {
      "epoch": 0.06704008401890425,
      "grad_norm": 0.8845099707748527,
      "learning_rate": 9.964100744505111e-06,
      "loss": 0.2003,
      "step": 2298
    },
    {
      "epoch": 0.06706925724954782,
      "grad_norm": 1.063371864190776,
      "learning_rate": 9.96404421133717e-06,
      "loss": 0.2201,
      "step": 2299
    },
    {
      "epoch": 0.06709843048019137,
      "grad_norm": 1.2343004147392833,
      "learning_rate": 9.963987633851427e-06,
      "loss": 0.1992,
      "step": 2300
    },
    {
      "epoch": 0.06712760371083494,
      "grad_norm": 0.8096666210595943,
      "learning_rate": 9.963931012048387e-06,
      "loss": 0.1975,
      "step": 2301
    },
    {
      "epoch": 0.0671567769414785,
      "grad_norm": 0.9578026541774669,
      "learning_rate": 9.963874345928557e-06,
      "loss": 0.2318,
      "step": 2302
    },
    {
      "epoch": 0.06718595017212206,
      "grad_norm": 0.9635990484625206,
      "learning_rate": 9.963817635492441e-06,
      "loss": 0.2109,
      "step": 2303
    },
    {
      "epoch": 0.06721512340276563,
      "grad_norm": 0.9000620424505866,
      "learning_rate": 9.963760880740545e-06,
      "loss": 0.1993,
      "step": 2304
    },
    {
      "epoch": 0.06724429663340918,
      "grad_norm": 0.8117717998021512,
      "learning_rate": 9.96370408167338e-06,
      "loss": 0.2174,
      "step": 2305
    },
    {
      "epoch": 0.06727346986405275,
      "grad_norm": 0.8858406507149971,
      "learning_rate": 9.963647238291446e-06,
      "loss": 0.1928,
      "step": 2306
    },
    {
      "epoch": 0.0673026430946963,
      "grad_norm": 0.9669617480035934,
      "learning_rate": 9.963590350595258e-06,
      "loss": 0.1973,
      "step": 2307
    },
    {
      "epoch": 0.06733181632533987,
      "grad_norm": 0.7658286769158015,
      "learning_rate": 9.963533418585318e-06,
      "loss": 0.1798,
      "step": 2308
    },
    {
      "epoch": 0.06736098955598344,
      "grad_norm": 0.9558386659030622,
      "learning_rate": 9.963476442262136e-06,
      "loss": 0.2138,
      "step": 2309
    },
    {
      "epoch": 0.06739016278662699,
      "grad_norm": 0.9344254127484777,
      "learning_rate": 9.963419421626224e-06,
      "loss": 0.1998,
      "step": 2310
    },
    {
      "epoch": 0.06741933601727056,
      "grad_norm": 0.8800064663338417,
      "learning_rate": 9.963362356678086e-06,
      "loss": 0.2017,
      "step": 2311
    },
    {
      "epoch": 0.06744850924791411,
      "grad_norm": 0.7737275162632699,
      "learning_rate": 9.963305247418234e-06,
      "loss": 0.1742,
      "step": 2312
    },
    {
      "epoch": 0.06747768247855768,
      "grad_norm": 1.1441889633269098,
      "learning_rate": 9.963248093847179e-06,
      "loss": 0.1959,
      "step": 2313
    },
    {
      "epoch": 0.06750685570920123,
      "grad_norm": 1.052257520574918,
      "learning_rate": 9.963190895965428e-06,
      "loss": 0.1829,
      "step": 2314
    },
    {
      "epoch": 0.0675360289398448,
      "grad_norm": 1.0701099476644802,
      "learning_rate": 9.963133653773495e-06,
      "loss": 0.1834,
      "step": 2315
    },
    {
      "epoch": 0.06756520217048836,
      "grad_norm": 0.8497336606664448,
      "learning_rate": 9.963076367271889e-06,
      "loss": 0.2044,
      "step": 2316
    },
    {
      "epoch": 0.06759437540113192,
      "grad_norm": 0.9013699327004027,
      "learning_rate": 9.96301903646112e-06,
      "loss": 0.2138,
      "step": 2317
    },
    {
      "epoch": 0.06762354863177548,
      "grad_norm": 0.8988222302331794,
      "learning_rate": 9.962961661341707e-06,
      "loss": 0.2128,
      "step": 2318
    },
    {
      "epoch": 0.06765272186241904,
      "grad_norm": 0.9897416661460551,
      "learning_rate": 9.962904241914151e-06,
      "loss": 0.2091,
      "step": 2319
    },
    {
      "epoch": 0.0676818950930626,
      "grad_norm": 1.0865752192965261,
      "learning_rate": 9.962846778178974e-06,
      "loss": 0.2044,
      "step": 2320
    },
    {
      "epoch": 0.06771106832370617,
      "grad_norm": 0.918354437620551,
      "learning_rate": 9.962789270136687e-06,
      "loss": 0.1885,
      "step": 2321
    },
    {
      "epoch": 0.06774024155434973,
      "grad_norm": 1.2876227223562233,
      "learning_rate": 9.962731717787798e-06,
      "loss": 0.204,
      "step": 2322
    },
    {
      "epoch": 0.06776941478499329,
      "grad_norm": 0.8520317395261258,
      "learning_rate": 9.962674121132827e-06,
      "loss": 0.1953,
      "step": 2323
    },
    {
      "epoch": 0.06779858801563685,
      "grad_norm": 1.011606949292786,
      "learning_rate": 9.962616480172287e-06,
      "loss": 0.2028,
      "step": 2324
    },
    {
      "epoch": 0.06782776124628041,
      "grad_norm": 0.9435219564123694,
      "learning_rate": 9.96255879490669e-06,
      "loss": 0.1701,
      "step": 2325
    },
    {
      "epoch": 0.06785693447692398,
      "grad_norm": 0.9355932918071468,
      "learning_rate": 9.962501065336553e-06,
      "loss": 0.2142,
      "step": 2326
    },
    {
      "epoch": 0.06788610770756753,
      "grad_norm": 1.0504961242919153,
      "learning_rate": 9.962443291462393e-06,
      "loss": 0.1902,
      "step": 2327
    },
    {
      "epoch": 0.0679152809382111,
      "grad_norm": 1.0262234020607701,
      "learning_rate": 9.962385473284723e-06,
      "loss": 0.188,
      "step": 2328
    },
    {
      "epoch": 0.06794445416885465,
      "grad_norm": 0.8601566986948037,
      "learning_rate": 9.962327610804059e-06,
      "loss": 0.1796,
      "step": 2329
    },
    {
      "epoch": 0.06797362739949822,
      "grad_norm": 0.8643775448630018,
      "learning_rate": 9.962269704020919e-06,
      "loss": 0.196,
      "step": 2330
    },
    {
      "epoch": 0.06800280063014179,
      "grad_norm": 1.0711486392766931,
      "learning_rate": 9.962211752935821e-06,
      "loss": 0.1832,
      "step": 2331
    },
    {
      "epoch": 0.06803197386078534,
      "grad_norm": 0.8854287171512436,
      "learning_rate": 9.96215375754928e-06,
      "loss": 0.2064,
      "step": 2332
    },
    {
      "epoch": 0.06806114709142891,
      "grad_norm": 1.0101785669871297,
      "learning_rate": 9.962095717861816e-06,
      "loss": 0.1963,
      "step": 2333
    },
    {
      "epoch": 0.06809032032207246,
      "grad_norm": 1.0012062687223648,
      "learning_rate": 9.962037633873945e-06,
      "loss": 0.2374,
      "step": 2334
    },
    {
      "epoch": 0.06811949355271603,
      "grad_norm": 0.8667464615236197,
      "learning_rate": 9.961979505586185e-06,
      "loss": 0.187,
      "step": 2335
    },
    {
      "epoch": 0.0681486667833596,
      "grad_norm": 0.9361547793247379,
      "learning_rate": 9.961921332999058e-06,
      "loss": 0.2079,
      "step": 2336
    },
    {
      "epoch": 0.06817784001400315,
      "grad_norm": 0.9839156956812861,
      "learning_rate": 9.961863116113083e-06,
      "loss": 0.2029,
      "step": 2337
    },
    {
      "epoch": 0.06820701324464672,
      "grad_norm": 1.0421774610372838,
      "learning_rate": 9.961804854928778e-06,
      "loss": 0.2236,
      "step": 2338
    },
    {
      "epoch": 0.06823618647529027,
      "grad_norm": 1.0811409838186092,
      "learning_rate": 9.961746549446662e-06,
      "loss": 0.2267,
      "step": 2339
    },
    {
      "epoch": 0.06826535970593384,
      "grad_norm": 1.0460234373857449,
      "learning_rate": 9.961688199667259e-06,
      "loss": 0.1794,
      "step": 2340
    },
    {
      "epoch": 0.06829453293657739,
      "grad_norm": 0.9051163139657413,
      "learning_rate": 9.961629805591088e-06,
      "loss": 0.2255,
      "step": 2341
    },
    {
      "epoch": 0.06832370616722096,
      "grad_norm": 1.1277562297268768,
      "learning_rate": 9.96157136721867e-06,
      "loss": 0.1808,
      "step": 2342
    },
    {
      "epoch": 0.06835287939786452,
      "grad_norm": 0.9348417394825488,
      "learning_rate": 9.961512884550529e-06,
      "loss": 0.2151,
      "step": 2343
    },
    {
      "epoch": 0.06838205262850808,
      "grad_norm": 1.0292995123316866,
      "learning_rate": 9.961454357587183e-06,
      "loss": 0.1855,
      "step": 2344
    },
    {
      "epoch": 0.06841122585915164,
      "grad_norm": 0.9322885776989647,
      "learning_rate": 9.961395786329158e-06,
      "loss": 0.2051,
      "step": 2345
    },
    {
      "epoch": 0.0684403990897952,
      "grad_norm": 1.0105816732637924,
      "learning_rate": 9.961337170776974e-06,
      "loss": 0.1959,
      "step": 2346
    },
    {
      "epoch": 0.06846957232043877,
      "grad_norm": 1.0499178862163263,
      "learning_rate": 9.961278510931159e-06,
      "loss": 0.2075,
      "step": 2347
    },
    {
      "epoch": 0.06849874555108233,
      "grad_norm": 0.8912862401866359,
      "learning_rate": 9.961219806792232e-06,
      "loss": 0.1925,
      "step": 2348
    },
    {
      "epoch": 0.06852791878172589,
      "grad_norm": 0.9819560623045472,
      "learning_rate": 9.96116105836072e-06,
      "loss": 0.185,
      "step": 2349
    },
    {
      "epoch": 0.06855709201236945,
      "grad_norm": 0.9962237144968342,
      "learning_rate": 9.961102265637144e-06,
      "loss": 0.2011,
      "step": 2350
    },
    {
      "epoch": 0.068586265243013,
      "grad_norm": 1.0182809788713403,
      "learning_rate": 9.961043428622035e-06,
      "loss": 0.2263,
      "step": 2351
    },
    {
      "epoch": 0.06861543847365657,
      "grad_norm": 0.9143174112065799,
      "learning_rate": 9.960984547315912e-06,
      "loss": 0.2016,
      "step": 2352
    },
    {
      "epoch": 0.06864461170430014,
      "grad_norm": 1.0485378149425884,
      "learning_rate": 9.960925621719303e-06,
      "loss": 0.1839,
      "step": 2353
    },
    {
      "epoch": 0.0686737849349437,
      "grad_norm": 1.0526971123647655,
      "learning_rate": 9.960866651832736e-06,
      "loss": 0.1631,
      "step": 2354
    },
    {
      "epoch": 0.06870295816558726,
      "grad_norm": 0.8948910382362572,
      "learning_rate": 9.960807637656735e-06,
      "loss": 0.1991,
      "step": 2355
    },
    {
      "epoch": 0.06873213139623081,
      "grad_norm": 1.1887194133978207,
      "learning_rate": 9.960748579191828e-06,
      "loss": 0.2164,
      "step": 2356
    },
    {
      "epoch": 0.06876130462687438,
      "grad_norm": 1.1574222769922864,
      "learning_rate": 9.960689476438541e-06,
      "loss": 0.1861,
      "step": 2357
    },
    {
      "epoch": 0.06879047785751795,
      "grad_norm": 0.7514501214475601,
      "learning_rate": 9.960630329397403e-06,
      "loss": 0.1801,
      "step": 2358
    },
    {
      "epoch": 0.0688196510881615,
      "grad_norm": 0.9347542331968323,
      "learning_rate": 9.960571138068942e-06,
      "loss": 0.222,
      "step": 2359
    },
    {
      "epoch": 0.06884882431880507,
      "grad_norm": 0.8621373614996176,
      "learning_rate": 9.960511902453685e-06,
      "loss": 0.2159,
      "step": 2360
    },
    {
      "epoch": 0.06887799754944862,
      "grad_norm": 0.9284847136860013,
      "learning_rate": 9.960452622552163e-06,
      "loss": 0.1902,
      "step": 2361
    },
    {
      "epoch": 0.06890717078009219,
      "grad_norm": 0.9108127425244467,
      "learning_rate": 9.960393298364904e-06,
      "loss": 0.1985,
      "step": 2362
    },
    {
      "epoch": 0.06893634401073574,
      "grad_norm": 0.8798707880760961,
      "learning_rate": 9.960333929892438e-06,
      "loss": 0.1968,
      "step": 2363
    },
    {
      "epoch": 0.06896551724137931,
      "grad_norm": 0.9092462067129579,
      "learning_rate": 9.960274517135294e-06,
      "loss": 0.2275,
      "step": 2364
    },
    {
      "epoch": 0.06899469047202288,
      "grad_norm": 1.0368817334127765,
      "learning_rate": 9.960215060094004e-06,
      "loss": 0.2058,
      "step": 2365
    },
    {
      "epoch": 0.06902386370266643,
      "grad_norm": 0.861514927853887,
      "learning_rate": 9.960155558769097e-06,
      "loss": 0.1853,
      "step": 2366
    },
    {
      "epoch": 0.06905303693331,
      "grad_norm": 0.8636982992003643,
      "learning_rate": 9.960096013161105e-06,
      "loss": 0.223,
      "step": 2367
    },
    {
      "epoch": 0.06908221016395355,
      "grad_norm": 0.860306211958087,
      "learning_rate": 9.960036423270561e-06,
      "loss": 0.2064,
      "step": 2368
    },
    {
      "epoch": 0.06911138339459712,
      "grad_norm": 0.847459846554446,
      "learning_rate": 9.959976789097997e-06,
      "loss": 0.1825,
      "step": 2369
    },
    {
      "epoch": 0.06914055662524068,
      "grad_norm": 0.9282284890303654,
      "learning_rate": 9.959917110643942e-06,
      "loss": 0.1952,
      "step": 2370
    },
    {
      "epoch": 0.06916972985588424,
      "grad_norm": 0.939952568470686,
      "learning_rate": 9.959857387908931e-06,
      "loss": 0.1776,
      "step": 2371
    },
    {
      "epoch": 0.0691989030865278,
      "grad_norm": 1.0191405770354907,
      "learning_rate": 9.959797620893498e-06,
      "loss": 0.2041,
      "step": 2372
    },
    {
      "epoch": 0.06922807631717136,
      "grad_norm": 0.7120147314681421,
      "learning_rate": 9.959737809598177e-06,
      "loss": 0.184,
      "step": 2373
    },
    {
      "epoch": 0.06925724954781493,
      "grad_norm": 0.8907912228070642,
      "learning_rate": 9.959677954023501e-06,
      "loss": 0.2127,
      "step": 2374
    },
    {
      "epoch": 0.06928642277845849,
      "grad_norm": 0.9414504551311939,
      "learning_rate": 9.959618054170003e-06,
      "loss": 0.2173,
      "step": 2375
    },
    {
      "epoch": 0.06931559600910205,
      "grad_norm": 0.7052813775278587,
      "learning_rate": 9.959558110038218e-06,
      "loss": 0.1791,
      "step": 2376
    },
    {
      "epoch": 0.06934476923974561,
      "grad_norm": 0.9652752271670552,
      "learning_rate": 9.959498121628683e-06,
      "loss": 0.2138,
      "step": 2377
    },
    {
      "epoch": 0.06937394247038917,
      "grad_norm": 1.0808645757388777,
      "learning_rate": 9.959438088941935e-06,
      "loss": 0.2052,
      "step": 2378
    },
    {
      "epoch": 0.06940311570103273,
      "grad_norm": 0.9018716780531703,
      "learning_rate": 9.959378011978504e-06,
      "loss": 0.183,
      "step": 2379
    },
    {
      "epoch": 0.0694322889316763,
      "grad_norm": 1.0419770280542815,
      "learning_rate": 9.959317890738932e-06,
      "loss": 0.203,
      "step": 2380
    },
    {
      "epoch": 0.06946146216231985,
      "grad_norm": 0.969712062246678,
      "learning_rate": 9.959257725223753e-06,
      "loss": 0.2298,
      "step": 2381
    },
    {
      "epoch": 0.06949063539296342,
      "grad_norm": 0.8279362437059269,
      "learning_rate": 9.959197515433505e-06,
      "loss": 0.1666,
      "step": 2382
    },
    {
      "epoch": 0.06951980862360697,
      "grad_norm": 0.974389601516134,
      "learning_rate": 9.959137261368725e-06,
      "loss": 0.2003,
      "step": 2383
    },
    {
      "epoch": 0.06954898185425054,
      "grad_norm": 0.9440985070740611,
      "learning_rate": 9.959076963029954e-06,
      "loss": 0.209,
      "step": 2384
    },
    {
      "epoch": 0.0695781550848941,
      "grad_norm": 1.0767766171698354,
      "learning_rate": 9.959016620417725e-06,
      "loss": 0.1983,
      "step": 2385
    },
    {
      "epoch": 0.06960732831553766,
      "grad_norm": 1.0014248062894908,
      "learning_rate": 9.95895623353258e-06,
      "loss": 0.2096,
      "step": 2386
    },
    {
      "epoch": 0.06963650154618123,
      "grad_norm": 0.7556710962392882,
      "learning_rate": 9.958895802375056e-06,
      "loss": 0.2061,
      "step": 2387
    },
    {
      "epoch": 0.06966567477682478,
      "grad_norm": 0.9292753584936179,
      "learning_rate": 9.958835326945698e-06,
      "loss": 0.1853,
      "step": 2388
    },
    {
      "epoch": 0.06969484800746835,
      "grad_norm": 1.1008176939215812,
      "learning_rate": 9.958774807245039e-06,
      "loss": 0.201,
      "step": 2389
    },
    {
      "epoch": 0.0697240212381119,
      "grad_norm": 1.0318162019529176,
      "learning_rate": 9.958714243273624e-06,
      "loss": 0.1892,
      "step": 2390
    },
    {
      "epoch": 0.06975319446875547,
      "grad_norm": 0.9803870456434441,
      "learning_rate": 9.95865363503199e-06,
      "loss": 0.2192,
      "step": 2391
    },
    {
      "epoch": 0.06978236769939904,
      "grad_norm": 1.1702309635721455,
      "learning_rate": 9.958592982520681e-06,
      "loss": 0.2317,
      "step": 2392
    },
    {
      "epoch": 0.06981154093004259,
      "grad_norm": 0.9783216659503141,
      "learning_rate": 9.958532285740238e-06,
      "loss": 0.1876,
      "step": 2393
    },
    {
      "epoch": 0.06984071416068616,
      "grad_norm": 0.7905536273298849,
      "learning_rate": 9.958471544691201e-06,
      "loss": 0.1973,
      "step": 2394
    },
    {
      "epoch": 0.06986988739132971,
      "grad_norm": 1.077268354827615,
      "learning_rate": 9.958410759374116e-06,
      "loss": 0.2185,
      "step": 2395
    },
    {
      "epoch": 0.06989906062197328,
      "grad_norm": 1.126968531852992,
      "learning_rate": 9.958349929789521e-06,
      "loss": 0.1984,
      "step": 2396
    },
    {
      "epoch": 0.06992823385261684,
      "grad_norm": 0.7881223910381542,
      "learning_rate": 9.958289055937963e-06,
      "loss": 0.1918,
      "step": 2397
    },
    {
      "epoch": 0.0699574070832604,
      "grad_norm": 0.9537166975001522,
      "learning_rate": 9.958228137819984e-06,
      "loss": 0.197,
      "step": 2398
    },
    {
      "epoch": 0.06998658031390396,
      "grad_norm": 0.9744682320428244,
      "learning_rate": 9.958167175436128e-06,
      "loss": 0.214,
      "step": 2399
    },
    {
      "epoch": 0.07001575354454752,
      "grad_norm": 1.1523567877347596,
      "learning_rate": 9.95810616878694e-06,
      "loss": 0.1957,
      "step": 2400
    },
    {
      "epoch": 0.07004492677519109,
      "grad_norm": 1.1944507329865188,
      "learning_rate": 9.958045117872961e-06,
      "loss": 0.187,
      "step": 2401
    },
    {
      "epoch": 0.07007410000583465,
      "grad_norm": 0.9685427581118649,
      "learning_rate": 9.95798402269474e-06,
      "loss": 0.1911,
      "step": 2402
    },
    {
      "epoch": 0.0701032732364782,
      "grad_norm": 1.0326345432613742,
      "learning_rate": 9.95792288325282e-06,
      "loss": 0.1912,
      "step": 2403
    },
    {
      "epoch": 0.07013244646712177,
      "grad_norm": 0.9634055727972706,
      "learning_rate": 9.95786169954775e-06,
      "loss": 0.1985,
      "step": 2404
    },
    {
      "epoch": 0.07016161969776533,
      "grad_norm": 0.9193623921254882,
      "learning_rate": 9.957800471580074e-06,
      "loss": 0.2138,
      "step": 2405
    },
    {
      "epoch": 0.0701907929284089,
      "grad_norm": 0.9821151777603562,
      "learning_rate": 9.957739199350339e-06,
      "loss": 0.2002,
      "step": 2406
    },
    {
      "epoch": 0.07021996615905245,
      "grad_norm": 0.9099336925257334,
      "learning_rate": 9.95767788285909e-06,
      "loss": 0.1921,
      "step": 2407
    },
    {
      "epoch": 0.07024913938969601,
      "grad_norm": 0.7324101817432253,
      "learning_rate": 9.957616522106878e-06,
      "loss": 0.1831,
      "step": 2408
    },
    {
      "epoch": 0.07027831262033958,
      "grad_norm": 0.8722205327852851,
      "learning_rate": 9.95755511709425e-06,
      "loss": 0.1838,
      "step": 2409
    },
    {
      "epoch": 0.07030748585098313,
      "grad_norm": 1.0160175849577036,
      "learning_rate": 9.957493667821752e-06,
      "loss": 0.1973,
      "step": 2410
    },
    {
      "epoch": 0.0703366590816267,
      "grad_norm": 1.0796794263996108,
      "learning_rate": 9.957432174289934e-06,
      "loss": 0.228,
      "step": 2411
    },
    {
      "epoch": 0.07036583231227025,
      "grad_norm": 1.1028917050992295,
      "learning_rate": 9.957370636499346e-06,
      "loss": 0.2113,
      "step": 2412
    },
    {
      "epoch": 0.07039500554291382,
      "grad_norm": 1.1035408149593489,
      "learning_rate": 9.957309054450534e-06,
      "loss": 0.2053,
      "step": 2413
    },
    {
      "epoch": 0.07042417877355739,
      "grad_norm": 1.0248996546132965,
      "learning_rate": 9.957247428144052e-06,
      "loss": 0.1859,
      "step": 2414
    },
    {
      "epoch": 0.07045335200420094,
      "grad_norm": 1.0670146838617907,
      "learning_rate": 9.957185757580448e-06,
      "loss": 0.1889,
      "step": 2415
    },
    {
      "epoch": 0.07048252523484451,
      "grad_norm": 0.9198217089388051,
      "learning_rate": 9.957124042760274e-06,
      "loss": 0.1887,
      "step": 2416
    },
    {
      "epoch": 0.07051169846548806,
      "grad_norm": 1.1104964904412704,
      "learning_rate": 9.957062283684078e-06,
      "loss": 0.1753,
      "step": 2417
    },
    {
      "epoch": 0.07054087169613163,
      "grad_norm": 1.0434878111586023,
      "learning_rate": 9.957000480352415e-06,
      "loss": 0.2157,
      "step": 2418
    },
    {
      "epoch": 0.0705700449267752,
      "grad_norm": 0.8205827054933801,
      "learning_rate": 9.956938632765833e-06,
      "loss": 0.1835,
      "step": 2419
    },
    {
      "epoch": 0.07059921815741875,
      "grad_norm": 0.9370711144255537,
      "learning_rate": 9.956876740924888e-06,
      "loss": 0.1918,
      "step": 2420
    },
    {
      "epoch": 0.07062839138806232,
      "grad_norm": 0.9241820231784976,
      "learning_rate": 9.956814804830131e-06,
      "loss": 0.1937,
      "step": 2421
    },
    {
      "epoch": 0.07065756461870587,
      "grad_norm": 0.8320079140917186,
      "learning_rate": 9.956752824482114e-06,
      "loss": 0.1989,
      "step": 2422
    },
    {
      "epoch": 0.07068673784934944,
      "grad_norm": 0.9235242375593017,
      "learning_rate": 9.956690799881391e-06,
      "loss": 0.2022,
      "step": 2423
    },
    {
      "epoch": 0.070715911079993,
      "grad_norm": 0.862395217085308,
      "learning_rate": 9.956628731028516e-06,
      "loss": 0.2068,
      "step": 2424
    },
    {
      "epoch": 0.07074508431063656,
      "grad_norm": 0.8779579436971858,
      "learning_rate": 9.956566617924043e-06,
      "loss": 0.1978,
      "step": 2425
    },
    {
      "epoch": 0.07077425754128013,
      "grad_norm": 0.8552995115610158,
      "learning_rate": 9.956504460568525e-06,
      "loss": 0.1976,
      "step": 2426
    },
    {
      "epoch": 0.07080343077192368,
      "grad_norm": 0.8616896467486487,
      "learning_rate": 9.95644225896252e-06,
      "loss": 0.2032,
      "step": 2427
    },
    {
      "epoch": 0.07083260400256725,
      "grad_norm": 1.1605532484673293,
      "learning_rate": 9.956380013106582e-06,
      "loss": 0.2089,
      "step": 2428
    },
    {
      "epoch": 0.07086177723321081,
      "grad_norm": 1.000970860784796,
      "learning_rate": 9.956317723001265e-06,
      "loss": 0.2094,
      "step": 2429
    },
    {
      "epoch": 0.07089095046385437,
      "grad_norm": 0.9507506852609013,
      "learning_rate": 9.956255388647127e-06,
      "loss": 0.2075,
      "step": 2430
    },
    {
      "epoch": 0.07092012369449793,
      "grad_norm": 1.0275419870209836,
      "learning_rate": 9.956193010044725e-06,
      "loss": 0.2001,
      "step": 2431
    },
    {
      "epoch": 0.07094929692514149,
      "grad_norm": 1.1099346786745798,
      "learning_rate": 9.956130587194615e-06,
      "loss": 0.18,
      "step": 2432
    },
    {
      "epoch": 0.07097847015578505,
      "grad_norm": 0.9167391311402326,
      "learning_rate": 9.956068120097353e-06,
      "loss": 0.2279,
      "step": 2433
    },
    {
      "epoch": 0.0710076433864286,
      "grad_norm": 0.997229453392759,
      "learning_rate": 9.956005608753499e-06,
      "loss": 0.1833,
      "step": 2434
    },
    {
      "epoch": 0.07103681661707217,
      "grad_norm": 0.9326126299386502,
      "learning_rate": 9.95594305316361e-06,
      "loss": 0.2074,
      "step": 2435
    },
    {
      "epoch": 0.07106598984771574,
      "grad_norm": 0.8114360126713511,
      "learning_rate": 9.955880453328243e-06,
      "loss": 0.1873,
      "step": 2436
    },
    {
      "epoch": 0.0710951630783593,
      "grad_norm": 1.1166856325162593,
      "learning_rate": 9.95581780924796e-06,
      "loss": 0.192,
      "step": 2437
    },
    {
      "epoch": 0.07112433630900286,
      "grad_norm": 0.9857179562554458,
      "learning_rate": 9.955755120923319e-06,
      "loss": 0.1774,
      "step": 2438
    },
    {
      "epoch": 0.07115350953964641,
      "grad_norm": 1.0638188423148094,
      "learning_rate": 9.955692388354876e-06,
      "loss": 0.2159,
      "step": 2439
    },
    {
      "epoch": 0.07118268277028998,
      "grad_norm": 1.0349445235061012,
      "learning_rate": 9.955629611543198e-06,
      "loss": 0.218,
      "step": 2440
    },
    {
      "epoch": 0.07121185600093355,
      "grad_norm": 0.8570386046990435,
      "learning_rate": 9.95556679048884e-06,
      "loss": 0.1897,
      "step": 2441
    },
    {
      "epoch": 0.0712410292315771,
      "grad_norm": 0.9321227916859394,
      "learning_rate": 9.955503925192365e-06,
      "loss": 0.222,
      "step": 2442
    },
    {
      "epoch": 0.07127020246222067,
      "grad_norm": 1.1486333156118809,
      "learning_rate": 9.955441015654334e-06,
      "loss": 0.1724,
      "step": 2443
    },
    {
      "epoch": 0.07129937569286422,
      "grad_norm": 0.8610017320801263,
      "learning_rate": 9.955378061875309e-06,
      "loss": 0.1985,
      "step": 2444
    },
    {
      "epoch": 0.07132854892350779,
      "grad_norm": 0.9887204063111942,
      "learning_rate": 9.955315063855851e-06,
      "loss": 0.1992,
      "step": 2445
    },
    {
      "epoch": 0.07135772215415136,
      "grad_norm": 1.0296967608410998,
      "learning_rate": 9.955252021596524e-06,
      "loss": 0.187,
      "step": 2446
    },
    {
      "epoch": 0.07138689538479491,
      "grad_norm": 1.042432065215246,
      "learning_rate": 9.955188935097888e-06,
      "loss": 0.2054,
      "step": 2447
    },
    {
      "epoch": 0.07141606861543848,
      "grad_norm": 1.0335384342747953,
      "learning_rate": 9.95512580436051e-06,
      "loss": 0.1951,
      "step": 2448
    },
    {
      "epoch": 0.07144524184608203,
      "grad_norm": 1.068551528570425,
      "learning_rate": 9.955062629384952e-06,
      "loss": 0.2164,
      "step": 2449
    },
    {
      "epoch": 0.0714744150767256,
      "grad_norm": 0.8822694536965376,
      "learning_rate": 9.954999410171775e-06,
      "loss": 0.1946,
      "step": 2450
    },
    {
      "epoch": 0.07150358830736916,
      "grad_norm": 0.952687521069965,
      "learning_rate": 9.954936146721548e-06,
      "loss": 0.194,
      "step": 2451
    },
    {
      "epoch": 0.07153276153801272,
      "grad_norm": 1.0031368490503632,
      "learning_rate": 9.954872839034836e-06,
      "loss": 0.2031,
      "step": 2452
    },
    {
      "epoch": 0.07156193476865629,
      "grad_norm": 0.9135427669353791,
      "learning_rate": 9.954809487112198e-06,
      "loss": 0.1763,
      "step": 2453
    },
    {
      "epoch": 0.07159110799929984,
      "grad_norm": 0.9643004903890515,
      "learning_rate": 9.954746090954205e-06,
      "loss": 0.1695,
      "step": 2454
    },
    {
      "epoch": 0.0716202812299434,
      "grad_norm": 0.9528310407040534,
      "learning_rate": 9.954682650561423e-06,
      "loss": 0.214,
      "step": 2455
    },
    {
      "epoch": 0.07164945446058696,
      "grad_norm": 0.8547133992363304,
      "learning_rate": 9.954619165934417e-06,
      "loss": 0.227,
      "step": 2456
    },
    {
      "epoch": 0.07167862769123053,
      "grad_norm": 0.9051851367771333,
      "learning_rate": 9.954555637073752e-06,
      "loss": 0.1706,
      "step": 2457
    },
    {
      "epoch": 0.07170780092187409,
      "grad_norm": 0.8861919955521864,
      "learning_rate": 9.95449206398e-06,
      "loss": 0.2051,
      "step": 2458
    },
    {
      "epoch": 0.07173697415251765,
      "grad_norm": 0.9730791963232464,
      "learning_rate": 9.954428446653723e-06,
      "loss": 0.201,
      "step": 2459
    },
    {
      "epoch": 0.07176614738316121,
      "grad_norm": 0.9933523719193613,
      "learning_rate": 9.954364785095493e-06,
      "loss": 0.1991,
      "step": 2460
    },
    {
      "epoch": 0.07179532061380477,
      "grad_norm": 0.8492267930360498,
      "learning_rate": 9.954301079305875e-06,
      "loss": 0.1834,
      "step": 2461
    },
    {
      "epoch": 0.07182449384444833,
      "grad_norm": 1.0898044476044764,
      "learning_rate": 9.95423732928544e-06,
      "loss": 0.1983,
      "step": 2462
    },
    {
      "epoch": 0.0718536670750919,
      "grad_norm": 0.9642681007485292,
      "learning_rate": 9.95417353503476e-06,
      "loss": 0.2028,
      "step": 2463
    },
    {
      "epoch": 0.07188284030573545,
      "grad_norm": 0.8881218865673711,
      "learning_rate": 9.9541096965544e-06,
      "loss": 0.1994,
      "step": 2464
    },
    {
      "epoch": 0.07191201353637902,
      "grad_norm": 0.9375368969560157,
      "learning_rate": 9.954045813844929e-06,
      "loss": 0.1858,
      "step": 2465
    },
    {
      "epoch": 0.07194118676702257,
      "grad_norm": 0.9285210062506181,
      "learning_rate": 9.953981886906921e-06,
      "loss": 0.1921,
      "step": 2466
    },
    {
      "epoch": 0.07197035999766614,
      "grad_norm": 1.004301277208077,
      "learning_rate": 9.953917915740944e-06,
      "loss": 0.2061,
      "step": 2467
    },
    {
      "epoch": 0.07199953322830971,
      "grad_norm": 0.9226092100920326,
      "learning_rate": 9.953853900347572e-06,
      "loss": 0.2316,
      "step": 2468
    },
    {
      "epoch": 0.07202870645895326,
      "grad_norm": 0.8940228359318462,
      "learning_rate": 9.953789840727374e-06,
      "loss": 0.2167,
      "step": 2469
    },
    {
      "epoch": 0.07205787968959683,
      "grad_norm": 0.7997127468207761,
      "learning_rate": 9.953725736880925e-06,
      "loss": 0.2064,
      "step": 2470
    },
    {
      "epoch": 0.07208705292024038,
      "grad_norm": 0.9171188662970678,
      "learning_rate": 9.953661588808795e-06,
      "loss": 0.1954,
      "step": 2471
    },
    {
      "epoch": 0.07211622615088395,
      "grad_norm": 0.8660430614460405,
      "learning_rate": 9.953597396511555e-06,
      "loss": 0.2022,
      "step": 2472
    },
    {
      "epoch": 0.07214539938152752,
      "grad_norm": 0.9180021940704193,
      "learning_rate": 9.95353315998978e-06,
      "loss": 0.1714,
      "step": 2473
    },
    {
      "epoch": 0.07217457261217107,
      "grad_norm": 0.8869049918164137,
      "learning_rate": 9.953468879244045e-06,
      "loss": 0.1886,
      "step": 2474
    },
    {
      "epoch": 0.07220374584281464,
      "grad_norm": 0.8894304734100424,
      "learning_rate": 9.95340455427492e-06,
      "loss": 0.1792,
      "step": 2475
    },
    {
      "epoch": 0.07223291907345819,
      "grad_norm": 0.9035187211235377,
      "learning_rate": 9.953340185082982e-06,
      "loss": 0.2004,
      "step": 2476
    },
    {
      "epoch": 0.07226209230410176,
      "grad_norm": 0.8558932996514017,
      "learning_rate": 9.953275771668807e-06,
      "loss": 0.1821,
      "step": 2477
    },
    {
      "epoch": 0.07229126553474531,
      "grad_norm": 1.0864380103500642,
      "learning_rate": 9.953211314032967e-06,
      "loss": 0.1976,
      "step": 2478
    },
    {
      "epoch": 0.07232043876538888,
      "grad_norm": 0.7988910475774128,
      "learning_rate": 9.95314681217604e-06,
      "loss": 0.1813,
      "step": 2479
    },
    {
      "epoch": 0.07234961199603245,
      "grad_norm": 0.9068000257420371,
      "learning_rate": 9.9530822660986e-06,
      "loss": 0.1949,
      "step": 2480
    },
    {
      "epoch": 0.072378785226676,
      "grad_norm": 0.9470315605128883,
      "learning_rate": 9.953017675801225e-06,
      "loss": 0.1993,
      "step": 2481
    },
    {
      "epoch": 0.07240795845731957,
      "grad_norm": 0.9435008351969455,
      "learning_rate": 9.952953041284488e-06,
      "loss": 0.1859,
      "step": 2482
    },
    {
      "epoch": 0.07243713168796312,
      "grad_norm": 0.9990786184141739,
      "learning_rate": 9.952888362548971e-06,
      "loss": 0.2186,
      "step": 2483
    },
    {
      "epoch": 0.07246630491860669,
      "grad_norm": 0.9145289797257048,
      "learning_rate": 9.952823639595248e-06,
      "loss": 0.208,
      "step": 2484
    },
    {
      "epoch": 0.07249547814925025,
      "grad_norm": 1.0575528341843174,
      "learning_rate": 9.952758872423897e-06,
      "loss": 0.2204,
      "step": 2485
    },
    {
      "epoch": 0.0725246513798938,
      "grad_norm": 0.8410179058919071,
      "learning_rate": 9.952694061035499e-06,
      "loss": 0.1962,
      "step": 2486
    },
    {
      "epoch": 0.07255382461053737,
      "grad_norm": 0.9140375553260064,
      "learning_rate": 9.952629205430631e-06,
      "loss": 0.189,
      "step": 2487
    },
    {
      "epoch": 0.07258299784118093,
      "grad_norm": 1.2227293723778352,
      "learning_rate": 9.95256430560987e-06,
      "loss": 0.2248,
      "step": 2488
    },
    {
      "epoch": 0.0726121710718245,
      "grad_norm": 0.997622801955556,
      "learning_rate": 9.952499361573797e-06,
      "loss": 0.1888,
      "step": 2489
    },
    {
      "epoch": 0.07264134430246806,
      "grad_norm": 0.8271726846289789,
      "learning_rate": 9.952434373322993e-06,
      "loss": 0.1957,
      "step": 2490
    },
    {
      "epoch": 0.07267051753311161,
      "grad_norm": 0.8662622401647231,
      "learning_rate": 9.952369340858037e-06,
      "loss": 0.1893,
      "step": 2491
    },
    {
      "epoch": 0.07269969076375518,
      "grad_norm": 1.0754646388722005,
      "learning_rate": 9.95230426417951e-06,
      "loss": 0.2122,
      "step": 2492
    },
    {
      "epoch": 0.07272886399439873,
      "grad_norm": 1.0034699914219778,
      "learning_rate": 9.952239143287992e-06,
      "loss": 0.2295,
      "step": 2493
    },
    {
      "epoch": 0.0727580372250423,
      "grad_norm": 1.037099578814681,
      "learning_rate": 9.952173978184065e-06,
      "loss": 0.2452,
      "step": 2494
    },
    {
      "epoch": 0.07278721045568587,
      "grad_norm": 0.882954788289804,
      "learning_rate": 9.952108768868311e-06,
      "loss": 0.1975,
      "step": 2495
    },
    {
      "epoch": 0.07281638368632942,
      "grad_norm": 0.8946645911590452,
      "learning_rate": 9.952043515341315e-06,
      "loss": 0.2023,
      "step": 2496
    },
    {
      "epoch": 0.07284555691697299,
      "grad_norm": 0.8918220605361443,
      "learning_rate": 9.951978217603652e-06,
      "loss": 0.1913,
      "step": 2497
    },
    {
      "epoch": 0.07287473014761654,
      "grad_norm": 0.8738879545689714,
      "learning_rate": 9.951912875655913e-06,
      "loss": 0.175,
      "step": 2498
    },
    {
      "epoch": 0.07290390337826011,
      "grad_norm": 0.9694918935764463,
      "learning_rate": 9.951847489498675e-06,
      "loss": 0.2217,
      "step": 2499
    },
    {
      "epoch": 0.07293307660890366,
      "grad_norm": 0.9743991490788483,
      "learning_rate": 9.951782059132528e-06,
      "loss": 0.1693,
      "step": 2500
    },
    {
      "epoch": 0.07296224983954723,
      "grad_norm": 0.9085291604921203,
      "learning_rate": 9.95171658455805e-06,
      "loss": 0.1954,
      "step": 2501
    },
    {
      "epoch": 0.0729914230701908,
      "grad_norm": 0.9292300982103625,
      "learning_rate": 9.951651065775831e-06,
      "loss": 0.2006,
      "step": 2502
    },
    {
      "epoch": 0.07302059630083435,
      "grad_norm": 0.9665078351867818,
      "learning_rate": 9.951585502786452e-06,
      "loss": 0.195,
      "step": 2503
    },
    {
      "epoch": 0.07304976953147792,
      "grad_norm": 0.8681058912661461,
      "learning_rate": 9.9515198955905e-06,
      "loss": 0.1977,
      "step": 2504
    },
    {
      "epoch": 0.07307894276212147,
      "grad_norm": 0.8466550917204185,
      "learning_rate": 9.95145424418856e-06,
      "loss": 0.1948,
      "step": 2505
    },
    {
      "epoch": 0.07310811599276504,
      "grad_norm": 1.2150285220508747,
      "learning_rate": 9.951388548581218e-06,
      "loss": 0.1869,
      "step": 2506
    },
    {
      "epoch": 0.0731372892234086,
      "grad_norm": 0.9931115293779862,
      "learning_rate": 9.951322808769062e-06,
      "loss": 0.189,
      "step": 2507
    },
    {
      "epoch": 0.07316646245405216,
      "grad_norm": 0.8626432422597347,
      "learning_rate": 9.951257024752678e-06,
      "loss": 0.2124,
      "step": 2508
    },
    {
      "epoch": 0.07319563568469573,
      "grad_norm": 0.9069895047602374,
      "learning_rate": 9.951191196532653e-06,
      "loss": 0.2139,
      "step": 2509
    },
    {
      "epoch": 0.07322480891533928,
      "grad_norm": 0.991581796264132,
      "learning_rate": 9.951125324109573e-06,
      "loss": 0.2091,
      "step": 2510
    },
    {
      "epoch": 0.07325398214598285,
      "grad_norm": 0.9564016164853112,
      "learning_rate": 9.951059407484032e-06,
      "loss": 0.2086,
      "step": 2511
    },
    {
      "epoch": 0.07328315537662641,
      "grad_norm": 0.824312750052677,
      "learning_rate": 9.950993446656612e-06,
      "loss": 0.2035,
      "step": 2512
    },
    {
      "epoch": 0.07331232860726997,
      "grad_norm": 1.0688203964707534,
      "learning_rate": 9.950927441627905e-06,
      "loss": 0.1888,
      "step": 2513
    },
    {
      "epoch": 0.07334150183791353,
      "grad_norm": 0.9778269925076594,
      "learning_rate": 9.950861392398499e-06,
      "loss": 0.2116,
      "step": 2514
    },
    {
      "epoch": 0.07337067506855709,
      "grad_norm": 1.1299564714170196,
      "learning_rate": 9.950795298968986e-06,
      "loss": 0.2042,
      "step": 2515
    },
    {
      "epoch": 0.07339984829920065,
      "grad_norm": 0.8718630474472272,
      "learning_rate": 9.950729161339951e-06,
      "loss": 0.196,
      "step": 2516
    },
    {
      "epoch": 0.07342902152984422,
      "grad_norm": 0.8128594163537469,
      "learning_rate": 9.95066297951199e-06,
      "loss": 0.185,
      "step": 2517
    },
    {
      "epoch": 0.07345819476048777,
      "grad_norm": 0.7886401178405563,
      "learning_rate": 9.950596753485693e-06,
      "loss": 0.1924,
      "step": 2518
    },
    {
      "epoch": 0.07348736799113134,
      "grad_norm": 0.7225279092141481,
      "learning_rate": 9.950530483261649e-06,
      "loss": 0.1983,
      "step": 2519
    },
    {
      "epoch": 0.0735165412217749,
      "grad_norm": 0.8504606643688458,
      "learning_rate": 9.95046416884045e-06,
      "loss": 0.1919,
      "step": 2520
    },
    {
      "epoch": 0.07354571445241846,
      "grad_norm": 1.0296881981133896,
      "learning_rate": 9.95039781022269e-06,
      "loss": 0.2238,
      "step": 2521
    },
    {
      "epoch": 0.07357488768306203,
      "grad_norm": 0.8873545789881933,
      "learning_rate": 9.950331407408958e-06,
      "loss": 0.1834,
      "step": 2522
    },
    {
      "epoch": 0.07360406091370558,
      "grad_norm": 0.8140051656943608,
      "learning_rate": 9.95026496039985e-06,
      "loss": 0.1756,
      "step": 2523
    },
    {
      "epoch": 0.07363323414434915,
      "grad_norm": 0.9609860973193264,
      "learning_rate": 9.950198469195959e-06,
      "loss": 0.2111,
      "step": 2524
    },
    {
      "epoch": 0.0736624073749927,
      "grad_norm": 0.9451889863966915,
      "learning_rate": 9.950131933797876e-06,
      "loss": 0.198,
      "step": 2525
    },
    {
      "epoch": 0.07369158060563627,
      "grad_norm": 0.9025713220549293,
      "learning_rate": 9.950065354206198e-06,
      "loss": 0.1906,
      "step": 2526
    },
    {
      "epoch": 0.07372075383627982,
      "grad_norm": 1.1171116402543344,
      "learning_rate": 9.949998730421519e-06,
      "loss": 0.1878,
      "step": 2527
    },
    {
      "epoch": 0.07374992706692339,
      "grad_norm": 0.8216241005939505,
      "learning_rate": 9.949932062444431e-06,
      "loss": 0.2053,
      "step": 2528
    },
    {
      "epoch": 0.07377910029756696,
      "grad_norm": 1.2913869392142565,
      "learning_rate": 9.949865350275532e-06,
      "loss": 0.1963,
      "step": 2529
    },
    {
      "epoch": 0.07380827352821051,
      "grad_norm": 0.8574517560317876,
      "learning_rate": 9.949798593915418e-06,
      "loss": 0.1922,
      "step": 2530
    },
    {
      "epoch": 0.07383744675885408,
      "grad_norm": 1.1689085454686512,
      "learning_rate": 9.949731793364683e-06,
      "loss": 0.1907,
      "step": 2531
    },
    {
      "epoch": 0.07386661998949763,
      "grad_norm": 1.2915789591712317,
      "learning_rate": 9.949664948623923e-06,
      "loss": 0.1945,
      "step": 2532
    },
    {
      "epoch": 0.0738957932201412,
      "grad_norm": 0.7767667508933432,
      "learning_rate": 9.949598059693737e-06,
      "loss": 0.1708,
      "step": 2533
    },
    {
      "epoch": 0.07392496645078477,
      "grad_norm": 1.2243554126197655,
      "learning_rate": 9.94953112657472e-06,
      "loss": 0.2032,
      "step": 2534
    },
    {
      "epoch": 0.07395413968142832,
      "grad_norm": 1.1787710785802916,
      "learning_rate": 9.949464149267473e-06,
      "loss": 0.19,
      "step": 2535
    },
    {
      "epoch": 0.07398331291207189,
      "grad_norm": 0.906760896516415,
      "learning_rate": 9.94939712777259e-06,
      "loss": 0.1989,
      "step": 2536
    },
    {
      "epoch": 0.07401248614271544,
      "grad_norm": 1.031484955743404,
      "learning_rate": 9.949330062090671e-06,
      "loss": 0.1758,
      "step": 2537
    },
    {
      "epoch": 0.074041659373359,
      "grad_norm": 0.9411214101633906,
      "learning_rate": 9.949262952222316e-06,
      "loss": 0.1878,
      "step": 2538
    },
    {
      "epoch": 0.07407083260400257,
      "grad_norm": 0.9884594827492988,
      "learning_rate": 9.94919579816812e-06,
      "loss": 0.1961,
      "step": 2539
    },
    {
      "epoch": 0.07410000583464613,
      "grad_norm": 1.0737036981799195,
      "learning_rate": 9.949128599928687e-06,
      "loss": 0.199,
      "step": 2540
    },
    {
      "epoch": 0.0741291790652897,
      "grad_norm": 0.9696232110495232,
      "learning_rate": 9.949061357504617e-06,
      "loss": 0.1732,
      "step": 2541
    },
    {
      "epoch": 0.07415835229593325,
      "grad_norm": 1.1846628887886361,
      "learning_rate": 9.948994070896508e-06,
      "loss": 0.1945,
      "step": 2542
    },
    {
      "epoch": 0.07418752552657681,
      "grad_norm": 1.4202641562292888,
      "learning_rate": 9.948926740104958e-06,
      "loss": 0.204,
      "step": 2543
    },
    {
      "epoch": 0.07421669875722038,
      "grad_norm": 0.933920277116389,
      "learning_rate": 9.948859365130574e-06,
      "loss": 0.1673,
      "step": 2544
    },
    {
      "epoch": 0.07424587198786393,
      "grad_norm": 0.99870713723646,
      "learning_rate": 9.948791945973955e-06,
      "loss": 0.2584,
      "step": 2545
    },
    {
      "epoch": 0.0742750452185075,
      "grad_norm": 0.944141519756461,
      "learning_rate": 9.948724482635703e-06,
      "loss": 0.1935,
      "step": 2546
    },
    {
      "epoch": 0.07430421844915105,
      "grad_norm": 0.9033999505149384,
      "learning_rate": 9.94865697511642e-06,
      "loss": 0.2236,
      "step": 2547
    },
    {
      "epoch": 0.07433339167979462,
      "grad_norm": 0.8211666350624257,
      "learning_rate": 9.94858942341671e-06,
      "loss": 0.1818,
      "step": 2548
    },
    {
      "epoch": 0.07436256491043818,
      "grad_norm": 0.9274903197590763,
      "learning_rate": 9.948521827537172e-06,
      "loss": 0.1812,
      "step": 2549
    },
    {
      "epoch": 0.07439173814108174,
      "grad_norm": 0.9605098584721782,
      "learning_rate": 9.948454187478414e-06,
      "loss": 0.1965,
      "step": 2550
    },
    {
      "epoch": 0.07442091137172531,
      "grad_norm": 0.9480236670737756,
      "learning_rate": 9.948386503241039e-06,
      "loss": 0.1996,
      "step": 2551
    },
    {
      "epoch": 0.07445008460236886,
      "grad_norm": 0.8485166444346322,
      "learning_rate": 9.94831877482565e-06,
      "loss": 0.1988,
      "step": 2552
    },
    {
      "epoch": 0.07447925783301243,
      "grad_norm": 0.825194544691037,
      "learning_rate": 9.948251002232852e-06,
      "loss": 0.1853,
      "step": 2553
    },
    {
      "epoch": 0.07450843106365598,
      "grad_norm": 0.9736642606664729,
      "learning_rate": 9.948183185463252e-06,
      "loss": 0.213,
      "step": 2554
    },
    {
      "epoch": 0.07453760429429955,
      "grad_norm": 0.8261115363986782,
      "learning_rate": 9.948115324517451e-06,
      "loss": 0.1845,
      "step": 2555
    },
    {
      "epoch": 0.07456677752494312,
      "grad_norm": 1.0089780084904925,
      "learning_rate": 9.948047419396059e-06,
      "loss": 0.1782,
      "step": 2556
    },
    {
      "epoch": 0.07459595075558667,
      "grad_norm": 0.7853899303405794,
      "learning_rate": 9.947979470099682e-06,
      "loss": 0.1911,
      "step": 2557
    },
    {
      "epoch": 0.07462512398623024,
      "grad_norm": 1.0173764561506085,
      "learning_rate": 9.947911476628923e-06,
      "loss": 0.2073,
      "step": 2558
    },
    {
      "epoch": 0.07465429721687379,
      "grad_norm": 0.8505969321849713,
      "learning_rate": 9.947843438984392e-06,
      "loss": 0.2028,
      "step": 2559
    },
    {
      "epoch": 0.07468347044751736,
      "grad_norm": 0.794195364019822,
      "learning_rate": 9.947775357166699e-06,
      "loss": 0.1719,
      "step": 2560
    },
    {
      "epoch": 0.07471264367816093,
      "grad_norm": 0.835272658713182,
      "learning_rate": 9.947707231176444e-06,
      "loss": 0.1784,
      "step": 2561
    },
    {
      "epoch": 0.07474181690880448,
      "grad_norm": 0.9894806609791134,
      "learning_rate": 9.947639061014242e-06,
      "loss": 0.1715,
      "step": 2562
    },
    {
      "epoch": 0.07477099013944805,
      "grad_norm": 0.9869328334441443,
      "learning_rate": 9.9475708466807e-06,
      "loss": 0.1863,
      "step": 2563
    },
    {
      "epoch": 0.0748001633700916,
      "grad_norm": 1.159662679891997,
      "learning_rate": 9.947502588176427e-06,
      "loss": 0.2023,
      "step": 2564
    },
    {
      "epoch": 0.07482933660073517,
      "grad_norm": 0.9242264876067878,
      "learning_rate": 9.947434285502032e-06,
      "loss": 0.1819,
      "step": 2565
    },
    {
      "epoch": 0.07485850983137873,
      "grad_norm": 1.026543496068096,
      "learning_rate": 9.947365938658124e-06,
      "loss": 0.1965,
      "step": 2566
    },
    {
      "epoch": 0.07488768306202229,
      "grad_norm": 0.9841963172592002,
      "learning_rate": 9.947297547645314e-06,
      "loss": 0.1957,
      "step": 2567
    },
    {
      "epoch": 0.07491685629266585,
      "grad_norm": 1.2404952203932327,
      "learning_rate": 9.947229112464213e-06,
      "loss": 0.1963,
      "step": 2568
    },
    {
      "epoch": 0.07494602952330941,
      "grad_norm": 0.9740693352878483,
      "learning_rate": 9.947160633115431e-06,
      "loss": 0.1969,
      "step": 2569
    },
    {
      "epoch": 0.07497520275395297,
      "grad_norm": 0.870458868803175,
      "learning_rate": 9.94709210959958e-06,
      "loss": 0.1857,
      "step": 2570
    },
    {
      "epoch": 0.07500437598459653,
      "grad_norm": 0.9470708798435789,
      "learning_rate": 9.947023541917271e-06,
      "loss": 0.1886,
      "step": 2571
    },
    {
      "epoch": 0.0750335492152401,
      "grad_norm": 1.1836894201983172,
      "learning_rate": 9.946954930069117e-06,
      "loss": 0.2086,
      "step": 2572
    },
    {
      "epoch": 0.07506272244588366,
      "grad_norm": 0.9832880021505518,
      "learning_rate": 9.946886274055731e-06,
      "loss": 0.202,
      "step": 2573
    },
    {
      "epoch": 0.07509189567652721,
      "grad_norm": 0.9515418412970081,
      "learning_rate": 9.946817573877725e-06,
      "loss": 0.208,
      "step": 2574
    },
    {
      "epoch": 0.07512106890717078,
      "grad_norm": 0.9548031685703873,
      "learning_rate": 9.946748829535714e-06,
      "loss": 0.1928,
      "step": 2575
    },
    {
      "epoch": 0.07515024213781434,
      "grad_norm": 0.8728612685822629,
      "learning_rate": 9.946680041030308e-06,
      "loss": 0.2178,
      "step": 2576
    },
    {
      "epoch": 0.0751794153684579,
      "grad_norm": 0.9042425278014952,
      "learning_rate": 9.946611208362123e-06,
      "loss": 0.2207,
      "step": 2577
    },
    {
      "epoch": 0.07520858859910147,
      "grad_norm": 0.8710367260702849,
      "learning_rate": 9.946542331531777e-06,
      "loss": 0.1995,
      "step": 2578
    },
    {
      "epoch": 0.07523776182974502,
      "grad_norm": 0.8886974016626152,
      "learning_rate": 9.946473410539878e-06,
      "loss": 0.2174,
      "step": 2579
    },
    {
      "epoch": 0.07526693506038859,
      "grad_norm": 0.8335754120537804,
      "learning_rate": 9.946404445387048e-06,
      "loss": 0.2076,
      "step": 2580
    },
    {
      "epoch": 0.07529610829103214,
      "grad_norm": 0.8780117152861452,
      "learning_rate": 9.946335436073899e-06,
      "loss": 0.1905,
      "step": 2581
    },
    {
      "epoch": 0.07532528152167571,
      "grad_norm": 0.9089534703220984,
      "learning_rate": 9.946266382601049e-06,
      "loss": 0.1763,
      "step": 2582
    },
    {
      "epoch": 0.07535445475231928,
      "grad_norm": 0.7917110926617236,
      "learning_rate": 9.946197284969112e-06,
      "loss": 0.2208,
      "step": 2583
    },
    {
      "epoch": 0.07538362798296283,
      "grad_norm": 1.1303322563806806,
      "learning_rate": 9.946128143178708e-06,
      "loss": 0.2117,
      "step": 2584
    },
    {
      "epoch": 0.0754128012136064,
      "grad_norm": 1.0074994278245815,
      "learning_rate": 9.946058957230451e-06,
      "loss": 0.1866,
      "step": 2585
    },
    {
      "epoch": 0.07544197444424995,
      "grad_norm": 0.918129918274096,
      "learning_rate": 9.945989727124963e-06,
      "loss": 0.204,
      "step": 2586
    },
    {
      "epoch": 0.07547114767489352,
      "grad_norm": 0.7543863200792966,
      "learning_rate": 9.945920452862856e-06,
      "loss": 0.1771,
      "step": 2587
    },
    {
      "epoch": 0.07550032090553709,
      "grad_norm": 1.3205281719710635,
      "learning_rate": 9.945851134444754e-06,
      "loss": 0.2061,
      "step": 2588
    },
    {
      "epoch": 0.07552949413618064,
      "grad_norm": 1.0235724508465391,
      "learning_rate": 9.945781771871274e-06,
      "loss": 0.1994,
      "step": 2589
    },
    {
      "epoch": 0.0755586673668242,
      "grad_norm": 0.920875422648793,
      "learning_rate": 9.945712365143034e-06,
      "loss": 0.1996,
      "step": 2590
    },
    {
      "epoch": 0.07558784059746776,
      "grad_norm": 0.9255287465514315,
      "learning_rate": 9.945642914260655e-06,
      "loss": 0.2,
      "step": 2591
    },
    {
      "epoch": 0.07561701382811133,
      "grad_norm": 1.0882154267587745,
      "learning_rate": 9.945573419224757e-06,
      "loss": 0.2171,
      "step": 2592
    },
    {
      "epoch": 0.07564618705875488,
      "grad_norm": 1.111913818943444,
      "learning_rate": 9.945503880035958e-06,
      "loss": 0.1969,
      "step": 2593
    },
    {
      "epoch": 0.07567536028939845,
      "grad_norm": 0.8757571257745991,
      "learning_rate": 9.945434296694883e-06,
      "loss": 0.2094,
      "step": 2594
    },
    {
      "epoch": 0.07570453352004201,
      "grad_norm": 0.8715141905293745,
      "learning_rate": 9.94536466920215e-06,
      "loss": 0.1984,
      "step": 2595
    },
    {
      "epoch": 0.07573370675068557,
      "grad_norm": 1.2895697791871283,
      "learning_rate": 9.945294997558384e-06,
      "loss": 0.2181,
      "step": 2596
    },
    {
      "epoch": 0.07576287998132913,
      "grad_norm": 1.0281992159004434,
      "learning_rate": 9.945225281764203e-06,
      "loss": 0.199,
      "step": 2597
    },
    {
      "epoch": 0.07579205321197269,
      "grad_norm": 1.0161962229050718,
      "learning_rate": 9.945155521820232e-06,
      "loss": 0.1731,
      "step": 2598
    },
    {
      "epoch": 0.07582122644261625,
      "grad_norm": 0.9895792606137934,
      "learning_rate": 9.945085717727093e-06,
      "loss": 0.1997,
      "step": 2599
    },
    {
      "epoch": 0.07585039967325982,
      "grad_norm": 0.8103461045109309,
      "learning_rate": 9.945015869485409e-06,
      "loss": 0.1735,
      "step": 2600
    },
    {
      "epoch": 0.07587957290390338,
      "grad_norm": 0.7954580081561768,
      "learning_rate": 9.944945977095803e-06,
      "loss": 0.1902,
      "step": 2601
    },
    {
      "epoch": 0.07590874613454694,
      "grad_norm": 0.8832405590401877,
      "learning_rate": 9.9448760405589e-06,
      "loss": 0.1729,
      "step": 2602
    },
    {
      "epoch": 0.0759379193651905,
      "grad_norm": 0.9456933396832424,
      "learning_rate": 9.944806059875326e-06,
      "loss": 0.2013,
      "step": 2603
    },
    {
      "epoch": 0.07596709259583406,
      "grad_norm": 1.0812812205345452,
      "learning_rate": 9.944736035045702e-06,
      "loss": 0.2016,
      "step": 2604
    },
    {
      "epoch": 0.07599626582647763,
      "grad_norm": 0.8352054121513081,
      "learning_rate": 9.944665966070654e-06,
      "loss": 0.168,
      "step": 2605
    },
    {
      "epoch": 0.07602543905712118,
      "grad_norm": 0.9275361524424524,
      "learning_rate": 9.944595852950812e-06,
      "loss": 0.1932,
      "step": 2606
    },
    {
      "epoch": 0.07605461228776475,
      "grad_norm": 0.9762002102562343,
      "learning_rate": 9.944525695686795e-06,
      "loss": 0.1877,
      "step": 2607
    },
    {
      "epoch": 0.0760837855184083,
      "grad_norm": 0.8302742683084885,
      "learning_rate": 9.944455494279235e-06,
      "loss": 0.1964,
      "step": 2608
    },
    {
      "epoch": 0.07611295874905187,
      "grad_norm": 0.9651412018240907,
      "learning_rate": 9.944385248728757e-06,
      "loss": 0.1935,
      "step": 2609
    },
    {
      "epoch": 0.07614213197969544,
      "grad_norm": 1.0580469462137965,
      "learning_rate": 9.944314959035987e-06,
      "loss": 0.1954,
      "step": 2610
    },
    {
      "epoch": 0.07617130521033899,
      "grad_norm": 0.8436060684882135,
      "learning_rate": 9.944244625201553e-06,
      "loss": 0.1786,
      "step": 2611
    },
    {
      "epoch": 0.07620047844098256,
      "grad_norm": 1.1668726972890329,
      "learning_rate": 9.944174247226084e-06,
      "loss": 0.201,
      "step": 2612
    },
    {
      "epoch": 0.07622965167162611,
      "grad_norm": 0.9050530434600664,
      "learning_rate": 9.944103825110207e-06,
      "loss": 0.2082,
      "step": 2613
    },
    {
      "epoch": 0.07625882490226968,
      "grad_norm": 0.9657438371151073,
      "learning_rate": 9.944033358854553e-06,
      "loss": 0.1855,
      "step": 2614
    },
    {
      "epoch": 0.07628799813291325,
      "grad_norm": 0.8982666967367076,
      "learning_rate": 9.943962848459747e-06,
      "loss": 0.1957,
      "step": 2615
    },
    {
      "epoch": 0.0763171713635568,
      "grad_norm": 0.8917214421128401,
      "learning_rate": 9.943892293926422e-06,
      "loss": 0.2097,
      "step": 2616
    },
    {
      "epoch": 0.07634634459420037,
      "grad_norm": 1.0212730113490176,
      "learning_rate": 9.943821695255208e-06,
      "loss": 0.1911,
      "step": 2617
    },
    {
      "epoch": 0.07637551782484392,
      "grad_norm": 1.2312845391322114,
      "learning_rate": 9.943751052446732e-06,
      "loss": 0.1713,
      "step": 2618
    },
    {
      "epoch": 0.07640469105548749,
      "grad_norm": 0.7339266003982186,
      "learning_rate": 9.943680365501628e-06,
      "loss": 0.1962,
      "step": 2619
    },
    {
      "epoch": 0.07643386428613104,
      "grad_norm": 0.769726512109223,
      "learning_rate": 9.943609634420526e-06,
      "loss": 0.1659,
      "step": 2620
    },
    {
      "epoch": 0.0764630375167746,
      "grad_norm": 1.1079342427199865,
      "learning_rate": 9.943538859204056e-06,
      "loss": 0.2021,
      "step": 2621
    },
    {
      "epoch": 0.07649221074741817,
      "grad_norm": 0.9860008360785368,
      "learning_rate": 9.943468039852852e-06,
      "loss": 0.1891,
      "step": 2622
    },
    {
      "epoch": 0.07652138397806173,
      "grad_norm": 0.7521624716160008,
      "learning_rate": 9.943397176367546e-06,
      "loss": 0.1832,
      "step": 2623
    },
    {
      "epoch": 0.0765505572087053,
      "grad_norm": 1.0201516196014724,
      "learning_rate": 9.94332626874877e-06,
      "loss": 0.1984,
      "step": 2624
    },
    {
      "epoch": 0.07657973043934885,
      "grad_norm": 1.221830997682814,
      "learning_rate": 9.943255316997156e-06,
      "loss": 0.21,
      "step": 2625
    },
    {
      "epoch": 0.07660890366999241,
      "grad_norm": 0.8614480246302127,
      "learning_rate": 9.943184321113339e-06,
      "loss": 0.2104,
      "step": 2626
    },
    {
      "epoch": 0.07663807690063598,
      "grad_norm": 0.8262000373867923,
      "learning_rate": 9.943113281097953e-06,
      "loss": 0.1722,
      "step": 2627
    },
    {
      "epoch": 0.07666725013127954,
      "grad_norm": 0.9084643356559261,
      "learning_rate": 9.943042196951631e-06,
      "loss": 0.1972,
      "step": 2628
    },
    {
      "epoch": 0.0766964233619231,
      "grad_norm": 0.8752944963258539,
      "learning_rate": 9.942971068675009e-06,
      "loss": 0.1875,
      "step": 2629
    },
    {
      "epoch": 0.07672559659256666,
      "grad_norm": 0.795338642633206,
      "learning_rate": 9.942899896268721e-06,
      "loss": 0.1535,
      "step": 2630
    },
    {
      "epoch": 0.07675476982321022,
      "grad_norm": 0.9344221503966518,
      "learning_rate": 9.942828679733402e-06,
      "loss": 0.2219,
      "step": 2631
    },
    {
      "epoch": 0.07678394305385379,
      "grad_norm": 1.4776393321268981,
      "learning_rate": 9.942757419069688e-06,
      "loss": 0.1929,
      "step": 2632
    },
    {
      "epoch": 0.07681311628449734,
      "grad_norm": 1.0484382039943305,
      "learning_rate": 9.942686114278218e-06,
      "loss": 0.1843,
      "step": 2633
    },
    {
      "epoch": 0.07684228951514091,
      "grad_norm": 0.7543481335208506,
      "learning_rate": 9.942614765359625e-06,
      "loss": 0.1758,
      "step": 2634
    },
    {
      "epoch": 0.07687146274578446,
      "grad_norm": 1.1031882754003925,
      "learning_rate": 9.942543372314548e-06,
      "loss": 0.1963,
      "step": 2635
    },
    {
      "epoch": 0.07690063597642803,
      "grad_norm": 1.0430234254755133,
      "learning_rate": 9.942471935143623e-06,
      "loss": 0.2032,
      "step": 2636
    },
    {
      "epoch": 0.0769298092070716,
      "grad_norm": 1.0339827850226333,
      "learning_rate": 9.942400453847487e-06,
      "loss": 0.1958,
      "step": 2637
    },
    {
      "epoch": 0.07695898243771515,
      "grad_norm": 1.1071622241519605,
      "learning_rate": 9.94232892842678e-06,
      "loss": 0.2067,
      "step": 2638
    },
    {
      "epoch": 0.07698815566835872,
      "grad_norm": 1.0958838219887979,
      "learning_rate": 9.942257358882144e-06,
      "loss": 0.1747,
      "step": 2639
    },
    {
      "epoch": 0.07701732889900227,
      "grad_norm": 1.0717453136637745,
      "learning_rate": 9.94218574521421e-06,
      "loss": 0.227,
      "step": 2640
    },
    {
      "epoch": 0.07704650212964584,
      "grad_norm": 0.9185332528220834,
      "learning_rate": 9.942114087423622e-06,
      "loss": 0.2231,
      "step": 2641
    },
    {
      "epoch": 0.07707567536028939,
      "grad_norm": 0.9051003261109557,
      "learning_rate": 9.942042385511022e-06,
      "loss": 0.1943,
      "step": 2642
    },
    {
      "epoch": 0.07710484859093296,
      "grad_norm": 1.3066658504609052,
      "learning_rate": 9.941970639477044e-06,
      "loss": 0.1824,
      "step": 2643
    },
    {
      "epoch": 0.07713402182157653,
      "grad_norm": 0.9224265104180501,
      "learning_rate": 9.941898849322333e-06,
      "loss": 0.2068,
      "step": 2644
    },
    {
      "epoch": 0.07716319505222008,
      "grad_norm": 0.8542627557072558,
      "learning_rate": 9.94182701504753e-06,
      "loss": 0.1943,
      "step": 2645
    },
    {
      "epoch": 0.07719236828286365,
      "grad_norm": 1.1004733462542646,
      "learning_rate": 9.941755136653273e-06,
      "loss": 0.2051,
      "step": 2646
    },
    {
      "epoch": 0.0772215415135072,
      "grad_norm": 0.964762488155317,
      "learning_rate": 9.941683214140207e-06,
      "loss": 0.2048,
      "step": 2647
    },
    {
      "epoch": 0.07725071474415077,
      "grad_norm": 0.8247333942088099,
      "learning_rate": 9.941611247508973e-06,
      "loss": 0.1727,
      "step": 2648
    },
    {
      "epoch": 0.07727988797479433,
      "grad_norm": 1.0219784976162776,
      "learning_rate": 9.941539236760213e-06,
      "loss": 0.2032,
      "step": 2649
    },
    {
      "epoch": 0.07730906120543789,
      "grad_norm": 1.1353265879464371,
      "learning_rate": 9.94146718189457e-06,
      "loss": 0.1955,
      "step": 2650
    },
    {
      "epoch": 0.07733823443608145,
      "grad_norm": 0.7962805265253471,
      "learning_rate": 9.94139508291269e-06,
      "loss": 0.1886,
      "step": 2651
    },
    {
      "epoch": 0.07736740766672501,
      "grad_norm": 0.7955698233236265,
      "learning_rate": 9.941322939815213e-06,
      "loss": 0.1976,
      "step": 2652
    },
    {
      "epoch": 0.07739658089736857,
      "grad_norm": 1.0542124619816051,
      "learning_rate": 9.941250752602783e-06,
      "loss": 0.1962,
      "step": 2653
    },
    {
      "epoch": 0.07742575412801214,
      "grad_norm": 0.8402126269339003,
      "learning_rate": 9.941178521276046e-06,
      "loss": 0.1893,
      "step": 2654
    },
    {
      "epoch": 0.0774549273586557,
      "grad_norm": 0.8447199467017881,
      "learning_rate": 9.941106245835648e-06,
      "loss": 0.1937,
      "step": 2655
    },
    {
      "epoch": 0.07748410058929926,
      "grad_norm": 0.912593285122189,
      "learning_rate": 9.941033926282233e-06,
      "loss": 0.1825,
      "step": 2656
    },
    {
      "epoch": 0.07751327381994282,
      "grad_norm": 0.9718797109379124,
      "learning_rate": 9.940961562616446e-06,
      "loss": 0.1984,
      "step": 2657
    },
    {
      "epoch": 0.07754244705058638,
      "grad_norm": 0.9868174289875705,
      "learning_rate": 9.940889154838934e-06,
      "loss": 0.1878,
      "step": 2658
    },
    {
      "epoch": 0.07757162028122995,
      "grad_norm": 1.0507159006122682,
      "learning_rate": 9.940816702950343e-06,
      "loss": 0.1867,
      "step": 2659
    },
    {
      "epoch": 0.0776007935118735,
      "grad_norm": 1.0525048615759034,
      "learning_rate": 9.940744206951318e-06,
      "loss": 0.1996,
      "step": 2660
    },
    {
      "epoch": 0.07762996674251707,
      "grad_norm": 1.0223398335816087,
      "learning_rate": 9.940671666842512e-06,
      "loss": 0.1994,
      "step": 2661
    },
    {
      "epoch": 0.07765913997316062,
      "grad_norm": 0.9068473842074293,
      "learning_rate": 9.940599082624566e-06,
      "loss": 0.173,
      "step": 2662
    },
    {
      "epoch": 0.07768831320380419,
      "grad_norm": 0.899211220038558,
      "learning_rate": 9.940526454298132e-06,
      "loss": 0.1894,
      "step": 2663
    },
    {
      "epoch": 0.07771748643444774,
      "grad_norm": 0.9777365591898555,
      "learning_rate": 9.940453781863857e-06,
      "loss": 0.1943,
      "step": 2664
    },
    {
      "epoch": 0.07774665966509131,
      "grad_norm": 0.7923847513019054,
      "learning_rate": 9.940381065322388e-06,
      "loss": 0.2013,
      "step": 2665
    },
    {
      "epoch": 0.07777583289573488,
      "grad_norm": 0.8198327533951112,
      "learning_rate": 9.94030830467438e-06,
      "loss": 0.1779,
      "step": 2666
    },
    {
      "epoch": 0.07780500612637843,
      "grad_norm": 1.0282763387788505,
      "learning_rate": 9.940235499920476e-06,
      "loss": 0.1892,
      "step": 2667
    },
    {
      "epoch": 0.077834179357022,
      "grad_norm": 0.955231002477153,
      "learning_rate": 9.940162651061329e-06,
      "loss": 0.1873,
      "step": 2668
    },
    {
      "epoch": 0.07786335258766555,
      "grad_norm": 0.9416850910784917,
      "learning_rate": 9.940089758097591e-06,
      "loss": 0.1858,
      "step": 2669
    },
    {
      "epoch": 0.07789252581830912,
      "grad_norm": 0.9282490361042554,
      "learning_rate": 9.94001682102991e-06,
      "loss": 0.204,
      "step": 2670
    },
    {
      "epoch": 0.07792169904895269,
      "grad_norm": 0.9752447389088829,
      "learning_rate": 9.939943839858936e-06,
      "loss": 0.2057,
      "step": 2671
    },
    {
      "epoch": 0.07795087227959624,
      "grad_norm": 0.9947648135003092,
      "learning_rate": 9.939870814585327e-06,
      "loss": 0.1849,
      "step": 2672
    },
    {
      "epoch": 0.0779800455102398,
      "grad_norm": 0.968443584009168,
      "learning_rate": 9.939797745209727e-06,
      "loss": 0.2005,
      "step": 2673
    },
    {
      "epoch": 0.07800921874088336,
      "grad_norm": 0.9606764833604585,
      "learning_rate": 9.939724631732793e-06,
      "loss": 0.2075,
      "step": 2674
    },
    {
      "epoch": 0.07803839197152693,
      "grad_norm": 1.0200740151040866,
      "learning_rate": 9.939651474155176e-06,
      "loss": 0.2015,
      "step": 2675
    },
    {
      "epoch": 0.0780675652021705,
      "grad_norm": 1.0026526550299906,
      "learning_rate": 9.93957827247753e-06,
      "loss": 0.178,
      "step": 2676
    },
    {
      "epoch": 0.07809673843281405,
      "grad_norm": 0.9075067466521576,
      "learning_rate": 9.93950502670051e-06,
      "loss": 0.1905,
      "step": 2677
    },
    {
      "epoch": 0.07812591166345761,
      "grad_norm": 1.0334341557588698,
      "learning_rate": 9.939431736824767e-06,
      "loss": 0.2049,
      "step": 2678
    },
    {
      "epoch": 0.07815508489410117,
      "grad_norm": 0.9574596778896336,
      "learning_rate": 9.939358402850955e-06,
      "loss": 0.1964,
      "step": 2679
    },
    {
      "epoch": 0.07818425812474473,
      "grad_norm": 1.0897485305686212,
      "learning_rate": 9.939285024779734e-06,
      "loss": 0.2029,
      "step": 2680
    },
    {
      "epoch": 0.0782134313553883,
      "grad_norm": 0.9828221224677702,
      "learning_rate": 9.939211602611754e-06,
      "loss": 0.1822,
      "step": 2681
    },
    {
      "epoch": 0.07824260458603186,
      "grad_norm": 1.1855913890247984,
      "learning_rate": 9.93913813634767e-06,
      "loss": 0.1943,
      "step": 2682
    },
    {
      "epoch": 0.07827177781667542,
      "grad_norm": 1.2297052617062867,
      "learning_rate": 9.939064625988142e-06,
      "loss": 0.2273,
      "step": 2683
    },
    {
      "epoch": 0.07830095104731898,
      "grad_norm": 0.8934870583091481,
      "learning_rate": 9.938991071533823e-06,
      "loss": 0.1936,
      "step": 2684
    },
    {
      "epoch": 0.07833012427796254,
      "grad_norm": 1.244846335122825,
      "learning_rate": 9.938917472985372e-06,
      "loss": 0.1949,
      "step": 2685
    },
    {
      "epoch": 0.0783592975086061,
      "grad_norm": 1.1793669347515032,
      "learning_rate": 9.938843830343443e-06,
      "loss": 0.1697,
      "step": 2686
    },
    {
      "epoch": 0.07838847073924966,
      "grad_norm": 0.8600819325514898,
      "learning_rate": 9.938770143608695e-06,
      "loss": 0.211,
      "step": 2687
    },
    {
      "epoch": 0.07841764396989323,
      "grad_norm": 0.904292666952364,
      "learning_rate": 9.938696412781787e-06,
      "loss": 0.2164,
      "step": 2688
    },
    {
      "epoch": 0.07844681720053678,
      "grad_norm": 1.0879181073662818,
      "learning_rate": 9.938622637863377e-06,
      "loss": 0.2169,
      "step": 2689
    },
    {
      "epoch": 0.07847599043118035,
      "grad_norm": 0.8801298156236977,
      "learning_rate": 9.938548818854124e-06,
      "loss": 0.1878,
      "step": 2690
    },
    {
      "epoch": 0.0785051636618239,
      "grad_norm": 0.8137726067530414,
      "learning_rate": 9.938474955754685e-06,
      "loss": 0.1956,
      "step": 2691
    },
    {
      "epoch": 0.07853433689246747,
      "grad_norm": 1.0321234197281965,
      "learning_rate": 9.93840104856572e-06,
      "loss": 0.1922,
      "step": 2692
    },
    {
      "epoch": 0.07856351012311104,
      "grad_norm": 0.9366913737022776,
      "learning_rate": 9.93832709728789e-06,
      "loss": 0.1887,
      "step": 2693
    },
    {
      "epoch": 0.07859268335375459,
      "grad_norm": 0.7059196165237965,
      "learning_rate": 9.938253101921852e-06,
      "loss": 0.1824,
      "step": 2694
    },
    {
      "epoch": 0.07862185658439816,
      "grad_norm": 1.0892404704290901,
      "learning_rate": 9.938179062468272e-06,
      "loss": 0.1808,
      "step": 2695
    },
    {
      "epoch": 0.07865102981504171,
      "grad_norm": 1.0510282012379226,
      "learning_rate": 9.938104978927807e-06,
      "loss": 0.1961,
      "step": 2696
    },
    {
      "epoch": 0.07868020304568528,
      "grad_norm": 0.9814648931511375,
      "learning_rate": 9.938030851301122e-06,
      "loss": 0.1865,
      "step": 2697
    },
    {
      "epoch": 0.07870937627632885,
      "grad_norm": 0.9219050370141942,
      "learning_rate": 9.937956679588874e-06,
      "loss": 0.1965,
      "step": 2698
    },
    {
      "epoch": 0.0787385495069724,
      "grad_norm": 1.2533633717403911,
      "learning_rate": 9.937882463791727e-06,
      "loss": 0.1815,
      "step": 2699
    },
    {
      "epoch": 0.07876772273761597,
      "grad_norm": 1.1228788934518406,
      "learning_rate": 9.937808203910345e-06,
      "loss": 0.2045,
      "step": 2700
    },
    {
      "epoch": 0.07879689596825952,
      "grad_norm": 0.9634574103341637,
      "learning_rate": 9.93773389994539e-06,
      "loss": 0.1988,
      "step": 2701
    },
    {
      "epoch": 0.07882606919890309,
      "grad_norm": 1.021651442566582,
      "learning_rate": 9.937659551897526e-06,
      "loss": 0.1836,
      "step": 2702
    },
    {
      "epoch": 0.07885524242954665,
      "grad_norm": 1.0105141364475234,
      "learning_rate": 9.937585159767416e-06,
      "loss": 0.1978,
      "step": 2703
    },
    {
      "epoch": 0.07888441566019021,
      "grad_norm": 1.021767068751826,
      "learning_rate": 9.937510723555723e-06,
      "loss": 0.1799,
      "step": 2704
    },
    {
      "epoch": 0.07891358889083377,
      "grad_norm": 0.9066280821815615,
      "learning_rate": 9.937436243263115e-06,
      "loss": 0.1985,
      "step": 2705
    },
    {
      "epoch": 0.07894276212147733,
      "grad_norm": 0.841122543552312,
      "learning_rate": 9.937361718890255e-06,
      "loss": 0.2056,
      "step": 2706
    },
    {
      "epoch": 0.0789719353521209,
      "grad_norm": 0.9077763985090129,
      "learning_rate": 9.937287150437807e-06,
      "loss": 0.2134,
      "step": 2707
    },
    {
      "epoch": 0.07900110858276446,
      "grad_norm": 0.9746503628502282,
      "learning_rate": 9.937212537906438e-06,
      "loss": 0.2175,
      "step": 2708
    },
    {
      "epoch": 0.07903028181340802,
      "grad_norm": 1.0464230380507649,
      "learning_rate": 9.937137881296814e-06,
      "loss": 0.1983,
      "step": 2709
    },
    {
      "epoch": 0.07905945504405158,
      "grad_norm": 1.200070722162346,
      "learning_rate": 9.937063180609602e-06,
      "loss": 0.2105,
      "step": 2710
    },
    {
      "epoch": 0.07908862827469514,
      "grad_norm": 0.8591917355902438,
      "learning_rate": 9.936988435845469e-06,
      "loss": 0.1901,
      "step": 2711
    },
    {
      "epoch": 0.0791178015053387,
      "grad_norm": 0.8876176216163101,
      "learning_rate": 9.93691364700508e-06,
      "loss": 0.1849,
      "step": 2712
    },
    {
      "epoch": 0.07914697473598226,
      "grad_norm": 1.1549833181924853,
      "learning_rate": 9.936838814089107e-06,
      "loss": 0.1925,
      "step": 2713
    },
    {
      "epoch": 0.07917614796662582,
      "grad_norm": 1.0203950851234787,
      "learning_rate": 9.936763937098213e-06,
      "loss": 0.2012,
      "step": 2714
    },
    {
      "epoch": 0.07920532119726939,
      "grad_norm": 1.0481209030947731,
      "learning_rate": 9.93668901603307e-06,
      "loss": 0.2047,
      "step": 2715
    },
    {
      "epoch": 0.07923449442791294,
      "grad_norm": 0.9819989793255824,
      "learning_rate": 9.936614050894346e-06,
      "loss": 0.1842,
      "step": 2716
    },
    {
      "epoch": 0.07926366765855651,
      "grad_norm": 0.9479496180338147,
      "learning_rate": 9.93653904168271e-06,
      "loss": 0.1967,
      "step": 2717
    },
    {
      "epoch": 0.07929284088920006,
      "grad_norm": 1.106088483378671,
      "learning_rate": 9.936463988398834e-06,
      "loss": 0.1801,
      "step": 2718
    },
    {
      "epoch": 0.07932201411984363,
      "grad_norm": 0.8381484977399565,
      "learning_rate": 9.936388891043384e-06,
      "loss": 0.2019,
      "step": 2719
    },
    {
      "epoch": 0.0793511873504872,
      "grad_norm": 0.9140491887527451,
      "learning_rate": 9.936313749617032e-06,
      "loss": 0.1896,
      "step": 2720
    },
    {
      "epoch": 0.07938036058113075,
      "grad_norm": 0.9834658135188353,
      "learning_rate": 9.93623856412045e-06,
      "loss": 0.1963,
      "step": 2721
    },
    {
      "epoch": 0.07940953381177432,
      "grad_norm": 0.870219693261259,
      "learning_rate": 9.93616333455431e-06,
      "loss": 0.2083,
      "step": 2722
    },
    {
      "epoch": 0.07943870704241787,
      "grad_norm": 0.8545790111060516,
      "learning_rate": 9.93608806091928e-06,
      "loss": 0.2006,
      "step": 2723
    },
    {
      "epoch": 0.07946788027306144,
      "grad_norm": 0.8398428387455098,
      "learning_rate": 9.936012743216034e-06,
      "loss": 0.1972,
      "step": 2724
    },
    {
      "epoch": 0.079497053503705,
      "grad_norm": 0.9003407969640789,
      "learning_rate": 9.935937381445247e-06,
      "loss": 0.1978,
      "step": 2725
    },
    {
      "epoch": 0.07952622673434856,
      "grad_norm": 0.918600237288775,
      "learning_rate": 9.935861975607586e-06,
      "loss": 0.2031,
      "step": 2726
    },
    {
      "epoch": 0.07955539996499213,
      "grad_norm": 1.001836761949255,
      "learning_rate": 9.93578652570373e-06,
      "loss": 0.1818,
      "step": 2727
    },
    {
      "epoch": 0.07958457319563568,
      "grad_norm": 0.7416507055637092,
      "learning_rate": 9.935711031734349e-06,
      "loss": 0.19,
      "step": 2728
    },
    {
      "epoch": 0.07961374642627925,
      "grad_norm": 1.1444359194944251,
      "learning_rate": 9.935635493700117e-06,
      "loss": 0.2049,
      "step": 2729
    },
    {
      "epoch": 0.07964291965692281,
      "grad_norm": 0.901635866880554,
      "learning_rate": 9.935559911601713e-06,
      "loss": 0.1933,
      "step": 2730
    },
    {
      "epoch": 0.07967209288756637,
      "grad_norm": 0.9359464537723414,
      "learning_rate": 9.935484285439806e-06,
      "loss": 0.2042,
      "step": 2731
    },
    {
      "epoch": 0.07970126611820993,
      "grad_norm": 0.8740846860070967,
      "learning_rate": 9.935408615215075e-06,
      "loss": 0.1722,
      "step": 2732
    },
    {
      "epoch": 0.07973043934885349,
      "grad_norm": 0.8249563066594702,
      "learning_rate": 9.935332900928192e-06,
      "loss": 0.1991,
      "step": 2733
    },
    {
      "epoch": 0.07975961257949706,
      "grad_norm": 1.0008799259776313,
      "learning_rate": 9.935257142579835e-06,
      "loss": 0.1953,
      "step": 2734
    },
    {
      "epoch": 0.07978878581014061,
      "grad_norm": 0.9306772494535623,
      "learning_rate": 9.93518134017068e-06,
      "loss": 0.2069,
      "step": 2735
    },
    {
      "epoch": 0.07981795904078418,
      "grad_norm": 0.935440707262837,
      "learning_rate": 9.935105493701406e-06,
      "loss": 0.1979,
      "step": 2736
    },
    {
      "epoch": 0.07984713227142774,
      "grad_norm": 0.8310594003377255,
      "learning_rate": 9.935029603172689e-06,
      "loss": 0.1726,
      "step": 2737
    },
    {
      "epoch": 0.0798763055020713,
      "grad_norm": 0.8204925660785971,
      "learning_rate": 9.934953668585205e-06,
      "loss": 0.1868,
      "step": 2738
    },
    {
      "epoch": 0.07990547873271486,
      "grad_norm": 1.1133498944659208,
      "learning_rate": 9.93487768993963e-06,
      "loss": 0.1967,
      "step": 2739
    },
    {
      "epoch": 0.07993465196335842,
      "grad_norm": 0.9255066100765028,
      "learning_rate": 9.93480166723665e-06,
      "loss": 0.2357,
      "step": 2740
    },
    {
      "epoch": 0.07996382519400198,
      "grad_norm": 0.9069965715365438,
      "learning_rate": 9.934725600476935e-06,
      "loss": 0.1891,
      "step": 2741
    },
    {
      "epoch": 0.07999299842464555,
      "grad_norm": 1.0889468089974361,
      "learning_rate": 9.934649489661168e-06,
      "loss": 0.2092,
      "step": 2742
    },
    {
      "epoch": 0.0800221716552891,
      "grad_norm": 1.0411829070438932,
      "learning_rate": 9.934573334790029e-06,
      "loss": 0.1998,
      "step": 2743
    },
    {
      "epoch": 0.08005134488593267,
      "grad_norm": 0.994705550420692,
      "learning_rate": 9.934497135864198e-06,
      "loss": 0.1823,
      "step": 2744
    },
    {
      "epoch": 0.08008051811657622,
      "grad_norm": 0.9356101296702157,
      "learning_rate": 9.934420892884352e-06,
      "loss": 0.182,
      "step": 2745
    },
    {
      "epoch": 0.08010969134721979,
      "grad_norm": 0.9463574646080707,
      "learning_rate": 9.934344605851179e-06,
      "loss": 0.1776,
      "step": 2746
    },
    {
      "epoch": 0.08013886457786336,
      "grad_norm": 1.064276672676239,
      "learning_rate": 9.93426827476535e-06,
      "loss": 0.1967,
      "step": 2747
    },
    {
      "epoch": 0.08016803780850691,
      "grad_norm": 0.9202510035566694,
      "learning_rate": 9.934191899627555e-06,
      "loss": 0.1857,
      "step": 2748
    },
    {
      "epoch": 0.08019721103915048,
      "grad_norm": 0.8217108165975183,
      "learning_rate": 9.934115480438471e-06,
      "loss": 0.166,
      "step": 2749
    },
    {
      "epoch": 0.08022638426979403,
      "grad_norm": 0.8994533678584419,
      "learning_rate": 9.934039017198784e-06,
      "loss": 0.1742,
      "step": 2750
    },
    {
      "epoch": 0.0802555575004376,
      "grad_norm": 0.8821589782245868,
      "learning_rate": 9.933962509909173e-06,
      "loss": 0.2001,
      "step": 2751
    },
    {
      "epoch": 0.08028473073108117,
      "grad_norm": 0.9057562916177336,
      "learning_rate": 9.933885958570323e-06,
      "loss": 0.2066,
      "step": 2752
    },
    {
      "epoch": 0.08031390396172472,
      "grad_norm": 1.01509641287476,
      "learning_rate": 9.933809363182916e-06,
      "loss": 0.1882,
      "step": 2753
    },
    {
      "epoch": 0.08034307719236829,
      "grad_norm": 0.9693622053620701,
      "learning_rate": 9.933732723747638e-06,
      "loss": 0.1919,
      "step": 2754
    },
    {
      "epoch": 0.08037225042301184,
      "grad_norm": 0.9054770900695217,
      "learning_rate": 9.933656040265172e-06,
      "loss": 0.1758,
      "step": 2755
    },
    {
      "epoch": 0.08040142365365541,
      "grad_norm": 1.1345735848261322,
      "learning_rate": 9.9335793127362e-06,
      "loss": 0.2141,
      "step": 2756
    },
    {
      "epoch": 0.08043059688429896,
      "grad_norm": 0.9157774978521533,
      "learning_rate": 9.933502541161413e-06,
      "loss": 0.181,
      "step": 2757
    },
    {
      "epoch": 0.08045977011494253,
      "grad_norm": 0.8205035273923003,
      "learning_rate": 9.933425725541493e-06,
      "loss": 0.198,
      "step": 2758
    },
    {
      "epoch": 0.0804889433455861,
      "grad_norm": 1.1078514936409194,
      "learning_rate": 9.933348865877125e-06,
      "loss": 0.1876,
      "step": 2759
    },
    {
      "epoch": 0.08051811657622965,
      "grad_norm": 0.9466618575867122,
      "learning_rate": 9.933271962168993e-06,
      "loss": 0.2011,
      "step": 2760
    },
    {
      "epoch": 0.08054728980687322,
      "grad_norm": 1.0194453198695188,
      "learning_rate": 9.93319501441779e-06,
      "loss": 0.1894,
      "step": 2761
    },
    {
      "epoch": 0.08057646303751677,
      "grad_norm": 0.8180571092945929,
      "learning_rate": 9.9331180226242e-06,
      "loss": 0.1935,
      "step": 2762
    },
    {
      "epoch": 0.08060563626816034,
      "grad_norm": 1.0405861116911927,
      "learning_rate": 9.933040986788909e-06,
      "loss": 0.2106,
      "step": 2763
    },
    {
      "epoch": 0.0806348094988039,
      "grad_norm": 0.8460896887974851,
      "learning_rate": 9.932963906912603e-06,
      "loss": 0.1856,
      "step": 2764
    },
    {
      "epoch": 0.08066398272944746,
      "grad_norm": 0.7751705907994965,
      "learning_rate": 9.932886782995977e-06,
      "loss": 0.1892,
      "step": 2765
    },
    {
      "epoch": 0.08069315596009102,
      "grad_norm": 1.0849083426806656,
      "learning_rate": 9.932809615039714e-06,
      "loss": 0.1973,
      "step": 2766
    },
    {
      "epoch": 0.08072232919073458,
      "grad_norm": 0.79395450879649,
      "learning_rate": 9.932732403044502e-06,
      "loss": 0.1811,
      "step": 2767
    },
    {
      "epoch": 0.08075150242137814,
      "grad_norm": 0.9203116392343945,
      "learning_rate": 9.932655147011034e-06,
      "loss": 0.1812,
      "step": 2768
    },
    {
      "epoch": 0.08078067565202171,
      "grad_norm": 0.7840840738602628,
      "learning_rate": 9.93257784694e-06,
      "loss": 0.1869,
      "step": 2769
    },
    {
      "epoch": 0.08080984888266526,
      "grad_norm": 0.8152327691890939,
      "learning_rate": 9.932500502832087e-06,
      "loss": 0.1934,
      "step": 2770
    },
    {
      "epoch": 0.08083902211330883,
      "grad_norm": 0.9607678098915426,
      "learning_rate": 9.932423114687988e-06,
      "loss": 0.1858,
      "step": 2771
    },
    {
      "epoch": 0.08086819534395238,
      "grad_norm": 0.7847020955786086,
      "learning_rate": 9.932345682508393e-06,
      "loss": 0.2001,
      "step": 2772
    },
    {
      "epoch": 0.08089736857459595,
      "grad_norm": 1.0975533217950417,
      "learning_rate": 9.93226820629399e-06,
      "loss": 0.1943,
      "step": 2773
    },
    {
      "epoch": 0.08092654180523952,
      "grad_norm": 1.0910647082549227,
      "learning_rate": 9.932190686045478e-06,
      "loss": 0.2256,
      "step": 2774
    },
    {
      "epoch": 0.08095571503588307,
      "grad_norm": 6.994995426750002,
      "learning_rate": 9.932113121763542e-06,
      "loss": 0.2086,
      "step": 2775
    },
    {
      "epoch": 0.08098488826652664,
      "grad_norm": 1.2525824847267382,
      "learning_rate": 9.93203551344888e-06,
      "loss": 0.1986,
      "step": 2776
    },
    {
      "epoch": 0.08101406149717019,
      "grad_norm": 1.1556186623538949,
      "learning_rate": 9.931957861102181e-06,
      "loss": 0.2066,
      "step": 2777
    },
    {
      "epoch": 0.08104323472781376,
      "grad_norm": 1.2286560015579542,
      "learning_rate": 9.93188016472414e-06,
      "loss": 0.2073,
      "step": 2778
    },
    {
      "epoch": 0.08107240795845731,
      "grad_norm": 1.1692159101193345,
      "learning_rate": 9.931802424315448e-06,
      "loss": 0.189,
      "step": 2779
    },
    {
      "epoch": 0.08110158118910088,
      "grad_norm": 1.169928906398308,
      "learning_rate": 9.931724639876806e-06,
      "loss": 0.168,
      "step": 2780
    },
    {
      "epoch": 0.08113075441974445,
      "grad_norm": 1.1798753346118862,
      "learning_rate": 9.931646811408899e-06,
      "loss": 0.2168,
      "step": 2781
    },
    {
      "epoch": 0.081159927650388,
      "grad_norm": 1.139026510056305,
      "learning_rate": 9.931568938912428e-06,
      "loss": 0.2013,
      "step": 2782
    },
    {
      "epoch": 0.08118910088103157,
      "grad_norm": 0.9073070629248077,
      "learning_rate": 9.931491022388087e-06,
      "loss": 0.1781,
      "step": 2783
    },
    {
      "epoch": 0.08121827411167512,
      "grad_norm": 1.0617980509299942,
      "learning_rate": 9.931413061836573e-06,
      "loss": 0.1888,
      "step": 2784
    },
    {
      "epoch": 0.08124744734231869,
      "grad_norm": 1.0467083373748136,
      "learning_rate": 9.931335057258579e-06,
      "loss": 0.1987,
      "step": 2785
    },
    {
      "epoch": 0.08127662057296225,
      "grad_norm": 0.9096295779546549,
      "learning_rate": 9.931257008654801e-06,
      "loss": 0.1807,
      "step": 2786
    },
    {
      "epoch": 0.08130579380360581,
      "grad_norm": 1.0581114770644902,
      "learning_rate": 9.931178916025941e-06,
      "loss": 0.1992,
      "step": 2787
    },
    {
      "epoch": 0.08133496703424938,
      "grad_norm": 0.901276263214751,
      "learning_rate": 9.931100779372691e-06,
      "loss": 0.2196,
      "step": 2788
    },
    {
      "epoch": 0.08136414026489293,
      "grad_norm": 0.8226719310667839,
      "learning_rate": 9.93102259869575e-06,
      "loss": 0.1913,
      "step": 2789
    },
    {
      "epoch": 0.0813933134955365,
      "grad_norm": 0.9334413386187128,
      "learning_rate": 9.930944373995816e-06,
      "loss": 0.1995,
      "step": 2790
    },
    {
      "epoch": 0.08142248672618006,
      "grad_norm": 0.8547959893057678,
      "learning_rate": 9.93086610527359e-06,
      "loss": 0.168,
      "step": 2791
    },
    {
      "epoch": 0.08145165995682362,
      "grad_norm": 0.8424939866021695,
      "learning_rate": 9.930787792529768e-06,
      "loss": 0.1803,
      "step": 2792
    },
    {
      "epoch": 0.08148083318746718,
      "grad_norm": 0.7925941421375563,
      "learning_rate": 9.930709435765049e-06,
      "loss": 0.1807,
      "step": 2793
    },
    {
      "epoch": 0.08151000641811074,
      "grad_norm": 0.8068038095868088,
      "learning_rate": 9.930631034980132e-06,
      "loss": 0.1995,
      "step": 2794
    },
    {
      "epoch": 0.0815391796487543,
      "grad_norm": 1.1927830385184788,
      "learning_rate": 9.93055259017572e-06,
      "loss": 0.2237,
      "step": 2795
    },
    {
      "epoch": 0.08156835287939787,
      "grad_norm": 0.87189253766637,
      "learning_rate": 9.93047410135251e-06,
      "loss": 0.1778,
      "step": 2796
    },
    {
      "epoch": 0.08159752611004142,
      "grad_norm": 0.9239119098014479,
      "learning_rate": 9.930395568511205e-06,
      "loss": 0.1726,
      "step": 2797
    },
    {
      "epoch": 0.08162669934068499,
      "grad_norm": 0.8123218897094974,
      "learning_rate": 9.930316991652506e-06,
      "loss": 0.2261,
      "step": 2798
    },
    {
      "epoch": 0.08165587257132854,
      "grad_norm": 1.0795451179377848,
      "learning_rate": 9.930238370777112e-06,
      "loss": 0.2013,
      "step": 2799
    },
    {
      "epoch": 0.08168504580197211,
      "grad_norm": 0.9989426857442159,
      "learning_rate": 9.93015970588573e-06,
      "loss": 0.1877,
      "step": 2800
    },
    {
      "epoch": 0.08171421903261568,
      "grad_norm": 0.809457448580003,
      "learning_rate": 9.930080996979055e-06,
      "loss": 0.2254,
      "step": 2801
    },
    {
      "epoch": 0.08174339226325923,
      "grad_norm": 0.8504300144113776,
      "learning_rate": 9.930002244057795e-06,
      "loss": 0.2054,
      "step": 2802
    },
    {
      "epoch": 0.0817725654939028,
      "grad_norm": 1.0160328925502884,
      "learning_rate": 9.929923447122654e-06,
      "loss": 0.2227,
      "step": 2803
    },
    {
      "epoch": 0.08180173872454635,
      "grad_norm": 0.859542823941737,
      "learning_rate": 9.92984460617433e-06,
      "loss": 0.1837,
      "step": 2804
    },
    {
      "epoch": 0.08183091195518992,
      "grad_norm": 1.3241622148617618,
      "learning_rate": 9.929765721213533e-06,
      "loss": 0.1782,
      "step": 2805
    },
    {
      "epoch": 0.08186008518583347,
      "grad_norm": 0.9965291614881215,
      "learning_rate": 9.929686792240965e-06,
      "loss": 0.1826,
      "step": 2806
    },
    {
      "epoch": 0.08188925841647704,
      "grad_norm": 1.086457509226983,
      "learning_rate": 9.929607819257327e-06,
      "loss": 0.2149,
      "step": 2807
    },
    {
      "epoch": 0.08191843164712061,
      "grad_norm": 0.909790091638877,
      "learning_rate": 9.929528802263331e-06,
      "loss": 0.214,
      "step": 2808
    },
    {
      "epoch": 0.08194760487776416,
      "grad_norm": 0.9813151341080305,
      "learning_rate": 9.929449741259675e-06,
      "loss": 0.1862,
      "step": 2809
    },
    {
      "epoch": 0.08197677810840773,
      "grad_norm": 1.0301680924957024,
      "learning_rate": 9.92937063624707e-06,
      "loss": 0.1895,
      "step": 2810
    },
    {
      "epoch": 0.08200595133905128,
      "grad_norm": 1.1282745828440568,
      "learning_rate": 9.929291487226221e-06,
      "loss": 0.1759,
      "step": 2811
    },
    {
      "epoch": 0.08203512456969485,
      "grad_norm": 1.0243361260094277,
      "learning_rate": 9.929212294197834e-06,
      "loss": 0.1858,
      "step": 2812
    },
    {
      "epoch": 0.08206429780033841,
      "grad_norm": 0.9394166173833635,
      "learning_rate": 9.929133057162616e-06,
      "loss": 0.2236,
      "step": 2813
    },
    {
      "epoch": 0.08209347103098197,
      "grad_norm": 1.0602639828605804,
      "learning_rate": 9.929053776121276e-06,
      "loss": 0.1997,
      "step": 2814
    },
    {
      "epoch": 0.08212264426162554,
      "grad_norm": 0.9907947906298216,
      "learning_rate": 9.92897445107452e-06,
      "loss": 0.1889,
      "step": 2815
    },
    {
      "epoch": 0.08215181749226909,
      "grad_norm": 1.0825967856642165,
      "learning_rate": 9.928895082023056e-06,
      "loss": 0.1892,
      "step": 2816
    },
    {
      "epoch": 0.08218099072291266,
      "grad_norm": 1.129993170967017,
      "learning_rate": 9.928815668967592e-06,
      "loss": 0.1786,
      "step": 2817
    },
    {
      "epoch": 0.08221016395355622,
      "grad_norm": 0.9319226639332902,
      "learning_rate": 9.928736211908841e-06,
      "loss": 0.1943,
      "step": 2818
    },
    {
      "epoch": 0.08223933718419978,
      "grad_norm": 0.8958238329213462,
      "learning_rate": 9.92865671084751e-06,
      "loss": 0.1892,
      "step": 2819
    },
    {
      "epoch": 0.08226851041484334,
      "grad_norm": 1.1202960962969155,
      "learning_rate": 9.928577165784306e-06,
      "loss": 0.1752,
      "step": 2820
    },
    {
      "epoch": 0.0822976836454869,
      "grad_norm": 0.9182878870187241,
      "learning_rate": 9.928497576719943e-06,
      "loss": 0.218,
      "step": 2821
    },
    {
      "epoch": 0.08232685687613046,
      "grad_norm": 1.152626927527983,
      "learning_rate": 9.92841794365513e-06,
      "loss": 0.1809,
      "step": 2822
    },
    {
      "epoch": 0.08235603010677403,
      "grad_norm": 0.8891267211475501,
      "learning_rate": 9.928338266590578e-06,
      "loss": 0.1851,
      "step": 2823
    },
    {
      "epoch": 0.08238520333741758,
      "grad_norm": 0.9388552579142749,
      "learning_rate": 9.928258545526999e-06,
      "loss": 0.2464,
      "step": 2824
    },
    {
      "epoch": 0.08241437656806115,
      "grad_norm": 1.0039462884984038,
      "learning_rate": 9.928178780465103e-06,
      "loss": 0.1702,
      "step": 2825
    },
    {
      "epoch": 0.0824435497987047,
      "grad_norm": 0.8654622861712875,
      "learning_rate": 9.928098971405604e-06,
      "loss": 0.1692,
      "step": 2826
    },
    {
      "epoch": 0.08247272302934827,
      "grad_norm": 0.9468776200338908,
      "learning_rate": 9.928019118349214e-06,
      "loss": 0.208,
      "step": 2827
    },
    {
      "epoch": 0.08250189625999182,
      "grad_norm": 0.8675639463620226,
      "learning_rate": 9.927939221296645e-06,
      "loss": 0.1893,
      "step": 2828
    },
    {
      "epoch": 0.08253106949063539,
      "grad_norm": 0.9140124809001825,
      "learning_rate": 9.927859280248613e-06,
      "loss": 0.1912,
      "step": 2829
    },
    {
      "epoch": 0.08256024272127896,
      "grad_norm": 0.8808135522217446,
      "learning_rate": 9.927779295205828e-06,
      "loss": 0.1882,
      "step": 2830
    },
    {
      "epoch": 0.08258941595192251,
      "grad_norm": 1.1263294413792957,
      "learning_rate": 9.927699266169006e-06,
      "loss": 0.2129,
      "step": 2831
    },
    {
      "epoch": 0.08261858918256608,
      "grad_norm": 1.1464325707882075,
      "learning_rate": 9.927619193138862e-06,
      "loss": 0.2159,
      "step": 2832
    },
    {
      "epoch": 0.08264776241320963,
      "grad_norm": 0.9193497134407058,
      "learning_rate": 9.927539076116108e-06,
      "loss": 0.1843,
      "step": 2833
    },
    {
      "epoch": 0.0826769356438532,
      "grad_norm": 1.0620327484528727,
      "learning_rate": 9.927458915101463e-06,
      "loss": 0.1875,
      "step": 2834
    },
    {
      "epoch": 0.08270610887449677,
      "grad_norm": 1.297023435604498,
      "learning_rate": 9.92737871009564e-06,
      "loss": 0.1988,
      "step": 2835
    },
    {
      "epoch": 0.08273528210514032,
      "grad_norm": 1.1594631116160867,
      "learning_rate": 9.927298461099358e-06,
      "loss": 0.1945,
      "step": 2836
    },
    {
      "epoch": 0.08276445533578389,
      "grad_norm": 2.71844039216031,
      "learning_rate": 9.92721816811333e-06,
      "loss": 0.2019,
      "step": 2837
    },
    {
      "epoch": 0.08279362856642744,
      "grad_norm": 1.1300278850126155,
      "learning_rate": 9.927137831138275e-06,
      "loss": 0.2028,
      "step": 2838
    },
    {
      "epoch": 0.08282280179707101,
      "grad_norm": 0.9754568136533278,
      "learning_rate": 9.92705745017491e-06,
      "loss": 0.2007,
      "step": 2839
    },
    {
      "epoch": 0.08285197502771458,
      "grad_norm": 0.8622160475156416,
      "learning_rate": 9.926977025223954e-06,
      "loss": 0.2024,
      "step": 2840
    },
    {
      "epoch": 0.08288114825835813,
      "grad_norm": 0.9267529229951084,
      "learning_rate": 9.92689655628612e-06,
      "loss": 0.1839,
      "step": 2841
    },
    {
      "epoch": 0.0829103214890017,
      "grad_norm": 0.9914073092895164,
      "learning_rate": 9.926816043362132e-06,
      "loss": 0.2059,
      "step": 2842
    },
    {
      "epoch": 0.08293949471964525,
      "grad_norm": 0.8820006093688145,
      "learning_rate": 9.926735486452706e-06,
      "loss": 0.2116,
      "step": 2843
    },
    {
      "epoch": 0.08296866795028882,
      "grad_norm": 1.128717413870026,
      "learning_rate": 9.92665488555856e-06,
      "loss": 0.1931,
      "step": 2844
    },
    {
      "epoch": 0.08299784118093238,
      "grad_norm": 1.033345513230471,
      "learning_rate": 9.926574240680417e-06,
      "loss": 0.2084,
      "step": 2845
    },
    {
      "epoch": 0.08302701441157594,
      "grad_norm": 0.7905999006884825,
      "learning_rate": 9.926493551818995e-06,
      "loss": 0.1836,
      "step": 2846
    },
    {
      "epoch": 0.0830561876422195,
      "grad_norm": 0.9106042662231119,
      "learning_rate": 9.926412818975015e-06,
      "loss": 0.1792,
      "step": 2847
    },
    {
      "epoch": 0.08308536087286306,
      "grad_norm": 0.7649859787618821,
      "learning_rate": 9.926332042149196e-06,
      "loss": 0.1835,
      "step": 2848
    },
    {
      "epoch": 0.08311453410350662,
      "grad_norm": 0.9070524418435134,
      "learning_rate": 9.926251221342262e-06,
      "loss": 0.2235,
      "step": 2849
    },
    {
      "epoch": 0.08314370733415018,
      "grad_norm": 0.8590134215437185,
      "learning_rate": 9.926170356554932e-06,
      "loss": 0.1774,
      "step": 2850
    },
    {
      "epoch": 0.08317288056479374,
      "grad_norm": 1.0204603380841695,
      "learning_rate": 9.92608944778793e-06,
      "loss": 0.214,
      "step": 2851
    },
    {
      "epoch": 0.08320205379543731,
      "grad_norm": 0.9968437364023346,
      "learning_rate": 9.926008495041975e-06,
      "loss": 0.2257,
      "step": 2852
    },
    {
      "epoch": 0.08323122702608086,
      "grad_norm": 0.9535351588974474,
      "learning_rate": 9.925927498317794e-06,
      "loss": 0.1844,
      "step": 2853
    },
    {
      "epoch": 0.08326040025672443,
      "grad_norm": 0.8854242104496456,
      "learning_rate": 9.925846457616109e-06,
      "loss": 0.2026,
      "step": 2854
    },
    {
      "epoch": 0.08328957348736798,
      "grad_norm": 1.0089888511502039,
      "learning_rate": 9.925765372937641e-06,
      "loss": 0.192,
      "step": 2855
    },
    {
      "epoch": 0.08331874671801155,
      "grad_norm": 0.917845127766735,
      "learning_rate": 9.925684244283116e-06,
      "loss": 0.2179,
      "step": 2856
    },
    {
      "epoch": 0.08334791994865512,
      "grad_norm": 0.9949885017616943,
      "learning_rate": 9.925603071653258e-06,
      "loss": 0.1836,
      "step": 2857
    },
    {
      "epoch": 0.08337709317929867,
      "grad_norm": 1.0751050383811247,
      "learning_rate": 9.925521855048794e-06,
      "loss": 0.2328,
      "step": 2858
    },
    {
      "epoch": 0.08340626640994224,
      "grad_norm": 0.906733513481403,
      "learning_rate": 9.925440594470444e-06,
      "loss": 0.1967,
      "step": 2859
    },
    {
      "epoch": 0.08343543964058579,
      "grad_norm": 1.142701110240169,
      "learning_rate": 9.925359289918937e-06,
      "loss": 0.2128,
      "step": 2860
    },
    {
      "epoch": 0.08346461287122936,
      "grad_norm": 0.9905148856143702,
      "learning_rate": 9.925277941394998e-06,
      "loss": 0.213,
      "step": 2861
    },
    {
      "epoch": 0.08349378610187293,
      "grad_norm": 1.0512130964236224,
      "learning_rate": 9.925196548899353e-06,
      "loss": 0.1755,
      "step": 2862
    },
    {
      "epoch": 0.08352295933251648,
      "grad_norm": 1.0806773648246382,
      "learning_rate": 9.925115112432728e-06,
      "loss": 0.1709,
      "step": 2863
    },
    {
      "epoch": 0.08355213256316005,
      "grad_norm": 1.1075388767413175,
      "learning_rate": 9.925033631995854e-06,
      "loss": 0.192,
      "step": 2864
    },
    {
      "epoch": 0.0835813057938036,
      "grad_norm": 0.9388043533551024,
      "learning_rate": 9.924952107589452e-06,
      "loss": 0.2066,
      "step": 2865
    },
    {
      "epoch": 0.08361047902444717,
      "grad_norm": 0.9950697119120068,
      "learning_rate": 9.924870539214256e-06,
      "loss": 0.2075,
      "step": 2866
    },
    {
      "epoch": 0.08363965225509074,
      "grad_norm": 1.1421071021035174,
      "learning_rate": 9.924788926870989e-06,
      "loss": 0.2287,
      "step": 2867
    },
    {
      "epoch": 0.08366882548573429,
      "grad_norm": 1.0471766176853352,
      "learning_rate": 9.924707270560383e-06,
      "loss": 0.1956,
      "step": 2868
    },
    {
      "epoch": 0.08369799871637786,
      "grad_norm": 0.91963288783241,
      "learning_rate": 9.924625570283167e-06,
      "loss": 0.2252,
      "step": 2869
    },
    {
      "epoch": 0.08372717194702141,
      "grad_norm": 1.0305444563482429,
      "learning_rate": 9.92454382604007e-06,
      "loss": 0.1757,
      "step": 2870
    },
    {
      "epoch": 0.08375634517766498,
      "grad_norm": 0.9452937508991428,
      "learning_rate": 9.92446203783182e-06,
      "loss": 0.1947,
      "step": 2871
    },
    {
      "epoch": 0.08378551840830854,
      "grad_norm": 0.9541093318719566,
      "learning_rate": 9.924380205659147e-06,
      "loss": 0.196,
      "step": 2872
    },
    {
      "epoch": 0.0838146916389521,
      "grad_norm": 1.112014158471839,
      "learning_rate": 9.924298329522786e-06,
      "loss": 0.1981,
      "step": 2873
    },
    {
      "epoch": 0.08384386486959566,
      "grad_norm": 0.901707974059267,
      "learning_rate": 9.924216409423464e-06,
      "loss": 0.182,
      "step": 2874
    },
    {
      "epoch": 0.08387303810023922,
      "grad_norm": 1.1860376632262863,
      "learning_rate": 9.924134445361913e-06,
      "loss": 0.2023,
      "step": 2875
    },
    {
      "epoch": 0.08390221133088278,
      "grad_norm": 1.0321403578435027,
      "learning_rate": 9.924052437338865e-06,
      "loss": 0.1765,
      "step": 2876
    },
    {
      "epoch": 0.08393138456152634,
      "grad_norm": 0.8207461696475347,
      "learning_rate": 9.923970385355052e-06,
      "loss": 0.1855,
      "step": 2877
    },
    {
      "epoch": 0.0839605577921699,
      "grad_norm": 0.9138012734625626,
      "learning_rate": 9.92388828941121e-06,
      "loss": 0.2035,
      "step": 2878
    },
    {
      "epoch": 0.08398973102281347,
      "grad_norm": 0.9461767070447371,
      "learning_rate": 9.923806149508066e-06,
      "loss": 0.1844,
      "step": 2879
    },
    {
      "epoch": 0.08401890425345702,
      "grad_norm": 0.9880288764358837,
      "learning_rate": 9.923723965646356e-06,
      "loss": 0.1901,
      "step": 2880
    },
    {
      "epoch": 0.08404807748410059,
      "grad_norm": 1.1643190997949617,
      "learning_rate": 9.923641737826815e-06,
      "loss": 0.2148,
      "step": 2881
    },
    {
      "epoch": 0.08407725071474415,
      "grad_norm": 1.1033208593677126,
      "learning_rate": 9.923559466050174e-06,
      "loss": 0.1941,
      "step": 2882
    },
    {
      "epoch": 0.08410642394538771,
      "grad_norm": 0.9737642643596599,
      "learning_rate": 9.923477150317172e-06,
      "loss": 0.1997,
      "step": 2883
    },
    {
      "epoch": 0.08413559717603128,
      "grad_norm": 0.9261757169279775,
      "learning_rate": 9.92339479062854e-06,
      "loss": 0.2009,
      "step": 2884
    },
    {
      "epoch": 0.08416477040667483,
      "grad_norm": 1.258990034617718,
      "learning_rate": 9.923312386985013e-06,
      "loss": 0.1933,
      "step": 2885
    },
    {
      "epoch": 0.0841939436373184,
      "grad_norm": 1.001298937861448,
      "learning_rate": 9.92322993938733e-06,
      "loss": 0.1971,
      "step": 2886
    },
    {
      "epoch": 0.08422311686796195,
      "grad_norm": 0.9213789051844401,
      "learning_rate": 9.923147447836226e-06,
      "loss": 0.1794,
      "step": 2887
    },
    {
      "epoch": 0.08425229009860552,
      "grad_norm": 0.7759502889329065,
      "learning_rate": 9.923064912332436e-06,
      "loss": 0.1844,
      "step": 2888
    },
    {
      "epoch": 0.08428146332924909,
      "grad_norm": 0.9828130081912211,
      "learning_rate": 9.922982332876698e-06,
      "loss": 0.1866,
      "step": 2889
    },
    {
      "epoch": 0.08431063655989264,
      "grad_norm": 0.9141924436531134,
      "learning_rate": 9.922899709469748e-06,
      "loss": 0.1957,
      "step": 2890
    },
    {
      "epoch": 0.08433980979053621,
      "grad_norm": 0.9530853995878734,
      "learning_rate": 9.922817042112326e-06,
      "loss": 0.201,
      "step": 2891
    },
    {
      "epoch": 0.08436898302117976,
      "grad_norm": 0.809485663870376,
      "learning_rate": 9.922734330805169e-06,
      "loss": 0.1921,
      "step": 2892
    },
    {
      "epoch": 0.08439815625182333,
      "grad_norm": 0.8076078712216355,
      "learning_rate": 9.922651575549013e-06,
      "loss": 0.2064,
      "step": 2893
    },
    {
      "epoch": 0.0844273294824669,
      "grad_norm": 0.9071757954275966,
      "learning_rate": 9.9225687763446e-06,
      "loss": 0.2151,
      "step": 2894
    },
    {
      "epoch": 0.08445650271311045,
      "grad_norm": 0.9745045000878537,
      "learning_rate": 9.922485933192667e-06,
      "loss": 0.177,
      "step": 2895
    },
    {
      "epoch": 0.08448567594375402,
      "grad_norm": 0.9922703437221619,
      "learning_rate": 9.922403046093956e-06,
      "loss": 0.1813,
      "step": 2896
    },
    {
      "epoch": 0.08451484917439757,
      "grad_norm": 0.9315399021583822,
      "learning_rate": 9.922320115049205e-06,
      "loss": 0.1787,
      "step": 2897
    },
    {
      "epoch": 0.08454402240504114,
      "grad_norm": 0.8776975219008598,
      "learning_rate": 9.922237140059157e-06,
      "loss": 0.1704,
      "step": 2898
    },
    {
      "epoch": 0.08457319563568469,
      "grad_norm": 1.1553772509854796,
      "learning_rate": 9.922154121124548e-06,
      "loss": 0.28,
      "step": 2899
    },
    {
      "epoch": 0.08460236886632826,
      "grad_norm": 0.8628025604026724,
      "learning_rate": 9.922071058246122e-06,
      "loss": 0.1691,
      "step": 2900
    },
    {
      "epoch": 0.08463154209697182,
      "grad_norm": 0.9123746640465588,
      "learning_rate": 9.921987951424624e-06,
      "loss": 0.1756,
      "step": 2901
    },
    {
      "epoch": 0.08466071532761538,
      "grad_norm": 0.9395113792693988,
      "learning_rate": 9.92190480066079e-06,
      "loss": 0.2124,
      "step": 2902
    },
    {
      "epoch": 0.08468988855825894,
      "grad_norm": 0.8051932965639589,
      "learning_rate": 9.921821605955366e-06,
      "loss": 0.2006,
      "step": 2903
    },
    {
      "epoch": 0.0847190617889025,
      "grad_norm": 0.8792275575625256,
      "learning_rate": 9.921738367309091e-06,
      "loss": 0.188,
      "step": 2904
    },
    {
      "epoch": 0.08474823501954606,
      "grad_norm": 0.9120241887495304,
      "learning_rate": 9.921655084722713e-06,
      "loss": 0.1948,
      "step": 2905
    },
    {
      "epoch": 0.08477740825018963,
      "grad_norm": 0.9919025555721173,
      "learning_rate": 9.921571758196973e-06,
      "loss": 0.2279,
      "step": 2906
    },
    {
      "epoch": 0.08480658148083318,
      "grad_norm": 0.8772008946481051,
      "learning_rate": 9.921488387732617e-06,
      "loss": 0.1788,
      "step": 2907
    },
    {
      "epoch": 0.08483575471147675,
      "grad_norm": 0.8265557172512299,
      "learning_rate": 9.921404973330385e-06,
      "loss": 0.1836,
      "step": 2908
    },
    {
      "epoch": 0.0848649279421203,
      "grad_norm": 0.9060913602449435,
      "learning_rate": 9.921321514991024e-06,
      "loss": 0.1791,
      "step": 2909
    },
    {
      "epoch": 0.08489410117276387,
      "grad_norm": 0.8626848225961726,
      "learning_rate": 9.92123801271528e-06,
      "loss": 0.1884,
      "step": 2910
    },
    {
      "epoch": 0.08492327440340744,
      "grad_norm": 0.8905954737845782,
      "learning_rate": 9.921154466503899e-06,
      "loss": 0.1761,
      "step": 2911
    },
    {
      "epoch": 0.08495244763405099,
      "grad_norm": 0.9143770626772574,
      "learning_rate": 9.921070876357625e-06,
      "loss": 0.1756,
      "step": 2912
    },
    {
      "epoch": 0.08498162086469456,
      "grad_norm": 0.929480628058618,
      "learning_rate": 9.920987242277205e-06,
      "loss": 0.202,
      "step": 2913
    },
    {
      "epoch": 0.08501079409533811,
      "grad_norm": 0.8884063264213005,
      "learning_rate": 9.920903564263385e-06,
      "loss": 0.1967,
      "step": 2914
    },
    {
      "epoch": 0.08503996732598168,
      "grad_norm": 0.9497256865252193,
      "learning_rate": 9.920819842316914e-06,
      "loss": 0.2079,
      "step": 2915
    },
    {
      "epoch": 0.08506914055662525,
      "grad_norm": 0.9076402904311048,
      "learning_rate": 9.920736076438535e-06,
      "loss": 0.1875,
      "step": 2916
    },
    {
      "epoch": 0.0850983137872688,
      "grad_norm": 0.8785093924513132,
      "learning_rate": 9.920652266629002e-06,
      "loss": 0.2004,
      "step": 2917
    },
    {
      "epoch": 0.08512748701791237,
      "grad_norm": 0.9594248321183944,
      "learning_rate": 9.92056841288906e-06,
      "loss": 0.1886,
      "step": 2918
    },
    {
      "epoch": 0.08515666024855592,
      "grad_norm": 0.7301481664940029,
      "learning_rate": 9.920484515219458e-06,
      "loss": 0.2005,
      "step": 2919
    },
    {
      "epoch": 0.08518583347919949,
      "grad_norm": 0.8705722867537141,
      "learning_rate": 9.920400573620943e-06,
      "loss": 0.1737,
      "step": 2920
    },
    {
      "epoch": 0.08521500670984304,
      "grad_norm": 0.9298330380751542,
      "learning_rate": 9.920316588094268e-06,
      "loss": 0.2029,
      "step": 2921
    },
    {
      "epoch": 0.08524417994048661,
      "grad_norm": 0.8401546420998232,
      "learning_rate": 9.92023255864018e-06,
      "loss": 0.1906,
      "step": 2922
    },
    {
      "epoch": 0.08527335317113018,
      "grad_norm": 0.9615083026920218,
      "learning_rate": 9.92014848525943e-06,
      "loss": 0.1863,
      "step": 2923
    },
    {
      "epoch": 0.08530252640177373,
      "grad_norm": 1.0750265050292815,
      "learning_rate": 9.92006436795277e-06,
      "loss": 0.188,
      "step": 2924
    },
    {
      "epoch": 0.0853316996324173,
      "grad_norm": 0.9646879674547842,
      "learning_rate": 9.919980206720949e-06,
      "loss": 0.179,
      "step": 2925
    },
    {
      "epoch": 0.08536087286306085,
      "grad_norm": 0.7327944202303368,
      "learning_rate": 9.919896001564721e-06,
      "loss": 0.1659,
      "step": 2926
    },
    {
      "epoch": 0.08539004609370442,
      "grad_norm": 0.9456678912751119,
      "learning_rate": 9.919811752484834e-06,
      "loss": 0.2574,
      "step": 2927
    },
    {
      "epoch": 0.08541921932434798,
      "grad_norm": 0.983425907080063,
      "learning_rate": 9.919727459482043e-06,
      "loss": 0.2018,
      "step": 2928
    },
    {
      "epoch": 0.08544839255499154,
      "grad_norm": 0.803034174790023,
      "learning_rate": 9.919643122557099e-06,
      "loss": 0.177,
      "step": 2929
    },
    {
      "epoch": 0.0854775657856351,
      "grad_norm": 0.8841768667334998,
      "learning_rate": 9.919558741710757e-06,
      "loss": 0.2047,
      "step": 2930
    },
    {
      "epoch": 0.08550673901627866,
      "grad_norm": 0.8384717905943977,
      "learning_rate": 9.919474316943767e-06,
      "loss": 0.1876,
      "step": 2931
    },
    {
      "epoch": 0.08553591224692222,
      "grad_norm": 0.9552471273658159,
      "learning_rate": 9.919389848256886e-06,
      "loss": 0.1822,
      "step": 2932
    },
    {
      "epoch": 0.08556508547756579,
      "grad_norm": 0.861439992265136,
      "learning_rate": 9.919305335650866e-06,
      "loss": 0.1677,
      "step": 2933
    },
    {
      "epoch": 0.08559425870820934,
      "grad_norm": 0.9117444486814688,
      "learning_rate": 9.919220779126464e-06,
      "loss": 0.1805,
      "step": 2934
    },
    {
      "epoch": 0.08562343193885291,
      "grad_norm": 0.9329925244000081,
      "learning_rate": 9.919136178684432e-06,
      "loss": 0.1942,
      "step": 2935
    },
    {
      "epoch": 0.08565260516949647,
      "grad_norm": 1.0201358946837327,
      "learning_rate": 9.919051534325526e-06,
      "loss": 0.1848,
      "step": 2936
    },
    {
      "epoch": 0.08568177840014003,
      "grad_norm": 1.0060928362882617,
      "learning_rate": 9.918966846050502e-06,
      "loss": 0.1728,
      "step": 2937
    },
    {
      "epoch": 0.0857109516307836,
      "grad_norm": 0.93465579707486,
      "learning_rate": 9.918882113860117e-06,
      "loss": 0.2069,
      "step": 2938
    },
    {
      "epoch": 0.08574012486142715,
      "grad_norm": 0.9294178165199828,
      "learning_rate": 9.918797337755125e-06,
      "loss": 0.2154,
      "step": 2939
    },
    {
      "epoch": 0.08576929809207072,
      "grad_norm": 1.0235219812827783,
      "learning_rate": 9.918712517736288e-06,
      "loss": 0.163,
      "step": 2940
    },
    {
      "epoch": 0.08579847132271427,
      "grad_norm": 0.9363081392345324,
      "learning_rate": 9.918627653804358e-06,
      "loss": 0.1693,
      "step": 2941
    },
    {
      "epoch": 0.08582764455335784,
      "grad_norm": 0.8096250351067642,
      "learning_rate": 9.918542745960094e-06,
      "loss": 0.1792,
      "step": 2942
    },
    {
      "epoch": 0.0858568177840014,
      "grad_norm": 1.030032549066214,
      "learning_rate": 9.918457794204255e-06,
      "loss": 0.2217,
      "step": 2943
    },
    {
      "epoch": 0.08588599101464496,
      "grad_norm": 0.8574877595755447,
      "learning_rate": 9.918372798537599e-06,
      "loss": 0.1828,
      "step": 2944
    },
    {
      "epoch": 0.08591516424528853,
      "grad_norm": 1.491100058222644,
      "learning_rate": 9.918287758960885e-06,
      "loss": 0.2054,
      "step": 2945
    },
    {
      "epoch": 0.08594433747593208,
      "grad_norm": 0.9083850752775197,
      "learning_rate": 9.918202675474872e-06,
      "loss": 0.1918,
      "step": 2946
    },
    {
      "epoch": 0.08597351070657565,
      "grad_norm": 0.9011727890489442,
      "learning_rate": 9.91811754808032e-06,
      "loss": 0.1913,
      "step": 2947
    },
    {
      "epoch": 0.0860026839372192,
      "grad_norm": 0.7906158588954184,
      "learning_rate": 9.918032376777987e-06,
      "loss": 0.1825,
      "step": 2948
    },
    {
      "epoch": 0.08603185716786277,
      "grad_norm": 0.8329374188141475,
      "learning_rate": 9.917947161568635e-06,
      "loss": 0.1932,
      "step": 2949
    },
    {
      "epoch": 0.08606103039850634,
      "grad_norm": 0.9847432416449173,
      "learning_rate": 9.917861902453026e-06,
      "loss": 0.1805,
      "step": 2950
    },
    {
      "epoch": 0.08609020362914989,
      "grad_norm": 0.77088051200454,
      "learning_rate": 9.91777659943192e-06,
      "loss": 0.198,
      "step": 2951
    },
    {
      "epoch": 0.08611937685979346,
      "grad_norm": 0.9664209900370838,
      "learning_rate": 9.917691252506077e-06,
      "loss": 0.2013,
      "step": 2952
    },
    {
      "epoch": 0.08614855009043701,
      "grad_norm": 0.8164306184048502,
      "learning_rate": 9.917605861676263e-06,
      "loss": 0.1877,
      "step": 2953
    },
    {
      "epoch": 0.08617772332108058,
      "grad_norm": 0.9170505125275688,
      "learning_rate": 9.917520426943234e-06,
      "loss": 0.1819,
      "step": 2954
    },
    {
      "epoch": 0.08620689655172414,
      "grad_norm": 0.8540539865375996,
      "learning_rate": 9.91743494830776e-06,
      "loss": 0.1946,
      "step": 2955
    },
    {
      "epoch": 0.0862360697823677,
      "grad_norm": 0.8564212394033919,
      "learning_rate": 9.9173494257706e-06,
      "loss": 0.1847,
      "step": 2956
    },
    {
      "epoch": 0.08626524301301126,
      "grad_norm": 0.9910363759857238,
      "learning_rate": 9.917263859332517e-06,
      "loss": 0.2041,
      "step": 2957
    },
    {
      "epoch": 0.08629441624365482,
      "grad_norm": 0.8074326237234796,
      "learning_rate": 9.917178248994276e-06,
      "loss": 0.1802,
      "step": 2958
    },
    {
      "epoch": 0.08632358947429838,
      "grad_norm": 0.8084312759161405,
      "learning_rate": 9.917092594756644e-06,
      "loss": 0.1797,
      "step": 2959
    },
    {
      "epoch": 0.08635276270494195,
      "grad_norm": 0.902437331471456,
      "learning_rate": 9.91700689662038e-06,
      "loss": 0.2053,
      "step": 2960
    },
    {
      "epoch": 0.0863819359355855,
      "grad_norm": 1.1399668089315442,
      "learning_rate": 9.916921154586255e-06,
      "loss": 0.2152,
      "step": 2961
    },
    {
      "epoch": 0.08641110916622907,
      "grad_norm": 1.0049544203280714,
      "learning_rate": 9.91683536865503e-06,
      "loss": 0.1975,
      "step": 2962
    },
    {
      "epoch": 0.08644028239687263,
      "grad_norm": 0.912389918373014,
      "learning_rate": 9.916749538827472e-06,
      "loss": 0.196,
      "step": 2963
    },
    {
      "epoch": 0.08646945562751619,
      "grad_norm": 0.8774603790351778,
      "learning_rate": 9.916663665104348e-06,
      "loss": 0.1969,
      "step": 2964
    },
    {
      "epoch": 0.08649862885815976,
      "grad_norm": 0.8048369930767437,
      "learning_rate": 9.916577747486425e-06,
      "loss": 0.1683,
      "step": 2965
    },
    {
      "epoch": 0.08652780208880331,
      "grad_norm": 0.9037903315671819,
      "learning_rate": 9.91649178597447e-06,
      "loss": 0.2098,
      "step": 2966
    },
    {
      "epoch": 0.08655697531944688,
      "grad_norm": 0.9839554970042445,
      "learning_rate": 9.91640578056925e-06,
      "loss": 0.1768,
      "step": 2967
    },
    {
      "epoch": 0.08658614855009043,
      "grad_norm": 0.8167726210545194,
      "learning_rate": 9.916319731271532e-06,
      "loss": 0.1728,
      "step": 2968
    },
    {
      "epoch": 0.086615321780734,
      "grad_norm": 0.8284331586776322,
      "learning_rate": 9.916233638082086e-06,
      "loss": 0.1754,
      "step": 2969
    },
    {
      "epoch": 0.08664449501137755,
      "grad_norm": 0.8480392275632824,
      "learning_rate": 9.916147501001679e-06,
      "loss": 0.1781,
      "step": 2970
    },
    {
      "epoch": 0.08667366824202112,
      "grad_norm": 0.8980497097528721,
      "learning_rate": 9.91606132003108e-06,
      "loss": 0.1839,
      "step": 2971
    },
    {
      "epoch": 0.08670284147266469,
      "grad_norm": 0.9619761370527177,
      "learning_rate": 9.91597509517106e-06,
      "loss": 0.1868,
      "step": 2972
    },
    {
      "epoch": 0.08673201470330824,
      "grad_norm": 0.7884627570569046,
      "learning_rate": 9.91588882642239e-06,
      "loss": 0.2301,
      "step": 2973
    },
    {
      "epoch": 0.08676118793395181,
      "grad_norm": 0.7750463676329331,
      "learning_rate": 9.915802513785835e-06,
      "loss": 0.1739,
      "step": 2974
    },
    {
      "epoch": 0.08679036116459536,
      "grad_norm": 0.825933172220525,
      "learning_rate": 9.91571615726217e-06,
      "loss": 0.176,
      "step": 2975
    },
    {
      "epoch": 0.08681953439523893,
      "grad_norm": 0.8739152510151846,
      "learning_rate": 9.915629756852163e-06,
      "loss": 0.209,
      "step": 2976
    },
    {
      "epoch": 0.0868487076258825,
      "grad_norm": 0.9315939547274,
      "learning_rate": 9.915543312556588e-06,
      "loss": 0.2316,
      "step": 2977
    },
    {
      "epoch": 0.08687788085652605,
      "grad_norm": 0.7991220099424584,
      "learning_rate": 9.915456824376217e-06,
      "loss": 0.2085,
      "step": 2978
    },
    {
      "epoch": 0.08690705408716962,
      "grad_norm": 0.9678247234032558,
      "learning_rate": 9.915370292311818e-06,
      "loss": 0.2058,
      "step": 2979
    },
    {
      "epoch": 0.08693622731781317,
      "grad_norm": 0.9505488319158721,
      "learning_rate": 9.91528371636417e-06,
      "loss": 0.1984,
      "step": 2980
    },
    {
      "epoch": 0.08696540054845674,
      "grad_norm": 0.8523923035068051,
      "learning_rate": 9.915197096534039e-06,
      "loss": 0.1738,
      "step": 2981
    },
    {
      "epoch": 0.0869945737791003,
      "grad_norm": 0.9849848988500524,
      "learning_rate": 9.915110432822203e-06,
      "loss": 0.2035,
      "step": 2982
    },
    {
      "epoch": 0.08702374700974386,
      "grad_norm": 0.7955156687986064,
      "learning_rate": 9.915023725229435e-06,
      "loss": 0.1719,
      "step": 2983
    },
    {
      "epoch": 0.08705292024038742,
      "grad_norm": 0.8459914807033535,
      "learning_rate": 9.914936973756509e-06,
      "loss": 0.1807,
      "step": 2984
    },
    {
      "epoch": 0.08708209347103098,
      "grad_norm": 1.01317682427066,
      "learning_rate": 9.914850178404199e-06,
      "loss": 0.1929,
      "step": 2985
    },
    {
      "epoch": 0.08711126670167454,
      "grad_norm": 0.8749675976675539,
      "learning_rate": 9.914763339173279e-06,
      "loss": 0.1938,
      "step": 2986
    },
    {
      "epoch": 0.08714043993231811,
      "grad_norm": 1.0050341942248369,
      "learning_rate": 9.914676456064526e-06,
      "loss": 0.1933,
      "step": 2987
    },
    {
      "epoch": 0.08716961316296166,
      "grad_norm": 0.8836090974873869,
      "learning_rate": 9.914589529078713e-06,
      "loss": 0.1998,
      "step": 2988
    },
    {
      "epoch": 0.08719878639360523,
      "grad_norm": 1.1617059750490621,
      "learning_rate": 9.914502558216618e-06,
      "loss": 0.1878,
      "step": 2989
    },
    {
      "epoch": 0.08722795962424879,
      "grad_norm": 0.9042435047273026,
      "learning_rate": 9.91441554347902e-06,
      "loss": 0.1999,
      "step": 2990
    },
    {
      "epoch": 0.08725713285489235,
      "grad_norm": 0.78858604502361,
      "learning_rate": 9.91432848486669e-06,
      "loss": 0.1955,
      "step": 2991
    },
    {
      "epoch": 0.0872863060855359,
      "grad_norm": 0.913998913718363,
      "learning_rate": 9.91424138238041e-06,
      "loss": 0.2084,
      "step": 2992
    },
    {
      "epoch": 0.08731547931617947,
      "grad_norm": 0.8933043256883373,
      "learning_rate": 9.914154236020957e-06,
      "loss": 0.1769,
      "step": 2993
    },
    {
      "epoch": 0.08734465254682304,
      "grad_norm": 0.8455693392256065,
      "learning_rate": 9.914067045789107e-06,
      "loss": 0.1945,
      "step": 2994
    },
    {
      "epoch": 0.0873738257774666,
      "grad_norm": 1.0031771539640966,
      "learning_rate": 9.913979811685638e-06,
      "loss": 0.1998,
      "step": 2995
    },
    {
      "epoch": 0.08740299900811016,
      "grad_norm": 0.8820102377931416,
      "learning_rate": 9.913892533711331e-06,
      "loss": 0.1791,
      "step": 2996
    },
    {
      "epoch": 0.08743217223875371,
      "grad_norm": 0.9376324370591926,
      "learning_rate": 9.913805211866967e-06,
      "loss": 0.1785,
      "step": 2997
    },
    {
      "epoch": 0.08746134546939728,
      "grad_norm": 1.0575494307685773,
      "learning_rate": 9.913717846153322e-06,
      "loss": 0.1782,
      "step": 2998
    },
    {
      "epoch": 0.08749051870004085,
      "grad_norm": 1.4428421928061463,
      "learning_rate": 9.913630436571176e-06,
      "loss": 0.195,
      "step": 2999
    },
    {
      "epoch": 0.0875196919306844,
      "grad_norm": 1.2558891724714025,
      "learning_rate": 9.91354298312131e-06,
      "loss": 0.225,
      "step": 3000
    },
    {
      "epoch": 0.08754886516132797,
      "grad_norm": 0.9906785696055594,
      "learning_rate": 9.913455485804506e-06,
      "loss": 0.1908,
      "step": 3001
    },
    {
      "epoch": 0.08757803839197152,
      "grad_norm": 0.9814461413562301,
      "learning_rate": 9.913367944621545e-06,
      "loss": 0.182,
      "step": 3002
    },
    {
      "epoch": 0.08760721162261509,
      "grad_norm": 1.3326132631853649,
      "learning_rate": 9.913280359573207e-06,
      "loss": 0.1837,
      "step": 3003
    },
    {
      "epoch": 0.08763638485325866,
      "grad_norm": 0.8927915911853632,
      "learning_rate": 9.913192730660275e-06,
      "loss": 0.1795,
      "step": 3004
    },
    {
      "epoch": 0.08766555808390221,
      "grad_norm": 1.0050729571802486,
      "learning_rate": 9.913105057883532e-06,
      "loss": 0.1945,
      "step": 3005
    },
    {
      "epoch": 0.08769473131454578,
      "grad_norm": 0.9128684143400813,
      "learning_rate": 9.91301734124376e-06,
      "loss": 0.1964,
      "step": 3006
    },
    {
      "epoch": 0.08772390454518933,
      "grad_norm": 0.8438485794169703,
      "learning_rate": 9.91292958074174e-06,
      "loss": 0.1732,
      "step": 3007
    },
    {
      "epoch": 0.0877530777758329,
      "grad_norm": 0.8477518508629068,
      "learning_rate": 9.91284177637826e-06,
      "loss": 0.2083,
      "step": 3008
    },
    {
      "epoch": 0.08778225100647646,
      "grad_norm": 0.7869919198682958,
      "learning_rate": 9.9127539281541e-06,
      "loss": 0.1763,
      "step": 3009
    },
    {
      "epoch": 0.08781142423712002,
      "grad_norm": 1.0437124670764184,
      "learning_rate": 9.912666036070045e-06,
      "loss": 0.1778,
      "step": 3010
    },
    {
      "epoch": 0.08784059746776358,
      "grad_norm": 0.874233790713194,
      "learning_rate": 9.912578100126883e-06,
      "loss": 0.1874,
      "step": 3011
    },
    {
      "epoch": 0.08786977069840714,
      "grad_norm": 0.8595788089308047,
      "learning_rate": 9.912490120325394e-06,
      "loss": 0.2016,
      "step": 3012
    },
    {
      "epoch": 0.0878989439290507,
      "grad_norm": 0.9981321545014091,
      "learning_rate": 9.912402096666367e-06,
      "loss": 0.1904,
      "step": 3013
    },
    {
      "epoch": 0.08792811715969426,
      "grad_norm": 0.9073995176985775,
      "learning_rate": 9.912314029150586e-06,
      "loss": 0.1721,
      "step": 3014
    },
    {
      "epoch": 0.08795729039033783,
      "grad_norm": 0.9292271893163728,
      "learning_rate": 9.912225917778838e-06,
      "loss": 0.1936,
      "step": 3015
    },
    {
      "epoch": 0.08798646362098139,
      "grad_norm": 0.9206644134436318,
      "learning_rate": 9.91213776255191e-06,
      "loss": 0.195,
      "step": 3016
    },
    {
      "epoch": 0.08801563685162495,
      "grad_norm": 0.8990250912326241,
      "learning_rate": 9.912049563470589e-06,
      "loss": 0.2245,
      "step": 3017
    },
    {
      "epoch": 0.08804481008226851,
      "grad_norm": 0.9241519313027056,
      "learning_rate": 9.91196132053566e-06,
      "loss": 0.1705,
      "step": 3018
    },
    {
      "epoch": 0.08807398331291207,
      "grad_norm": 0.9298534231910399,
      "learning_rate": 9.911873033747916e-06,
      "loss": 0.2189,
      "step": 3019
    },
    {
      "epoch": 0.08810315654355563,
      "grad_norm": 0.7907726777360939,
      "learning_rate": 9.911784703108141e-06,
      "loss": 0.1865,
      "step": 3020
    },
    {
      "epoch": 0.0881323297741992,
      "grad_norm": 1.0804125823083952,
      "learning_rate": 9.911696328617126e-06,
      "loss": 0.206,
      "step": 3021
    },
    {
      "epoch": 0.08816150300484275,
      "grad_norm": 0.7719793440825188,
      "learning_rate": 9.911607910275655e-06,
      "loss": 0.1666,
      "step": 3022
    },
    {
      "epoch": 0.08819067623548632,
      "grad_norm": 0.9937527633913799,
      "learning_rate": 9.911519448084526e-06,
      "loss": 0.2015,
      "step": 3023
    },
    {
      "epoch": 0.08821984946612987,
      "grad_norm": 0.9353945419009075,
      "learning_rate": 9.91143094204452e-06,
      "loss": 0.236,
      "step": 3024
    },
    {
      "epoch": 0.08824902269677344,
      "grad_norm": 1.0133951671553068,
      "learning_rate": 9.911342392156432e-06,
      "loss": 0.1743,
      "step": 3025
    },
    {
      "epoch": 0.08827819592741701,
      "grad_norm": 0.989671455261747,
      "learning_rate": 9.911253798421051e-06,
      "loss": 0.2082,
      "step": 3026
    },
    {
      "epoch": 0.08830736915806056,
      "grad_norm": 1.2663908906167805,
      "learning_rate": 9.91116516083917e-06,
      "loss": 0.1768,
      "step": 3027
    },
    {
      "epoch": 0.08833654238870413,
      "grad_norm": 0.8667957135089173,
      "learning_rate": 9.911076479411578e-06,
      "loss": 0.1738,
      "step": 3028
    },
    {
      "epoch": 0.08836571561934768,
      "grad_norm": 1.0614484471945946,
      "learning_rate": 9.910987754139067e-06,
      "loss": 0.2223,
      "step": 3029
    },
    {
      "epoch": 0.08839488884999125,
      "grad_norm": 1.0329687572312753,
      "learning_rate": 9.91089898502243e-06,
      "loss": 0.1865,
      "step": 3030
    },
    {
      "epoch": 0.08842406208063482,
      "grad_norm": 1.067140489676507,
      "learning_rate": 9.910810172062462e-06,
      "loss": 0.1996,
      "step": 3031
    },
    {
      "epoch": 0.08845323531127837,
      "grad_norm": 0.96701487872926,
      "learning_rate": 9.91072131525995e-06,
      "loss": 0.1864,
      "step": 3032
    },
    {
      "epoch": 0.08848240854192194,
      "grad_norm": 0.9739058647898851,
      "learning_rate": 9.910632414615691e-06,
      "loss": 0.2033,
      "step": 3033
    },
    {
      "epoch": 0.08851158177256549,
      "grad_norm": 0.9240948823187121,
      "learning_rate": 9.910543470130478e-06,
      "loss": 0.2031,
      "step": 3034
    },
    {
      "epoch": 0.08854075500320906,
      "grad_norm": 1.0302705675412827,
      "learning_rate": 9.910454481805105e-06,
      "loss": 0.1894,
      "step": 3035
    },
    {
      "epoch": 0.08856992823385261,
      "grad_norm": 1.0976861082356608,
      "learning_rate": 9.910365449640367e-06,
      "loss": 0.2008,
      "step": 3036
    },
    {
      "epoch": 0.08859910146449618,
      "grad_norm": 0.9165697419348509,
      "learning_rate": 9.910276373637058e-06,
      "loss": 0.192,
      "step": 3037
    },
    {
      "epoch": 0.08862827469513974,
      "grad_norm": 1.4118806773504327,
      "learning_rate": 9.910187253795974e-06,
      "loss": 0.2065,
      "step": 3038
    },
    {
      "epoch": 0.0886574479257833,
      "grad_norm": 0.8733323283579463,
      "learning_rate": 9.91009809011791e-06,
      "loss": 0.1819,
      "step": 3039
    },
    {
      "epoch": 0.08868662115642686,
      "grad_norm": 0.732815032517925,
      "learning_rate": 9.910008882603664e-06,
      "loss": 0.1768,
      "step": 3040
    },
    {
      "epoch": 0.08871579438707042,
      "grad_norm": 1.1269245047076029,
      "learning_rate": 9.909919631254028e-06,
      "loss": 0.1973,
      "step": 3041
    },
    {
      "epoch": 0.08874496761771399,
      "grad_norm": 0.9555805411047182,
      "learning_rate": 9.909830336069803e-06,
      "loss": 0.2164,
      "step": 3042
    },
    {
      "epoch": 0.08877414084835755,
      "grad_norm": 0.9834162396195962,
      "learning_rate": 9.909740997051786e-06,
      "loss": 0.1821,
      "step": 3043
    },
    {
      "epoch": 0.0888033140790011,
      "grad_norm": 0.8057651821738971,
      "learning_rate": 9.909651614200773e-06,
      "loss": 0.1889,
      "step": 3044
    },
    {
      "epoch": 0.08883248730964467,
      "grad_norm": 0.9273619873149924,
      "learning_rate": 9.90956218751756e-06,
      "loss": 0.1914,
      "step": 3045
    },
    {
      "epoch": 0.08886166054028823,
      "grad_norm": 0.7597660008902855,
      "learning_rate": 9.90947271700295e-06,
      "loss": 0.1796,
      "step": 3046
    },
    {
      "epoch": 0.08889083377093179,
      "grad_norm": 0.7956731608419013,
      "learning_rate": 9.909383202657739e-06,
      "loss": 0.1871,
      "step": 3047
    },
    {
      "epoch": 0.08892000700157536,
      "grad_norm": 0.7726968702012784,
      "learning_rate": 9.909293644482727e-06,
      "loss": 0.1549,
      "step": 3048
    },
    {
      "epoch": 0.08894918023221891,
      "grad_norm": 0.9075519329697846,
      "learning_rate": 9.909204042478713e-06,
      "loss": 0.1771,
      "step": 3049
    },
    {
      "epoch": 0.08897835346286248,
      "grad_norm": 0.9671623273112833,
      "learning_rate": 9.9091143966465e-06,
      "loss": 0.1916,
      "step": 3050
    },
    {
      "epoch": 0.08900752669350603,
      "grad_norm": 0.9015163634226375,
      "learning_rate": 9.909024706986881e-06,
      "loss": 0.2011,
      "step": 3051
    },
    {
      "epoch": 0.0890366999241496,
      "grad_norm": 0.9184962450164161,
      "learning_rate": 9.908934973500664e-06,
      "loss": 0.1803,
      "step": 3052
    },
    {
      "epoch": 0.08906587315479317,
      "grad_norm": 0.8450153788163894,
      "learning_rate": 9.908845196188647e-06,
      "loss": 0.1614,
      "step": 3053
    },
    {
      "epoch": 0.08909504638543672,
      "grad_norm": 0.9553291965529953,
      "learning_rate": 9.908755375051631e-06,
      "loss": 0.194,
      "step": 3054
    },
    {
      "epoch": 0.08912421961608029,
      "grad_norm": 0.891058644169043,
      "learning_rate": 9.90866551009042e-06,
      "loss": 0.1671,
      "step": 3055
    },
    {
      "epoch": 0.08915339284672384,
      "grad_norm": 1.2104446913879496,
      "learning_rate": 9.908575601305815e-06,
      "loss": 0.1813,
      "step": 3056
    },
    {
      "epoch": 0.08918256607736741,
      "grad_norm": 1.0127375435632076,
      "learning_rate": 9.908485648698618e-06,
      "loss": 0.2193,
      "step": 3057
    },
    {
      "epoch": 0.08921173930801098,
      "grad_norm": 1.0258627847235318,
      "learning_rate": 9.908395652269633e-06,
      "loss": 0.1938,
      "step": 3058
    },
    {
      "epoch": 0.08924091253865453,
      "grad_norm": 1.0079976664411516,
      "learning_rate": 9.908305612019665e-06,
      "loss": 0.2049,
      "step": 3059
    },
    {
      "epoch": 0.0892700857692981,
      "grad_norm": 0.9704927433343631,
      "learning_rate": 9.908215527949514e-06,
      "loss": 0.1988,
      "step": 3060
    },
    {
      "epoch": 0.08929925899994165,
      "grad_norm": 0.8499876970223335,
      "learning_rate": 9.908125400059988e-06,
      "loss": 0.1726,
      "step": 3061
    },
    {
      "epoch": 0.08932843223058522,
      "grad_norm": 0.8942257336748414,
      "learning_rate": 9.908035228351888e-06,
      "loss": 0.1965,
      "step": 3062
    },
    {
      "epoch": 0.08935760546122877,
      "grad_norm": 0.9569510087355682,
      "learning_rate": 9.907945012826022e-06,
      "loss": 0.1926,
      "step": 3063
    },
    {
      "epoch": 0.08938677869187234,
      "grad_norm": 1.0157032076843844,
      "learning_rate": 9.907854753483194e-06,
      "loss": 0.2116,
      "step": 3064
    },
    {
      "epoch": 0.0894159519225159,
      "grad_norm": 1.0428423346131277,
      "learning_rate": 9.907764450324213e-06,
      "loss": 0.192,
      "step": 3065
    },
    {
      "epoch": 0.08944512515315946,
      "grad_norm": 0.9531557202518242,
      "learning_rate": 9.90767410334988e-06,
      "loss": 0.1769,
      "step": 3066
    },
    {
      "epoch": 0.08947429838380302,
      "grad_norm": 0.9251978560430241,
      "learning_rate": 9.907583712561007e-06,
      "loss": 0.1977,
      "step": 3067
    },
    {
      "epoch": 0.08950347161444658,
      "grad_norm": 0.8586274452048444,
      "learning_rate": 9.907493277958395e-06,
      "loss": 0.1731,
      "step": 3068
    },
    {
      "epoch": 0.08953264484509015,
      "grad_norm": 0.9827735005065518,
      "learning_rate": 9.907402799542856e-06,
      "loss": 0.2121,
      "step": 3069
    },
    {
      "epoch": 0.08956181807573371,
      "grad_norm": 0.9753572676253564,
      "learning_rate": 9.907312277315196e-06,
      "loss": 0.18,
      "step": 3070
    },
    {
      "epoch": 0.08959099130637727,
      "grad_norm": 0.8210386693880609,
      "learning_rate": 9.907221711276224e-06,
      "loss": 0.1682,
      "step": 3071
    },
    {
      "epoch": 0.08962016453702083,
      "grad_norm": 0.8229563047627599,
      "learning_rate": 9.907131101426748e-06,
      "loss": 0.1851,
      "step": 3072
    },
    {
      "epoch": 0.08964933776766439,
      "grad_norm": 1.1028219063692366,
      "learning_rate": 9.907040447767575e-06,
      "loss": 0.193,
      "step": 3073
    },
    {
      "epoch": 0.08967851099830795,
      "grad_norm": 0.9994190165962931,
      "learning_rate": 9.906949750299519e-06,
      "loss": 0.1843,
      "step": 3074
    },
    {
      "epoch": 0.08970768422895152,
      "grad_norm": 0.8739944914336276,
      "learning_rate": 9.906859009023386e-06,
      "loss": 0.1813,
      "step": 3075
    },
    {
      "epoch": 0.08973685745959507,
      "grad_norm": 0.9650162467973781,
      "learning_rate": 9.906768223939986e-06,
      "loss": 0.1986,
      "step": 3076
    },
    {
      "epoch": 0.08976603069023864,
      "grad_norm": 1.203808331887953,
      "learning_rate": 9.906677395050132e-06,
      "loss": 0.2227,
      "step": 3077
    },
    {
      "epoch": 0.0897952039208822,
      "grad_norm": 0.9256040756127788,
      "learning_rate": 9.906586522354633e-06,
      "loss": 0.1764,
      "step": 3078
    },
    {
      "epoch": 0.08982437715152576,
      "grad_norm": 0.9056929166129613,
      "learning_rate": 9.9064956058543e-06,
      "loss": 0.1918,
      "step": 3079
    },
    {
      "epoch": 0.08985355038216933,
      "grad_norm": 0.925389887214733,
      "learning_rate": 9.906404645549947e-06,
      "loss": 0.1699,
      "step": 3080
    },
    {
      "epoch": 0.08988272361281288,
      "grad_norm": 1.0533964757737913,
      "learning_rate": 9.906313641442385e-06,
      "loss": 0.1714,
      "step": 3081
    },
    {
      "epoch": 0.08991189684345645,
      "grad_norm": 1.0862831572218263,
      "learning_rate": 9.906222593532424e-06,
      "loss": 0.2107,
      "step": 3082
    },
    {
      "epoch": 0.0899410700741,
      "grad_norm": 0.8008642476942929,
      "learning_rate": 9.906131501820881e-06,
      "loss": 0.1854,
      "step": 3083
    },
    {
      "epoch": 0.08997024330474357,
      "grad_norm": 1.3107928344527915,
      "learning_rate": 9.906040366308565e-06,
      "loss": 0.1836,
      "step": 3084
    },
    {
      "epoch": 0.08999941653538712,
      "grad_norm": 1.1093016703702059,
      "learning_rate": 9.905949186996293e-06,
      "loss": 0.1997,
      "step": 3085
    },
    {
      "epoch": 0.09002858976603069,
      "grad_norm": 1.219609482626167,
      "learning_rate": 9.905857963884878e-06,
      "loss": 0.1899,
      "step": 3086
    },
    {
      "epoch": 0.09005776299667426,
      "grad_norm": 0.8906354180218126,
      "learning_rate": 9.905766696975134e-06,
      "loss": 0.1925,
      "step": 3087
    },
    {
      "epoch": 0.09008693622731781,
      "grad_norm": 0.8998464826192879,
      "learning_rate": 9.905675386267877e-06,
      "loss": 0.2051,
      "step": 3088
    },
    {
      "epoch": 0.09011610945796138,
      "grad_norm": 1.0311402053101608,
      "learning_rate": 9.90558403176392e-06,
      "loss": 0.1956,
      "step": 3089
    },
    {
      "epoch": 0.09014528268860493,
      "grad_norm": 1.046837754144681,
      "learning_rate": 9.90549263346408e-06,
      "loss": 0.2266,
      "step": 3090
    },
    {
      "epoch": 0.0901744559192485,
      "grad_norm": 0.9232934703463918,
      "learning_rate": 9.905401191369172e-06,
      "loss": 0.2041,
      "step": 3091
    },
    {
      "epoch": 0.09020362914989206,
      "grad_norm": 0.929324485804581,
      "learning_rate": 9.905309705480014e-06,
      "loss": 0.2204,
      "step": 3092
    },
    {
      "epoch": 0.09023280238053562,
      "grad_norm": 0.7903364123471041,
      "learning_rate": 9.905218175797421e-06,
      "loss": 0.1999,
      "step": 3093
    },
    {
      "epoch": 0.09026197561117918,
      "grad_norm": 1.137748834396601,
      "learning_rate": 9.905126602322212e-06,
      "loss": 0.1671,
      "step": 3094
    },
    {
      "epoch": 0.09029114884182274,
      "grad_norm": 1.044897327008593,
      "learning_rate": 9.905034985055205e-06,
      "loss": 0.1846,
      "step": 3095
    },
    {
      "epoch": 0.0903203220724663,
      "grad_norm": 0.7780612022090226,
      "learning_rate": 9.904943323997216e-06,
      "loss": 0.1588,
      "step": 3096
    },
    {
      "epoch": 0.09034949530310987,
      "grad_norm": 0.8476472221427317,
      "learning_rate": 9.904851619149063e-06,
      "loss": 0.1766,
      "step": 3097
    },
    {
      "epoch": 0.09037866853375343,
      "grad_norm": 0.922397270879313,
      "learning_rate": 9.904759870511564e-06,
      "loss": 0.165,
      "step": 3098
    },
    {
      "epoch": 0.09040784176439699,
      "grad_norm": 0.9510848219792485,
      "learning_rate": 9.904668078085543e-06,
      "loss": 0.2115,
      "step": 3099
    },
    {
      "epoch": 0.09043701499504055,
      "grad_norm": 0.8949759049240087,
      "learning_rate": 9.904576241871814e-06,
      "loss": 0.1911,
      "step": 3100
    },
    {
      "epoch": 0.09046618822568411,
      "grad_norm": 1.4688418395431837,
      "learning_rate": 9.9044843618712e-06,
      "loss": 0.1785,
      "step": 3101
    },
    {
      "epoch": 0.09049536145632768,
      "grad_norm": 0.7970945369810468,
      "learning_rate": 9.90439243808452e-06,
      "loss": 0.2037,
      "step": 3102
    },
    {
      "epoch": 0.09052453468697123,
      "grad_norm": 0.8455029656333727,
      "learning_rate": 9.904300470512595e-06,
      "loss": 0.1704,
      "step": 3103
    },
    {
      "epoch": 0.0905537079176148,
      "grad_norm": 0.9547802543491765,
      "learning_rate": 9.904208459156247e-06,
      "loss": 0.1919,
      "step": 3104
    },
    {
      "epoch": 0.09058288114825835,
      "grad_norm": 1.006677853859439,
      "learning_rate": 9.904116404016296e-06,
      "loss": 0.2218,
      "step": 3105
    },
    {
      "epoch": 0.09061205437890192,
      "grad_norm": 0.8237642844111117,
      "learning_rate": 9.904024305093564e-06,
      "loss": 0.1964,
      "step": 3106
    },
    {
      "epoch": 0.09064122760954547,
      "grad_norm": 0.8582909828870113,
      "learning_rate": 9.903932162388875e-06,
      "loss": 0.2008,
      "step": 3107
    },
    {
      "epoch": 0.09067040084018904,
      "grad_norm": 0.7976472158239346,
      "learning_rate": 9.903839975903049e-06,
      "loss": 0.1964,
      "step": 3108
    },
    {
      "epoch": 0.09069957407083261,
      "grad_norm": 0.8758348784855697,
      "learning_rate": 9.903747745636912e-06,
      "loss": 0.2018,
      "step": 3109
    },
    {
      "epoch": 0.09072874730147616,
      "grad_norm": 0.9311384467244032,
      "learning_rate": 9.903655471591285e-06,
      "loss": 0.1821,
      "step": 3110
    },
    {
      "epoch": 0.09075792053211973,
      "grad_norm": 0.8383641189741342,
      "learning_rate": 9.903563153766992e-06,
      "loss": 0.1604,
      "step": 3111
    },
    {
      "epoch": 0.09078709376276328,
      "grad_norm": 0.7450909120524571,
      "learning_rate": 9.903470792164857e-06,
      "loss": 0.1886,
      "step": 3112
    },
    {
      "epoch": 0.09081626699340685,
      "grad_norm": 0.9430597561494886,
      "learning_rate": 9.903378386785707e-06,
      "loss": 0.2151,
      "step": 3113
    },
    {
      "epoch": 0.09084544022405042,
      "grad_norm": 1.0206645907134217,
      "learning_rate": 9.903285937630364e-06,
      "loss": 0.1744,
      "step": 3114
    },
    {
      "epoch": 0.09087461345469397,
      "grad_norm": 0.8434807430942434,
      "learning_rate": 9.903193444699656e-06,
      "loss": 0.1908,
      "step": 3115
    },
    {
      "epoch": 0.09090378668533754,
      "grad_norm": 0.9500140411201377,
      "learning_rate": 9.903100907994407e-06,
      "loss": 0.2084,
      "step": 3116
    },
    {
      "epoch": 0.09093295991598109,
      "grad_norm": 0.837192644453322,
      "learning_rate": 9.903008327515442e-06,
      "loss": 0.1884,
      "step": 3117
    },
    {
      "epoch": 0.09096213314662466,
      "grad_norm": 0.8378672650614899,
      "learning_rate": 9.902915703263591e-06,
      "loss": 0.1848,
      "step": 3118
    },
    {
      "epoch": 0.09099130637726822,
      "grad_norm": 0.9663374647208482,
      "learning_rate": 9.902823035239678e-06,
      "loss": 0.1759,
      "step": 3119
    },
    {
      "epoch": 0.09102047960791178,
      "grad_norm": 0.841080490878279,
      "learning_rate": 9.902730323444531e-06,
      "loss": 0.2047,
      "step": 3120
    },
    {
      "epoch": 0.09104965283855534,
      "grad_norm": 0.9822202779269245,
      "learning_rate": 9.902637567878979e-06,
      "loss": 0.1889,
      "step": 3121
    },
    {
      "epoch": 0.0910788260691989,
      "grad_norm": 0.8996420337880603,
      "learning_rate": 9.90254476854385e-06,
      "loss": 0.1784,
      "step": 3122
    },
    {
      "epoch": 0.09110799929984247,
      "grad_norm": 1.3390013111690215,
      "learning_rate": 9.90245192543997e-06,
      "loss": 0.1851,
      "step": 3123
    },
    {
      "epoch": 0.09113717253048603,
      "grad_norm": 0.8384136121624998,
      "learning_rate": 9.90235903856817e-06,
      "loss": 0.192,
      "step": 3124
    },
    {
      "epoch": 0.09116634576112959,
      "grad_norm": 0.8420761828985089,
      "learning_rate": 9.902266107929279e-06,
      "loss": 0.1807,
      "step": 3125
    },
    {
      "epoch": 0.09119551899177315,
      "grad_norm": 0.8803474977758997,
      "learning_rate": 9.902173133524125e-06,
      "loss": 0.1902,
      "step": 3126
    },
    {
      "epoch": 0.0912246922224167,
      "grad_norm": 0.9323609834892589,
      "learning_rate": 9.902080115353541e-06,
      "loss": 0.2006,
      "step": 3127
    },
    {
      "epoch": 0.09125386545306027,
      "grad_norm": 0.9239092626296833,
      "learning_rate": 9.901987053418355e-06,
      "loss": 0.1729,
      "step": 3128
    },
    {
      "epoch": 0.09128303868370383,
      "grad_norm": 0.9192162854775283,
      "learning_rate": 9.901893947719401e-06,
      "loss": 0.1734,
      "step": 3129
    },
    {
      "epoch": 0.0913122119143474,
      "grad_norm": 1.1515306150746933,
      "learning_rate": 9.901800798257506e-06,
      "loss": 0.2072,
      "step": 3130
    },
    {
      "epoch": 0.09134138514499096,
      "grad_norm": 0.9095002771703233,
      "learning_rate": 9.901707605033504e-06,
      "loss": 0.1883,
      "step": 3131
    },
    {
      "epoch": 0.09137055837563451,
      "grad_norm": 0.9494728333330158,
      "learning_rate": 9.901614368048226e-06,
      "loss": 0.1689,
      "step": 3132
    },
    {
      "epoch": 0.09139973160627808,
      "grad_norm": 1.0741251960370346,
      "learning_rate": 9.901521087302508e-06,
      "loss": 0.1848,
      "step": 3133
    },
    {
      "epoch": 0.09142890483692163,
      "grad_norm": 0.9276160324018725,
      "learning_rate": 9.901427762797176e-06,
      "loss": 0.201,
      "step": 3134
    },
    {
      "epoch": 0.0914580780675652,
      "grad_norm": 0.9020587477903261,
      "learning_rate": 9.901334394533069e-06,
      "loss": 0.1815,
      "step": 3135
    },
    {
      "epoch": 0.09148725129820877,
      "grad_norm": 0.9543040013265817,
      "learning_rate": 9.901240982511017e-06,
      "loss": 0.1853,
      "step": 3136
    },
    {
      "epoch": 0.09151642452885232,
      "grad_norm": 0.8074048966683778,
      "learning_rate": 9.901147526731857e-06,
      "loss": 0.2072,
      "step": 3137
    },
    {
      "epoch": 0.09154559775949589,
      "grad_norm": 0.8168688399715941,
      "learning_rate": 9.901054027196422e-06,
      "loss": 0.1705,
      "step": 3138
    },
    {
      "epoch": 0.09157477099013944,
      "grad_norm": 0.9696701967770042,
      "learning_rate": 9.900960483905546e-06,
      "loss": 0.1854,
      "step": 3139
    },
    {
      "epoch": 0.09160394422078301,
      "grad_norm": 0.8608991500956572,
      "learning_rate": 9.900866896860066e-06,
      "loss": 0.1764,
      "step": 3140
    },
    {
      "epoch": 0.09163311745142658,
      "grad_norm": 0.9563370412441334,
      "learning_rate": 9.900773266060814e-06,
      "loss": 0.2001,
      "step": 3141
    },
    {
      "epoch": 0.09166229068207013,
      "grad_norm": 1.0622492285855472,
      "learning_rate": 9.90067959150863e-06,
      "loss": 0.2042,
      "step": 3142
    },
    {
      "epoch": 0.0916914639127137,
      "grad_norm": 0.980733770469344,
      "learning_rate": 9.90058587320435e-06,
      "loss": 0.207,
      "step": 3143
    },
    {
      "epoch": 0.09172063714335725,
      "grad_norm": 0.9548998232870056,
      "learning_rate": 9.900492111148804e-06,
      "loss": 0.2022,
      "step": 3144
    },
    {
      "epoch": 0.09174981037400082,
      "grad_norm": 1.1523984929924487,
      "learning_rate": 9.900398305342838e-06,
      "loss": 0.1978,
      "step": 3145
    },
    {
      "epoch": 0.09177898360464438,
      "grad_norm": 0.9496215587216399,
      "learning_rate": 9.900304455787285e-06,
      "loss": 0.1852,
      "step": 3146
    },
    {
      "epoch": 0.09180815683528794,
      "grad_norm": 1.0291303492810868,
      "learning_rate": 9.900210562482985e-06,
      "loss": 0.1993,
      "step": 3147
    },
    {
      "epoch": 0.0918373300659315,
      "grad_norm": 1.1989672928651198,
      "learning_rate": 9.900116625430774e-06,
      "loss": 0.193,
      "step": 3148
    },
    {
      "epoch": 0.09186650329657506,
      "grad_norm": 0.9900630480878376,
      "learning_rate": 9.90002264463149e-06,
      "loss": 0.2284,
      "step": 3149
    },
    {
      "epoch": 0.09189567652721863,
      "grad_norm": 1.0363159213300737,
      "learning_rate": 9.899928620085975e-06,
      "loss": 0.196,
      "step": 3150
    },
    {
      "epoch": 0.09192484975786219,
      "grad_norm": 1.089681182274323,
      "learning_rate": 9.899834551795066e-06,
      "loss": 0.1909,
      "step": 3151
    },
    {
      "epoch": 0.09195402298850575,
      "grad_norm": 1.1716790411151727,
      "learning_rate": 9.899740439759605e-06,
      "loss": 0.1828,
      "step": 3152
    },
    {
      "epoch": 0.09198319621914931,
      "grad_norm": 0.8815311348464451,
      "learning_rate": 9.899646283980432e-06,
      "loss": 0.1781,
      "step": 3153
    },
    {
      "epoch": 0.09201236944979287,
      "grad_norm": 0.8823874358845932,
      "learning_rate": 9.899552084458383e-06,
      "loss": 0.2,
      "step": 3154
    },
    {
      "epoch": 0.09204154268043643,
      "grad_norm": 1.0980207624784954,
      "learning_rate": 9.899457841194307e-06,
      "loss": 0.1835,
      "step": 3155
    },
    {
      "epoch": 0.09207071591107999,
      "grad_norm": 1.09346749548337,
      "learning_rate": 9.899363554189038e-06,
      "loss": 0.1886,
      "step": 3156
    },
    {
      "epoch": 0.09209988914172355,
      "grad_norm": 0.832123474446183,
      "learning_rate": 9.899269223443421e-06,
      "loss": 0.2037,
      "step": 3157
    },
    {
      "epoch": 0.09212906237236712,
      "grad_norm": 0.9348866978298538,
      "learning_rate": 9.899174848958298e-06,
      "loss": 0.191,
      "step": 3158
    },
    {
      "epoch": 0.09215823560301067,
      "grad_norm": 0.8448691113518083,
      "learning_rate": 9.899080430734512e-06,
      "loss": 0.168,
      "step": 3159
    },
    {
      "epoch": 0.09218740883365424,
      "grad_norm": 0.9668811661491377,
      "learning_rate": 9.898985968772905e-06,
      "loss": 0.1911,
      "step": 3160
    },
    {
      "epoch": 0.0922165820642978,
      "grad_norm": 0.8822283021391198,
      "learning_rate": 9.898891463074321e-06,
      "loss": 0.2,
      "step": 3161
    },
    {
      "epoch": 0.09224575529494136,
      "grad_norm": 0.8299612372235152,
      "learning_rate": 9.898796913639605e-06,
      "loss": 0.1518,
      "step": 3162
    },
    {
      "epoch": 0.09227492852558493,
      "grad_norm": 1.1499537794875645,
      "learning_rate": 9.898702320469597e-06,
      "loss": 0.1837,
      "step": 3163
    },
    {
      "epoch": 0.09230410175622848,
      "grad_norm": 0.9581816435886256,
      "learning_rate": 9.898607683565146e-06,
      "loss": 0.1635,
      "step": 3164
    },
    {
      "epoch": 0.09233327498687205,
      "grad_norm": 0.776698703755397,
      "learning_rate": 9.898513002927094e-06,
      "loss": 0.1697,
      "step": 3165
    },
    {
      "epoch": 0.0923624482175156,
      "grad_norm": 1.1063066015427327,
      "learning_rate": 9.898418278556288e-06,
      "loss": 0.1696,
      "step": 3166
    },
    {
      "epoch": 0.09239162144815917,
      "grad_norm": 1.0075887363532354,
      "learning_rate": 9.898323510453571e-06,
      "loss": 0.1845,
      "step": 3167
    },
    {
      "epoch": 0.09242079467880274,
      "grad_norm": 0.9451382122340356,
      "learning_rate": 9.898228698619794e-06,
      "loss": 0.1791,
      "step": 3168
    },
    {
      "epoch": 0.09244996790944629,
      "grad_norm": 1.037499933351149,
      "learning_rate": 9.898133843055798e-06,
      "loss": 0.1729,
      "step": 3169
    },
    {
      "epoch": 0.09247914114008986,
      "grad_norm": 1.3896563982420864,
      "learning_rate": 9.898038943762434e-06,
      "loss": 0.1966,
      "step": 3170
    },
    {
      "epoch": 0.09250831437073341,
      "grad_norm": 1.009329571422478,
      "learning_rate": 9.897944000740547e-06,
      "loss": 0.2243,
      "step": 3171
    },
    {
      "epoch": 0.09253748760137698,
      "grad_norm": 0.8822312471226745,
      "learning_rate": 9.897849013990985e-06,
      "loss": 0.1789,
      "step": 3172
    },
    {
      "epoch": 0.09256666083202054,
      "grad_norm": 1.1137719163993303,
      "learning_rate": 9.897753983514595e-06,
      "loss": 0.222,
      "step": 3173
    },
    {
      "epoch": 0.0925958340626641,
      "grad_norm": 1.0172438228433538,
      "learning_rate": 9.897658909312229e-06,
      "loss": 0.1824,
      "step": 3174
    },
    {
      "epoch": 0.09262500729330767,
      "grad_norm": 0.9884739364426408,
      "learning_rate": 9.897563791384733e-06,
      "loss": 0.1768,
      "step": 3175
    },
    {
      "epoch": 0.09265418052395122,
      "grad_norm": 1.0481532690179143,
      "learning_rate": 9.897468629732956e-06,
      "loss": 0.2083,
      "step": 3176
    },
    {
      "epoch": 0.09268335375459479,
      "grad_norm": 0.9691419807415792,
      "learning_rate": 9.897373424357747e-06,
      "loss": 0.2084,
      "step": 3177
    },
    {
      "epoch": 0.09271252698523834,
      "grad_norm": 0.9862788336906526,
      "learning_rate": 9.897278175259959e-06,
      "loss": 0.1862,
      "step": 3178
    },
    {
      "epoch": 0.0927417002158819,
      "grad_norm": 0.9780930416857407,
      "learning_rate": 9.897182882440439e-06,
      "loss": 0.1723,
      "step": 3179
    },
    {
      "epoch": 0.09277087344652547,
      "grad_norm": 0.7931073017265083,
      "learning_rate": 9.897087545900039e-06,
      "loss": 0.1902,
      "step": 3180
    },
    {
      "epoch": 0.09280004667716903,
      "grad_norm": 0.8816903279426856,
      "learning_rate": 9.896992165639612e-06,
      "loss": 0.1863,
      "step": 3181
    },
    {
      "epoch": 0.0928292199078126,
      "grad_norm": 0.949194604791015,
      "learning_rate": 9.896896741660008e-06,
      "loss": 0.2192,
      "step": 3182
    },
    {
      "epoch": 0.09285839313845615,
      "grad_norm": 0.918235761083928,
      "learning_rate": 9.896801273962078e-06,
      "loss": 0.1992,
      "step": 3183
    },
    {
      "epoch": 0.09288756636909971,
      "grad_norm": 0.807178404781097,
      "learning_rate": 9.896705762546676e-06,
      "loss": 0.1997,
      "step": 3184
    },
    {
      "epoch": 0.09291673959974328,
      "grad_norm": 0.9321238823064868,
      "learning_rate": 9.896610207414654e-06,
      "loss": 0.2117,
      "step": 3185
    },
    {
      "epoch": 0.09294591283038683,
      "grad_norm": 0.9375778044105287,
      "learning_rate": 9.896514608566863e-06,
      "loss": 0.2062,
      "step": 3186
    },
    {
      "epoch": 0.0929750860610304,
      "grad_norm": 0.7205607205601595,
      "learning_rate": 9.896418966004159e-06,
      "loss": 0.1912,
      "step": 3187
    },
    {
      "epoch": 0.09300425929167395,
      "grad_norm": 0.8587312590346948,
      "learning_rate": 9.896323279727398e-06,
      "loss": 0.186,
      "step": 3188
    },
    {
      "epoch": 0.09303343252231752,
      "grad_norm": 1.0585477782466406,
      "learning_rate": 9.89622754973743e-06,
      "loss": 0.2327,
      "step": 3189
    },
    {
      "epoch": 0.09306260575296109,
      "grad_norm": 0.8824097003662679,
      "learning_rate": 9.896131776035111e-06,
      "loss": 0.1881,
      "step": 3190
    },
    {
      "epoch": 0.09309177898360464,
      "grad_norm": 1.0035868168095385,
      "learning_rate": 9.896035958621295e-06,
      "loss": 0.1937,
      "step": 3191
    },
    {
      "epoch": 0.09312095221424821,
      "grad_norm": 1.0261592643292214,
      "learning_rate": 9.89594009749684e-06,
      "loss": 0.2211,
      "step": 3192
    },
    {
      "epoch": 0.09315012544489176,
      "grad_norm": 0.8813262335652425,
      "learning_rate": 9.895844192662602e-06,
      "loss": 0.1801,
      "step": 3193
    },
    {
      "epoch": 0.09317929867553533,
      "grad_norm": 1.178278218707878,
      "learning_rate": 9.895748244119434e-06,
      "loss": 0.1958,
      "step": 3194
    },
    {
      "epoch": 0.0932084719061789,
      "grad_norm": 1.0313091909980319,
      "learning_rate": 9.895652251868196e-06,
      "loss": 0.2324,
      "step": 3195
    },
    {
      "epoch": 0.09323764513682245,
      "grad_norm": 0.9311020701092604,
      "learning_rate": 9.89555621590974e-06,
      "loss": 0.1788,
      "step": 3196
    },
    {
      "epoch": 0.09326681836746602,
      "grad_norm": 0.764970517720893,
      "learning_rate": 9.89546013624493e-06,
      "loss": 0.1872,
      "step": 3197
    },
    {
      "epoch": 0.09329599159810957,
      "grad_norm": 0.9470134867851716,
      "learning_rate": 9.89536401287462e-06,
      "loss": 0.1919,
      "step": 3198
    },
    {
      "epoch": 0.09332516482875314,
      "grad_norm": 0.8313124470837624,
      "learning_rate": 9.895267845799667e-06,
      "loss": 0.2011,
      "step": 3199
    },
    {
      "epoch": 0.09335433805939669,
      "grad_norm": 0.7609124467794467,
      "learning_rate": 9.895171635020933e-06,
      "loss": 0.1792,
      "step": 3200
    },
    {
      "epoch": 0.09338351129004026,
      "grad_norm": 0.8741796775430868,
      "learning_rate": 9.895075380539275e-06,
      "loss": 0.213,
      "step": 3201
    },
    {
      "epoch": 0.09341268452068383,
      "grad_norm": 0.9733134720207894,
      "learning_rate": 9.894979082355552e-06,
      "loss": 0.2125,
      "step": 3202
    },
    {
      "epoch": 0.09344185775132738,
      "grad_norm": 0.9272558824072925,
      "learning_rate": 9.894882740470625e-06,
      "loss": 0.2068,
      "step": 3203
    },
    {
      "epoch": 0.09347103098197095,
      "grad_norm": 0.8939817708889214,
      "learning_rate": 9.894786354885354e-06,
      "loss": 0.2177,
      "step": 3204
    },
    {
      "epoch": 0.0935002042126145,
      "grad_norm": 0.7836086036755353,
      "learning_rate": 9.894689925600596e-06,
      "loss": 0.1964,
      "step": 3205
    },
    {
      "epoch": 0.09352937744325807,
      "grad_norm": 0.8645969989270436,
      "learning_rate": 9.894593452617216e-06,
      "loss": 0.1979,
      "step": 3206
    },
    {
      "epoch": 0.09355855067390163,
      "grad_norm": 0.8637258641034835,
      "learning_rate": 9.894496935936076e-06,
      "loss": 0.179,
      "step": 3207
    },
    {
      "epoch": 0.09358772390454519,
      "grad_norm": 0.686666093078826,
      "learning_rate": 9.894400375558035e-06,
      "loss": 0.1797,
      "step": 3208
    },
    {
      "epoch": 0.09361689713518875,
      "grad_norm": 0.9946572338255079,
      "learning_rate": 9.894303771483955e-06,
      "loss": 0.1913,
      "step": 3209
    },
    {
      "epoch": 0.0936460703658323,
      "grad_norm": 0.7749649434714495,
      "learning_rate": 9.8942071237147e-06,
      "loss": 0.1663,
      "step": 3210
    },
    {
      "epoch": 0.09367524359647587,
      "grad_norm": 0.7833073905906534,
      "learning_rate": 9.894110432251131e-06,
      "loss": 0.1699,
      "step": 3211
    },
    {
      "epoch": 0.09370441682711944,
      "grad_norm": 0.9176131665834725,
      "learning_rate": 9.894013697094113e-06,
      "loss": 0.178,
      "step": 3212
    },
    {
      "epoch": 0.093733590057763,
      "grad_norm": 0.82519863993861,
      "learning_rate": 9.89391691824451e-06,
      "loss": 0.1909,
      "step": 3213
    },
    {
      "epoch": 0.09376276328840656,
      "grad_norm": 1.0785544956802007,
      "learning_rate": 9.893820095703185e-06,
      "loss": 0.1708,
      "step": 3214
    },
    {
      "epoch": 0.09379193651905011,
      "grad_norm": 0.8957478455051892,
      "learning_rate": 9.893723229471001e-06,
      "loss": 0.1815,
      "step": 3215
    },
    {
      "epoch": 0.09382110974969368,
      "grad_norm": 1.0309759483868939,
      "learning_rate": 9.893626319548823e-06,
      "loss": 0.2181,
      "step": 3216
    },
    {
      "epoch": 0.09385028298033725,
      "grad_norm": 1.112924509433906,
      "learning_rate": 9.89352936593752e-06,
      "loss": 0.1885,
      "step": 3217
    },
    {
      "epoch": 0.0938794562109808,
      "grad_norm": 1.0197801277631693,
      "learning_rate": 9.893432368637954e-06,
      "loss": 0.1724,
      "step": 3218
    },
    {
      "epoch": 0.09390862944162437,
      "grad_norm": 0.9052079172498365,
      "learning_rate": 9.893335327650992e-06,
      "loss": 0.1759,
      "step": 3219
    },
    {
      "epoch": 0.09393780267226792,
      "grad_norm": 1.0412474859763168,
      "learning_rate": 9.893238242977502e-06,
      "loss": 0.1901,
      "step": 3220
    },
    {
      "epoch": 0.09396697590291149,
      "grad_norm": 0.8339256080577501,
      "learning_rate": 9.893141114618348e-06,
      "loss": 0.1968,
      "step": 3221
    },
    {
      "epoch": 0.09399614913355504,
      "grad_norm": 0.8540496003410044,
      "learning_rate": 9.893043942574397e-06,
      "loss": 0.1831,
      "step": 3222
    },
    {
      "epoch": 0.09402532236419861,
      "grad_norm": 0.992501538161245,
      "learning_rate": 9.89294672684652e-06,
      "loss": 0.2003,
      "step": 3223
    },
    {
      "epoch": 0.09405449559484218,
      "grad_norm": 0.9560750673388776,
      "learning_rate": 9.89284946743558e-06,
      "loss": 0.197,
      "step": 3224
    },
    {
      "epoch": 0.09408366882548573,
      "grad_norm": 1.117693455821563,
      "learning_rate": 9.892752164342449e-06,
      "loss": 0.1927,
      "step": 3225
    },
    {
      "epoch": 0.0941128420561293,
      "grad_norm": 0.918814702440944,
      "learning_rate": 9.892654817567995e-06,
      "loss": 0.1923,
      "step": 3226
    },
    {
      "epoch": 0.09414201528677285,
      "grad_norm": 1.0096062228169131,
      "learning_rate": 9.892557427113087e-06,
      "loss": 0.1659,
      "step": 3227
    },
    {
      "epoch": 0.09417118851741642,
      "grad_norm": 0.9441484563863961,
      "learning_rate": 9.892459992978594e-06,
      "loss": 0.1872,
      "step": 3228
    },
    {
      "epoch": 0.09420036174805999,
      "grad_norm": 0.9177509239500861,
      "learning_rate": 9.892362515165386e-06,
      "loss": 0.2264,
      "step": 3229
    },
    {
      "epoch": 0.09422953497870354,
      "grad_norm": 0.898743746501821,
      "learning_rate": 9.892264993674334e-06,
      "loss": 0.189,
      "step": 3230
    },
    {
      "epoch": 0.0942587082093471,
      "grad_norm": 0.9162917180759605,
      "learning_rate": 9.892167428506307e-06,
      "loss": 0.1947,
      "step": 3231
    },
    {
      "epoch": 0.09428788143999066,
      "grad_norm": 1.1773099051391154,
      "learning_rate": 9.892069819662179e-06,
      "loss": 0.1942,
      "step": 3232
    },
    {
      "epoch": 0.09431705467063423,
      "grad_norm": 1.013402861911588,
      "learning_rate": 9.891972167142816e-06,
      "loss": 0.1939,
      "step": 3233
    },
    {
      "epoch": 0.0943462279012778,
      "grad_norm": 1.2777285577531594,
      "learning_rate": 9.891874470949095e-06,
      "loss": 0.1895,
      "step": 3234
    },
    {
      "epoch": 0.09437540113192135,
      "grad_norm": 0.9119440469852996,
      "learning_rate": 9.891776731081887e-06,
      "loss": 0.209,
      "step": 3235
    },
    {
      "epoch": 0.09440457436256491,
      "grad_norm": 0.8659368042696534,
      "learning_rate": 9.891678947542063e-06,
      "loss": 0.1967,
      "step": 3236
    },
    {
      "epoch": 0.09443374759320847,
      "grad_norm": 0.9496012057232084,
      "learning_rate": 9.891581120330498e-06,
      "loss": 0.1841,
      "step": 3237
    },
    {
      "epoch": 0.09446292082385203,
      "grad_norm": 0.8945707600040342,
      "learning_rate": 9.891483249448066e-06,
      "loss": 0.1724,
      "step": 3238
    },
    {
      "epoch": 0.0944920940544956,
      "grad_norm": 0.8099025961551439,
      "learning_rate": 9.891385334895637e-06,
      "loss": 0.1955,
      "step": 3239
    },
    {
      "epoch": 0.09452126728513915,
      "grad_norm": 0.8034245101934876,
      "learning_rate": 9.891287376674089e-06,
      "loss": 0.1944,
      "step": 3240
    },
    {
      "epoch": 0.09455044051578272,
      "grad_norm": 0.9094264512261659,
      "learning_rate": 9.891189374784294e-06,
      "loss": 0.1938,
      "step": 3241
    },
    {
      "epoch": 0.09457961374642627,
      "grad_norm": 1.0101400403658125,
      "learning_rate": 9.891091329227127e-06,
      "loss": 0.1916,
      "step": 3242
    },
    {
      "epoch": 0.09460878697706984,
      "grad_norm": 0.9529816607580996,
      "learning_rate": 9.890993240003465e-06,
      "loss": 0.1856,
      "step": 3243
    },
    {
      "epoch": 0.09463796020771341,
      "grad_norm": 0.7679335540210122,
      "learning_rate": 9.890895107114182e-06,
      "loss": 0.2018,
      "step": 3244
    },
    {
      "epoch": 0.09466713343835696,
      "grad_norm": 1.1404036593134883,
      "learning_rate": 9.890796930560156e-06,
      "loss": 0.1774,
      "step": 3245
    },
    {
      "epoch": 0.09469630666900053,
      "grad_norm": 0.8780786896423192,
      "learning_rate": 9.890698710342263e-06,
      "loss": 0.1967,
      "step": 3246
    },
    {
      "epoch": 0.09472547989964408,
      "grad_norm": 0.9576698836614522,
      "learning_rate": 9.89060044646138e-06,
      "loss": 0.1814,
      "step": 3247
    },
    {
      "epoch": 0.09475465313028765,
      "grad_norm": 0.8730570112400692,
      "learning_rate": 9.890502138918382e-06,
      "loss": 0.1873,
      "step": 3248
    },
    {
      "epoch": 0.0947838263609312,
      "grad_norm": 1.2602141128621513,
      "learning_rate": 9.890403787714148e-06,
      "loss": 0.1919,
      "step": 3249
    },
    {
      "epoch": 0.09481299959157477,
      "grad_norm": 1.0245830975735313,
      "learning_rate": 9.890305392849559e-06,
      "loss": 0.1758,
      "step": 3250
    },
    {
      "epoch": 0.09484217282221834,
      "grad_norm": 0.9221155138585105,
      "learning_rate": 9.89020695432549e-06,
      "loss": 0.2038,
      "step": 3251
    },
    {
      "epoch": 0.09487134605286189,
      "grad_norm": 1.4151546844713536,
      "learning_rate": 9.890108472142818e-06,
      "loss": 0.2032,
      "step": 3252
    },
    {
      "epoch": 0.09490051928350546,
      "grad_norm": 1.0559456413965769,
      "learning_rate": 9.890009946302429e-06,
      "loss": 0.2026,
      "step": 3253
    },
    {
      "epoch": 0.09492969251414901,
      "grad_norm": 1.0718071293408422,
      "learning_rate": 9.889911376805195e-06,
      "loss": 0.1714,
      "step": 3254
    },
    {
      "epoch": 0.09495886574479258,
      "grad_norm": 0.9089126402809143,
      "learning_rate": 9.889812763652002e-06,
      "loss": 0.2007,
      "step": 3255
    },
    {
      "epoch": 0.09498803897543615,
      "grad_norm": 1.1091010533807344,
      "learning_rate": 9.889714106843726e-06,
      "loss": 0.1913,
      "step": 3256
    },
    {
      "epoch": 0.0950172122060797,
      "grad_norm": 1.0009387069839508,
      "learning_rate": 9.889615406381252e-06,
      "loss": 0.1781,
      "step": 3257
    },
    {
      "epoch": 0.09504638543672327,
      "grad_norm": 0.9272485928702179,
      "learning_rate": 9.889516662265457e-06,
      "loss": 0.1754,
      "step": 3258
    },
    {
      "epoch": 0.09507555866736682,
      "grad_norm": 0.7614197847993052,
      "learning_rate": 9.889417874497225e-06,
      "loss": 0.1636,
      "step": 3259
    },
    {
      "epoch": 0.09510473189801039,
      "grad_norm": 0.9743955166343347,
      "learning_rate": 9.889319043077438e-06,
      "loss": 0.1972,
      "step": 3260
    },
    {
      "epoch": 0.09513390512865395,
      "grad_norm": 0.9910664775395345,
      "learning_rate": 9.889220168006977e-06,
      "loss": 0.2058,
      "step": 3261
    },
    {
      "epoch": 0.0951630783592975,
      "grad_norm": 0.7768260311895827,
      "learning_rate": 9.889121249286727e-06,
      "loss": 0.1661,
      "step": 3262
    },
    {
      "epoch": 0.09519225158994107,
      "grad_norm": 0.919702114574406,
      "learning_rate": 9.889022286917567e-06,
      "loss": 0.1687,
      "step": 3263
    },
    {
      "epoch": 0.09522142482058463,
      "grad_norm": 1.1063076369430893,
      "learning_rate": 9.888923280900385e-06,
      "loss": 0.1684,
      "step": 3264
    },
    {
      "epoch": 0.0952505980512282,
      "grad_norm": 0.9419642147128596,
      "learning_rate": 9.888824231236063e-06,
      "loss": 0.1717,
      "step": 3265
    },
    {
      "epoch": 0.09527977128187176,
      "grad_norm": 0.8241379303385615,
      "learning_rate": 9.888725137925484e-06,
      "loss": 0.1885,
      "step": 3266
    },
    {
      "epoch": 0.09530894451251531,
      "grad_norm": 1.0135462106994542,
      "learning_rate": 9.888626000969534e-06,
      "loss": 0.2109,
      "step": 3267
    },
    {
      "epoch": 0.09533811774315888,
      "grad_norm": 0.9170158099052814,
      "learning_rate": 9.8885268203691e-06,
      "loss": 0.1987,
      "step": 3268
    },
    {
      "epoch": 0.09536729097380243,
      "grad_norm": 1.0248116195809147,
      "learning_rate": 9.888427596125063e-06,
      "loss": 0.2015,
      "step": 3269
    },
    {
      "epoch": 0.095396464204446,
      "grad_norm": 0.9796239011512825,
      "learning_rate": 9.888328328238313e-06,
      "loss": 0.2114,
      "step": 3270
    },
    {
      "epoch": 0.09542563743508956,
      "grad_norm": 0.9266420074692738,
      "learning_rate": 9.888229016709735e-06,
      "loss": 0.1797,
      "step": 3271
    },
    {
      "epoch": 0.09545481066573312,
      "grad_norm": 1.0461543235954773,
      "learning_rate": 9.888129661540215e-06,
      "loss": 0.1819,
      "step": 3272
    },
    {
      "epoch": 0.09548398389637669,
      "grad_norm": 0.8013264228977466,
      "learning_rate": 9.88803026273064e-06,
      "loss": 0.171,
      "step": 3273
    },
    {
      "epoch": 0.09551315712702024,
      "grad_norm": 0.9709236879591128,
      "learning_rate": 9.887930820281896e-06,
      "loss": 0.1764,
      "step": 3274
    },
    {
      "epoch": 0.09554233035766381,
      "grad_norm": 0.9074604077154521,
      "learning_rate": 9.887831334194874e-06,
      "loss": 0.1848,
      "step": 3275
    },
    {
      "epoch": 0.09557150358830736,
      "grad_norm": 0.8539229277032876,
      "learning_rate": 9.887731804470462e-06,
      "loss": 0.1935,
      "step": 3276
    },
    {
      "epoch": 0.09560067681895093,
      "grad_norm": 0.9419424877500043,
      "learning_rate": 9.887632231109546e-06,
      "loss": 0.2011,
      "step": 3277
    },
    {
      "epoch": 0.0956298500495945,
      "grad_norm": 0.849111456865038,
      "learning_rate": 9.887532614113018e-06,
      "loss": 0.1814,
      "step": 3278
    },
    {
      "epoch": 0.09565902328023805,
      "grad_norm": 0.8042604791344427,
      "learning_rate": 9.887432953481762e-06,
      "loss": 0.1734,
      "step": 3279
    },
    {
      "epoch": 0.09568819651088162,
      "grad_norm": 0.8820753714732914,
      "learning_rate": 9.887333249216673e-06,
      "loss": 0.1808,
      "step": 3280
    },
    {
      "epoch": 0.09571736974152517,
      "grad_norm": 0.8229004316236141,
      "learning_rate": 9.88723350131864e-06,
      "loss": 0.2121,
      "step": 3281
    },
    {
      "epoch": 0.09574654297216874,
      "grad_norm": 0.8417919912196324,
      "learning_rate": 9.887133709788552e-06,
      "loss": 0.2052,
      "step": 3282
    },
    {
      "epoch": 0.0957757162028123,
      "grad_norm": 0.8615882879927675,
      "learning_rate": 9.887033874627303e-06,
      "loss": 0.2154,
      "step": 3283
    },
    {
      "epoch": 0.09580488943345586,
      "grad_norm": 1.0144257818867242,
      "learning_rate": 9.88693399583578e-06,
      "loss": 0.1894,
      "step": 3284
    },
    {
      "epoch": 0.09583406266409943,
      "grad_norm": 0.9307069346703885,
      "learning_rate": 9.886834073414878e-06,
      "loss": 0.1935,
      "step": 3285
    },
    {
      "epoch": 0.09586323589474298,
      "grad_norm": 0.9137495912927327,
      "learning_rate": 9.886734107365486e-06,
      "loss": 0.1805,
      "step": 3286
    },
    {
      "epoch": 0.09589240912538655,
      "grad_norm": 1.0047984497652105,
      "learning_rate": 9.8866340976885e-06,
      "loss": 0.2039,
      "step": 3287
    },
    {
      "epoch": 0.09592158235603011,
      "grad_norm": 0.9344310243993718,
      "learning_rate": 9.886534044384812e-06,
      "loss": 0.2172,
      "step": 3288
    },
    {
      "epoch": 0.09595075558667367,
      "grad_norm": 1.009365258653322,
      "learning_rate": 9.886433947455314e-06,
      "loss": 0.1894,
      "step": 3289
    },
    {
      "epoch": 0.09597992881731723,
      "grad_norm": 1.0553967057413909,
      "learning_rate": 9.886333806900901e-06,
      "loss": 0.1806,
      "step": 3290
    },
    {
      "epoch": 0.09600910204796079,
      "grad_norm": 1.0933924973594682,
      "learning_rate": 9.886233622722464e-06,
      "loss": 0.182,
      "step": 3291
    },
    {
      "epoch": 0.09603827527860435,
      "grad_norm": 0.8160425800459061,
      "learning_rate": 9.886133394920901e-06,
      "loss": 0.1702,
      "step": 3292
    },
    {
      "epoch": 0.09606744850924791,
      "grad_norm": 0.9554707412430091,
      "learning_rate": 9.886033123497106e-06,
      "loss": 0.1959,
      "step": 3293
    },
    {
      "epoch": 0.09609662173989147,
      "grad_norm": 0.7926178740748009,
      "learning_rate": 9.885932808451973e-06,
      "loss": 0.1772,
      "step": 3294
    },
    {
      "epoch": 0.09612579497053504,
      "grad_norm": 0.9356315460002099,
      "learning_rate": 9.885832449786398e-06,
      "loss": 0.1948,
      "step": 3295
    },
    {
      "epoch": 0.0961549682011786,
      "grad_norm": 0.8967607401124458,
      "learning_rate": 9.885732047501277e-06,
      "loss": 0.1947,
      "step": 3296
    },
    {
      "epoch": 0.09618414143182216,
      "grad_norm": 1.0259354476889229,
      "learning_rate": 9.885631601597508e-06,
      "loss": 0.173,
      "step": 3297
    },
    {
      "epoch": 0.09621331466246572,
      "grad_norm": 0.8528681011356201,
      "learning_rate": 9.885531112075986e-06,
      "loss": 0.1803,
      "step": 3298
    },
    {
      "epoch": 0.09624248789310928,
      "grad_norm": 0.7580319978031034,
      "learning_rate": 9.885430578937608e-06,
      "loss": 0.1766,
      "step": 3299
    },
    {
      "epoch": 0.09627166112375285,
      "grad_norm": 0.9766955848878668,
      "learning_rate": 9.88533000218327e-06,
      "loss": 0.2093,
      "step": 3300
    },
    {
      "epoch": 0.0963008343543964,
      "grad_norm": 0.6405171835639228,
      "learning_rate": 9.885229381813875e-06,
      "loss": 0.1709,
      "step": 3301
    },
    {
      "epoch": 0.09633000758503997,
      "grad_norm": 0.9008804603593166,
      "learning_rate": 9.885128717830317e-06,
      "loss": 0.2178,
      "step": 3302
    },
    {
      "epoch": 0.09635918081568352,
      "grad_norm": 0.8826419199815049,
      "learning_rate": 9.885028010233497e-06,
      "loss": 0.1689,
      "step": 3303
    },
    {
      "epoch": 0.09638835404632709,
      "grad_norm": 0.9761910585351266,
      "learning_rate": 9.884927259024311e-06,
      "loss": 0.1964,
      "step": 3304
    },
    {
      "epoch": 0.09641752727697066,
      "grad_norm": 0.8815602490417409,
      "learning_rate": 9.884826464203662e-06,
      "loss": 0.1975,
      "step": 3305
    },
    {
      "epoch": 0.09644670050761421,
      "grad_norm": 0.966632303420737,
      "learning_rate": 9.88472562577245e-06,
      "loss": 0.1909,
      "step": 3306
    },
    {
      "epoch": 0.09647587373825778,
      "grad_norm": 0.9525752124567378,
      "learning_rate": 9.88462474373157e-06,
      "loss": 0.1675,
      "step": 3307
    },
    {
      "epoch": 0.09650504696890133,
      "grad_norm": 0.9006113194578775,
      "learning_rate": 9.88452381808193e-06,
      "loss": 0.1781,
      "step": 3308
    },
    {
      "epoch": 0.0965342201995449,
      "grad_norm": 0.8680790723405596,
      "learning_rate": 9.884422848824424e-06,
      "loss": 0.1796,
      "step": 3309
    },
    {
      "epoch": 0.09656339343018847,
      "grad_norm": 1.6488054174957947,
      "learning_rate": 9.88432183595996e-06,
      "loss": 0.1566,
      "step": 3310
    },
    {
      "epoch": 0.09659256666083202,
      "grad_norm": 0.8059685590284299,
      "learning_rate": 9.884220779489435e-06,
      "loss": 0.1827,
      "step": 3311
    },
    {
      "epoch": 0.09662173989147559,
      "grad_norm": 0.9038782946036477,
      "learning_rate": 9.884119679413753e-06,
      "loss": 0.1832,
      "step": 3312
    },
    {
      "epoch": 0.09665091312211914,
      "grad_norm": 0.8444990222150676,
      "learning_rate": 9.884018535733816e-06,
      "loss": 0.2048,
      "step": 3313
    },
    {
      "epoch": 0.0966800863527627,
      "grad_norm": 0.9332736693317996,
      "learning_rate": 9.883917348450529e-06,
      "loss": 0.1994,
      "step": 3314
    },
    {
      "epoch": 0.09670925958340626,
      "grad_norm": 0.9741830425703921,
      "learning_rate": 9.883816117564792e-06,
      "loss": 0.184,
      "step": 3315
    },
    {
      "epoch": 0.09673843281404983,
      "grad_norm": 0.8552561903773729,
      "learning_rate": 9.883714843077512e-06,
      "loss": 0.1802,
      "step": 3316
    },
    {
      "epoch": 0.0967676060446934,
      "grad_norm": 1.017909983191464,
      "learning_rate": 9.883613524989591e-06,
      "loss": 0.2043,
      "step": 3317
    },
    {
      "epoch": 0.09679677927533695,
      "grad_norm": 0.8278959717775877,
      "learning_rate": 9.883512163301934e-06,
      "loss": 0.1761,
      "step": 3318
    },
    {
      "epoch": 0.09682595250598051,
      "grad_norm": 1.028255430484232,
      "learning_rate": 9.883410758015446e-06,
      "loss": 0.1853,
      "step": 3319
    },
    {
      "epoch": 0.09685512573662407,
      "grad_norm": 0.8815026666009607,
      "learning_rate": 9.883309309131032e-06,
      "loss": 0.168,
      "step": 3320
    },
    {
      "epoch": 0.09688429896726763,
      "grad_norm": 0.9173579267075747,
      "learning_rate": 9.883207816649599e-06,
      "loss": 0.1587,
      "step": 3321
    },
    {
      "epoch": 0.0969134721979112,
      "grad_norm": 0.8683491755852815,
      "learning_rate": 9.883106280572052e-06,
      "loss": 0.1983,
      "step": 3322
    },
    {
      "epoch": 0.09694264542855476,
      "grad_norm": 1.012331012789619,
      "learning_rate": 9.883004700899299e-06,
      "loss": 0.176,
      "step": 3323
    },
    {
      "epoch": 0.09697181865919832,
      "grad_norm": 0.8572340863566649,
      "learning_rate": 9.882903077632245e-06,
      "loss": 0.1965,
      "step": 3324
    },
    {
      "epoch": 0.09700099188984188,
      "grad_norm": 0.8623076186261965,
      "learning_rate": 9.882801410771798e-06,
      "loss": 0.166,
      "step": 3325
    },
    {
      "epoch": 0.09703016512048544,
      "grad_norm": 1.071937739099093,
      "learning_rate": 9.882699700318865e-06,
      "loss": 0.173,
      "step": 3326
    },
    {
      "epoch": 0.09705933835112901,
      "grad_norm": 1.0500033029737166,
      "learning_rate": 9.882597946274356e-06,
      "loss": 0.1951,
      "step": 3327
    },
    {
      "epoch": 0.09708851158177256,
      "grad_norm": 1.1095799706133407,
      "learning_rate": 9.882496148639178e-06,
      "loss": 0.1773,
      "step": 3328
    },
    {
      "epoch": 0.09711768481241613,
      "grad_norm": 0.8412458752082929,
      "learning_rate": 9.882394307414237e-06,
      "loss": 0.2105,
      "step": 3329
    },
    {
      "epoch": 0.09714685804305968,
      "grad_norm": 0.9602601915159229,
      "learning_rate": 9.88229242260045e-06,
      "loss": 0.1838,
      "step": 3330
    },
    {
      "epoch": 0.09717603127370325,
      "grad_norm": 1.056922883017463,
      "learning_rate": 9.882190494198718e-06,
      "loss": 0.1984,
      "step": 3331
    },
    {
      "epoch": 0.09720520450434682,
      "grad_norm": 0.9087468509235599,
      "learning_rate": 9.882088522209956e-06,
      "loss": 0.1755,
      "step": 3332
    },
    {
      "epoch": 0.09723437773499037,
      "grad_norm": 0.8542197272026406,
      "learning_rate": 9.881986506635073e-06,
      "loss": 0.1816,
      "step": 3333
    },
    {
      "epoch": 0.09726355096563394,
      "grad_norm": 0.9848855524266356,
      "learning_rate": 9.88188444747498e-06,
      "loss": 0.1975,
      "step": 3334
    },
    {
      "epoch": 0.09729272419627749,
      "grad_norm": 1.0557472764375053,
      "learning_rate": 9.881782344730588e-06,
      "loss": 0.1866,
      "step": 3335
    },
    {
      "epoch": 0.09732189742692106,
      "grad_norm": 0.9167491324108101,
      "learning_rate": 9.881680198402808e-06,
      "loss": 0.1833,
      "step": 3336
    },
    {
      "epoch": 0.09735107065756463,
      "grad_norm": 0.9669479835621975,
      "learning_rate": 9.881578008492554e-06,
      "loss": 0.2438,
      "step": 3337
    },
    {
      "epoch": 0.09738024388820818,
      "grad_norm": 1.132293288909696,
      "learning_rate": 9.881475775000735e-06,
      "loss": 0.2082,
      "step": 3338
    },
    {
      "epoch": 0.09740941711885175,
      "grad_norm": 1.0823698455225095,
      "learning_rate": 9.881373497928267e-06,
      "loss": 0.1738,
      "step": 3339
    },
    {
      "epoch": 0.0974385903494953,
      "grad_norm": 1.2544833097035346,
      "learning_rate": 9.881271177276061e-06,
      "loss": 0.2096,
      "step": 3340
    },
    {
      "epoch": 0.09746776358013887,
      "grad_norm": 1.025550403217825,
      "learning_rate": 9.881168813045032e-06,
      "loss": 0.1918,
      "step": 3341
    },
    {
      "epoch": 0.09749693681078242,
      "grad_norm": 0.7801058042312882,
      "learning_rate": 9.881066405236093e-06,
      "loss": 0.1862,
      "step": 3342
    },
    {
      "epoch": 0.09752611004142599,
      "grad_norm": 1.123647415104818,
      "learning_rate": 9.880963953850158e-06,
      "loss": 0.1944,
      "step": 3343
    },
    {
      "epoch": 0.09755528327206955,
      "grad_norm": 0.9789486274989931,
      "learning_rate": 9.880861458888141e-06,
      "loss": 0.2039,
      "step": 3344
    },
    {
      "epoch": 0.09758445650271311,
      "grad_norm": 0.8972913970938006,
      "learning_rate": 9.88075892035096e-06,
      "loss": 0.1849,
      "step": 3345
    },
    {
      "epoch": 0.09761362973335667,
      "grad_norm": 1.4062895258580415,
      "learning_rate": 9.880656338239527e-06,
      "loss": 0.1955,
      "step": 3346
    },
    {
      "epoch": 0.09764280296400023,
      "grad_norm": 0.9490073349859146,
      "learning_rate": 9.880553712554759e-06,
      "loss": 0.1881,
      "step": 3347
    },
    {
      "epoch": 0.0976719761946438,
      "grad_norm": 1.015122416593752,
      "learning_rate": 9.880451043297574e-06,
      "loss": 0.1759,
      "step": 3348
    },
    {
      "epoch": 0.09770114942528736,
      "grad_norm": 0.8943725267473936,
      "learning_rate": 9.880348330468885e-06,
      "loss": 0.1793,
      "step": 3349
    },
    {
      "epoch": 0.09773032265593092,
      "grad_norm": 0.969702487842303,
      "learning_rate": 9.880245574069613e-06,
      "loss": 0.1867,
      "step": 3350
    },
    {
      "epoch": 0.09775949588657448,
      "grad_norm": 1.2799073018645288,
      "learning_rate": 9.880142774100673e-06,
      "loss": 0.1787,
      "step": 3351
    },
    {
      "epoch": 0.09778866911721804,
      "grad_norm": 1.0941196809913207,
      "learning_rate": 9.880039930562983e-06,
      "loss": 0.1813,
      "step": 3352
    },
    {
      "epoch": 0.0978178423478616,
      "grad_norm": 1.0847507720429033,
      "learning_rate": 9.879937043457462e-06,
      "loss": 0.1697,
      "step": 3353
    },
    {
      "epoch": 0.09784701557850517,
      "grad_norm": 0.8460299768224915,
      "learning_rate": 9.879834112785028e-06,
      "loss": 0.2061,
      "step": 3354
    },
    {
      "epoch": 0.09787618880914872,
      "grad_norm": 1.1308387993487357,
      "learning_rate": 9.8797311385466e-06,
      "loss": 0.1928,
      "step": 3355
    },
    {
      "epoch": 0.09790536203979229,
      "grad_norm": 1.051652606247428,
      "learning_rate": 9.879628120743096e-06,
      "loss": 0.2231,
      "step": 3356
    },
    {
      "epoch": 0.09793453527043584,
      "grad_norm": 0.9733068669877226,
      "learning_rate": 9.879525059375438e-06,
      "loss": 0.198,
      "step": 3357
    },
    {
      "epoch": 0.09796370850107941,
      "grad_norm": 1.0361697472510567,
      "learning_rate": 9.879421954444546e-06,
      "loss": 0.1969,
      "step": 3358
    },
    {
      "epoch": 0.09799288173172298,
      "grad_norm": 0.9013273137220902,
      "learning_rate": 9.879318805951339e-06,
      "loss": 0.2174,
      "step": 3359
    },
    {
      "epoch": 0.09802205496236653,
      "grad_norm": 0.9373053268113689,
      "learning_rate": 9.879215613896737e-06,
      "loss": 0.1638,
      "step": 3360
    },
    {
      "epoch": 0.0980512281930101,
      "grad_norm": 1.0238897306994317,
      "learning_rate": 9.879112378281666e-06,
      "loss": 0.1904,
      "step": 3361
    },
    {
      "epoch": 0.09808040142365365,
      "grad_norm": 0.9165639559651162,
      "learning_rate": 9.879009099107042e-06,
      "loss": 0.1961,
      "step": 3362
    },
    {
      "epoch": 0.09810957465429722,
      "grad_norm": 1.13160027184167,
      "learning_rate": 9.87890577637379e-06,
      "loss": 0.2078,
      "step": 3363
    },
    {
      "epoch": 0.09813874788494077,
      "grad_norm": 1.0210775302186303,
      "learning_rate": 9.878802410082832e-06,
      "loss": 0.1955,
      "step": 3364
    },
    {
      "epoch": 0.09816792111558434,
      "grad_norm": 0.8217896526873605,
      "learning_rate": 9.87869900023509e-06,
      "loss": 0.1682,
      "step": 3365
    },
    {
      "epoch": 0.0981970943462279,
      "grad_norm": 1.1233133348841937,
      "learning_rate": 9.87859554683149e-06,
      "loss": 0.1885,
      "step": 3366
    },
    {
      "epoch": 0.09822626757687146,
      "grad_norm": 1.0795927439787238,
      "learning_rate": 9.878492049872951e-06,
      "loss": 0.1978,
      "step": 3367
    },
    {
      "epoch": 0.09825544080751503,
      "grad_norm": 1.2355938655652003,
      "learning_rate": 9.8783885093604e-06,
      "loss": 0.2049,
      "step": 3368
    },
    {
      "epoch": 0.09828461403815858,
      "grad_norm": 1.0884883466673847,
      "learning_rate": 9.878284925294763e-06,
      "loss": 0.1823,
      "step": 3369
    },
    {
      "epoch": 0.09831378726880215,
      "grad_norm": 1.2905904713936949,
      "learning_rate": 9.87818129767696e-06,
      "loss": 0.2198,
      "step": 3370
    },
    {
      "epoch": 0.09834296049944571,
      "grad_norm": 0.8099165654820559,
      "learning_rate": 9.878077626507921e-06,
      "loss": 0.1829,
      "step": 3371
    },
    {
      "epoch": 0.09837213373008927,
      "grad_norm": 1.206895294436484,
      "learning_rate": 9.877973911788569e-06,
      "loss": 0.2136,
      "step": 3372
    },
    {
      "epoch": 0.09840130696073283,
      "grad_norm": 1.284751688336377,
      "learning_rate": 9.87787015351983e-06,
      "loss": 0.1921,
      "step": 3373
    },
    {
      "epoch": 0.09843048019137639,
      "grad_norm": 0.7459196499352689,
      "learning_rate": 9.877766351702631e-06,
      "loss": 0.1664,
      "step": 3374
    },
    {
      "epoch": 0.09845965342201995,
      "grad_norm": 1.2156262081437241,
      "learning_rate": 9.877662506337898e-06,
      "loss": 0.1964,
      "step": 3375
    },
    {
      "epoch": 0.09848882665266352,
      "grad_norm": 0.9004909046762573,
      "learning_rate": 9.877558617426558e-06,
      "loss": 0.2037,
      "step": 3376
    },
    {
      "epoch": 0.09851799988330708,
      "grad_norm": 0.8786610661004547,
      "learning_rate": 9.877454684969541e-06,
      "loss": 0.1864,
      "step": 3377
    },
    {
      "epoch": 0.09854717311395064,
      "grad_norm": 0.8747028960856301,
      "learning_rate": 9.87735070896777e-06,
      "loss": 0.1912,
      "step": 3378
    },
    {
      "epoch": 0.0985763463445942,
      "grad_norm": 0.9535249120129909,
      "learning_rate": 9.87724668942218e-06,
      "loss": 0.1775,
      "step": 3379
    },
    {
      "epoch": 0.09860551957523776,
      "grad_norm": 0.7698045496984315,
      "learning_rate": 9.877142626333692e-06,
      "loss": 0.1693,
      "step": 3380
    },
    {
      "epoch": 0.09863469280588133,
      "grad_norm": 0.8464332139561874,
      "learning_rate": 9.87703851970324e-06,
      "loss": 0.1714,
      "step": 3381
    },
    {
      "epoch": 0.09866386603652488,
      "grad_norm": 0.9022198071684441,
      "learning_rate": 9.876934369531754e-06,
      "loss": 0.1952,
      "step": 3382
    },
    {
      "epoch": 0.09869303926716845,
      "grad_norm": 0.9859271685129993,
      "learning_rate": 9.87683017582016e-06,
      "loss": 0.1888,
      "step": 3383
    },
    {
      "epoch": 0.098722212497812,
      "grad_norm": 1.026625590481748,
      "learning_rate": 9.876725938569392e-06,
      "loss": 0.1818,
      "step": 3384
    },
    {
      "epoch": 0.09875138572845557,
      "grad_norm": 0.9036674784035109,
      "learning_rate": 9.876621657780378e-06,
      "loss": 0.1709,
      "step": 3385
    },
    {
      "epoch": 0.09878055895909912,
      "grad_norm": 1.0830903552836817,
      "learning_rate": 9.876517333454051e-06,
      "loss": 0.1864,
      "step": 3386
    },
    {
      "epoch": 0.09880973218974269,
      "grad_norm": 1.0448141036868295,
      "learning_rate": 9.876412965591343e-06,
      "loss": 0.1951,
      "step": 3387
    },
    {
      "epoch": 0.09883890542038626,
      "grad_norm": 0.9688155387079148,
      "learning_rate": 9.876308554193182e-06,
      "loss": 0.1695,
      "step": 3388
    },
    {
      "epoch": 0.09886807865102981,
      "grad_norm": 0.8334733215835574,
      "learning_rate": 9.876204099260501e-06,
      "loss": 0.1769,
      "step": 3389
    },
    {
      "epoch": 0.09889725188167338,
      "grad_norm": 0.9606360477202657,
      "learning_rate": 9.876099600794236e-06,
      "loss": 0.1955,
      "step": 3390
    },
    {
      "epoch": 0.09892642511231693,
      "grad_norm": 0.8205109808036659,
      "learning_rate": 9.875995058795316e-06,
      "loss": 0.2048,
      "step": 3391
    },
    {
      "epoch": 0.0989555983429605,
      "grad_norm": 0.9982764097769949,
      "learning_rate": 9.875890473264678e-06,
      "loss": 0.2095,
      "step": 3392
    },
    {
      "epoch": 0.09898477157360407,
      "grad_norm": 1.0626266558414459,
      "learning_rate": 9.875785844203251e-06,
      "loss": 0.2021,
      "step": 3393
    },
    {
      "epoch": 0.09901394480424762,
      "grad_norm": 0.9973487185751021,
      "learning_rate": 9.875681171611974e-06,
      "loss": 0.1683,
      "step": 3394
    },
    {
      "epoch": 0.09904311803489119,
      "grad_norm": 1.0140220788974939,
      "learning_rate": 9.87557645549178e-06,
      "loss": 0.1782,
      "step": 3395
    },
    {
      "epoch": 0.09907229126553474,
      "grad_norm": 0.9630179068400443,
      "learning_rate": 9.875471695843603e-06,
      "loss": 0.169,
      "step": 3396
    },
    {
      "epoch": 0.09910146449617831,
      "grad_norm": 0.7429839039648668,
      "learning_rate": 9.875366892668376e-06,
      "loss": 0.1653,
      "step": 3397
    },
    {
      "epoch": 0.09913063772682187,
      "grad_norm": 1.075112707108095,
      "learning_rate": 9.87526204596704e-06,
      "loss": 0.1951,
      "step": 3398
    },
    {
      "epoch": 0.09915981095746543,
      "grad_norm": 0.9306430813422164,
      "learning_rate": 9.875157155740528e-06,
      "loss": 0.1878,
      "step": 3399
    },
    {
      "epoch": 0.099188984188109,
      "grad_norm": 0.81734598225374,
      "learning_rate": 9.875052221989777e-06,
      "loss": 0.182,
      "step": 3400
    },
    {
      "epoch": 0.09921815741875255,
      "grad_norm": 0.885771193859634,
      "learning_rate": 9.874947244715722e-06,
      "loss": 0.1704,
      "step": 3401
    },
    {
      "epoch": 0.09924733064939611,
      "grad_norm": 0.8940409062009961,
      "learning_rate": 9.874842223919303e-06,
      "loss": 0.1752,
      "step": 3402
    },
    {
      "epoch": 0.09927650388003968,
      "grad_norm": 0.8175311045028216,
      "learning_rate": 9.874737159601455e-06,
      "loss": 0.1635,
      "step": 3403
    },
    {
      "epoch": 0.09930567711068324,
      "grad_norm": 0.9443309919729741,
      "learning_rate": 9.87463205176312e-06,
      "loss": 0.2026,
      "step": 3404
    },
    {
      "epoch": 0.0993348503413268,
      "grad_norm": 0.919398290601415,
      "learning_rate": 9.87452690040523e-06,
      "loss": 0.1726,
      "step": 3405
    },
    {
      "epoch": 0.09936402357197036,
      "grad_norm": 0.8357805180322369,
      "learning_rate": 9.87442170552873e-06,
      "loss": 0.1836,
      "step": 3406
    },
    {
      "epoch": 0.09939319680261392,
      "grad_norm": 0.885788982003115,
      "learning_rate": 9.874316467134557e-06,
      "loss": 0.1737,
      "step": 3407
    },
    {
      "epoch": 0.09942237003325748,
      "grad_norm": 0.9252449527546076,
      "learning_rate": 9.874211185223649e-06,
      "loss": 0.2005,
      "step": 3408
    },
    {
      "epoch": 0.09945154326390104,
      "grad_norm": 1.4076580398560004,
      "learning_rate": 9.874105859796947e-06,
      "loss": 0.2026,
      "step": 3409
    },
    {
      "epoch": 0.09948071649454461,
      "grad_norm": 0.9840688774481136,
      "learning_rate": 9.87400049085539e-06,
      "loss": 0.185,
      "step": 3410
    },
    {
      "epoch": 0.09950988972518816,
      "grad_norm": 0.8966402285038693,
      "learning_rate": 9.873895078399925e-06,
      "loss": 0.1624,
      "step": 3411
    },
    {
      "epoch": 0.09953906295583173,
      "grad_norm": 0.8245966853829566,
      "learning_rate": 9.873789622431484e-06,
      "loss": 0.1746,
      "step": 3412
    },
    {
      "epoch": 0.09956823618647528,
      "grad_norm": 0.941991276321919,
      "learning_rate": 9.873684122951013e-06,
      "loss": 0.1717,
      "step": 3413
    },
    {
      "epoch": 0.09959740941711885,
      "grad_norm": 0.9563024015671291,
      "learning_rate": 9.873578579959456e-06,
      "loss": 0.1923,
      "step": 3414
    },
    {
      "epoch": 0.09962658264776242,
      "grad_norm": 1.0495462587066298,
      "learning_rate": 9.87347299345775e-06,
      "loss": 0.2,
      "step": 3415
    },
    {
      "epoch": 0.09965575587840597,
      "grad_norm": 1.082272418818284,
      "learning_rate": 9.873367363446843e-06,
      "loss": 0.1999,
      "step": 3416
    },
    {
      "epoch": 0.09968492910904954,
      "grad_norm": 0.9170445275393765,
      "learning_rate": 9.873261689927674e-06,
      "loss": 0.1873,
      "step": 3417
    },
    {
      "epoch": 0.09971410233969309,
      "grad_norm": 0.9607984396105024,
      "learning_rate": 9.873155972901187e-06,
      "loss": 0.1884,
      "step": 3418
    },
    {
      "epoch": 0.09974327557033666,
      "grad_norm": 0.878612477667233,
      "learning_rate": 9.87305021236833e-06,
      "loss": 0.1969,
      "step": 3419
    },
    {
      "epoch": 0.09977244880098023,
      "grad_norm": 0.8412296816249966,
      "learning_rate": 9.87294440833004e-06,
      "loss": 0.1816,
      "step": 3420
    },
    {
      "epoch": 0.09980162203162378,
      "grad_norm": 1.023308551384643,
      "learning_rate": 9.872838560787269e-06,
      "loss": 0.1765,
      "step": 3421
    },
    {
      "epoch": 0.09983079526226735,
      "grad_norm": 0.7906909622655207,
      "learning_rate": 9.872732669740956e-06,
      "loss": 0.1857,
      "step": 3422
    },
    {
      "epoch": 0.0998599684929109,
      "grad_norm": 0.8820929409110352,
      "learning_rate": 9.87262673519205e-06,
      "loss": 0.2048,
      "step": 3423
    },
    {
      "epoch": 0.09988914172355447,
      "grad_norm": 0.968160720640558,
      "learning_rate": 9.872520757141497e-06,
      "loss": 0.1854,
      "step": 3424
    },
    {
      "epoch": 0.09991831495419803,
      "grad_norm": 0.70461981116057,
      "learning_rate": 9.87241473559024e-06,
      "loss": 0.183,
      "step": 3425
    },
    {
      "epoch": 0.09994748818484159,
      "grad_norm": 0.6835017711956095,
      "learning_rate": 9.872308670539229e-06,
      "loss": 0.1572,
      "step": 3426
    },
    {
      "epoch": 0.09997666141548515,
      "grad_norm": 0.8106605104603117,
      "learning_rate": 9.872202561989409e-06,
      "loss": 0.188,
      "step": 3427
    },
    {
      "epoch": 0.10000583464612871,
      "grad_norm": 1.0264724096549127,
      "learning_rate": 9.872096409941726e-06,
      "loss": 0.1971,
      "step": 3428
    },
    {
      "epoch": 0.10003500787677228,
      "grad_norm": 0.7914214116749334,
      "learning_rate": 9.871990214397131e-06,
      "loss": 0.183,
      "step": 3429
    },
    {
      "epoch": 0.10006418110741584,
      "grad_norm": 0.9188258887963189,
      "learning_rate": 9.871883975356568e-06,
      "loss": 0.1761,
      "step": 3430
    },
    {
      "epoch": 0.1000933543380594,
      "grad_norm": 0.8394724893678543,
      "learning_rate": 9.87177769282099e-06,
      "loss": 0.1804,
      "step": 3431
    },
    {
      "epoch": 0.10012252756870296,
      "grad_norm": 1.2163310379645849,
      "learning_rate": 9.871671366791344e-06,
      "loss": 0.1958,
      "step": 3432
    },
    {
      "epoch": 0.10015170079934652,
      "grad_norm": 0.8273408849172802,
      "learning_rate": 9.87156499726858e-06,
      "loss": 0.1693,
      "step": 3433
    },
    {
      "epoch": 0.10018087402999008,
      "grad_norm": 0.9024040961441147,
      "learning_rate": 9.871458584253644e-06,
      "loss": 0.1908,
      "step": 3434
    },
    {
      "epoch": 0.10021004726063364,
      "grad_norm": 0.8330899285828883,
      "learning_rate": 9.871352127747489e-06,
      "loss": 0.1711,
      "step": 3435
    },
    {
      "epoch": 0.1002392204912772,
      "grad_norm": 0.9509143420825992,
      "learning_rate": 9.871245627751067e-06,
      "loss": 0.1846,
      "step": 3436
    },
    {
      "epoch": 0.10026839372192077,
      "grad_norm": 1.0883350035256232,
      "learning_rate": 9.871139084265324e-06,
      "loss": 0.2013,
      "step": 3437
    },
    {
      "epoch": 0.10029756695256432,
      "grad_norm": 1.1309254010693355,
      "learning_rate": 9.871032497291217e-06,
      "loss": 0.2145,
      "step": 3438
    },
    {
      "epoch": 0.10032674018320789,
      "grad_norm": 0.9576681093904603,
      "learning_rate": 9.870925866829692e-06,
      "loss": 0.1901,
      "step": 3439
    },
    {
      "epoch": 0.10035591341385144,
      "grad_norm": 1.1262857177310994,
      "learning_rate": 9.870819192881707e-06,
      "loss": 0.1655,
      "step": 3440
    },
    {
      "epoch": 0.10038508664449501,
      "grad_norm": 0.9866849846226996,
      "learning_rate": 9.870712475448207e-06,
      "loss": 0.1817,
      "step": 3441
    },
    {
      "epoch": 0.10041425987513858,
      "grad_norm": 1.0529171418373293,
      "learning_rate": 9.870605714530152e-06,
      "loss": 0.1915,
      "step": 3442
    },
    {
      "epoch": 0.10044343310578213,
      "grad_norm": 1.0145462296288652,
      "learning_rate": 9.870498910128492e-06,
      "loss": 0.189,
      "step": 3443
    },
    {
      "epoch": 0.1004726063364257,
      "grad_norm": 0.8909609759811756,
      "learning_rate": 9.870392062244178e-06,
      "loss": 0.1554,
      "step": 3444
    },
    {
      "epoch": 0.10050177956706925,
      "grad_norm": 1.1927000558620366,
      "learning_rate": 9.870285170878167e-06,
      "loss": 0.177,
      "step": 3445
    },
    {
      "epoch": 0.10053095279771282,
      "grad_norm": 1.178151159469842,
      "learning_rate": 9.870178236031413e-06,
      "loss": 0.2003,
      "step": 3446
    },
    {
      "epoch": 0.10056012602835639,
      "grad_norm": 0.975324090964805,
      "learning_rate": 9.870071257704871e-06,
      "loss": 0.1837,
      "step": 3447
    },
    {
      "epoch": 0.10058929925899994,
      "grad_norm": 1.1448229783115247,
      "learning_rate": 9.869964235899494e-06,
      "loss": 0.174,
      "step": 3448
    },
    {
      "epoch": 0.1006184724896435,
      "grad_norm": 0.9514149119948482,
      "learning_rate": 9.86985717061624e-06,
      "loss": 0.1821,
      "step": 3449
    },
    {
      "epoch": 0.10064764572028706,
      "grad_norm": 0.847909054724833,
      "learning_rate": 9.869750061856063e-06,
      "loss": 0.1681,
      "step": 3450
    },
    {
      "epoch": 0.10067681895093063,
      "grad_norm": 0.9218506513046638,
      "learning_rate": 9.869642909619921e-06,
      "loss": 0.1883,
      "step": 3451
    },
    {
      "epoch": 0.1007059921815742,
      "grad_norm": 0.9041931059317265,
      "learning_rate": 9.869535713908768e-06,
      "loss": 0.1958,
      "step": 3452
    },
    {
      "epoch": 0.10073516541221775,
      "grad_norm": 0.8896465975703093,
      "learning_rate": 9.869428474723563e-06,
      "loss": 0.1794,
      "step": 3453
    },
    {
      "epoch": 0.10076433864286131,
      "grad_norm": 0.9377163592942124,
      "learning_rate": 9.869321192065264e-06,
      "loss": 0.1734,
      "step": 3454
    },
    {
      "epoch": 0.10079351187350487,
      "grad_norm": 1.0246636715401394,
      "learning_rate": 9.869213865934827e-06,
      "loss": 0.1938,
      "step": 3455
    },
    {
      "epoch": 0.10082268510414844,
      "grad_norm": 0.8475687952188002,
      "learning_rate": 9.869106496333213e-06,
      "loss": 0.2209,
      "step": 3456
    },
    {
      "epoch": 0.10085185833479199,
      "grad_norm": 0.9379196203907476,
      "learning_rate": 9.868999083261377e-06,
      "loss": 0.1892,
      "step": 3457
    },
    {
      "epoch": 0.10088103156543556,
      "grad_norm": 0.9441790698051824,
      "learning_rate": 9.868891626720279e-06,
      "loss": 0.2129,
      "step": 3458
    },
    {
      "epoch": 0.10091020479607912,
      "grad_norm": 0.7326471551649248,
      "learning_rate": 9.868784126710878e-06,
      "loss": 0.1785,
      "step": 3459
    },
    {
      "epoch": 0.10093937802672268,
      "grad_norm": 0.818500409204287,
      "learning_rate": 9.868676583234136e-06,
      "loss": 0.1778,
      "step": 3460
    },
    {
      "epoch": 0.10096855125736624,
      "grad_norm": 0.8706269163312929,
      "learning_rate": 9.868568996291013e-06,
      "loss": 0.1668,
      "step": 3461
    },
    {
      "epoch": 0.1009977244880098,
      "grad_norm": 1.3126513024180493,
      "learning_rate": 9.868461365882465e-06,
      "loss": 0.1815,
      "step": 3462
    },
    {
      "epoch": 0.10102689771865336,
      "grad_norm": 0.8610585831478037,
      "learning_rate": 9.868353692009458e-06,
      "loss": 0.177,
      "step": 3463
    },
    {
      "epoch": 0.10105607094929693,
      "grad_norm": 1.011781175607378,
      "learning_rate": 9.868245974672952e-06,
      "loss": 0.1965,
      "step": 3464
    },
    {
      "epoch": 0.10108524417994048,
      "grad_norm": 0.833371986012559,
      "learning_rate": 9.868138213873908e-06,
      "loss": 0.172,
      "step": 3465
    },
    {
      "epoch": 0.10111441741058405,
      "grad_norm": 1.022504922983816,
      "learning_rate": 9.868030409613286e-06,
      "loss": 0.1783,
      "step": 3466
    },
    {
      "epoch": 0.1011435906412276,
      "grad_norm": 1.0568751412016992,
      "learning_rate": 9.867922561892053e-06,
      "loss": 0.2108,
      "step": 3467
    },
    {
      "epoch": 0.10117276387187117,
      "grad_norm": 0.9318373715013006,
      "learning_rate": 9.86781467071117e-06,
      "loss": 0.1764,
      "step": 3468
    },
    {
      "epoch": 0.10120193710251474,
      "grad_norm": 0.9056729730442917,
      "learning_rate": 9.867706736071596e-06,
      "loss": 0.1759,
      "step": 3469
    },
    {
      "epoch": 0.10123111033315829,
      "grad_norm": 0.8599796170135051,
      "learning_rate": 9.867598757974302e-06,
      "loss": 0.1844,
      "step": 3470
    },
    {
      "epoch": 0.10126028356380186,
      "grad_norm": 0.9620661176977179,
      "learning_rate": 9.867490736420245e-06,
      "loss": 0.2018,
      "step": 3471
    },
    {
      "epoch": 0.10128945679444541,
      "grad_norm": 0.9284833683199802,
      "learning_rate": 9.867382671410395e-06,
      "loss": 0.1757,
      "step": 3472
    },
    {
      "epoch": 0.10131863002508898,
      "grad_norm": 0.8619858597108815,
      "learning_rate": 9.867274562945713e-06,
      "loss": 0.2095,
      "step": 3473
    },
    {
      "epoch": 0.10134780325573255,
      "grad_norm": 1.040972216388095,
      "learning_rate": 9.867166411027167e-06,
      "loss": 0.1842,
      "step": 3474
    },
    {
      "epoch": 0.1013769764863761,
      "grad_norm": 1.008087937414507,
      "learning_rate": 9.867058215655721e-06,
      "loss": 0.1659,
      "step": 3475
    },
    {
      "epoch": 0.10140614971701967,
      "grad_norm": 0.8469254202320418,
      "learning_rate": 9.86694997683234e-06,
      "loss": 0.1902,
      "step": 3476
    },
    {
      "epoch": 0.10143532294766322,
      "grad_norm": 0.7941411979341042,
      "learning_rate": 9.866841694557993e-06,
      "loss": 0.2063,
      "step": 3477
    },
    {
      "epoch": 0.10146449617830679,
      "grad_norm": 0.8950825419085655,
      "learning_rate": 9.866733368833643e-06,
      "loss": 0.186,
      "step": 3478
    },
    {
      "epoch": 0.10149366940895034,
      "grad_norm": 1.14569798578479,
      "learning_rate": 9.866624999660262e-06,
      "loss": 0.2022,
      "step": 3479
    },
    {
      "epoch": 0.10152284263959391,
      "grad_norm": 0.7917867480873769,
      "learning_rate": 9.866516587038813e-06,
      "loss": 0.1754,
      "step": 3480
    },
    {
      "epoch": 0.10155201587023747,
      "grad_norm": 0.8642967705541632,
      "learning_rate": 9.866408130970267e-06,
      "loss": 0.1829,
      "step": 3481
    },
    {
      "epoch": 0.10158118910088103,
      "grad_norm": 1.5467627532716057,
      "learning_rate": 9.86629963145559e-06,
      "loss": 0.2343,
      "step": 3482
    },
    {
      "epoch": 0.1016103623315246,
      "grad_norm": 0.8266053130281075,
      "learning_rate": 9.86619108849575e-06,
      "loss": 0.1737,
      "step": 3483
    },
    {
      "epoch": 0.10163953556216815,
      "grad_norm": 0.9367688798654689,
      "learning_rate": 9.86608250209172e-06,
      "loss": 0.1775,
      "step": 3484
    },
    {
      "epoch": 0.10166870879281172,
      "grad_norm": 0.9155674535316602,
      "learning_rate": 9.865973872244466e-06,
      "loss": 0.1667,
      "step": 3485
    },
    {
      "epoch": 0.10169788202345528,
      "grad_norm": 0.9847455876401546,
      "learning_rate": 9.865865198954959e-06,
      "loss": 0.2094,
      "step": 3486
    },
    {
      "epoch": 0.10172705525409884,
      "grad_norm": 0.8308552008359891,
      "learning_rate": 9.865756482224169e-06,
      "loss": 0.1995,
      "step": 3487
    },
    {
      "epoch": 0.1017562284847424,
      "grad_norm": 0.8410268218827373,
      "learning_rate": 9.865647722053066e-06,
      "loss": 0.1962,
      "step": 3488
    },
    {
      "epoch": 0.10178540171538596,
      "grad_norm": 1.0087615518486086,
      "learning_rate": 9.865538918442624e-06,
      "loss": 0.1779,
      "step": 3489
    },
    {
      "epoch": 0.10181457494602952,
      "grad_norm": 1.041963448367663,
      "learning_rate": 9.86543007139381e-06,
      "loss": 0.1771,
      "step": 3490
    },
    {
      "epoch": 0.10184374817667309,
      "grad_norm": 1.0238256726632478,
      "learning_rate": 9.865321180907597e-06,
      "loss": 0.167,
      "step": 3491
    },
    {
      "epoch": 0.10187292140731664,
      "grad_norm": 1.161048222587803,
      "learning_rate": 9.86521224698496e-06,
      "loss": 0.205,
      "step": 3492
    },
    {
      "epoch": 0.10190209463796021,
      "grad_norm": 0.9043069579942137,
      "learning_rate": 9.865103269626868e-06,
      "loss": 0.1717,
      "step": 3493
    },
    {
      "epoch": 0.10193126786860376,
      "grad_norm": 0.8923616361442843,
      "learning_rate": 9.864994248834297e-06,
      "loss": 0.1919,
      "step": 3494
    },
    {
      "epoch": 0.10196044109924733,
      "grad_norm": 0.7993846930318241,
      "learning_rate": 9.864885184608217e-06,
      "loss": 0.2093,
      "step": 3495
    },
    {
      "epoch": 0.1019896143298909,
      "grad_norm": 0.9328576530882595,
      "learning_rate": 9.864776076949604e-06,
      "loss": 0.2036,
      "step": 3496
    },
    {
      "epoch": 0.10201878756053445,
      "grad_norm": 0.9497224570949078,
      "learning_rate": 9.864666925859432e-06,
      "loss": 0.1852,
      "step": 3497
    },
    {
      "epoch": 0.10204796079117802,
      "grad_norm": 0.8879533396575509,
      "learning_rate": 9.864557731338675e-06,
      "loss": 0.1747,
      "step": 3498
    },
    {
      "epoch": 0.10207713402182157,
      "grad_norm": 0.8937655185804683,
      "learning_rate": 9.864448493388307e-06,
      "loss": 0.1946,
      "step": 3499
    },
    {
      "epoch": 0.10210630725246514,
      "grad_norm": 0.9357675307242985,
      "learning_rate": 9.864339212009304e-06,
      "loss": 0.197,
      "step": 3500
    },
    {
      "epoch": 0.10213548048310869,
      "grad_norm": 0.8261058627027291,
      "learning_rate": 9.864229887202643e-06,
      "loss": 0.1909,
      "step": 3501
    },
    {
      "epoch": 0.10216465371375226,
      "grad_norm": 1.0006517067419611,
      "learning_rate": 9.864120518969298e-06,
      "loss": 0.1902,
      "step": 3502
    },
    {
      "epoch": 0.10219382694439583,
      "grad_norm": 1.0604154204135914,
      "learning_rate": 9.864011107310246e-06,
      "loss": 0.1867,
      "step": 3503
    },
    {
      "epoch": 0.10222300017503938,
      "grad_norm": 0.8545044363306173,
      "learning_rate": 9.863901652226464e-06,
      "loss": 0.2068,
      "step": 3504
    },
    {
      "epoch": 0.10225217340568295,
      "grad_norm": 1.0859515433427762,
      "learning_rate": 9.86379215371893e-06,
      "loss": 0.1637,
      "step": 3505
    },
    {
      "epoch": 0.1022813466363265,
      "grad_norm": 1.1918579655623438,
      "learning_rate": 9.86368261178862e-06,
      "loss": 0.2019,
      "step": 3506
    },
    {
      "epoch": 0.10231051986697007,
      "grad_norm": 1.0430591125132915,
      "learning_rate": 9.863573026436513e-06,
      "loss": 0.1965,
      "step": 3507
    },
    {
      "epoch": 0.10233969309761363,
      "grad_norm": 1.221202207806947,
      "learning_rate": 9.863463397663587e-06,
      "loss": 0.1915,
      "step": 3508
    },
    {
      "epoch": 0.10236886632825719,
      "grad_norm": 1.142944732564729,
      "learning_rate": 9.863353725470822e-06,
      "loss": 0.2163,
      "step": 3509
    },
    {
      "epoch": 0.10239803955890076,
      "grad_norm": 0.9364772931896006,
      "learning_rate": 9.863244009859194e-06,
      "loss": 0.1711,
      "step": 3510
    },
    {
      "epoch": 0.10242721278954431,
      "grad_norm": 1.052710404061729,
      "learning_rate": 9.863134250829685e-06,
      "loss": 0.1801,
      "step": 3511
    },
    {
      "epoch": 0.10245638602018788,
      "grad_norm": 1.0758791904026912,
      "learning_rate": 9.863024448383273e-06,
      "loss": 0.1846,
      "step": 3512
    },
    {
      "epoch": 0.10248555925083144,
      "grad_norm": 1.038212081530209,
      "learning_rate": 9.86291460252094e-06,
      "loss": 0.1926,
      "step": 3513
    },
    {
      "epoch": 0.102514732481475,
      "grad_norm": 1.134331392771612,
      "learning_rate": 9.862804713243667e-06,
      "loss": 0.1836,
      "step": 3514
    },
    {
      "epoch": 0.10254390571211856,
      "grad_norm": 1.0703474061020763,
      "learning_rate": 9.862694780552435e-06,
      "loss": 0.1736,
      "step": 3515
    },
    {
      "epoch": 0.10257307894276212,
      "grad_norm": 1.0638348901860075,
      "learning_rate": 9.862584804448226e-06,
      "loss": 0.2009,
      "step": 3516
    },
    {
      "epoch": 0.10260225217340568,
      "grad_norm": 0.8599113957013631,
      "learning_rate": 9.862474784932018e-06,
      "loss": 0.1828,
      "step": 3517
    },
    {
      "epoch": 0.10263142540404925,
      "grad_norm": 0.9874231174873996,
      "learning_rate": 9.862364722004798e-06,
      "loss": 0.1715,
      "step": 3518
    },
    {
      "epoch": 0.1026605986346928,
      "grad_norm": 0.8885320043956563,
      "learning_rate": 9.862254615667546e-06,
      "loss": 0.1908,
      "step": 3519
    },
    {
      "epoch": 0.10268977186533637,
      "grad_norm": 1.073164181263853,
      "learning_rate": 9.862144465921244e-06,
      "loss": 0.2105,
      "step": 3520
    },
    {
      "epoch": 0.10271894509597992,
      "grad_norm": 0.8240112977118601,
      "learning_rate": 9.862034272766879e-06,
      "loss": 0.1767,
      "step": 3521
    },
    {
      "epoch": 0.10274811832662349,
      "grad_norm": 0.8359128423414961,
      "learning_rate": 9.86192403620543e-06,
      "loss": 0.1746,
      "step": 3522
    },
    {
      "epoch": 0.10277729155726706,
      "grad_norm": 0.9401539512117901,
      "learning_rate": 9.861813756237886e-06,
      "loss": 0.2089,
      "step": 3523
    },
    {
      "epoch": 0.10280646478791061,
      "grad_norm": 0.7972123494742941,
      "learning_rate": 9.861703432865228e-06,
      "loss": 0.1778,
      "step": 3524
    },
    {
      "epoch": 0.10283563801855418,
      "grad_norm": 0.8325761377041836,
      "learning_rate": 9.861593066088444e-06,
      "loss": 0.1787,
      "step": 3525
    },
    {
      "epoch": 0.10286481124919773,
      "grad_norm": 0.820495223309961,
      "learning_rate": 9.861482655908517e-06,
      "loss": 0.2027,
      "step": 3526
    },
    {
      "epoch": 0.1028939844798413,
      "grad_norm": 0.7924336240807823,
      "learning_rate": 9.861372202326432e-06,
      "loss": 0.1723,
      "step": 3527
    },
    {
      "epoch": 0.10292315771048485,
      "grad_norm": 0.7817727912241805,
      "learning_rate": 9.861261705343178e-06,
      "loss": 0.1683,
      "step": 3528
    },
    {
      "epoch": 0.10295233094112842,
      "grad_norm": 0.86152589055737,
      "learning_rate": 9.861151164959738e-06,
      "loss": 0.1749,
      "step": 3529
    },
    {
      "epoch": 0.10298150417177199,
      "grad_norm": 0.6543986493202684,
      "learning_rate": 9.861040581177103e-06,
      "loss": 0.1547,
      "step": 3530
    },
    {
      "epoch": 0.10301067740241554,
      "grad_norm": 1.0808079835968676,
      "learning_rate": 9.86092995399626e-06,
      "loss": 0.1648,
      "step": 3531
    },
    {
      "epoch": 0.10303985063305911,
      "grad_norm": 0.8424842609109987,
      "learning_rate": 9.860819283418192e-06,
      "loss": 0.1878,
      "step": 3532
    },
    {
      "epoch": 0.10306902386370266,
      "grad_norm": 0.8238721639152723,
      "learning_rate": 9.860708569443888e-06,
      "loss": 0.1892,
      "step": 3533
    },
    {
      "epoch": 0.10309819709434623,
      "grad_norm": 0.8802106856130075,
      "learning_rate": 9.860597812074343e-06,
      "loss": 0.1705,
      "step": 3534
    },
    {
      "epoch": 0.1031273703249898,
      "grad_norm": 0.7550766242911688,
      "learning_rate": 9.860487011310537e-06,
      "loss": 0.1472,
      "step": 3535
    },
    {
      "epoch": 0.10315654355563335,
      "grad_norm": 0.9375274970501286,
      "learning_rate": 9.860376167153466e-06,
      "loss": 0.1783,
      "step": 3536
    },
    {
      "epoch": 0.10318571678627692,
      "grad_norm": 0.9816305146749097,
      "learning_rate": 9.860265279604114e-06,
      "loss": 0.2091,
      "step": 3537
    },
    {
      "epoch": 0.10321489001692047,
      "grad_norm": 0.9361128396163539,
      "learning_rate": 9.860154348663476e-06,
      "loss": 0.1865,
      "step": 3538
    },
    {
      "epoch": 0.10324406324756404,
      "grad_norm": 0.9431525304148766,
      "learning_rate": 9.86004337433254e-06,
      "loss": 0.1921,
      "step": 3539
    },
    {
      "epoch": 0.1032732364782076,
      "grad_norm": 1.1496564048652087,
      "learning_rate": 9.859932356612297e-06,
      "loss": 0.1886,
      "step": 3540
    },
    {
      "epoch": 0.10330240970885116,
      "grad_norm": 0.9096912734203451,
      "learning_rate": 9.859821295503736e-06,
      "loss": 0.2,
      "step": 3541
    },
    {
      "epoch": 0.10333158293949472,
      "grad_norm": 0.7937785251353193,
      "learning_rate": 9.859710191007851e-06,
      "loss": 0.2068,
      "step": 3542
    },
    {
      "epoch": 0.10336075617013828,
      "grad_norm": 1.0477861995956441,
      "learning_rate": 9.859599043125636e-06,
      "loss": 0.1765,
      "step": 3543
    },
    {
      "epoch": 0.10338992940078184,
      "grad_norm": 0.8480879574997857,
      "learning_rate": 9.85948785185808e-06,
      "loss": 0.1709,
      "step": 3544
    },
    {
      "epoch": 0.10341910263142541,
      "grad_norm": 0.7737043039154154,
      "learning_rate": 9.859376617206175e-06,
      "loss": 0.1727,
      "step": 3545
    },
    {
      "epoch": 0.10344827586206896,
      "grad_norm": 0.8627681389248744,
      "learning_rate": 9.859265339170918e-06,
      "loss": 0.1763,
      "step": 3546
    },
    {
      "epoch": 0.10347744909271253,
      "grad_norm": 0.7419522433195506,
      "learning_rate": 9.859154017753299e-06,
      "loss": 0.1755,
      "step": 3547
    },
    {
      "epoch": 0.10350662232335608,
      "grad_norm": 0.9000903192887774,
      "learning_rate": 9.859042652954312e-06,
      "loss": 0.1811,
      "step": 3548
    },
    {
      "epoch": 0.10353579555399965,
      "grad_norm": 0.8256068494585156,
      "learning_rate": 9.858931244774952e-06,
      "loss": 0.1972,
      "step": 3549
    },
    {
      "epoch": 0.1035649687846432,
      "grad_norm": 0.8784834109295333,
      "learning_rate": 9.858819793216214e-06,
      "loss": 0.1571,
      "step": 3550
    },
    {
      "epoch": 0.10359414201528677,
      "grad_norm": 0.7339011855047681,
      "learning_rate": 9.858708298279094e-06,
      "loss": 0.1682,
      "step": 3551
    },
    {
      "epoch": 0.10362331524593034,
      "grad_norm": 0.776599733420472,
      "learning_rate": 9.858596759964586e-06,
      "loss": 0.1947,
      "step": 3552
    },
    {
      "epoch": 0.10365248847657389,
      "grad_norm": 1.0010043951440706,
      "learning_rate": 9.858485178273684e-06,
      "loss": 0.2001,
      "step": 3553
    },
    {
      "epoch": 0.10368166170721746,
      "grad_norm": 0.9808689083488781,
      "learning_rate": 9.858373553207387e-06,
      "loss": 0.1782,
      "step": 3554
    },
    {
      "epoch": 0.10371083493786101,
      "grad_norm": 0.7748226788065447,
      "learning_rate": 9.858261884766693e-06,
      "loss": 0.1694,
      "step": 3555
    },
    {
      "epoch": 0.10374000816850458,
      "grad_norm": 0.9663212703640649,
      "learning_rate": 9.858150172952594e-06,
      "loss": 0.1901,
      "step": 3556
    },
    {
      "epoch": 0.10376918139914815,
      "grad_norm": 0.9912394499988492,
      "learning_rate": 9.85803841776609e-06,
      "loss": 0.1836,
      "step": 3557
    },
    {
      "epoch": 0.1037983546297917,
      "grad_norm": 1.1206233748183603,
      "learning_rate": 9.857926619208181e-06,
      "loss": 0.1824,
      "step": 3558
    },
    {
      "epoch": 0.10382752786043527,
      "grad_norm": 0.8513351639284373,
      "learning_rate": 9.857814777279861e-06,
      "loss": 0.1862,
      "step": 3559
    },
    {
      "epoch": 0.10385670109107882,
      "grad_norm": 1.1036362299937632,
      "learning_rate": 9.85770289198213e-06,
      "loss": 0.173,
      "step": 3560
    },
    {
      "epoch": 0.10388587432172239,
      "grad_norm": 0.917666980493957,
      "learning_rate": 9.85759096331599e-06,
      "loss": 0.1692,
      "step": 3561
    },
    {
      "epoch": 0.10391504755236596,
      "grad_norm": 0.9200122465756295,
      "learning_rate": 9.857478991282434e-06,
      "loss": 0.1897,
      "step": 3562
    },
    {
      "epoch": 0.10394422078300951,
      "grad_norm": 0.9203765928255002,
      "learning_rate": 9.857366975882468e-06,
      "loss": 0.21,
      "step": 3563
    },
    {
      "epoch": 0.10397339401365308,
      "grad_norm": 0.8525297570143247,
      "learning_rate": 9.857254917117087e-06,
      "loss": 0.1877,
      "step": 3564
    },
    {
      "epoch": 0.10400256724429663,
      "grad_norm": 0.7806261111855988,
      "learning_rate": 9.857142814987295e-06,
      "loss": 0.1892,
      "step": 3565
    },
    {
      "epoch": 0.1040317404749402,
      "grad_norm": 0.7587133703653338,
      "learning_rate": 9.85703066949409e-06,
      "loss": 0.1845,
      "step": 3566
    },
    {
      "epoch": 0.10406091370558376,
      "grad_norm": 1.026226746266854,
      "learning_rate": 9.856918480638476e-06,
      "loss": 0.191,
      "step": 3567
    },
    {
      "epoch": 0.10409008693622732,
      "grad_norm": 0.7792006638213195,
      "learning_rate": 9.856806248421453e-06,
      "loss": 0.1728,
      "step": 3568
    },
    {
      "epoch": 0.10411926016687088,
      "grad_norm": 0.8558516870616105,
      "learning_rate": 9.856693972844022e-06,
      "loss": 0.1595,
      "step": 3569
    },
    {
      "epoch": 0.10414843339751444,
      "grad_norm": 0.8568316620101666,
      "learning_rate": 9.856581653907188e-06,
      "loss": 0.1845,
      "step": 3570
    },
    {
      "epoch": 0.104177606628158,
      "grad_norm": 0.9358208121428494,
      "learning_rate": 9.856469291611953e-06,
      "loss": 0.1891,
      "step": 3571
    },
    {
      "epoch": 0.10420677985880156,
      "grad_norm": 0.9295522384551842,
      "learning_rate": 9.856356885959318e-06,
      "loss": 0.1942,
      "step": 3572
    },
    {
      "epoch": 0.10423595308944512,
      "grad_norm": 0.8160735448647487,
      "learning_rate": 9.856244436950287e-06,
      "loss": 0.1781,
      "step": 3573
    },
    {
      "epoch": 0.10426512632008869,
      "grad_norm": 1.0885212326819285,
      "learning_rate": 9.856131944585867e-06,
      "loss": 0.201,
      "step": 3574
    },
    {
      "epoch": 0.10429429955073224,
      "grad_norm": 0.9730661270887382,
      "learning_rate": 9.85601940886706e-06,
      "loss": 0.1903,
      "step": 3575
    },
    {
      "epoch": 0.10432347278137581,
      "grad_norm": 0.7703858602175028,
      "learning_rate": 9.85590682979487e-06,
      "loss": 0.1936,
      "step": 3576
    },
    {
      "epoch": 0.10435264601201936,
      "grad_norm": 0.804187780846293,
      "learning_rate": 9.855794207370305e-06,
      "loss": 0.1974,
      "step": 3577
    },
    {
      "epoch": 0.10438181924266293,
      "grad_norm": 0.8588128990261085,
      "learning_rate": 9.855681541594367e-06,
      "loss": 0.226,
      "step": 3578
    },
    {
      "epoch": 0.1044109924733065,
      "grad_norm": 0.9682983627913089,
      "learning_rate": 9.855568832468063e-06,
      "loss": 0.185,
      "step": 3579
    },
    {
      "epoch": 0.10444016570395005,
      "grad_norm": 0.8972332881015471,
      "learning_rate": 9.8554560799924e-06,
      "loss": 0.1866,
      "step": 3580
    },
    {
      "epoch": 0.10446933893459362,
      "grad_norm": 0.817740618854365,
      "learning_rate": 9.855343284168384e-06,
      "loss": 0.1768,
      "step": 3581
    },
    {
      "epoch": 0.10449851216523717,
      "grad_norm": 0.8671290101837122,
      "learning_rate": 9.855230444997021e-06,
      "loss": 0.1845,
      "step": 3582
    },
    {
      "epoch": 0.10452768539588074,
      "grad_norm": 1.0124203321984675,
      "learning_rate": 9.855117562479321e-06,
      "loss": 0.1987,
      "step": 3583
    },
    {
      "epoch": 0.10455685862652431,
      "grad_norm": 1.0825051900887046,
      "learning_rate": 9.855004636616293e-06,
      "loss": 0.1724,
      "step": 3584
    },
    {
      "epoch": 0.10458603185716786,
      "grad_norm": 0.9814958449489769,
      "learning_rate": 9.85489166740894e-06,
      "loss": 0.2068,
      "step": 3585
    },
    {
      "epoch": 0.10461520508781143,
      "grad_norm": 0.9602983957181478,
      "learning_rate": 9.854778654858272e-06,
      "loss": 0.1795,
      "step": 3586
    },
    {
      "epoch": 0.10464437831845498,
      "grad_norm": 0.9920054261851653,
      "learning_rate": 9.854665598965301e-06,
      "loss": 0.176,
      "step": 3587
    },
    {
      "epoch": 0.10467355154909855,
      "grad_norm": 0.9678449489202484,
      "learning_rate": 9.854552499731032e-06,
      "loss": 0.1929,
      "step": 3588
    },
    {
      "epoch": 0.10470272477974212,
      "grad_norm": 0.9525309788802895,
      "learning_rate": 9.85443935715648e-06,
      "loss": 0.1992,
      "step": 3589
    },
    {
      "epoch": 0.10473189801038567,
      "grad_norm": 0.8838725559384846,
      "learning_rate": 9.854326171242651e-06,
      "loss": 0.1708,
      "step": 3590
    },
    {
      "epoch": 0.10476107124102924,
      "grad_norm": 0.9580701114468362,
      "learning_rate": 9.854212941990557e-06,
      "loss": 0.1706,
      "step": 3591
    },
    {
      "epoch": 0.10479024447167279,
      "grad_norm": 1.1849188690459378,
      "learning_rate": 9.854099669401209e-06,
      "loss": 0.1909,
      "step": 3592
    },
    {
      "epoch": 0.10481941770231636,
      "grad_norm": 0.8369489004436974,
      "learning_rate": 9.853986353475618e-06,
      "loss": 0.1949,
      "step": 3593
    },
    {
      "epoch": 0.10484859093295991,
      "grad_norm": 0.8283792487854817,
      "learning_rate": 9.853872994214794e-06,
      "loss": 0.1601,
      "step": 3594
    },
    {
      "epoch": 0.10487776416360348,
      "grad_norm": 0.9514420257774472,
      "learning_rate": 9.853759591619752e-06,
      "loss": 0.1886,
      "step": 3595
    },
    {
      "epoch": 0.10490693739424704,
      "grad_norm": 0.7027861315228137,
      "learning_rate": 9.853646145691502e-06,
      "loss": 0.1724,
      "step": 3596
    },
    {
      "epoch": 0.1049361106248906,
      "grad_norm": 0.821053437805854,
      "learning_rate": 9.85353265643106e-06,
      "loss": 0.2092,
      "step": 3597
    },
    {
      "epoch": 0.10496528385553416,
      "grad_norm": 0.961109184519753,
      "learning_rate": 9.853419123839434e-06,
      "loss": 0.1811,
      "step": 3598
    },
    {
      "epoch": 0.10499445708617772,
      "grad_norm": 0.8388968563304371,
      "learning_rate": 9.853305547917643e-06,
      "loss": 0.2077,
      "step": 3599
    },
    {
      "epoch": 0.10502363031682128,
      "grad_norm": 0.8995002274805877,
      "learning_rate": 9.853191928666699e-06,
      "loss": 0.2083,
      "step": 3600
    },
    {
      "epoch": 0.10505280354746485,
      "grad_norm": 0.8278156054714256,
      "learning_rate": 9.853078266087615e-06,
      "loss": 0.1739,
      "step": 3601
    },
    {
      "epoch": 0.1050819767781084,
      "grad_norm": 0.8207064823983518,
      "learning_rate": 9.852964560181406e-06,
      "loss": 0.173,
      "step": 3602
    },
    {
      "epoch": 0.10511115000875197,
      "grad_norm": 0.778077752028872,
      "learning_rate": 9.852850810949088e-06,
      "loss": 0.1978,
      "step": 3603
    },
    {
      "epoch": 0.10514032323939553,
      "grad_norm": 0.7439386492724069,
      "learning_rate": 9.852737018391678e-06,
      "loss": 0.1907,
      "step": 3604
    },
    {
      "epoch": 0.10516949647003909,
      "grad_norm": 0.8941220101624787,
      "learning_rate": 9.85262318251019e-06,
      "loss": 0.1684,
      "step": 3605
    },
    {
      "epoch": 0.10519866970068266,
      "grad_norm": 0.920803984158187,
      "learning_rate": 9.85250930330564e-06,
      "loss": 0.2096,
      "step": 3606
    },
    {
      "epoch": 0.10522784293132621,
      "grad_norm": 0.8530089595896981,
      "learning_rate": 9.852395380779045e-06,
      "loss": 0.1747,
      "step": 3607
    },
    {
      "epoch": 0.10525701616196978,
      "grad_norm": 0.7817955853277232,
      "learning_rate": 9.852281414931422e-06,
      "loss": 0.2058,
      "step": 3608
    },
    {
      "epoch": 0.10528618939261333,
      "grad_norm": 0.8349454842830898,
      "learning_rate": 9.852167405763791e-06,
      "loss": 0.182,
      "step": 3609
    },
    {
      "epoch": 0.1053153626232569,
      "grad_norm": 0.9498919429639381,
      "learning_rate": 9.852053353277166e-06,
      "loss": 0.1861,
      "step": 3610
    },
    {
      "epoch": 0.10534453585390047,
      "grad_norm": 1.010670781742482,
      "learning_rate": 9.851939257472567e-06,
      "loss": 0.1714,
      "step": 3611
    },
    {
      "epoch": 0.10537370908454402,
      "grad_norm": 0.8576945237017012,
      "learning_rate": 9.851825118351012e-06,
      "loss": 0.1748,
      "step": 3612
    },
    {
      "epoch": 0.10540288231518759,
      "grad_norm": 0.8817090284343435,
      "learning_rate": 9.851710935913522e-06,
      "loss": 0.1497,
      "step": 3613
    },
    {
      "epoch": 0.10543205554583114,
      "grad_norm": 0.9768218949533995,
      "learning_rate": 9.851596710161115e-06,
      "loss": 0.1873,
      "step": 3614
    },
    {
      "epoch": 0.10546122877647471,
      "grad_norm": 0.969484927400703,
      "learning_rate": 9.851482441094809e-06,
      "loss": 0.1937,
      "step": 3615
    },
    {
      "epoch": 0.10549040200711828,
      "grad_norm": 0.8436609685314721,
      "learning_rate": 9.851368128715627e-06,
      "loss": 0.1608,
      "step": 3616
    },
    {
      "epoch": 0.10551957523776183,
      "grad_norm": 0.8973571802412434,
      "learning_rate": 9.85125377302459e-06,
      "loss": 0.2042,
      "step": 3617
    },
    {
      "epoch": 0.1055487484684054,
      "grad_norm": 0.9805136113686406,
      "learning_rate": 9.851139374022715e-06,
      "loss": 0.1761,
      "step": 3618
    },
    {
      "epoch": 0.10557792169904895,
      "grad_norm": 0.8950355394942241,
      "learning_rate": 9.851024931711026e-06,
      "loss": 0.1988,
      "step": 3619
    },
    {
      "epoch": 0.10560709492969252,
      "grad_norm": 0.9967364634792623,
      "learning_rate": 9.850910446090545e-06,
      "loss": 0.1736,
      "step": 3620
    },
    {
      "epoch": 0.10563626816033607,
      "grad_norm": 0.8152349956595033,
      "learning_rate": 9.850795917162295e-06,
      "loss": 0.1721,
      "step": 3621
    },
    {
      "epoch": 0.10566544139097964,
      "grad_norm": 0.9117315910839804,
      "learning_rate": 9.850681344927295e-06,
      "loss": 0.2011,
      "step": 3622
    },
    {
      "epoch": 0.1056946146216232,
      "grad_norm": 0.9183673562073597,
      "learning_rate": 9.85056672938657e-06,
      "loss": 0.1913,
      "step": 3623
    },
    {
      "epoch": 0.10572378785226676,
      "grad_norm": 0.8907167001631543,
      "learning_rate": 9.850452070541145e-06,
      "loss": 0.1997,
      "step": 3624
    },
    {
      "epoch": 0.10575296108291032,
      "grad_norm": 0.7452388870196537,
      "learning_rate": 9.85033736839204e-06,
      "loss": 0.1567,
      "step": 3625
    },
    {
      "epoch": 0.10578213431355388,
      "grad_norm": 1.0293733016375348,
      "learning_rate": 9.850222622940282e-06,
      "loss": 0.1786,
      "step": 3626
    },
    {
      "epoch": 0.10581130754419744,
      "grad_norm": 0.8947462098266787,
      "learning_rate": 9.850107834186893e-06,
      "loss": 0.1807,
      "step": 3627
    },
    {
      "epoch": 0.10584048077484101,
      "grad_norm": 0.8708521042151944,
      "learning_rate": 9.8499930021329e-06,
      "loss": 0.1698,
      "step": 3628
    },
    {
      "epoch": 0.10586965400548456,
      "grad_norm": 0.9404817004403054,
      "learning_rate": 9.849878126779326e-06,
      "loss": 0.219,
      "step": 3629
    },
    {
      "epoch": 0.10589882723612813,
      "grad_norm": 0.8760941174090979,
      "learning_rate": 9.8497632081272e-06,
      "loss": 0.2157,
      "step": 3630
    },
    {
      "epoch": 0.10592800046677169,
      "grad_norm": 0.9371641097270601,
      "learning_rate": 9.849648246177544e-06,
      "loss": 0.1963,
      "step": 3631
    },
    {
      "epoch": 0.10595717369741525,
      "grad_norm": 0.8086527270226705,
      "learning_rate": 9.849533240931388e-06,
      "loss": 0.1569,
      "step": 3632
    },
    {
      "epoch": 0.10598634692805882,
      "grad_norm": 0.9406634070880344,
      "learning_rate": 9.849418192389755e-06,
      "loss": 0.1793,
      "step": 3633
    },
    {
      "epoch": 0.10601552015870237,
      "grad_norm": 1.010354893817405,
      "learning_rate": 9.849303100553675e-06,
      "loss": 0.2215,
      "step": 3634
    },
    {
      "epoch": 0.10604469338934594,
      "grad_norm": 1.1892070123249876,
      "learning_rate": 9.849187965424174e-06,
      "loss": 0.1973,
      "step": 3635
    },
    {
      "epoch": 0.1060738666199895,
      "grad_norm": 1.0311238063426773,
      "learning_rate": 9.849072787002281e-06,
      "loss": 0.1823,
      "step": 3636
    },
    {
      "epoch": 0.10610303985063306,
      "grad_norm": 0.8037583422842794,
      "learning_rate": 9.848957565289024e-06,
      "loss": 0.173,
      "step": 3637
    },
    {
      "epoch": 0.10613221308127663,
      "grad_norm": 0.9914643598253442,
      "learning_rate": 9.84884230028543e-06,
      "loss": 0.1733,
      "step": 3638
    },
    {
      "epoch": 0.10616138631192018,
      "grad_norm": 0.7477706637644134,
      "learning_rate": 9.84872699199253e-06,
      "loss": 0.1967,
      "step": 3639
    },
    {
      "epoch": 0.10619055954256375,
      "grad_norm": 0.8397578799055514,
      "learning_rate": 9.848611640411355e-06,
      "loss": 0.1821,
      "step": 3640
    },
    {
      "epoch": 0.1062197327732073,
      "grad_norm": 0.8983672869267227,
      "learning_rate": 9.848496245542928e-06,
      "loss": 0.1913,
      "step": 3641
    },
    {
      "epoch": 0.10624890600385087,
      "grad_norm": 0.8500491520526038,
      "learning_rate": 9.848380807388287e-06,
      "loss": 0.1673,
      "step": 3642
    },
    {
      "epoch": 0.10627807923449442,
      "grad_norm": 0.8896732084116462,
      "learning_rate": 9.84826532594846e-06,
      "loss": 0.1905,
      "step": 3643
    },
    {
      "epoch": 0.10630725246513799,
      "grad_norm": 0.7485932722651284,
      "learning_rate": 9.848149801224478e-06,
      "loss": 0.1849,
      "step": 3644
    },
    {
      "epoch": 0.10633642569578156,
      "grad_norm": 1.967401684554678,
      "learning_rate": 9.84803423321737e-06,
      "loss": 0.1847,
      "step": 3645
    },
    {
      "epoch": 0.10636559892642511,
      "grad_norm": 0.8288106003515783,
      "learning_rate": 9.84791862192817e-06,
      "loss": 0.1721,
      "step": 3646
    },
    {
      "epoch": 0.10639477215706868,
      "grad_norm": 1.146949706729043,
      "learning_rate": 9.84780296735791e-06,
      "loss": 0.1993,
      "step": 3647
    },
    {
      "epoch": 0.10642394538771223,
      "grad_norm": 0.8806391390682924,
      "learning_rate": 9.847687269507624e-06,
      "loss": 0.1798,
      "step": 3648
    },
    {
      "epoch": 0.1064531186183558,
      "grad_norm": 1.0032036286894994,
      "learning_rate": 9.847571528378342e-06,
      "loss": 0.1659,
      "step": 3649
    },
    {
      "epoch": 0.10648229184899936,
      "grad_norm": 0.8126100534228001,
      "learning_rate": 9.8474557439711e-06,
      "loss": 0.1796,
      "step": 3650
    },
    {
      "epoch": 0.10651146507964292,
      "grad_norm": 0.8152182091978051,
      "learning_rate": 9.847339916286928e-06,
      "loss": 0.1589,
      "step": 3651
    },
    {
      "epoch": 0.10654063831028648,
      "grad_norm": 0.7248696144602788,
      "learning_rate": 9.847224045326864e-06,
      "loss": 0.1737,
      "step": 3652
    },
    {
      "epoch": 0.10656981154093004,
      "grad_norm": 0.9821676998680899,
      "learning_rate": 9.84710813109194e-06,
      "loss": 0.1755,
      "step": 3653
    },
    {
      "epoch": 0.1065989847715736,
      "grad_norm": 0.9874156412488563,
      "learning_rate": 9.846992173583193e-06,
      "loss": 0.1744,
      "step": 3654
    },
    {
      "epoch": 0.10662815800221717,
      "grad_norm": 0.9935917984752777,
      "learning_rate": 9.846876172801653e-06,
      "loss": 0.1875,
      "step": 3655
    },
    {
      "epoch": 0.10665733123286072,
      "grad_norm": 0.8547818384954391,
      "learning_rate": 9.846760128748363e-06,
      "loss": 0.2255,
      "step": 3656
    },
    {
      "epoch": 0.10668650446350429,
      "grad_norm": 1.1420768220006505,
      "learning_rate": 9.846644041424357e-06,
      "loss": 0.1973,
      "step": 3657
    },
    {
      "epoch": 0.10671567769414785,
      "grad_norm": 0.9489048749425871,
      "learning_rate": 9.846527910830666e-06,
      "loss": 0.189,
      "step": 3658
    },
    {
      "epoch": 0.10674485092479141,
      "grad_norm": 0.7622933975836242,
      "learning_rate": 9.846411736968334e-06,
      "loss": 0.1802,
      "step": 3659
    },
    {
      "epoch": 0.10677402415543498,
      "grad_norm": 0.9875678375622912,
      "learning_rate": 9.846295519838393e-06,
      "loss": 0.2019,
      "step": 3660
    },
    {
      "epoch": 0.10680319738607853,
      "grad_norm": 0.9658618910429523,
      "learning_rate": 9.846179259441884e-06,
      "loss": 0.1736,
      "step": 3661
    },
    {
      "epoch": 0.1068323706167221,
      "grad_norm": 0.7838056532062962,
      "learning_rate": 9.846062955779843e-06,
      "loss": 0.1678,
      "step": 3662
    },
    {
      "epoch": 0.10686154384736565,
      "grad_norm": 0.9063951910550987,
      "learning_rate": 9.845946608853307e-06,
      "loss": 0.1751,
      "step": 3663
    },
    {
      "epoch": 0.10689071707800922,
      "grad_norm": 0.954163078224032,
      "learning_rate": 9.845830218663319e-06,
      "loss": 0.1974,
      "step": 3664
    },
    {
      "epoch": 0.10691989030865277,
      "grad_norm": 0.9078999281041293,
      "learning_rate": 9.845713785210915e-06,
      "loss": 0.1807,
      "step": 3665
    },
    {
      "epoch": 0.10694906353929634,
      "grad_norm": 0.9482398997349659,
      "learning_rate": 9.845597308497134e-06,
      "loss": 0.1603,
      "step": 3666
    },
    {
      "epoch": 0.10697823676993991,
      "grad_norm": 0.9527446241527082,
      "learning_rate": 9.845480788523018e-06,
      "loss": 0.1867,
      "step": 3667
    },
    {
      "epoch": 0.10700741000058346,
      "grad_norm": 0.8204347964831162,
      "learning_rate": 9.845364225289606e-06,
      "loss": 0.163,
      "step": 3668
    },
    {
      "epoch": 0.10703658323122703,
      "grad_norm": 0.7344762053758938,
      "learning_rate": 9.845247618797938e-06,
      "loss": 0.1621,
      "step": 3669
    },
    {
      "epoch": 0.10706575646187058,
      "grad_norm": 1.0051301924257035,
      "learning_rate": 9.845130969049057e-06,
      "loss": 0.2151,
      "step": 3670
    },
    {
      "epoch": 0.10709492969251415,
      "grad_norm": 0.9033577606481228,
      "learning_rate": 9.845014276044002e-06,
      "loss": 0.1864,
      "step": 3671
    },
    {
      "epoch": 0.10712410292315772,
      "grad_norm": 1.0478524524363073,
      "learning_rate": 9.844897539783817e-06,
      "loss": 0.1952,
      "step": 3672
    },
    {
      "epoch": 0.10715327615380127,
      "grad_norm": 0.9434465268313577,
      "learning_rate": 9.844780760269543e-06,
      "loss": 0.1921,
      "step": 3673
    },
    {
      "epoch": 0.10718244938444484,
      "grad_norm": 1.185492328938468,
      "learning_rate": 9.844663937502225e-06,
      "loss": 0.1987,
      "step": 3674
    },
    {
      "epoch": 0.10721162261508839,
      "grad_norm": 0.9408758168148659,
      "learning_rate": 9.844547071482902e-06,
      "loss": 0.1827,
      "step": 3675
    },
    {
      "epoch": 0.10724079584573196,
      "grad_norm": 0.9133147091973878,
      "learning_rate": 9.844430162212619e-06,
      "loss": 0.1894,
      "step": 3676
    },
    {
      "epoch": 0.10726996907637552,
      "grad_norm": 0.8748009274424542,
      "learning_rate": 9.84431320969242e-06,
      "loss": 0.2018,
      "step": 3677
    },
    {
      "epoch": 0.10729914230701908,
      "grad_norm": 1.0174652467453873,
      "learning_rate": 9.84419621392335e-06,
      "loss": 0.1778,
      "step": 3678
    },
    {
      "epoch": 0.10732831553766264,
      "grad_norm": 0.7728782072356283,
      "learning_rate": 9.844079174906453e-06,
      "loss": 0.1831,
      "step": 3679
    },
    {
      "epoch": 0.1073574887683062,
      "grad_norm": 1.108675218183585,
      "learning_rate": 9.843962092642772e-06,
      "loss": 0.1937,
      "step": 3680
    },
    {
      "epoch": 0.10738666199894976,
      "grad_norm": 0.8191061791296362,
      "learning_rate": 9.843844967133353e-06,
      "loss": 0.1666,
      "step": 3681
    },
    {
      "epoch": 0.10741583522959333,
      "grad_norm": 1.093219952602174,
      "learning_rate": 9.843727798379245e-06,
      "loss": 0.1834,
      "step": 3682
    },
    {
      "epoch": 0.10744500846023688,
      "grad_norm": 0.8429401948794337,
      "learning_rate": 9.843610586381491e-06,
      "loss": 0.1951,
      "step": 3683
    },
    {
      "epoch": 0.10747418169088045,
      "grad_norm": 1.0067529057383613,
      "learning_rate": 9.843493331141136e-06,
      "loss": 0.2011,
      "step": 3684
    },
    {
      "epoch": 0.107503354921524,
      "grad_norm": 0.7733939536939644,
      "learning_rate": 9.843376032659231e-06,
      "loss": 0.1728,
      "step": 3685
    },
    {
      "epoch": 0.10753252815216757,
      "grad_norm": 0.9664776166046107,
      "learning_rate": 9.84325869093682e-06,
      "loss": 0.1712,
      "step": 3686
    },
    {
      "epoch": 0.10756170138281113,
      "grad_norm": 0.8594469761438377,
      "learning_rate": 9.843141305974951e-06,
      "loss": 0.2086,
      "step": 3687
    },
    {
      "epoch": 0.10759087461345469,
      "grad_norm": 1.0214638134518348,
      "learning_rate": 9.843023877774673e-06,
      "loss": 0.1671,
      "step": 3688
    },
    {
      "epoch": 0.10762004784409826,
      "grad_norm": 0.9576001938898379,
      "learning_rate": 9.842906406337034e-06,
      "loss": 0.1923,
      "step": 3689
    },
    {
      "epoch": 0.10764922107474181,
      "grad_norm": 0.867883200304093,
      "learning_rate": 9.842788891663085e-06,
      "loss": 0.1962,
      "step": 3690
    },
    {
      "epoch": 0.10767839430538538,
      "grad_norm": 0.891562337545845,
      "learning_rate": 9.84267133375387e-06,
      "loss": 0.1782,
      "step": 3691
    },
    {
      "epoch": 0.10770756753602893,
      "grad_norm": 0.9513846342371838,
      "learning_rate": 9.842553732610442e-06,
      "loss": 0.1972,
      "step": 3692
    },
    {
      "epoch": 0.1077367407666725,
      "grad_norm": 0.8740119196856326,
      "learning_rate": 9.842436088233851e-06,
      "loss": 0.2128,
      "step": 3693
    },
    {
      "epoch": 0.10776591399731607,
      "grad_norm": 0.9505336722522594,
      "learning_rate": 9.842318400625145e-06,
      "loss": 0.2001,
      "step": 3694
    },
    {
      "epoch": 0.10779508722795962,
      "grad_norm": 0.9032592869121997,
      "learning_rate": 9.842200669785378e-06,
      "loss": 0.1871,
      "step": 3695
    },
    {
      "epoch": 0.10782426045860319,
      "grad_norm": 1.0805069099010487,
      "learning_rate": 9.842082895715598e-06,
      "loss": 0.1887,
      "step": 3696
    },
    {
      "epoch": 0.10785343368924674,
      "grad_norm": 0.9110385262451008,
      "learning_rate": 9.84196507841686e-06,
      "loss": 0.2106,
      "step": 3697
    },
    {
      "epoch": 0.10788260691989031,
      "grad_norm": 1.0107292562277639,
      "learning_rate": 9.84184721789021e-06,
      "loss": 0.1758,
      "step": 3698
    },
    {
      "epoch": 0.10791178015053388,
      "grad_norm": 1.1430490413573193,
      "learning_rate": 9.841729314136707e-06,
      "loss": 0.1807,
      "step": 3699
    },
    {
      "epoch": 0.10794095338117743,
      "grad_norm": 1.2589913484771265,
      "learning_rate": 9.8416113671574e-06,
      "loss": 0.1829,
      "step": 3700
    },
    {
      "epoch": 0.107970126611821,
      "grad_norm": 0.9674064446555021,
      "learning_rate": 9.841493376953341e-06,
      "loss": 0.1862,
      "step": 3701
    },
    {
      "epoch": 0.10799929984246455,
      "grad_norm": 1.00196352427155,
      "learning_rate": 9.841375343525586e-06,
      "loss": 0.2146,
      "step": 3702
    },
    {
      "epoch": 0.10802847307310812,
      "grad_norm": 0.8110697783459644,
      "learning_rate": 9.841257266875187e-06,
      "loss": 0.1696,
      "step": 3703
    },
    {
      "epoch": 0.10805764630375168,
      "grad_norm": 1.0770057636871617,
      "learning_rate": 9.8411391470032e-06,
      "loss": 0.1775,
      "step": 3704
    },
    {
      "epoch": 0.10808681953439524,
      "grad_norm": 1.0268880636789652,
      "learning_rate": 9.841020983910675e-06,
      "loss": 0.1761,
      "step": 3705
    },
    {
      "epoch": 0.1081159927650388,
      "grad_norm": 0.8155987790917397,
      "learning_rate": 9.840902777598675e-06,
      "loss": 0.1679,
      "step": 3706
    },
    {
      "epoch": 0.10814516599568236,
      "grad_norm": 0.8870319716724403,
      "learning_rate": 9.840784528068248e-06,
      "loss": 0.1854,
      "step": 3707
    },
    {
      "epoch": 0.10817433922632592,
      "grad_norm": 0.8624835876854527,
      "learning_rate": 9.840666235320453e-06,
      "loss": 0.1896,
      "step": 3708
    },
    {
      "epoch": 0.10820351245696949,
      "grad_norm": 0.9718767513383666,
      "learning_rate": 9.840547899356344e-06,
      "loss": 0.1817,
      "step": 3709
    },
    {
      "epoch": 0.10823268568761304,
      "grad_norm": 0.9234147792364222,
      "learning_rate": 9.840429520176981e-06,
      "loss": 0.2356,
      "step": 3710
    },
    {
      "epoch": 0.10826185891825661,
      "grad_norm": 0.8759859936433729,
      "learning_rate": 9.84031109778342e-06,
      "loss": 0.1804,
      "step": 3711
    },
    {
      "epoch": 0.10829103214890017,
      "grad_norm": 0.9744025451650687,
      "learning_rate": 9.840192632176714e-06,
      "loss": 0.1864,
      "step": 3712
    },
    {
      "epoch": 0.10832020537954373,
      "grad_norm": 1.1145841227908508,
      "learning_rate": 9.840074123357924e-06,
      "loss": 0.185,
      "step": 3713
    },
    {
      "epoch": 0.10834937861018729,
      "grad_norm": 0.9164931550793644,
      "learning_rate": 9.839955571328108e-06,
      "loss": 0.1762,
      "step": 3714
    },
    {
      "epoch": 0.10837855184083085,
      "grad_norm": 1.3306792105766259,
      "learning_rate": 9.839836976088326e-06,
      "loss": 0.2067,
      "step": 3715
    },
    {
      "epoch": 0.10840772507147442,
      "grad_norm": 1.0025956067578259,
      "learning_rate": 9.839718337639633e-06,
      "loss": 0.1904,
      "step": 3716
    },
    {
      "epoch": 0.10843689830211797,
      "grad_norm": 0.9590885285899383,
      "learning_rate": 9.83959965598309e-06,
      "loss": 0.1753,
      "step": 3717
    },
    {
      "epoch": 0.10846607153276154,
      "grad_norm": 0.8519876240715798,
      "learning_rate": 9.839480931119756e-06,
      "loss": 0.1655,
      "step": 3718
    },
    {
      "epoch": 0.1084952447634051,
      "grad_norm": 1.0216085372450623,
      "learning_rate": 9.839362163050692e-06,
      "loss": 0.2068,
      "step": 3719
    },
    {
      "epoch": 0.10852441799404866,
      "grad_norm": 1.015218478793388,
      "learning_rate": 9.839243351776959e-06,
      "loss": 0.2108,
      "step": 3720
    },
    {
      "epoch": 0.10855359122469223,
      "grad_norm": 0.8833693071316394,
      "learning_rate": 9.839124497299614e-06,
      "loss": 0.1814,
      "step": 3721
    },
    {
      "epoch": 0.10858276445533578,
      "grad_norm": 0.9985068447394757,
      "learning_rate": 9.839005599619723e-06,
      "loss": 0.1952,
      "step": 3722
    },
    {
      "epoch": 0.10861193768597935,
      "grad_norm": 0.9275513908128796,
      "learning_rate": 9.838886658738345e-06,
      "loss": 0.1816,
      "step": 3723
    },
    {
      "epoch": 0.1086411109166229,
      "grad_norm": 1.0624517703707523,
      "learning_rate": 9.838767674656541e-06,
      "loss": 0.2028,
      "step": 3724
    },
    {
      "epoch": 0.10867028414726647,
      "grad_norm": 1.0070792855763442,
      "learning_rate": 9.838648647375375e-06,
      "loss": 0.1519,
      "step": 3725
    },
    {
      "epoch": 0.10869945737791004,
      "grad_norm": 0.8410867942920129,
      "learning_rate": 9.83852957689591e-06,
      "loss": 0.1704,
      "step": 3726
    },
    {
      "epoch": 0.10872863060855359,
      "grad_norm": 1.1809385178616498,
      "learning_rate": 9.838410463219206e-06,
      "loss": 0.2194,
      "step": 3727
    },
    {
      "epoch": 0.10875780383919716,
      "grad_norm": 1.01928972420757,
      "learning_rate": 9.838291306346329e-06,
      "loss": 0.1862,
      "step": 3728
    },
    {
      "epoch": 0.10878697706984071,
      "grad_norm": 0.8529205792076802,
      "learning_rate": 9.838172106278344e-06,
      "loss": 0.171,
      "step": 3729
    },
    {
      "epoch": 0.10881615030048428,
      "grad_norm": 0.9092029669728684,
      "learning_rate": 9.83805286301631e-06,
      "loss": 0.1764,
      "step": 3730
    },
    {
      "epoch": 0.10884532353112784,
      "grad_norm": 0.9078365624779545,
      "learning_rate": 9.837933576561297e-06,
      "loss": 0.1843,
      "step": 3731
    },
    {
      "epoch": 0.1088744967617714,
      "grad_norm": 0.8989173614593705,
      "learning_rate": 9.837814246914367e-06,
      "loss": 0.2223,
      "step": 3732
    },
    {
      "epoch": 0.10890366999241496,
      "grad_norm": 0.8498264796992692,
      "learning_rate": 9.83769487407659e-06,
      "loss": 0.1806,
      "step": 3733
    },
    {
      "epoch": 0.10893284322305852,
      "grad_norm": 1.1470141639434395,
      "learning_rate": 9.837575458049023e-06,
      "loss": 0.1977,
      "step": 3734
    },
    {
      "epoch": 0.10896201645370208,
      "grad_norm": 1.0260437543873395,
      "learning_rate": 9.83745599883274e-06,
      "loss": 0.2063,
      "step": 3735
    },
    {
      "epoch": 0.10899118968434564,
      "grad_norm": 0.9272079305040548,
      "learning_rate": 9.837336496428804e-06,
      "loss": 0.1625,
      "step": 3736
    },
    {
      "epoch": 0.1090203629149892,
      "grad_norm": 0.7823327792556027,
      "learning_rate": 9.837216950838282e-06,
      "loss": 0.1914,
      "step": 3737
    },
    {
      "epoch": 0.10904953614563277,
      "grad_norm": 0.9446827984937581,
      "learning_rate": 9.83709736206224e-06,
      "loss": 0.1744,
      "step": 3738
    },
    {
      "epoch": 0.10907870937627633,
      "grad_norm": 1.0576705550162075,
      "learning_rate": 9.836977730101751e-06,
      "loss": 0.2053,
      "step": 3739
    },
    {
      "epoch": 0.10910788260691989,
      "grad_norm": 0.7897544097238344,
      "learning_rate": 9.836858054957879e-06,
      "loss": 0.1837,
      "step": 3740
    },
    {
      "epoch": 0.10913705583756345,
      "grad_norm": 0.7602633907190607,
      "learning_rate": 9.83673833663169e-06,
      "loss": 0.1868,
      "step": 3741
    },
    {
      "epoch": 0.10916622906820701,
      "grad_norm": 0.8498575573800358,
      "learning_rate": 9.836618575124259e-06,
      "loss": 0.1652,
      "step": 3742
    },
    {
      "epoch": 0.10919540229885058,
      "grad_norm": 0.8334433786361117,
      "learning_rate": 9.836498770436652e-06,
      "loss": 0.1701,
      "step": 3743
    },
    {
      "epoch": 0.10922457552949413,
      "grad_norm": 0.8748325644408049,
      "learning_rate": 9.836378922569935e-06,
      "loss": 0.1678,
      "step": 3744
    },
    {
      "epoch": 0.1092537487601377,
      "grad_norm": 1.1032192674675612,
      "learning_rate": 9.836259031525184e-06,
      "loss": 0.196,
      "step": 3745
    },
    {
      "epoch": 0.10928292199078125,
      "grad_norm": 0.8929200675933284,
      "learning_rate": 9.836139097303468e-06,
      "loss": 0.176,
      "step": 3746
    },
    {
      "epoch": 0.10931209522142482,
      "grad_norm": 0.9147053940989985,
      "learning_rate": 9.836019119905856e-06,
      "loss": 0.1866,
      "step": 3747
    },
    {
      "epoch": 0.10934126845206839,
      "grad_norm": 0.8795091301665815,
      "learning_rate": 9.835899099333418e-06,
      "loss": 0.1712,
      "step": 3748
    },
    {
      "epoch": 0.10937044168271194,
      "grad_norm": 0.8008882443825363,
      "learning_rate": 9.835779035587228e-06,
      "loss": 0.1879,
      "step": 3749
    },
    {
      "epoch": 0.10939961491335551,
      "grad_norm": 0.9958444752671687,
      "learning_rate": 9.835658928668356e-06,
      "loss": 0.2044,
      "step": 3750
    },
    {
      "epoch": 0.10942878814399906,
      "grad_norm": 0.778564098885269,
      "learning_rate": 9.835538778577877e-06,
      "loss": 0.1832,
      "step": 3751
    },
    {
      "epoch": 0.10945796137464263,
      "grad_norm": 0.9398409858222706,
      "learning_rate": 9.835418585316863e-06,
      "loss": 0.1748,
      "step": 3752
    },
    {
      "epoch": 0.1094871346052862,
      "grad_norm": 0.8758560899904604,
      "learning_rate": 9.835298348886386e-06,
      "loss": 0.16,
      "step": 3753
    },
    {
      "epoch": 0.10951630783592975,
      "grad_norm": 0.8911949385467063,
      "learning_rate": 9.835178069287519e-06,
      "loss": 0.1743,
      "step": 3754
    },
    {
      "epoch": 0.10954548106657332,
      "grad_norm": 0.8900291916817913,
      "learning_rate": 9.835057746521335e-06,
      "loss": 0.1839,
      "step": 3755
    },
    {
      "epoch": 0.10957465429721687,
      "grad_norm": 0.9417581588384263,
      "learning_rate": 9.83493738058891e-06,
      "loss": 0.1781,
      "step": 3756
    },
    {
      "epoch": 0.10960382752786044,
      "grad_norm": 0.9872104931867929,
      "learning_rate": 9.834816971491322e-06,
      "loss": 0.1767,
      "step": 3757
    },
    {
      "epoch": 0.10963300075850399,
      "grad_norm": 0.8309452719909529,
      "learning_rate": 9.834696519229638e-06,
      "loss": 0.1591,
      "step": 3758
    },
    {
      "epoch": 0.10966217398914756,
      "grad_norm": 0.8356111273091072,
      "learning_rate": 9.83457602380494e-06,
      "loss": 0.188,
      "step": 3759
    },
    {
      "epoch": 0.10969134721979112,
      "grad_norm": 1.2821605838808534,
      "learning_rate": 9.8344554852183e-06,
      "loss": 0.1767,
      "step": 3760
    },
    {
      "epoch": 0.10972052045043468,
      "grad_norm": 0.7899581477687933,
      "learning_rate": 9.834334903470796e-06,
      "loss": 0.1861,
      "step": 3761
    },
    {
      "epoch": 0.10974969368107824,
      "grad_norm": 0.8060093967417578,
      "learning_rate": 9.834214278563503e-06,
      "loss": 0.1719,
      "step": 3762
    },
    {
      "epoch": 0.1097788669117218,
      "grad_norm": 0.9629195746359068,
      "learning_rate": 9.834093610497501e-06,
      "loss": 0.1846,
      "step": 3763
    },
    {
      "epoch": 0.10980804014236537,
      "grad_norm": 0.8498193523692971,
      "learning_rate": 9.833972899273863e-06,
      "loss": 0.1865,
      "step": 3764
    },
    {
      "epoch": 0.10983721337300893,
      "grad_norm": 0.9510094692083803,
      "learning_rate": 9.83385214489367e-06,
      "loss": 0.2186,
      "step": 3765
    },
    {
      "epoch": 0.10986638660365249,
      "grad_norm": 0.9199366240798985,
      "learning_rate": 9.833731347358e-06,
      "loss": 0.1844,
      "step": 3766
    },
    {
      "epoch": 0.10989555983429605,
      "grad_norm": 0.9649402924819324,
      "learning_rate": 9.83361050666793e-06,
      "loss": 0.2012,
      "step": 3767
    },
    {
      "epoch": 0.1099247330649396,
      "grad_norm": 0.8969514360865897,
      "learning_rate": 9.833489622824537e-06,
      "loss": 0.1543,
      "step": 3768
    },
    {
      "epoch": 0.10995390629558317,
      "grad_norm": 0.9774128949200379,
      "learning_rate": 9.833368695828905e-06,
      "loss": 0.1777,
      "step": 3769
    },
    {
      "epoch": 0.10998307952622674,
      "grad_norm": 0.8418971392788187,
      "learning_rate": 9.833247725682111e-06,
      "loss": 0.1741,
      "step": 3770
    },
    {
      "epoch": 0.1100122527568703,
      "grad_norm": 0.8379352874568623,
      "learning_rate": 9.833126712385234e-06,
      "loss": 0.1655,
      "step": 3771
    },
    {
      "epoch": 0.11004142598751386,
      "grad_norm": 0.8231065469113594,
      "learning_rate": 9.833005655939356e-06,
      "loss": 0.1836,
      "step": 3772
    },
    {
      "epoch": 0.11007059921815741,
      "grad_norm": 0.8191344525780554,
      "learning_rate": 9.832884556345556e-06,
      "loss": 0.1689,
      "step": 3773
    },
    {
      "epoch": 0.11009977244880098,
      "grad_norm": 0.9611187222121934,
      "learning_rate": 9.832763413604918e-06,
      "loss": 0.1933,
      "step": 3774
    },
    {
      "epoch": 0.11012894567944455,
      "grad_norm": 0.832741343395637,
      "learning_rate": 9.832642227718522e-06,
      "loss": 0.1874,
      "step": 3775
    },
    {
      "epoch": 0.1101581189100881,
      "grad_norm": 0.897739888953288,
      "learning_rate": 9.83252099868745e-06,
      "loss": 0.1721,
      "step": 3776
    },
    {
      "epoch": 0.11018729214073167,
      "grad_norm": 0.9764722624378016,
      "learning_rate": 9.832399726512783e-06,
      "loss": 0.1987,
      "step": 3777
    },
    {
      "epoch": 0.11021646537137522,
      "grad_norm": 0.8605112724821691,
      "learning_rate": 9.832278411195606e-06,
      "loss": 0.1985,
      "step": 3778
    },
    {
      "epoch": 0.11024563860201879,
      "grad_norm": 0.9433795788950949,
      "learning_rate": 9.832157052737e-06,
      "loss": 0.1808,
      "step": 3779
    },
    {
      "epoch": 0.11027481183266234,
      "grad_norm": 1.1215045418125138,
      "learning_rate": 9.83203565113805e-06,
      "loss": 0.1788,
      "step": 3780
    },
    {
      "epoch": 0.11030398506330591,
      "grad_norm": 0.9726536245052037,
      "learning_rate": 9.831914206399837e-06,
      "loss": 0.1605,
      "step": 3781
    },
    {
      "epoch": 0.11033315829394948,
      "grad_norm": 0.9226634528504349,
      "learning_rate": 9.831792718523449e-06,
      "loss": 0.1787,
      "step": 3782
    },
    {
      "epoch": 0.11036233152459303,
      "grad_norm": 0.81507376458163,
      "learning_rate": 9.83167118750997e-06,
      "loss": 0.2006,
      "step": 3783
    },
    {
      "epoch": 0.1103915047552366,
      "grad_norm": 0.9089075165787985,
      "learning_rate": 9.831549613360482e-06,
      "loss": 0.198,
      "step": 3784
    },
    {
      "epoch": 0.11042067798588015,
      "grad_norm": 0.9859611143876775,
      "learning_rate": 9.831427996076074e-06,
      "loss": 0.1777,
      "step": 3785
    },
    {
      "epoch": 0.11044985121652372,
      "grad_norm": 0.7837485554712099,
      "learning_rate": 9.83130633565783e-06,
      "loss": 0.1565,
      "step": 3786
    },
    {
      "epoch": 0.11047902444716728,
      "grad_norm": 0.9260998754003568,
      "learning_rate": 9.831184632106837e-06,
      "loss": 0.2108,
      "step": 3787
    },
    {
      "epoch": 0.11050819767781084,
      "grad_norm": 0.8565242221498323,
      "learning_rate": 9.831062885424181e-06,
      "loss": 0.1886,
      "step": 3788
    },
    {
      "epoch": 0.1105373709084544,
      "grad_norm": 0.8681149920558899,
      "learning_rate": 9.830941095610948e-06,
      "loss": 0.1839,
      "step": 3789
    },
    {
      "epoch": 0.11056654413909796,
      "grad_norm": 0.857683535434253,
      "learning_rate": 9.830819262668225e-06,
      "loss": 0.1857,
      "step": 3790
    },
    {
      "epoch": 0.11059571736974153,
      "grad_norm": 1.810542375779002,
      "learning_rate": 9.830697386597102e-06,
      "loss": 0.2115,
      "step": 3791
    },
    {
      "epoch": 0.11062489060038509,
      "grad_norm": 0.9987769624881818,
      "learning_rate": 9.830575467398666e-06,
      "loss": 0.1878,
      "step": 3792
    },
    {
      "epoch": 0.11065406383102865,
      "grad_norm": 0.8863324438702518,
      "learning_rate": 9.830453505074005e-06,
      "loss": 0.1875,
      "step": 3793
    },
    {
      "epoch": 0.11068323706167221,
      "grad_norm": 0.9043276789551575,
      "learning_rate": 9.830331499624208e-06,
      "loss": 0.1657,
      "step": 3794
    },
    {
      "epoch": 0.11071241029231577,
      "grad_norm": 1.0448868343342503,
      "learning_rate": 9.830209451050365e-06,
      "loss": 0.1816,
      "step": 3795
    },
    {
      "epoch": 0.11074158352295933,
      "grad_norm": 1.1265263659396727,
      "learning_rate": 9.830087359353566e-06,
      "loss": 0.1772,
      "step": 3796
    },
    {
      "epoch": 0.1107707567536029,
      "grad_norm": 0.9817894511712082,
      "learning_rate": 9.829965224534899e-06,
      "loss": 0.194,
      "step": 3797
    },
    {
      "epoch": 0.11079992998424645,
      "grad_norm": 0.8576839815110606,
      "learning_rate": 9.829843046595455e-06,
      "loss": 0.1879,
      "step": 3798
    },
    {
      "epoch": 0.11082910321489002,
      "grad_norm": 0.9716186224016058,
      "learning_rate": 9.829720825536327e-06,
      "loss": 0.1745,
      "step": 3799
    },
    {
      "epoch": 0.11085827644553357,
      "grad_norm": 1.000444412402128,
      "learning_rate": 9.829598561358602e-06,
      "loss": 0.1658,
      "step": 3800
    },
    {
      "epoch": 0.11088744967617714,
      "grad_norm": 0.9179903393874113,
      "learning_rate": 9.829476254063376e-06,
      "loss": 0.1732,
      "step": 3801
    },
    {
      "epoch": 0.11091662290682071,
      "grad_norm": 1.0004830831060219,
      "learning_rate": 9.829353903651739e-06,
      "loss": 0.1831,
      "step": 3802
    },
    {
      "epoch": 0.11094579613746426,
      "grad_norm": 1.0181127662768221,
      "learning_rate": 9.829231510124782e-06,
      "loss": 0.1746,
      "step": 3803
    },
    {
      "epoch": 0.11097496936810783,
      "grad_norm": 1.0400139495645497,
      "learning_rate": 9.829109073483598e-06,
      "loss": 0.2311,
      "step": 3804
    },
    {
      "epoch": 0.11100414259875138,
      "grad_norm": 0.8718607152673257,
      "learning_rate": 9.828986593729283e-06,
      "loss": 0.2045,
      "step": 3805
    },
    {
      "epoch": 0.11103331582939495,
      "grad_norm": 0.92743147372795,
      "learning_rate": 9.828864070862927e-06,
      "loss": 0.2088,
      "step": 3806
    },
    {
      "epoch": 0.1110624890600385,
      "grad_norm": 0.9482139382413433,
      "learning_rate": 9.828741504885627e-06,
      "loss": 0.1533,
      "step": 3807
    },
    {
      "epoch": 0.11109166229068207,
      "grad_norm": 0.8801976295218055,
      "learning_rate": 9.828618895798474e-06,
      "loss": 0.1647,
      "step": 3808
    },
    {
      "epoch": 0.11112083552132564,
      "grad_norm": 0.8325714634226374,
      "learning_rate": 9.828496243602566e-06,
      "loss": 0.1695,
      "step": 3809
    },
    {
      "epoch": 0.11115000875196919,
      "grad_norm": 0.9163576438397082,
      "learning_rate": 9.828373548298994e-06,
      "loss": 0.1638,
      "step": 3810
    },
    {
      "epoch": 0.11117918198261276,
      "grad_norm": 1.1032582438441247,
      "learning_rate": 9.828250809888857e-06,
      "loss": 0.1789,
      "step": 3811
    },
    {
      "epoch": 0.11120835521325631,
      "grad_norm": 0.9469312949880322,
      "learning_rate": 9.828128028373249e-06,
      "loss": 0.1896,
      "step": 3812
    },
    {
      "epoch": 0.11123752844389988,
      "grad_norm": 0.9198554637930745,
      "learning_rate": 9.828005203753266e-06,
      "loss": 0.1759,
      "step": 3813
    },
    {
      "epoch": 0.11126670167454344,
      "grad_norm": 0.9820440764944599,
      "learning_rate": 9.827882336030005e-06,
      "loss": 0.1659,
      "step": 3814
    },
    {
      "epoch": 0.111295874905187,
      "grad_norm": 0.8816234477053786,
      "learning_rate": 9.827759425204563e-06,
      "loss": 0.1752,
      "step": 3815
    },
    {
      "epoch": 0.11132504813583056,
      "grad_norm": 0.8578308886705731,
      "learning_rate": 9.82763647127804e-06,
      "loss": 0.1835,
      "step": 3816
    },
    {
      "epoch": 0.11135422136647412,
      "grad_norm": 1.1081784354192596,
      "learning_rate": 9.827513474251527e-06,
      "loss": 0.1889,
      "step": 3817
    },
    {
      "epoch": 0.11138339459711769,
      "grad_norm": 1.2342896315640153,
      "learning_rate": 9.827390434126128e-06,
      "loss": 0.1766,
      "step": 3818
    },
    {
      "epoch": 0.11141256782776125,
      "grad_norm": 0.9570697493862371,
      "learning_rate": 9.82726735090294e-06,
      "loss": 0.1937,
      "step": 3819
    },
    {
      "epoch": 0.1114417410584048,
      "grad_norm": 1.0175854183995052,
      "learning_rate": 9.827144224583061e-06,
      "loss": 0.181,
      "step": 3820
    },
    {
      "epoch": 0.11147091428904837,
      "grad_norm": 1.0069047586440978,
      "learning_rate": 9.827021055167591e-06,
      "loss": 0.1795,
      "step": 3821
    },
    {
      "epoch": 0.11150008751969193,
      "grad_norm": 0.8170535531556331,
      "learning_rate": 9.82689784265763e-06,
      "loss": 0.1958,
      "step": 3822
    },
    {
      "epoch": 0.1115292607503355,
      "grad_norm": 0.9762549144270604,
      "learning_rate": 9.826774587054274e-06,
      "loss": 0.1887,
      "step": 3823
    },
    {
      "epoch": 0.11155843398097906,
      "grad_norm": 0.9765942416669872,
      "learning_rate": 9.826651288358631e-06,
      "loss": 0.1836,
      "step": 3824
    },
    {
      "epoch": 0.11158760721162261,
      "grad_norm": 0.8275904962695941,
      "learning_rate": 9.826527946571796e-06,
      "loss": 0.1638,
      "step": 3825
    },
    {
      "epoch": 0.11161678044226618,
      "grad_norm": 0.7480591770664048,
      "learning_rate": 9.826404561694872e-06,
      "loss": 0.1949,
      "step": 3826
    },
    {
      "epoch": 0.11164595367290973,
      "grad_norm": 0.8127582086929548,
      "learning_rate": 9.82628113372896e-06,
      "loss": 0.1724,
      "step": 3827
    },
    {
      "epoch": 0.1116751269035533,
      "grad_norm": 0.9590706890411163,
      "learning_rate": 9.82615766267516e-06,
      "loss": 0.1797,
      "step": 3828
    },
    {
      "epoch": 0.11170430013419685,
      "grad_norm": 0.9691927534950275,
      "learning_rate": 9.826034148534578e-06,
      "loss": 0.1907,
      "step": 3829
    },
    {
      "epoch": 0.11173347336484042,
      "grad_norm": 0.9479541205797285,
      "learning_rate": 9.825910591308316e-06,
      "loss": 0.1862,
      "step": 3830
    },
    {
      "epoch": 0.11176264659548399,
      "grad_norm": 0.8363688236797676,
      "learning_rate": 9.825786990997474e-06,
      "loss": 0.1813,
      "step": 3831
    },
    {
      "epoch": 0.11179181982612754,
      "grad_norm": 1.0214010560599478,
      "learning_rate": 9.82566334760316e-06,
      "loss": 0.1632,
      "step": 3832
    },
    {
      "epoch": 0.11182099305677111,
      "grad_norm": 0.9408923245091138,
      "learning_rate": 9.825539661126476e-06,
      "loss": 0.1951,
      "step": 3833
    },
    {
      "epoch": 0.11185016628741466,
      "grad_norm": 0.8505465960882488,
      "learning_rate": 9.825415931568525e-06,
      "loss": 0.1886,
      "step": 3834
    },
    {
      "epoch": 0.11187933951805823,
      "grad_norm": 0.9581726090741551,
      "learning_rate": 9.825292158930414e-06,
      "loss": 0.2082,
      "step": 3835
    },
    {
      "epoch": 0.1119085127487018,
      "grad_norm": 1.0289200179538978,
      "learning_rate": 9.825168343213244e-06,
      "loss": 0.1919,
      "step": 3836
    },
    {
      "epoch": 0.11193768597934535,
      "grad_norm": 0.8372072909384086,
      "learning_rate": 9.825044484418123e-06,
      "loss": 0.1879,
      "step": 3837
    },
    {
      "epoch": 0.11196685920998892,
      "grad_norm": 0.9287659409067983,
      "learning_rate": 9.824920582546157e-06,
      "loss": 0.1985,
      "step": 3838
    },
    {
      "epoch": 0.11199603244063247,
      "grad_norm": 1.1754128764959069,
      "learning_rate": 9.824796637598452e-06,
      "loss": 0.1935,
      "step": 3839
    },
    {
      "epoch": 0.11202520567127604,
      "grad_norm": 0.7114469733950892,
      "learning_rate": 9.824672649576114e-06,
      "loss": 0.1542,
      "step": 3840
    },
    {
      "epoch": 0.1120543789019196,
      "grad_norm": 0.9652819565295031,
      "learning_rate": 9.824548618480251e-06,
      "loss": 0.1697,
      "step": 3841
    },
    {
      "epoch": 0.11208355213256316,
      "grad_norm": 0.8632809183926381,
      "learning_rate": 9.82442454431197e-06,
      "loss": 0.1877,
      "step": 3842
    },
    {
      "epoch": 0.11211272536320672,
      "grad_norm": 0.8925510327530022,
      "learning_rate": 9.824300427072379e-06,
      "loss": 0.1937,
      "step": 3843
    },
    {
      "epoch": 0.11214189859385028,
      "grad_norm": 1.0306300289579706,
      "learning_rate": 9.824176266762584e-06,
      "loss": 0.1879,
      "step": 3844
    },
    {
      "epoch": 0.11217107182449385,
      "grad_norm": 0.7272399866168803,
      "learning_rate": 9.824052063383696e-06,
      "loss": 0.174,
      "step": 3845
    },
    {
      "epoch": 0.11220024505513741,
      "grad_norm": 0.7953291749523506,
      "learning_rate": 9.823927816936823e-06,
      "loss": 0.1594,
      "step": 3846
    },
    {
      "epoch": 0.11222941828578097,
      "grad_norm": 0.9242746273139414,
      "learning_rate": 9.823803527423073e-06,
      "loss": 0.1844,
      "step": 3847
    },
    {
      "epoch": 0.11225859151642453,
      "grad_norm": 0.7378429952935994,
      "learning_rate": 9.823679194843556e-06,
      "loss": 0.1838,
      "step": 3848
    },
    {
      "epoch": 0.11228776474706809,
      "grad_norm": 0.8491326506415591,
      "learning_rate": 9.823554819199383e-06,
      "loss": 0.1739,
      "step": 3849
    },
    {
      "epoch": 0.11231693797771165,
      "grad_norm": 0.8026960017082256,
      "learning_rate": 9.823430400491665e-06,
      "loss": 0.1633,
      "step": 3850
    },
    {
      "epoch": 0.1123461112083552,
      "grad_norm": 0.6599642453313245,
      "learning_rate": 9.823305938721511e-06,
      "loss": 0.1662,
      "step": 3851
    },
    {
      "epoch": 0.11237528443899877,
      "grad_norm": 0.8515874900467115,
      "learning_rate": 9.823181433890033e-06,
      "loss": 0.1814,
      "step": 3852
    },
    {
      "epoch": 0.11240445766964234,
      "grad_norm": 0.8802819705576921,
      "learning_rate": 9.823056885998344e-06,
      "loss": 0.1618,
      "step": 3853
    },
    {
      "epoch": 0.1124336309002859,
      "grad_norm": 0.9681653255674283,
      "learning_rate": 9.822932295047552e-06,
      "loss": 0.1708,
      "step": 3854
    },
    {
      "epoch": 0.11246280413092946,
      "grad_norm": 0.8629397185868605,
      "learning_rate": 9.822807661038774e-06,
      "loss": 0.2135,
      "step": 3855
    },
    {
      "epoch": 0.11249197736157301,
      "grad_norm": 0.9201597321229085,
      "learning_rate": 9.82268298397312e-06,
      "loss": 0.1723,
      "step": 3856
    },
    {
      "epoch": 0.11252115059221658,
      "grad_norm": 1.0763162141258271,
      "learning_rate": 9.822558263851703e-06,
      "loss": 0.1741,
      "step": 3857
    },
    {
      "epoch": 0.11255032382286015,
      "grad_norm": 0.8981623376821546,
      "learning_rate": 9.822433500675637e-06,
      "loss": 0.1767,
      "step": 3858
    },
    {
      "epoch": 0.1125794970535037,
      "grad_norm": 0.9258733393847015,
      "learning_rate": 9.822308694446036e-06,
      "loss": 0.1978,
      "step": 3859
    },
    {
      "epoch": 0.11260867028414727,
      "grad_norm": 1.4218574899201408,
      "learning_rate": 9.822183845164016e-06,
      "loss": 0.1732,
      "step": 3860
    },
    {
      "epoch": 0.11263784351479082,
      "grad_norm": 1.0251844248860928,
      "learning_rate": 9.822058952830687e-06,
      "loss": 0.1897,
      "step": 3861
    },
    {
      "epoch": 0.11266701674543439,
      "grad_norm": 0.9119394006279787,
      "learning_rate": 9.821934017447167e-06,
      "loss": 0.1806,
      "step": 3862
    },
    {
      "epoch": 0.11269618997607796,
      "grad_norm": 0.9380165588659504,
      "learning_rate": 9.82180903901457e-06,
      "loss": 0.1742,
      "step": 3863
    },
    {
      "epoch": 0.11272536320672151,
      "grad_norm": 1.03819331935346,
      "learning_rate": 9.821684017534016e-06,
      "loss": 0.193,
      "step": 3864
    },
    {
      "epoch": 0.11275453643736508,
      "grad_norm": 1.5310012087194778,
      "learning_rate": 9.821558953006618e-06,
      "loss": 0.1743,
      "step": 3865
    },
    {
      "epoch": 0.11278370966800863,
      "grad_norm": 0.9863703381047424,
      "learning_rate": 9.821433845433492e-06,
      "loss": 0.1805,
      "step": 3866
    },
    {
      "epoch": 0.1128128828986522,
      "grad_norm": 0.9499991563289474,
      "learning_rate": 9.821308694815757e-06,
      "loss": 0.2043,
      "step": 3867
    },
    {
      "epoch": 0.11284205612929576,
      "grad_norm": 0.9947653672530437,
      "learning_rate": 9.821183501154526e-06,
      "loss": 0.1884,
      "step": 3868
    },
    {
      "epoch": 0.11287122935993932,
      "grad_norm": 1.0925399041803332,
      "learning_rate": 9.821058264450921e-06,
      "loss": 0.1881,
      "step": 3869
    },
    {
      "epoch": 0.11290040259058289,
      "grad_norm": 0.7692636279572528,
      "learning_rate": 9.82093298470606e-06,
      "loss": 0.1623,
      "step": 3870
    },
    {
      "epoch": 0.11292957582122644,
      "grad_norm": 0.9293704198720617,
      "learning_rate": 9.820807661921057e-06,
      "loss": 0.1785,
      "step": 3871
    },
    {
      "epoch": 0.11295874905187,
      "grad_norm": 1.1286801579823407,
      "learning_rate": 9.820682296097038e-06,
      "loss": 0.1883,
      "step": 3872
    },
    {
      "epoch": 0.11298792228251356,
      "grad_norm": 0.99191725635705,
      "learning_rate": 9.820556887235115e-06,
      "loss": 0.1793,
      "step": 3873
    },
    {
      "epoch": 0.11301709551315713,
      "grad_norm": 1.1514342529400776,
      "learning_rate": 9.820431435336412e-06,
      "loss": 0.1794,
      "step": 3874
    },
    {
      "epoch": 0.11304626874380069,
      "grad_norm": 0.8792916962165257,
      "learning_rate": 9.820305940402046e-06,
      "loss": 0.1748,
      "step": 3875
    },
    {
      "epoch": 0.11307544197444425,
      "grad_norm": 0.9190598137431197,
      "learning_rate": 9.82018040243314e-06,
      "loss": 0.2084,
      "step": 3876
    },
    {
      "epoch": 0.11310461520508781,
      "grad_norm": 1.036820131371785,
      "learning_rate": 9.820054821430818e-06,
      "loss": 0.1797,
      "step": 3877
    },
    {
      "epoch": 0.11313378843573137,
      "grad_norm": 1.0797855853546763,
      "learning_rate": 9.819929197396193e-06,
      "loss": 0.1821,
      "step": 3878
    },
    {
      "epoch": 0.11316296166637493,
      "grad_norm": 0.9713902491663778,
      "learning_rate": 9.819803530330393e-06,
      "loss": 0.2093,
      "step": 3879
    },
    {
      "epoch": 0.1131921348970185,
      "grad_norm": 0.9679103759968202,
      "learning_rate": 9.819677820234536e-06,
      "loss": 0.1933,
      "step": 3880
    },
    {
      "epoch": 0.11322130812766205,
      "grad_norm": 0.9074006853552428,
      "learning_rate": 9.819552067109748e-06,
      "loss": 0.1826,
      "step": 3881
    },
    {
      "epoch": 0.11325048135830562,
      "grad_norm": 0.7600454088624697,
      "learning_rate": 9.819426270957148e-06,
      "loss": 0.1755,
      "step": 3882
    },
    {
      "epoch": 0.11327965458894917,
      "grad_norm": 0.8660801771507908,
      "learning_rate": 9.819300431777861e-06,
      "loss": 0.1847,
      "step": 3883
    },
    {
      "epoch": 0.11330882781959274,
      "grad_norm": 0.8873507212821025,
      "learning_rate": 9.81917454957301e-06,
      "loss": 0.1881,
      "step": 3884
    },
    {
      "epoch": 0.11333800105023631,
      "grad_norm": 0.9727903192838452,
      "learning_rate": 9.819048624343718e-06,
      "loss": 0.172,
      "step": 3885
    },
    {
      "epoch": 0.11336717428087986,
      "grad_norm": 0.9129466865242515,
      "learning_rate": 9.818922656091113e-06,
      "loss": 0.1991,
      "step": 3886
    },
    {
      "epoch": 0.11339634751152343,
      "grad_norm": 0.7584331411062796,
      "learning_rate": 9.818796644816315e-06,
      "loss": 0.1902,
      "step": 3887
    },
    {
      "epoch": 0.11342552074216698,
      "grad_norm": 0.9276645585519963,
      "learning_rate": 9.818670590520452e-06,
      "loss": 0.1941,
      "step": 3888
    },
    {
      "epoch": 0.11345469397281055,
      "grad_norm": 0.7732179536792737,
      "learning_rate": 9.818544493204647e-06,
      "loss": 0.1913,
      "step": 3889
    },
    {
      "epoch": 0.11348386720345412,
      "grad_norm": 0.7812702008459518,
      "learning_rate": 9.818418352870028e-06,
      "loss": 0.1911,
      "step": 3890
    },
    {
      "epoch": 0.11351304043409767,
      "grad_norm": 0.8628150701202014,
      "learning_rate": 9.81829216951772e-06,
      "loss": 0.179,
      "step": 3891
    },
    {
      "epoch": 0.11354221366474124,
      "grad_norm": 0.6979059627359,
      "learning_rate": 9.81816594314885e-06,
      "loss": 0.1722,
      "step": 3892
    },
    {
      "epoch": 0.11357138689538479,
      "grad_norm": 0.9831420334341316,
      "learning_rate": 9.818039673764543e-06,
      "loss": 0.1881,
      "step": 3893
    },
    {
      "epoch": 0.11360056012602836,
      "grad_norm": 0.7137533847401493,
      "learning_rate": 9.817913361365931e-06,
      "loss": 0.1912,
      "step": 3894
    },
    {
      "epoch": 0.11362973335667192,
      "grad_norm": 0.7777777033964453,
      "learning_rate": 9.817787005954136e-06,
      "loss": 0.1655,
      "step": 3895
    },
    {
      "epoch": 0.11365890658731548,
      "grad_norm": 0.8134838142745925,
      "learning_rate": 9.81766060753029e-06,
      "loss": 0.1768,
      "step": 3896
    },
    {
      "epoch": 0.11368807981795905,
      "grad_norm": 0.8617382337355313,
      "learning_rate": 9.817534166095519e-06,
      "loss": 0.1702,
      "step": 3897
    },
    {
      "epoch": 0.1137172530486026,
      "grad_norm": 0.7574232328325251,
      "learning_rate": 9.817407681650955e-06,
      "loss": 0.178,
      "step": 3898
    },
    {
      "epoch": 0.11374642627924617,
      "grad_norm": 0.967889985141796,
      "learning_rate": 9.817281154197725e-06,
      "loss": 0.1813,
      "step": 3899
    },
    {
      "epoch": 0.11377559950988972,
      "grad_norm": 0.8882074160499374,
      "learning_rate": 9.817154583736956e-06,
      "loss": 0.2091,
      "step": 3900
    },
    {
      "epoch": 0.11380477274053329,
      "grad_norm": 0.8488250995262521,
      "learning_rate": 9.817027970269783e-06,
      "loss": 0.1617,
      "step": 3901
    },
    {
      "epoch": 0.11383394597117685,
      "grad_norm": 0.9773972648043582,
      "learning_rate": 9.816901313797333e-06,
      "loss": 0.1658,
      "step": 3902
    },
    {
      "epoch": 0.1138631192018204,
      "grad_norm": 0.8364426964259626,
      "learning_rate": 9.81677461432074e-06,
      "loss": 0.1725,
      "step": 3903
    },
    {
      "epoch": 0.11389229243246397,
      "grad_norm": 0.9219903731469782,
      "learning_rate": 9.816647871841132e-06,
      "loss": 0.1689,
      "step": 3904
    },
    {
      "epoch": 0.11392146566310753,
      "grad_norm": 0.8414022118558264,
      "learning_rate": 9.816521086359641e-06,
      "loss": 0.1664,
      "step": 3905
    },
    {
      "epoch": 0.1139506388937511,
      "grad_norm": 1.1454968057716195,
      "learning_rate": 9.8163942578774e-06,
      "loss": 0.2033,
      "step": 3906
    },
    {
      "epoch": 0.11397981212439466,
      "grad_norm": 1.0569777052292275,
      "learning_rate": 9.816267386395542e-06,
      "loss": 0.182,
      "step": 3907
    },
    {
      "epoch": 0.11400898535503821,
      "grad_norm": 0.8936548661951315,
      "learning_rate": 9.816140471915196e-06,
      "loss": 0.1924,
      "step": 3908
    },
    {
      "epoch": 0.11403815858568178,
      "grad_norm": 1.0896106836331243,
      "learning_rate": 9.8160135144375e-06,
      "loss": 0.2064,
      "step": 3909
    },
    {
      "epoch": 0.11406733181632533,
      "grad_norm": 0.8439333244411857,
      "learning_rate": 9.815886513963584e-06,
      "loss": 0.1767,
      "step": 3910
    },
    {
      "epoch": 0.1140965050469689,
      "grad_norm": 1.012792895535858,
      "learning_rate": 9.815759470494582e-06,
      "loss": 0.1896,
      "step": 3911
    },
    {
      "epoch": 0.11412567827761247,
      "grad_norm": 1.1158822214582353,
      "learning_rate": 9.81563238403163e-06,
      "loss": 0.2073,
      "step": 3912
    },
    {
      "epoch": 0.11415485150825602,
      "grad_norm": 0.8168770507793907,
      "learning_rate": 9.815505254575862e-06,
      "loss": 0.1929,
      "step": 3913
    },
    {
      "epoch": 0.11418402473889959,
      "grad_norm": 1.0465396613520146,
      "learning_rate": 9.815378082128414e-06,
      "loss": 0.1842,
      "step": 3914
    },
    {
      "epoch": 0.11421319796954314,
      "grad_norm": 1.03863955639095,
      "learning_rate": 9.815250866690418e-06,
      "loss": 0.1921,
      "step": 3915
    },
    {
      "epoch": 0.11424237120018671,
      "grad_norm": 0.8759339958631058,
      "learning_rate": 9.815123608263011e-06,
      "loss": 0.1971,
      "step": 3916
    },
    {
      "epoch": 0.11427154443083028,
      "grad_norm": 0.9576286845939498,
      "learning_rate": 9.81499630684733e-06,
      "loss": 0.1914,
      "step": 3917
    },
    {
      "epoch": 0.11430071766147383,
      "grad_norm": 0.8924321060739058,
      "learning_rate": 9.814868962444512e-06,
      "loss": 0.1733,
      "step": 3918
    },
    {
      "epoch": 0.1143298908921174,
      "grad_norm": 0.9533798172660076,
      "learning_rate": 9.814741575055694e-06,
      "loss": 0.201,
      "step": 3919
    },
    {
      "epoch": 0.11435906412276095,
      "grad_norm": 0.8569490934644632,
      "learning_rate": 9.814614144682014e-06,
      "loss": 0.1709,
      "step": 3920
    },
    {
      "epoch": 0.11438823735340452,
      "grad_norm": 0.9917094580774355,
      "learning_rate": 9.814486671324604e-06,
      "loss": 0.2006,
      "step": 3921
    },
    {
      "epoch": 0.11441741058404807,
      "grad_norm": 0.9987004050751437,
      "learning_rate": 9.81435915498461e-06,
      "loss": 0.1792,
      "step": 3922
    },
    {
      "epoch": 0.11444658381469164,
      "grad_norm": 0.9412265153598919,
      "learning_rate": 9.814231595663165e-06,
      "loss": 0.1784,
      "step": 3923
    },
    {
      "epoch": 0.1144757570453352,
      "grad_norm": 0.9343015684978182,
      "learning_rate": 9.81410399336141e-06,
      "loss": 0.1866,
      "step": 3924
    },
    {
      "epoch": 0.11450493027597876,
      "grad_norm": 1.0180321162969372,
      "learning_rate": 9.813976348080484e-06,
      "loss": 0.2045,
      "step": 3925
    },
    {
      "epoch": 0.11453410350662233,
      "grad_norm": 0.7786413912112332,
      "learning_rate": 9.813848659821526e-06,
      "loss": 0.1665,
      "step": 3926
    },
    {
      "epoch": 0.11456327673726588,
      "grad_norm": 0.9569943966338017,
      "learning_rate": 9.813720928585678e-06,
      "loss": 0.1968,
      "step": 3927
    },
    {
      "epoch": 0.11459244996790945,
      "grad_norm": 0.8815021786382026,
      "learning_rate": 9.813593154374075e-06,
      "loss": 0.1662,
      "step": 3928
    },
    {
      "epoch": 0.11462162319855301,
      "grad_norm": 0.8900614397711096,
      "learning_rate": 9.813465337187864e-06,
      "loss": 0.1853,
      "step": 3929
    },
    {
      "epoch": 0.11465079642919657,
      "grad_norm": 0.8984565850328089,
      "learning_rate": 9.813337477028184e-06,
      "loss": 0.1685,
      "step": 3930
    },
    {
      "epoch": 0.11467996965984013,
      "grad_norm": 1.4587227900082587,
      "learning_rate": 9.813209573896175e-06,
      "loss": 0.2095,
      "step": 3931
    },
    {
      "epoch": 0.11470914289048369,
      "grad_norm": 1.0989382820231854,
      "learning_rate": 9.81308162779298e-06,
      "loss": 0.1766,
      "step": 3932
    },
    {
      "epoch": 0.11473831612112725,
      "grad_norm": 0.7076470715977727,
      "learning_rate": 9.812953638719741e-06,
      "loss": 0.1852,
      "step": 3933
    },
    {
      "epoch": 0.11476748935177082,
      "grad_norm": 0.8461530718765761,
      "learning_rate": 9.812825606677601e-06,
      "loss": 0.2041,
      "step": 3934
    },
    {
      "epoch": 0.11479666258241437,
      "grad_norm": 0.807544292357872,
      "learning_rate": 9.812697531667704e-06,
      "loss": 0.1848,
      "step": 3935
    },
    {
      "epoch": 0.11482583581305794,
      "grad_norm": 0.934358375278067,
      "learning_rate": 9.812569413691191e-06,
      "loss": 0.1939,
      "step": 3936
    },
    {
      "epoch": 0.1148550090437015,
      "grad_norm": 0.6569794659233321,
      "learning_rate": 9.812441252749207e-06,
      "loss": 0.176,
      "step": 3937
    },
    {
      "epoch": 0.11488418227434506,
      "grad_norm": 0.865850056802861,
      "learning_rate": 9.812313048842896e-06,
      "loss": 0.19,
      "step": 3938
    },
    {
      "epoch": 0.11491335550498863,
      "grad_norm": 1.4734153159013528,
      "learning_rate": 9.812184801973405e-06,
      "loss": 0.1671,
      "step": 3939
    },
    {
      "epoch": 0.11494252873563218,
      "grad_norm": 0.7067043431495279,
      "learning_rate": 9.812056512141875e-06,
      "loss": 0.1888,
      "step": 3940
    },
    {
      "epoch": 0.11497170196627575,
      "grad_norm": 0.7949118566718023,
      "learning_rate": 9.811928179349455e-06,
      "loss": 0.1906,
      "step": 3941
    },
    {
      "epoch": 0.1150008751969193,
      "grad_norm": 0.8324770315797557,
      "learning_rate": 9.811799803597286e-06,
      "loss": 0.1859,
      "step": 3942
    },
    {
      "epoch": 0.11503004842756287,
      "grad_norm": 0.7840093644842944,
      "learning_rate": 9.811671384886518e-06,
      "loss": 0.1884,
      "step": 3943
    },
    {
      "epoch": 0.11505922165820642,
      "grad_norm": 1.0144230590449386,
      "learning_rate": 9.811542923218298e-06,
      "loss": 0.1879,
      "step": 3944
    },
    {
      "epoch": 0.11508839488884999,
      "grad_norm": 0.9499532149031065,
      "learning_rate": 9.811414418593771e-06,
      "loss": 0.2091,
      "step": 3945
    },
    {
      "epoch": 0.11511756811949356,
      "grad_norm": 1.845968864637907,
      "learning_rate": 9.811285871014084e-06,
      "loss": 0.1768,
      "step": 3946
    },
    {
      "epoch": 0.11514674135013711,
      "grad_norm": 0.915737179055813,
      "learning_rate": 9.811157280480386e-06,
      "loss": 0.1724,
      "step": 3947
    },
    {
      "epoch": 0.11517591458078068,
      "grad_norm": 0.8715096334382696,
      "learning_rate": 9.811028646993823e-06,
      "loss": 0.1861,
      "step": 3948
    },
    {
      "epoch": 0.11520508781142423,
      "grad_norm": 1.1633558319535617,
      "learning_rate": 9.810899970555547e-06,
      "loss": 0.2188,
      "step": 3949
    },
    {
      "epoch": 0.1152342610420678,
      "grad_norm": 1.0185924503742674,
      "learning_rate": 9.810771251166702e-06,
      "loss": 0.1959,
      "step": 3950
    },
    {
      "epoch": 0.11526343427271137,
      "grad_norm": 0.9597127919256301,
      "learning_rate": 9.810642488828442e-06,
      "loss": 0.1864,
      "step": 3951
    },
    {
      "epoch": 0.11529260750335492,
      "grad_norm": 0.934634222614499,
      "learning_rate": 9.810513683541913e-06,
      "loss": 0.1681,
      "step": 3952
    },
    {
      "epoch": 0.11532178073399849,
      "grad_norm": 0.9237840016353603,
      "learning_rate": 9.810384835308266e-06,
      "loss": 0.1939,
      "step": 3953
    },
    {
      "epoch": 0.11535095396464204,
      "grad_norm": 0.849446720945563,
      "learning_rate": 9.810255944128651e-06,
      "loss": 0.1653,
      "step": 3954
    },
    {
      "epoch": 0.1153801271952856,
      "grad_norm": 1.084779195091929,
      "learning_rate": 9.81012701000422e-06,
      "loss": 0.1699,
      "step": 3955
    },
    {
      "epoch": 0.11540930042592917,
      "grad_norm": 0.944132426603004,
      "learning_rate": 9.809998032936123e-06,
      "loss": 0.1733,
      "step": 3956
    },
    {
      "epoch": 0.11543847365657273,
      "grad_norm": 0.961987504303707,
      "learning_rate": 9.809869012925512e-06,
      "loss": 0.1782,
      "step": 3957
    },
    {
      "epoch": 0.1154676468872163,
      "grad_norm": 1.0617644175619754,
      "learning_rate": 9.80973994997354e-06,
      "loss": 0.1813,
      "step": 3958
    },
    {
      "epoch": 0.11549682011785985,
      "grad_norm": 0.9413351711979291,
      "learning_rate": 9.809610844081357e-06,
      "loss": 0.167,
      "step": 3959
    },
    {
      "epoch": 0.11552599334850341,
      "grad_norm": 0.8095112512951378,
      "learning_rate": 9.809481695250116e-06,
      "loss": 0.1549,
      "step": 3960
    },
    {
      "epoch": 0.11555516657914698,
      "grad_norm": 1.0105866694575891,
      "learning_rate": 9.80935250348097e-06,
      "loss": 0.1953,
      "step": 3961
    },
    {
      "epoch": 0.11558433980979053,
      "grad_norm": 1.177664778950724,
      "learning_rate": 9.809223268775074e-06,
      "loss": 0.1852,
      "step": 3962
    },
    {
      "epoch": 0.1156135130404341,
      "grad_norm": 0.7748362871271275,
      "learning_rate": 9.80909399113358e-06,
      "loss": 0.1861,
      "step": 3963
    },
    {
      "epoch": 0.11564268627107765,
      "grad_norm": 0.8218146313453756,
      "learning_rate": 9.808964670557643e-06,
      "loss": 0.1603,
      "step": 3964
    },
    {
      "epoch": 0.11567185950172122,
      "grad_norm": 1.1794113164782682,
      "learning_rate": 9.80883530704842e-06,
      "loss": 0.1827,
      "step": 3965
    },
    {
      "epoch": 0.11570103273236478,
      "grad_norm": 0.7130535387940719,
      "learning_rate": 9.808705900607058e-06,
      "loss": 0.1541,
      "step": 3966
    },
    {
      "epoch": 0.11573020596300834,
      "grad_norm": 0.9235360139481987,
      "learning_rate": 9.808576451234721e-06,
      "loss": 0.179,
      "step": 3967
    },
    {
      "epoch": 0.11575937919365191,
      "grad_norm": 1.1301699140384471,
      "learning_rate": 9.80844695893256e-06,
      "loss": 0.1914,
      "step": 3968
    },
    {
      "epoch": 0.11578855242429546,
      "grad_norm": 0.8260365779849368,
      "learning_rate": 9.808317423701735e-06,
      "loss": 0.1531,
      "step": 3969
    },
    {
      "epoch": 0.11581772565493903,
      "grad_norm": 0.8304113067468187,
      "learning_rate": 9.808187845543397e-06,
      "loss": 0.1969,
      "step": 3970
    },
    {
      "epoch": 0.11584689888558258,
      "grad_norm": 0.9679207163452974,
      "learning_rate": 9.808058224458708e-06,
      "loss": 0.1897,
      "step": 3971
    },
    {
      "epoch": 0.11587607211622615,
      "grad_norm": 0.8470787888221096,
      "learning_rate": 9.807928560448822e-06,
      "loss": 0.1729,
      "step": 3972
    },
    {
      "epoch": 0.11590524534686972,
      "grad_norm": 0.9483774764824003,
      "learning_rate": 9.807798853514898e-06,
      "loss": 0.1717,
      "step": 3973
    },
    {
      "epoch": 0.11593441857751327,
      "grad_norm": 0.8434783474399654,
      "learning_rate": 9.807669103658092e-06,
      "loss": 0.187,
      "step": 3974
    },
    {
      "epoch": 0.11596359180815684,
      "grad_norm": 1.138839474688897,
      "learning_rate": 9.807539310879566e-06,
      "loss": 0.2028,
      "step": 3975
    },
    {
      "epoch": 0.11599276503880039,
      "grad_norm": 0.8098813997017877,
      "learning_rate": 9.807409475180476e-06,
      "loss": 0.1715,
      "step": 3976
    },
    {
      "epoch": 0.11602193826944396,
      "grad_norm": 0.8476295399786722,
      "learning_rate": 9.80727959656198e-06,
      "loss": 0.1742,
      "step": 3977
    },
    {
      "epoch": 0.11605111150008753,
      "grad_norm": 1.0189550480133291,
      "learning_rate": 9.807149675025242e-06,
      "loss": 0.1936,
      "step": 3978
    },
    {
      "epoch": 0.11608028473073108,
      "grad_norm": 0.9583739475972958,
      "learning_rate": 9.807019710571418e-06,
      "loss": 0.1925,
      "step": 3979
    },
    {
      "epoch": 0.11610945796137465,
      "grad_norm": 0.947373128348233,
      "learning_rate": 9.80688970320167e-06,
      "loss": 0.184,
      "step": 3980
    },
    {
      "epoch": 0.1161386311920182,
      "grad_norm": 1.0217865886958784,
      "learning_rate": 9.806759652917157e-06,
      "loss": 0.194,
      "step": 3981
    },
    {
      "epoch": 0.11616780442266177,
      "grad_norm": 0.9091592305269642,
      "learning_rate": 9.806629559719042e-06,
      "loss": 0.1809,
      "step": 3982
    },
    {
      "epoch": 0.11619697765330533,
      "grad_norm": 0.927284884626031,
      "learning_rate": 9.806499423608486e-06,
      "loss": 0.1788,
      "step": 3983
    },
    {
      "epoch": 0.11622615088394889,
      "grad_norm": 1.0614083593915533,
      "learning_rate": 9.80636924458665e-06,
      "loss": 0.1802,
      "step": 3984
    },
    {
      "epoch": 0.11625532411459245,
      "grad_norm": 0.8477671183172,
      "learning_rate": 9.806239022654699e-06,
      "loss": 0.2047,
      "step": 3985
    },
    {
      "epoch": 0.11628449734523601,
      "grad_norm": 1.0228600825612724,
      "learning_rate": 9.80610875781379e-06,
      "loss": 0.1742,
      "step": 3986
    },
    {
      "epoch": 0.11631367057587957,
      "grad_norm": 0.992616764441537,
      "learning_rate": 9.805978450065092e-06,
      "loss": 0.1681,
      "step": 3987
    },
    {
      "epoch": 0.11634284380652314,
      "grad_norm": 0.9306633160170079,
      "learning_rate": 9.805848099409765e-06,
      "loss": 0.1861,
      "step": 3988
    },
    {
      "epoch": 0.1163720170371667,
      "grad_norm": 1.0939088043077234,
      "learning_rate": 9.805717705848972e-06,
      "loss": 0.1747,
      "step": 3989
    },
    {
      "epoch": 0.11640119026781026,
      "grad_norm": 0.8550016996193792,
      "learning_rate": 9.805587269383881e-06,
      "loss": 0.1892,
      "step": 3990
    },
    {
      "epoch": 0.11643036349845381,
      "grad_norm": 1.0985057177862256,
      "learning_rate": 9.805456790015652e-06,
      "loss": 0.1952,
      "step": 3991
    },
    {
      "epoch": 0.11645953672909738,
      "grad_norm": 1.2032884200598375,
      "learning_rate": 9.805326267745451e-06,
      "loss": 0.1862,
      "step": 3992
    },
    {
      "epoch": 0.11648870995974094,
      "grad_norm": 0.8801044670755954,
      "learning_rate": 9.805195702574446e-06,
      "loss": 0.1677,
      "step": 3993
    },
    {
      "epoch": 0.1165178831903845,
      "grad_norm": 0.9838842619636844,
      "learning_rate": 9.805065094503801e-06,
      "loss": 0.2165,
      "step": 3994
    },
    {
      "epoch": 0.11654705642102807,
      "grad_norm": 1.036174236277911,
      "learning_rate": 9.804934443534682e-06,
      "loss": 0.1717,
      "step": 3995
    },
    {
      "epoch": 0.11657622965167162,
      "grad_norm": 1.0365517572217218,
      "learning_rate": 9.804803749668254e-06,
      "loss": 0.182,
      "step": 3996
    },
    {
      "epoch": 0.11660540288231519,
      "grad_norm": 1.2389885107002385,
      "learning_rate": 9.804673012905686e-06,
      "loss": 0.2106,
      "step": 3997
    },
    {
      "epoch": 0.11663457611295874,
      "grad_norm": 0.908669887314025,
      "learning_rate": 9.804542233248144e-06,
      "loss": 0.1792,
      "step": 3998
    },
    {
      "epoch": 0.11666374934360231,
      "grad_norm": 0.7735821706347971,
      "learning_rate": 9.804411410696797e-06,
      "loss": 0.1682,
      "step": 3999
    },
    {
      "epoch": 0.11669292257424588,
      "grad_norm": 0.9761508165663874,
      "learning_rate": 9.804280545252812e-06,
      "loss": 0.1963,
      "step": 4000
    },
    {
      "epoch": 0.11672209580488943,
      "grad_norm": 0.8864981232126092,
      "learning_rate": 9.804149636917355e-06,
      "loss": 0.1899,
      "step": 4001
    },
    {
      "epoch": 0.116751269035533,
      "grad_norm": 0.8001310128164034,
      "learning_rate": 9.8040186856916e-06,
      "loss": 0.1836,
      "step": 4002
    },
    {
      "epoch": 0.11678044226617655,
      "grad_norm": 0.8814383640876319,
      "learning_rate": 9.80388769157671e-06,
      "loss": 0.1788,
      "step": 4003
    },
    {
      "epoch": 0.11680961549682012,
      "grad_norm": 0.8520770186053725,
      "learning_rate": 9.803756654573857e-06,
      "loss": 0.1771,
      "step": 4004
    },
    {
      "epoch": 0.11683878872746369,
      "grad_norm": 0.880540957617837,
      "learning_rate": 9.803625574684213e-06,
      "loss": 0.1825,
      "step": 4005
    },
    {
      "epoch": 0.11686796195810724,
      "grad_norm": 0.9309486508322999,
      "learning_rate": 9.803494451908946e-06,
      "loss": 0.1881,
      "step": 4006
    },
    {
      "epoch": 0.1168971351887508,
      "grad_norm": 0.8613043448542246,
      "learning_rate": 9.803363286249228e-06,
      "loss": 0.1665,
      "step": 4007
    },
    {
      "epoch": 0.11692630841939436,
      "grad_norm": 0.96365789331625,
      "learning_rate": 9.803232077706229e-06,
      "loss": 0.1835,
      "step": 4008
    },
    {
      "epoch": 0.11695548165003793,
      "grad_norm": 0.8145166726519757,
      "learning_rate": 9.80310082628112e-06,
      "loss": 0.1949,
      "step": 4009
    },
    {
      "epoch": 0.1169846548806815,
      "grad_norm": 0.8066981054531861,
      "learning_rate": 9.802969531975074e-06,
      "loss": 0.1774,
      "step": 4010
    },
    {
      "epoch": 0.11701382811132505,
      "grad_norm": 0.8025106983913187,
      "learning_rate": 9.802838194789264e-06,
      "loss": 0.1775,
      "step": 4011
    },
    {
      "epoch": 0.11704300134196861,
      "grad_norm": 0.7545697971668403,
      "learning_rate": 9.802706814724857e-06,
      "loss": 0.1826,
      "step": 4012
    },
    {
      "epoch": 0.11707217457261217,
      "grad_norm": 0.8854062913148097,
      "learning_rate": 9.802575391783033e-06,
      "loss": 0.1763,
      "step": 4013
    },
    {
      "epoch": 0.11710134780325573,
      "grad_norm": 0.886431520686121,
      "learning_rate": 9.802443925964963e-06,
      "loss": 0.1914,
      "step": 4014
    },
    {
      "epoch": 0.11713052103389929,
      "grad_norm": 0.819842833339432,
      "learning_rate": 9.80231241727182e-06,
      "loss": 0.1723,
      "step": 4015
    },
    {
      "epoch": 0.11715969426454285,
      "grad_norm": 0.777312991272999,
      "learning_rate": 9.802180865704775e-06,
      "loss": 0.1766,
      "step": 4016
    },
    {
      "epoch": 0.11718886749518642,
      "grad_norm": 0.9257122523737836,
      "learning_rate": 9.80204927126501e-06,
      "loss": 0.1825,
      "step": 4017
    },
    {
      "epoch": 0.11721804072582998,
      "grad_norm": 0.8258775242878535,
      "learning_rate": 9.801917633953693e-06,
      "loss": 0.1899,
      "step": 4018
    },
    {
      "epoch": 0.11724721395647354,
      "grad_norm": 0.7616173750474217,
      "learning_rate": 9.801785953772001e-06,
      "loss": 0.1886,
      "step": 4019
    },
    {
      "epoch": 0.1172763871871171,
      "grad_norm": 0.9280170484458173,
      "learning_rate": 9.801654230721111e-06,
      "loss": 0.2046,
      "step": 4020
    },
    {
      "epoch": 0.11730556041776066,
      "grad_norm": 0.9799221946806403,
      "learning_rate": 9.801522464802199e-06,
      "loss": 0.1842,
      "step": 4021
    },
    {
      "epoch": 0.11733473364840423,
      "grad_norm": 0.8179915557194807,
      "learning_rate": 9.80139065601644e-06,
      "loss": 0.1696,
      "step": 4022
    },
    {
      "epoch": 0.11736390687904778,
      "grad_norm": 0.8595657911189106,
      "learning_rate": 9.801258804365013e-06,
      "loss": 0.1735,
      "step": 4023
    },
    {
      "epoch": 0.11739308010969135,
      "grad_norm": 0.9495398536792797,
      "learning_rate": 9.80112690984909e-06,
      "loss": 0.1671,
      "step": 4024
    },
    {
      "epoch": 0.1174222533403349,
      "grad_norm": 1.096707425460663,
      "learning_rate": 9.800994972469855e-06,
      "loss": 0.1657,
      "step": 4025
    },
    {
      "epoch": 0.11745142657097847,
      "grad_norm": 0.8307152374186523,
      "learning_rate": 9.800862992228481e-06,
      "loss": 0.1806,
      "step": 4026
    },
    {
      "epoch": 0.11748059980162204,
      "grad_norm": 0.6971424405215413,
      "learning_rate": 9.800730969126151e-06,
      "loss": 0.1627,
      "step": 4027
    },
    {
      "epoch": 0.11750977303226559,
      "grad_norm": 1.1698168537208269,
      "learning_rate": 9.800598903164039e-06,
      "loss": 0.1918,
      "step": 4028
    },
    {
      "epoch": 0.11753894626290916,
      "grad_norm": 0.8671636053545344,
      "learning_rate": 9.800466794343326e-06,
      "loss": 0.2005,
      "step": 4029
    },
    {
      "epoch": 0.11756811949355271,
      "grad_norm": 0.7690461245297601,
      "learning_rate": 9.800334642665193e-06,
      "loss": 0.1822,
      "step": 4030
    },
    {
      "epoch": 0.11759729272419628,
      "grad_norm": 0.8673928340155402,
      "learning_rate": 9.800202448130816e-06,
      "loss": 0.1686,
      "step": 4031
    },
    {
      "epoch": 0.11762646595483985,
      "grad_norm": 1.0113940236773769,
      "learning_rate": 9.80007021074138e-06,
      "loss": 0.1915,
      "step": 4032
    },
    {
      "epoch": 0.1176556391854834,
      "grad_norm": 0.835804990942079,
      "learning_rate": 9.79993793049806e-06,
      "loss": 0.1878,
      "step": 4033
    },
    {
      "epoch": 0.11768481241612697,
      "grad_norm": 1.055479587939155,
      "learning_rate": 9.799805607402042e-06,
      "loss": 0.1573,
      "step": 4034
    },
    {
      "epoch": 0.11771398564677052,
      "grad_norm": 0.9922774040757671,
      "learning_rate": 9.799673241454504e-06,
      "loss": 0.1709,
      "step": 4035
    },
    {
      "epoch": 0.11774315887741409,
      "grad_norm": 0.8400929161992129,
      "learning_rate": 9.79954083265663e-06,
      "loss": 0.1608,
      "step": 4036
    },
    {
      "epoch": 0.11777233210805764,
      "grad_norm": 1.0264751256358728,
      "learning_rate": 9.7994083810096e-06,
      "loss": 0.184,
      "step": 4037
    },
    {
      "epoch": 0.1178015053387012,
      "grad_norm": 0.9068214960259822,
      "learning_rate": 9.799275886514599e-06,
      "loss": 0.1754,
      "step": 4038
    },
    {
      "epoch": 0.11783067856934477,
      "grad_norm": 1.096105259722257,
      "learning_rate": 9.799143349172809e-06,
      "loss": 0.1583,
      "step": 4039
    },
    {
      "epoch": 0.11785985179998833,
      "grad_norm": 0.7592187551813588,
      "learning_rate": 9.799010768985413e-06,
      "loss": 0.1569,
      "step": 4040
    },
    {
      "epoch": 0.1178890250306319,
      "grad_norm": 0.9843277352889542,
      "learning_rate": 9.798878145953592e-06,
      "loss": 0.2049,
      "step": 4041
    },
    {
      "epoch": 0.11791819826127545,
      "grad_norm": 1.2825858480708008,
      "learning_rate": 9.798745480078535e-06,
      "loss": 0.1993,
      "step": 4042
    },
    {
      "epoch": 0.11794737149191901,
      "grad_norm": 0.8415361714071162,
      "learning_rate": 9.798612771361423e-06,
      "loss": 0.1884,
      "step": 4043
    },
    {
      "epoch": 0.11797654472256258,
      "grad_norm": 0.68541036268047,
      "learning_rate": 9.798480019803442e-06,
      "loss": 0.1677,
      "step": 4044
    },
    {
      "epoch": 0.11800571795320614,
      "grad_norm": 0.8833576539239999,
      "learning_rate": 9.798347225405777e-06,
      "loss": 0.1775,
      "step": 4045
    },
    {
      "epoch": 0.1180348911838497,
      "grad_norm": 0.7764367365558691,
      "learning_rate": 9.798214388169613e-06,
      "loss": 0.1793,
      "step": 4046
    },
    {
      "epoch": 0.11806406441449326,
      "grad_norm": 0.7572082043070054,
      "learning_rate": 9.798081508096135e-06,
      "loss": 0.1672,
      "step": 4047
    },
    {
      "epoch": 0.11809323764513682,
      "grad_norm": 0.7607281881986289,
      "learning_rate": 9.797948585186533e-06,
      "loss": 0.178,
      "step": 4048
    },
    {
      "epoch": 0.11812241087578039,
      "grad_norm": 1.0620435903387817,
      "learning_rate": 9.79781561944199e-06,
      "loss": 0.2079,
      "step": 4049
    },
    {
      "epoch": 0.11815158410642394,
      "grad_norm": 0.7446112541942896,
      "learning_rate": 9.797682610863695e-06,
      "loss": 0.171,
      "step": 4050
    },
    {
      "epoch": 0.11818075733706751,
      "grad_norm": 0.7523181824036657,
      "learning_rate": 9.797549559452835e-06,
      "loss": 0.1609,
      "step": 4051
    },
    {
      "epoch": 0.11820993056771106,
      "grad_norm": 0.7258478653299077,
      "learning_rate": 9.797416465210599e-06,
      "loss": 0.1728,
      "step": 4052
    },
    {
      "epoch": 0.11823910379835463,
      "grad_norm": 0.788248929247024,
      "learning_rate": 9.797283328138172e-06,
      "loss": 0.1783,
      "step": 4053
    },
    {
      "epoch": 0.1182682770289982,
      "grad_norm": 1.0221899508346437,
      "learning_rate": 9.797150148236744e-06,
      "loss": 0.1922,
      "step": 4054
    },
    {
      "epoch": 0.11829745025964175,
      "grad_norm": 0.8504084637091053,
      "learning_rate": 9.797016925507507e-06,
      "loss": 0.195,
      "step": 4055
    },
    {
      "epoch": 0.11832662349028532,
      "grad_norm": 0.8017278695647965,
      "learning_rate": 9.796883659951648e-06,
      "loss": 0.1966,
      "step": 4056
    },
    {
      "epoch": 0.11835579672092887,
      "grad_norm": 1.1323548778598158,
      "learning_rate": 9.796750351570355e-06,
      "loss": 0.1683,
      "step": 4057
    },
    {
      "epoch": 0.11838496995157244,
      "grad_norm": 1.1258246439497628,
      "learning_rate": 9.79661700036482e-06,
      "loss": 0.1936,
      "step": 4058
    },
    {
      "epoch": 0.11841414318221599,
      "grad_norm": 0.8904428235802615,
      "learning_rate": 9.796483606336235e-06,
      "loss": 0.1841,
      "step": 4059
    },
    {
      "epoch": 0.11844331641285956,
      "grad_norm": 1.1806870222346555,
      "learning_rate": 9.796350169485789e-06,
      "loss": 0.1864,
      "step": 4060
    },
    {
      "epoch": 0.11847248964350313,
      "grad_norm": 1.0329696619723054,
      "learning_rate": 9.796216689814672e-06,
      "loss": 0.1982,
      "step": 4061
    },
    {
      "epoch": 0.11850166287414668,
      "grad_norm": 0.9676735357582843,
      "learning_rate": 9.79608316732408e-06,
      "loss": 0.1914,
      "step": 4062
    },
    {
      "epoch": 0.11853083610479025,
      "grad_norm": 0.8864614157739367,
      "learning_rate": 9.7959496020152e-06,
      "loss": 0.1786,
      "step": 4063
    },
    {
      "epoch": 0.1185600093354338,
      "grad_norm": 1.0882041126738569,
      "learning_rate": 9.795815993889229e-06,
      "loss": 0.1964,
      "step": 4064
    },
    {
      "epoch": 0.11858918256607737,
      "grad_norm": 0.8471911828856352,
      "learning_rate": 9.795682342947356e-06,
      "loss": 0.1833,
      "step": 4065
    },
    {
      "epoch": 0.11861835579672093,
      "grad_norm": 0.7909408846675637,
      "learning_rate": 9.795548649190777e-06,
      "loss": 0.1761,
      "step": 4066
    },
    {
      "epoch": 0.11864752902736449,
      "grad_norm": 0.9649875727522704,
      "learning_rate": 9.795414912620685e-06,
      "loss": 0.1947,
      "step": 4067
    },
    {
      "epoch": 0.11867670225800805,
      "grad_norm": 0.8506455621724606,
      "learning_rate": 9.79528113323827e-06,
      "loss": 0.1786,
      "step": 4068
    },
    {
      "epoch": 0.11870587548865161,
      "grad_norm": 0.7179979463357128,
      "learning_rate": 9.795147311044732e-06,
      "loss": 0.1874,
      "step": 4069
    },
    {
      "epoch": 0.11873504871929517,
      "grad_norm": 0.8867828001423357,
      "learning_rate": 9.795013446041264e-06,
      "loss": 0.177,
      "step": 4070
    },
    {
      "epoch": 0.11876422194993874,
      "grad_norm": 0.7854471935045214,
      "learning_rate": 9.79487953822906e-06,
      "loss": 0.1785,
      "step": 4071
    },
    {
      "epoch": 0.1187933951805823,
      "grad_norm": 0.8486496225340521,
      "learning_rate": 9.794745587609318e-06,
      "loss": 0.1735,
      "step": 4072
    },
    {
      "epoch": 0.11882256841122586,
      "grad_norm": 0.8685026532048004,
      "learning_rate": 9.794611594183229e-06,
      "loss": 0.1722,
      "step": 4073
    },
    {
      "epoch": 0.11885174164186942,
      "grad_norm": 0.9528898964468909,
      "learning_rate": 9.794477557951993e-06,
      "loss": 0.1803,
      "step": 4074
    },
    {
      "epoch": 0.11888091487251298,
      "grad_norm": 0.8342324505777576,
      "learning_rate": 9.794343478916807e-06,
      "loss": 0.1779,
      "step": 4075
    },
    {
      "epoch": 0.11891008810315655,
      "grad_norm": 0.7742586727436434,
      "learning_rate": 9.794209357078867e-06,
      "loss": 0.2007,
      "step": 4076
    },
    {
      "epoch": 0.1189392613338001,
      "grad_norm": 0.9091465661381676,
      "learning_rate": 9.79407519243937e-06,
      "loss": 0.1842,
      "step": 4077
    },
    {
      "epoch": 0.11896843456444367,
      "grad_norm": 0.7469480493965509,
      "learning_rate": 9.793940984999512e-06,
      "loss": 0.1462,
      "step": 4078
    },
    {
      "epoch": 0.11899760779508722,
      "grad_norm": 0.8886994889203368,
      "learning_rate": 9.793806734760496e-06,
      "loss": 0.1947,
      "step": 4079
    },
    {
      "epoch": 0.11902678102573079,
      "grad_norm": 0.965534648887828,
      "learning_rate": 9.793672441723515e-06,
      "loss": 0.1827,
      "step": 4080
    },
    {
      "epoch": 0.11905595425637436,
      "grad_norm": 0.9257063083089873,
      "learning_rate": 9.793538105889775e-06,
      "loss": 0.1761,
      "step": 4081
    },
    {
      "epoch": 0.11908512748701791,
      "grad_norm": 1.2208151541428913,
      "learning_rate": 9.79340372726047e-06,
      "loss": 0.1908,
      "step": 4082
    },
    {
      "epoch": 0.11911430071766148,
      "grad_norm": 0.9993951765731436,
      "learning_rate": 9.793269305836799e-06,
      "loss": 0.191,
      "step": 4083
    },
    {
      "epoch": 0.11914347394830503,
      "grad_norm": 0.8280227146034735,
      "learning_rate": 9.793134841619964e-06,
      "loss": 0.1623,
      "step": 4084
    },
    {
      "epoch": 0.1191726471789486,
      "grad_norm": 1.3120390396897854,
      "learning_rate": 9.793000334611166e-06,
      "loss": 0.1563,
      "step": 4085
    },
    {
      "epoch": 0.11920182040959215,
      "grad_norm": 0.8928568609837696,
      "learning_rate": 9.792865784811604e-06,
      "loss": 0.1685,
      "step": 4086
    },
    {
      "epoch": 0.11923099364023572,
      "grad_norm": 1.0876694985665638,
      "learning_rate": 9.792731192222482e-06,
      "loss": 0.1704,
      "step": 4087
    },
    {
      "epoch": 0.11926016687087929,
      "grad_norm": 0.8772309262531918,
      "learning_rate": 9.792596556845e-06,
      "loss": 0.1743,
      "step": 4088
    },
    {
      "epoch": 0.11928934010152284,
      "grad_norm": 0.8882982565596025,
      "learning_rate": 9.79246187868036e-06,
      "loss": 0.1548,
      "step": 4089
    },
    {
      "epoch": 0.1193185133321664,
      "grad_norm": 0.7598220770302359,
      "learning_rate": 9.792327157729762e-06,
      "loss": 0.1719,
      "step": 4090
    },
    {
      "epoch": 0.11934768656280996,
      "grad_norm": 0.8979567040089305,
      "learning_rate": 9.792192393994415e-06,
      "loss": 0.1703,
      "step": 4091
    },
    {
      "epoch": 0.11937685979345353,
      "grad_norm": 1.0627609739083699,
      "learning_rate": 9.792057587475516e-06,
      "loss": 0.1821,
      "step": 4092
    },
    {
      "epoch": 0.1194060330240971,
      "grad_norm": 0.9182593943569555,
      "learning_rate": 9.79192273817427e-06,
      "loss": 0.1948,
      "step": 4093
    },
    {
      "epoch": 0.11943520625474065,
      "grad_norm": 0.8349549529579332,
      "learning_rate": 9.791787846091883e-06,
      "loss": 0.199,
      "step": 4094
    },
    {
      "epoch": 0.11946437948538421,
      "grad_norm": 0.9436604013645415,
      "learning_rate": 9.79165291122956e-06,
      "loss": 0.192,
      "step": 4095
    },
    {
      "epoch": 0.11949355271602777,
      "grad_norm": 0.8038645009853541,
      "learning_rate": 9.7915179335885e-06,
      "loss": 0.1638,
      "step": 4096
    },
    {
      "epoch": 0.11952272594667133,
      "grad_norm": 0.9705876176402898,
      "learning_rate": 9.791382913169913e-06,
      "loss": 0.1903,
      "step": 4097
    },
    {
      "epoch": 0.1195518991773149,
      "grad_norm": 0.8509804031952122,
      "learning_rate": 9.791247849975003e-06,
      "loss": 0.2078,
      "step": 4098
    },
    {
      "epoch": 0.11958107240795846,
      "grad_norm": 1.141298404017863,
      "learning_rate": 9.791112744004979e-06,
      "loss": 0.1825,
      "step": 4099
    },
    {
      "epoch": 0.11961024563860202,
      "grad_norm": 0.8444499592388437,
      "learning_rate": 9.79097759526104e-06,
      "loss": 0.1968,
      "step": 4100
    },
    {
      "epoch": 0.11963941886924558,
      "grad_norm": 0.8467549660289233,
      "learning_rate": 9.790842403744398e-06,
      "loss": 0.1741,
      "step": 4101
    },
    {
      "epoch": 0.11966859209988914,
      "grad_norm": 0.8479821220200611,
      "learning_rate": 9.79070716945626e-06,
      "loss": 0.1772,
      "step": 4102
    },
    {
      "epoch": 0.11969776533053271,
      "grad_norm": 0.8502502276293883,
      "learning_rate": 9.79057189239783e-06,
      "loss": 0.1899,
      "step": 4103
    },
    {
      "epoch": 0.11972693856117626,
      "grad_norm": 0.7865772133425387,
      "learning_rate": 9.790436572570319e-06,
      "loss": 0.1626,
      "step": 4104
    },
    {
      "epoch": 0.11975611179181983,
      "grad_norm": 0.962058000668101,
      "learning_rate": 9.790301209974932e-06,
      "loss": 0.1824,
      "step": 4105
    },
    {
      "epoch": 0.11978528502246338,
      "grad_norm": 0.7708385284061801,
      "learning_rate": 9.790165804612882e-06,
      "loss": 0.1852,
      "step": 4106
    },
    {
      "epoch": 0.11981445825310695,
      "grad_norm": 0.9232131298020444,
      "learning_rate": 9.790030356485374e-06,
      "loss": 0.1766,
      "step": 4107
    },
    {
      "epoch": 0.1198436314837505,
      "grad_norm": 0.8298980115415907,
      "learning_rate": 9.789894865593619e-06,
      "loss": 0.1662,
      "step": 4108
    },
    {
      "epoch": 0.11987280471439407,
      "grad_norm": 0.7722703020740381,
      "learning_rate": 9.789759331938826e-06,
      "loss": 0.165,
      "step": 4109
    },
    {
      "epoch": 0.11990197794503764,
      "grad_norm": 1.0514972642459577,
      "learning_rate": 9.789623755522204e-06,
      "loss": 0.1822,
      "step": 4110
    },
    {
      "epoch": 0.11993115117568119,
      "grad_norm": 0.6943035354131065,
      "learning_rate": 9.789488136344966e-06,
      "loss": 0.1652,
      "step": 4111
    },
    {
      "epoch": 0.11996032440632476,
      "grad_norm": 0.8219632672017334,
      "learning_rate": 9.78935247440832e-06,
      "loss": 0.1632,
      "step": 4112
    },
    {
      "epoch": 0.11998949763696831,
      "grad_norm": 0.9181735980006912,
      "learning_rate": 9.789216769713479e-06,
      "loss": 0.1691,
      "step": 4113
    },
    {
      "epoch": 0.12001867086761188,
      "grad_norm": 1.042590637169085,
      "learning_rate": 9.789081022261654e-06,
      "loss": 0.2045,
      "step": 4114
    },
    {
      "epoch": 0.12004784409825545,
      "grad_norm": 0.7952694115501756,
      "learning_rate": 9.788945232054056e-06,
      "loss": 0.1585,
      "step": 4115
    },
    {
      "epoch": 0.120077017328899,
      "grad_norm": 0.6729379380871152,
      "learning_rate": 9.788809399091899e-06,
      "loss": 0.1787,
      "step": 4116
    },
    {
      "epoch": 0.12010619055954257,
      "grad_norm": 1.5782806905729891,
      "learning_rate": 9.788673523376396e-06,
      "loss": 0.1917,
      "step": 4117
    },
    {
      "epoch": 0.12013536379018612,
      "grad_norm": 0.9286855269409086,
      "learning_rate": 9.788537604908756e-06,
      "loss": 0.1906,
      "step": 4118
    },
    {
      "epoch": 0.12016453702082969,
      "grad_norm": 1.1332457540301297,
      "learning_rate": 9.788401643690197e-06,
      "loss": 0.1729,
      "step": 4119
    },
    {
      "epoch": 0.12019371025147325,
      "grad_norm": 0.9568786266958319,
      "learning_rate": 9.788265639721932e-06,
      "loss": 0.1785,
      "step": 4120
    },
    {
      "epoch": 0.12022288348211681,
      "grad_norm": 0.8098561999190378,
      "learning_rate": 9.788129593005174e-06,
      "loss": 0.19,
      "step": 4121
    },
    {
      "epoch": 0.12025205671276037,
      "grad_norm": 1.0386085787577433,
      "learning_rate": 9.787993503541137e-06,
      "loss": 0.1847,
      "step": 4122
    },
    {
      "epoch": 0.12028122994340393,
      "grad_norm": 0.792221227936919,
      "learning_rate": 9.787857371331039e-06,
      "loss": 0.2145,
      "step": 4123
    },
    {
      "epoch": 0.1203104031740475,
      "grad_norm": 1.0271046503439851,
      "learning_rate": 9.787721196376092e-06,
      "loss": 0.1812,
      "step": 4124
    },
    {
      "epoch": 0.12033957640469106,
      "grad_norm": 0.8288151132963502,
      "learning_rate": 9.787584978677514e-06,
      "loss": 0.192,
      "step": 4125
    },
    {
      "epoch": 0.12036874963533462,
      "grad_norm": 0.8788552241854136,
      "learning_rate": 9.787448718236519e-06,
      "loss": 0.1728,
      "step": 4126
    },
    {
      "epoch": 0.12039792286597818,
      "grad_norm": 0.9221041884990281,
      "learning_rate": 9.787312415054325e-06,
      "loss": 0.184,
      "step": 4127
    },
    {
      "epoch": 0.12042709609662174,
      "grad_norm": 0.8464018185918841,
      "learning_rate": 9.787176069132149e-06,
      "loss": 0.1595,
      "step": 4128
    },
    {
      "epoch": 0.1204562693272653,
      "grad_norm": 0.9061550624397733,
      "learning_rate": 9.787039680471206e-06,
      "loss": 0.1811,
      "step": 4129
    },
    {
      "epoch": 0.12048544255790886,
      "grad_norm": 0.9436511449768542,
      "learning_rate": 9.786903249072717e-06,
      "loss": 0.1601,
      "step": 4130
    },
    {
      "epoch": 0.12051461578855242,
      "grad_norm": 0.8521825288934504,
      "learning_rate": 9.786766774937898e-06,
      "loss": 0.206,
      "step": 4131
    },
    {
      "epoch": 0.12054378901919599,
      "grad_norm": 1.3319908623471917,
      "learning_rate": 9.78663025806797e-06,
      "loss": 0.166,
      "step": 4132
    },
    {
      "epoch": 0.12057296224983954,
      "grad_norm": 1.3676056687058058,
      "learning_rate": 9.786493698464149e-06,
      "loss": 0.1735,
      "step": 4133
    },
    {
      "epoch": 0.12060213548048311,
      "grad_norm": 0.9383909096438793,
      "learning_rate": 9.786357096127652e-06,
      "loss": 0.1817,
      "step": 4134
    },
    {
      "epoch": 0.12063130871112666,
      "grad_norm": 1.2318892210635184,
      "learning_rate": 9.786220451059704e-06,
      "loss": 0.1979,
      "step": 4135
    },
    {
      "epoch": 0.12066048194177023,
      "grad_norm": 0.9014131174008452,
      "learning_rate": 9.786083763261522e-06,
      "loss": 0.1712,
      "step": 4136
    },
    {
      "epoch": 0.1206896551724138,
      "grad_norm": 0.8132711903801089,
      "learning_rate": 9.785947032734326e-06,
      "loss": 0.1873,
      "step": 4137
    },
    {
      "epoch": 0.12071882840305735,
      "grad_norm": 0.8642170649113723,
      "learning_rate": 9.785810259479337e-06,
      "loss": 0.1724,
      "step": 4138
    },
    {
      "epoch": 0.12074800163370092,
      "grad_norm": 0.8133536176102205,
      "learning_rate": 9.785673443497779e-06,
      "loss": 0.1635,
      "step": 4139
    },
    {
      "epoch": 0.12077717486434447,
      "grad_norm": 0.8421766063801384,
      "learning_rate": 9.785536584790869e-06,
      "loss": 0.1992,
      "step": 4140
    },
    {
      "epoch": 0.12080634809498804,
      "grad_norm": 0.8301407227670122,
      "learning_rate": 9.78539968335983e-06,
      "loss": 0.1728,
      "step": 4141
    },
    {
      "epoch": 0.1208355213256316,
      "grad_norm": 0.8329694079632481,
      "learning_rate": 9.785262739205887e-06,
      "loss": 0.172,
      "step": 4142
    },
    {
      "epoch": 0.12086469455627516,
      "grad_norm": 0.8015332815545945,
      "learning_rate": 9.78512575233026e-06,
      "loss": 0.1815,
      "step": 4143
    },
    {
      "epoch": 0.12089386778691873,
      "grad_norm": 0.7640230894037681,
      "learning_rate": 9.784988722734172e-06,
      "loss": 0.1662,
      "step": 4144
    },
    {
      "epoch": 0.12092304101756228,
      "grad_norm": 0.7414765506748662,
      "learning_rate": 9.784851650418847e-06,
      "loss": 0.1756,
      "step": 4145
    },
    {
      "epoch": 0.12095221424820585,
      "grad_norm": 0.8478198725277386,
      "learning_rate": 9.784714535385509e-06,
      "loss": 0.1594,
      "step": 4146
    },
    {
      "epoch": 0.12098138747884941,
      "grad_norm": 1.2695178645519447,
      "learning_rate": 9.784577377635382e-06,
      "loss": 0.1631,
      "step": 4147
    },
    {
      "epoch": 0.12101056070949297,
      "grad_norm": 0.7650605253914328,
      "learning_rate": 9.784440177169689e-06,
      "loss": 0.1609,
      "step": 4148
    },
    {
      "epoch": 0.12103973394013653,
      "grad_norm": 1.0276395784864185,
      "learning_rate": 9.784302933989657e-06,
      "loss": 0.2238,
      "step": 4149
    },
    {
      "epoch": 0.12106890717078009,
      "grad_norm": 0.8494424714848874,
      "learning_rate": 9.784165648096514e-06,
      "loss": 0.1882,
      "step": 4150
    },
    {
      "epoch": 0.12109808040142366,
      "grad_norm": 0.8593437139387499,
      "learning_rate": 9.784028319491478e-06,
      "loss": 0.1743,
      "step": 4151
    },
    {
      "epoch": 0.12112725363206721,
      "grad_norm": 0.8446351444485749,
      "learning_rate": 9.78389094817578e-06,
      "loss": 0.1742,
      "step": 4152
    },
    {
      "epoch": 0.12115642686271078,
      "grad_norm": 0.906404467522756,
      "learning_rate": 9.783753534150646e-06,
      "loss": 0.1892,
      "step": 4153
    },
    {
      "epoch": 0.12118560009335434,
      "grad_norm": 0.7928934533824235,
      "learning_rate": 9.783616077417301e-06,
      "loss": 0.1764,
      "step": 4154
    },
    {
      "epoch": 0.1212147733239979,
      "grad_norm": 1.011787013239762,
      "learning_rate": 9.783478577976976e-06,
      "loss": 0.1746,
      "step": 4155
    },
    {
      "epoch": 0.12124394655464146,
      "grad_norm": 0.8092316016257747,
      "learning_rate": 9.783341035830895e-06,
      "loss": 0.1744,
      "step": 4156
    },
    {
      "epoch": 0.12127311978528502,
      "grad_norm": 0.7880658319412299,
      "learning_rate": 9.783203450980287e-06,
      "loss": 0.1754,
      "step": 4157
    },
    {
      "epoch": 0.12130229301592858,
      "grad_norm": 0.9193699472202896,
      "learning_rate": 9.78306582342638e-06,
      "loss": 0.1914,
      "step": 4158
    },
    {
      "epoch": 0.12133146624657215,
      "grad_norm": 0.782109094769802,
      "learning_rate": 9.782928153170403e-06,
      "loss": 0.1599,
      "step": 4159
    },
    {
      "epoch": 0.1213606394772157,
      "grad_norm": 1.131680214875881,
      "learning_rate": 9.782790440213587e-06,
      "loss": 0.197,
      "step": 4160
    },
    {
      "epoch": 0.12138981270785927,
      "grad_norm": 0.8343074545424928,
      "learning_rate": 9.782652684557158e-06,
      "loss": 0.1773,
      "step": 4161
    },
    {
      "epoch": 0.12141898593850282,
      "grad_norm": 0.9572818164206963,
      "learning_rate": 9.78251488620235e-06,
      "loss": 0.174,
      "step": 4162
    },
    {
      "epoch": 0.12144815916914639,
      "grad_norm": 0.7420543684606417,
      "learning_rate": 9.782377045150387e-06,
      "loss": 0.168,
      "step": 4163
    },
    {
      "epoch": 0.12147733239978996,
      "grad_norm": 0.8974813647430494,
      "learning_rate": 9.782239161402505e-06,
      "loss": 0.1609,
      "step": 4164
    },
    {
      "epoch": 0.12150650563043351,
      "grad_norm": 0.8375675889457979,
      "learning_rate": 9.782101234959935e-06,
      "loss": 0.1808,
      "step": 4165
    },
    {
      "epoch": 0.12153567886107708,
      "grad_norm": 1.038493570210264,
      "learning_rate": 9.781963265823905e-06,
      "loss": 0.1869,
      "step": 4166
    },
    {
      "epoch": 0.12156485209172063,
      "grad_norm": 0.9711615425737522,
      "learning_rate": 9.78182525399565e-06,
      "loss": 0.178,
      "step": 4167
    },
    {
      "epoch": 0.1215940253223642,
      "grad_norm": 0.7962882984557913,
      "learning_rate": 9.781687199476399e-06,
      "loss": 0.1514,
      "step": 4168
    },
    {
      "epoch": 0.12162319855300777,
      "grad_norm": 0.7542737816784294,
      "learning_rate": 9.781549102267387e-06,
      "loss": 0.1632,
      "step": 4169
    },
    {
      "epoch": 0.12165237178365132,
      "grad_norm": 0.9970523601200407,
      "learning_rate": 9.781410962369846e-06,
      "loss": 0.1702,
      "step": 4170
    },
    {
      "epoch": 0.12168154501429489,
      "grad_norm": 1.074944699922814,
      "learning_rate": 9.78127277978501e-06,
      "loss": 0.1836,
      "step": 4171
    },
    {
      "epoch": 0.12171071824493844,
      "grad_norm": 1.052545248224118,
      "learning_rate": 9.781134554514108e-06,
      "loss": 0.1778,
      "step": 4172
    },
    {
      "epoch": 0.12173989147558201,
      "grad_norm": 0.8528179908215335,
      "learning_rate": 9.780996286558382e-06,
      "loss": 0.1685,
      "step": 4173
    },
    {
      "epoch": 0.12176906470622557,
      "grad_norm": 1.047169045200767,
      "learning_rate": 9.780857975919063e-06,
      "loss": 0.1955,
      "step": 4174
    },
    {
      "epoch": 0.12179823793686913,
      "grad_norm": 0.9103774911103129,
      "learning_rate": 9.780719622597383e-06,
      "loss": 0.1881,
      "step": 4175
    },
    {
      "epoch": 0.1218274111675127,
      "grad_norm": 0.6978801978572893,
      "learning_rate": 9.78058122659458e-06,
      "loss": 0.1521,
      "step": 4176
    },
    {
      "epoch": 0.12185658439815625,
      "grad_norm": 0.8262896383805954,
      "learning_rate": 9.780442787911891e-06,
      "loss": 0.1668,
      "step": 4177
    },
    {
      "epoch": 0.12188575762879982,
      "grad_norm": 0.8353675530022704,
      "learning_rate": 9.780304306550547e-06,
      "loss": 0.1743,
      "step": 4178
    },
    {
      "epoch": 0.12191493085944337,
      "grad_norm": 1.269227532562418,
      "learning_rate": 9.78016578251179e-06,
      "loss": 0.2063,
      "step": 4179
    },
    {
      "epoch": 0.12194410409008694,
      "grad_norm": 0.800200258529141,
      "learning_rate": 9.780027215796853e-06,
      "loss": 0.1675,
      "step": 4180
    },
    {
      "epoch": 0.1219732773207305,
      "grad_norm": 0.862996763044887,
      "learning_rate": 9.779888606406974e-06,
      "loss": 0.1682,
      "step": 4181
    },
    {
      "epoch": 0.12200245055137406,
      "grad_norm": 1.0524814671615295,
      "learning_rate": 9.77974995434339e-06,
      "loss": 0.1711,
      "step": 4182
    },
    {
      "epoch": 0.12203162378201762,
      "grad_norm": 0.7834384166130912,
      "learning_rate": 9.77961125960734e-06,
      "loss": 0.1766,
      "step": 4183
    },
    {
      "epoch": 0.12206079701266118,
      "grad_norm": 1.0000136967483966,
      "learning_rate": 9.779472522200063e-06,
      "loss": 0.1928,
      "step": 4184
    },
    {
      "epoch": 0.12208997024330474,
      "grad_norm": 0.8285754849884852,
      "learning_rate": 9.779333742122792e-06,
      "loss": 0.207,
      "step": 4185
    },
    {
      "epoch": 0.12211914347394831,
      "grad_norm": 0.9061836895942934,
      "learning_rate": 9.779194919376774e-06,
      "loss": 0.1918,
      "step": 4186
    },
    {
      "epoch": 0.12214831670459186,
      "grad_norm": 0.9974874319325114,
      "learning_rate": 9.779056053963243e-06,
      "loss": 0.1832,
      "step": 4187
    },
    {
      "epoch": 0.12217748993523543,
      "grad_norm": 0.79496352702967,
      "learning_rate": 9.778917145883441e-06,
      "loss": 0.1974,
      "step": 4188
    },
    {
      "epoch": 0.12220666316587898,
      "grad_norm": 1.1376640314406414,
      "learning_rate": 9.778778195138609e-06,
      "loss": 0.1795,
      "step": 4189
    },
    {
      "epoch": 0.12223583639652255,
      "grad_norm": 0.8717399481860646,
      "learning_rate": 9.778639201729985e-06,
      "loss": 0.1883,
      "step": 4190
    },
    {
      "epoch": 0.12226500962716612,
      "grad_norm": 0.8262123753517054,
      "learning_rate": 9.77850016565881e-06,
      "loss": 0.1858,
      "step": 4191
    },
    {
      "epoch": 0.12229418285780967,
      "grad_norm": 1.0161112536908263,
      "learning_rate": 9.778361086926327e-06,
      "loss": 0.2038,
      "step": 4192
    },
    {
      "epoch": 0.12232335608845324,
      "grad_norm": 0.9989675131884603,
      "learning_rate": 9.778221965533776e-06,
      "loss": 0.1889,
      "step": 4193
    },
    {
      "epoch": 0.12235252931909679,
      "grad_norm": 0.886147768004657,
      "learning_rate": 9.778082801482402e-06,
      "loss": 0.1553,
      "step": 4194
    },
    {
      "epoch": 0.12238170254974036,
      "grad_norm": 0.9443788967680901,
      "learning_rate": 9.777943594773443e-06,
      "loss": 0.1862,
      "step": 4195
    },
    {
      "epoch": 0.12241087578038393,
      "grad_norm": 0.7090528132935483,
      "learning_rate": 9.777804345408146e-06,
      "loss": 0.1859,
      "step": 4196
    },
    {
      "epoch": 0.12244004901102748,
      "grad_norm": 0.9017168604179734,
      "learning_rate": 9.77766505338775e-06,
      "loss": 0.1881,
      "step": 4197
    },
    {
      "epoch": 0.12246922224167105,
      "grad_norm": 0.9176129179618445,
      "learning_rate": 9.777525718713503e-06,
      "loss": 0.2047,
      "step": 4198
    },
    {
      "epoch": 0.1224983954723146,
      "grad_norm": 0.8592543265106798,
      "learning_rate": 9.777386341386647e-06,
      "loss": 0.1971,
      "step": 4199
    },
    {
      "epoch": 0.12252756870295817,
      "grad_norm": 0.9434533936174614,
      "learning_rate": 9.777246921408426e-06,
      "loss": 0.1667,
      "step": 4200
    },
    {
      "epoch": 0.12255674193360172,
      "grad_norm": 0.927354638086785,
      "learning_rate": 9.777107458780084e-06,
      "loss": 0.1919,
      "step": 4201
    },
    {
      "epoch": 0.12258591516424529,
      "grad_norm": 0.9226859426751418,
      "learning_rate": 9.776967953502869e-06,
      "loss": 0.2093,
      "step": 4202
    },
    {
      "epoch": 0.12261508839488885,
      "grad_norm": 0.9161039871550587,
      "learning_rate": 9.776828405578023e-06,
      "loss": 0.1823,
      "step": 4203
    },
    {
      "epoch": 0.12264426162553241,
      "grad_norm": 0.9020688434530008,
      "learning_rate": 9.776688815006792e-06,
      "loss": 0.1651,
      "step": 4204
    },
    {
      "epoch": 0.12267343485617598,
      "grad_norm": 1.037436081549395,
      "learning_rate": 9.776549181790424e-06,
      "loss": 0.1836,
      "step": 4205
    },
    {
      "epoch": 0.12270260808681953,
      "grad_norm": 0.8719546752081956,
      "learning_rate": 9.776409505930167e-06,
      "loss": 0.1694,
      "step": 4206
    },
    {
      "epoch": 0.1227317813174631,
      "grad_norm": 0.8810485573848786,
      "learning_rate": 9.776269787427266e-06,
      "loss": 0.1796,
      "step": 4207
    },
    {
      "epoch": 0.12276095454810666,
      "grad_norm": 0.9978537568358294,
      "learning_rate": 9.776130026282968e-06,
      "loss": 0.1915,
      "step": 4208
    },
    {
      "epoch": 0.12279012777875022,
      "grad_norm": 0.7555369809030872,
      "learning_rate": 9.77599022249852e-06,
      "loss": 0.1708,
      "step": 4209
    },
    {
      "epoch": 0.12281930100939378,
      "grad_norm": 0.9692423805565872,
      "learning_rate": 9.775850376075174e-06,
      "loss": 0.1572,
      "step": 4210
    },
    {
      "epoch": 0.12284847424003734,
      "grad_norm": 1.0618323249689408,
      "learning_rate": 9.775710487014172e-06,
      "loss": 0.2008,
      "step": 4211
    },
    {
      "epoch": 0.1228776474706809,
      "grad_norm": 1.040945500262105,
      "learning_rate": 9.77557055531677e-06,
      "loss": 0.1839,
      "step": 4212
    },
    {
      "epoch": 0.12290682070132447,
      "grad_norm": 1.037133168332038,
      "learning_rate": 9.775430580984213e-06,
      "loss": 0.2114,
      "step": 4213
    },
    {
      "epoch": 0.12293599393196802,
      "grad_norm": 0.8061472566653828,
      "learning_rate": 9.775290564017752e-06,
      "loss": 0.1874,
      "step": 4214
    },
    {
      "epoch": 0.12296516716261159,
      "grad_norm": 0.8963591424698387,
      "learning_rate": 9.775150504418638e-06,
      "loss": 0.1713,
      "step": 4215
    },
    {
      "epoch": 0.12299434039325514,
      "grad_norm": 0.8630866206082455,
      "learning_rate": 9.775010402188119e-06,
      "loss": 0.189,
      "step": 4216
    },
    {
      "epoch": 0.12302351362389871,
      "grad_norm": 0.9263597454549111,
      "learning_rate": 9.774870257327447e-06,
      "loss": 0.2046,
      "step": 4217
    },
    {
      "epoch": 0.12305268685454228,
      "grad_norm": 1.071914955078211,
      "learning_rate": 9.774730069837872e-06,
      "loss": 0.1956,
      "step": 4218
    },
    {
      "epoch": 0.12308186008518583,
      "grad_norm": 0.8329010322263537,
      "learning_rate": 9.774589839720649e-06,
      "loss": 0.1758,
      "step": 4219
    },
    {
      "epoch": 0.1231110333158294,
      "grad_norm": 0.7920640763112636,
      "learning_rate": 9.774449566977027e-06,
      "loss": 0.1977,
      "step": 4220
    },
    {
      "epoch": 0.12314020654647295,
      "grad_norm": 1.008236458276534,
      "learning_rate": 9.774309251608259e-06,
      "loss": 0.2016,
      "step": 4221
    },
    {
      "epoch": 0.12316937977711652,
      "grad_norm": 0.9347482520652315,
      "learning_rate": 9.774168893615597e-06,
      "loss": 0.1833,
      "step": 4222
    },
    {
      "epoch": 0.12319855300776007,
      "grad_norm": 0.8640234539806687,
      "learning_rate": 9.774028493000295e-06,
      "loss": 0.1681,
      "step": 4223
    },
    {
      "epoch": 0.12322772623840364,
      "grad_norm": 1.1631087202336545,
      "learning_rate": 9.773888049763606e-06,
      "loss": 0.2026,
      "step": 4224
    },
    {
      "epoch": 0.12325689946904721,
      "grad_norm": 1.0334941698045452,
      "learning_rate": 9.773747563906785e-06,
      "loss": 0.1759,
      "step": 4225
    },
    {
      "epoch": 0.12328607269969076,
      "grad_norm": 0.9064466530876436,
      "learning_rate": 9.773607035431085e-06,
      "loss": 0.1854,
      "step": 4226
    },
    {
      "epoch": 0.12331524593033433,
      "grad_norm": 1.819916819047121,
      "learning_rate": 9.77346646433776e-06,
      "loss": 0.1936,
      "step": 4227
    },
    {
      "epoch": 0.12334441916097788,
      "grad_norm": 0.9761879810370655,
      "learning_rate": 9.773325850628069e-06,
      "loss": 0.1885,
      "step": 4228
    },
    {
      "epoch": 0.12337359239162145,
      "grad_norm": 0.7388657787543375,
      "learning_rate": 9.77318519430326e-06,
      "loss": 0.1658,
      "step": 4229
    },
    {
      "epoch": 0.12340276562226501,
      "grad_norm": 0.991855344225325,
      "learning_rate": 9.773044495364596e-06,
      "loss": 0.1685,
      "step": 4230
    },
    {
      "epoch": 0.12343193885290857,
      "grad_norm": 0.8834022854048016,
      "learning_rate": 9.77290375381333e-06,
      "loss": 0.2044,
      "step": 4231
    },
    {
      "epoch": 0.12346111208355214,
      "grad_norm": 0.8811956457462701,
      "learning_rate": 9.772762969650717e-06,
      "loss": 0.2013,
      "step": 4232
    },
    {
      "epoch": 0.12349028531419569,
      "grad_norm": 0.9784935254653434,
      "learning_rate": 9.772622142878016e-06,
      "loss": 0.1691,
      "step": 4233
    },
    {
      "epoch": 0.12351945854483926,
      "grad_norm": 0.8224157593540263,
      "learning_rate": 9.772481273496486e-06,
      "loss": 0.1745,
      "step": 4234
    },
    {
      "epoch": 0.12354863177548282,
      "grad_norm": 0.9826511369228231,
      "learning_rate": 9.77234036150738e-06,
      "loss": 0.1704,
      "step": 4235
    },
    {
      "epoch": 0.12357780500612638,
      "grad_norm": 0.8707132641162341,
      "learning_rate": 9.77219940691196e-06,
      "loss": 0.1594,
      "step": 4236
    },
    {
      "epoch": 0.12360697823676994,
      "grad_norm": 0.8486161938388265,
      "learning_rate": 9.77205840971148e-06,
      "loss": 0.2133,
      "step": 4237
    },
    {
      "epoch": 0.1236361514674135,
      "grad_norm": 0.9949223452883215,
      "learning_rate": 9.771917369907206e-06,
      "loss": 0.18,
      "step": 4238
    },
    {
      "epoch": 0.12366532469805706,
      "grad_norm": 1.0058494158528097,
      "learning_rate": 9.77177628750039e-06,
      "loss": 0.2059,
      "step": 4239
    },
    {
      "epoch": 0.12369449792870063,
      "grad_norm": 0.9213454019566658,
      "learning_rate": 9.771635162492296e-06,
      "loss": 0.1639,
      "step": 4240
    },
    {
      "epoch": 0.12372367115934418,
      "grad_norm": 1.0291911621537848,
      "learning_rate": 9.771493994884182e-06,
      "loss": 0.1898,
      "step": 4241
    },
    {
      "epoch": 0.12375284438998775,
      "grad_norm": 0.8649075291704439,
      "learning_rate": 9.771352784677309e-06,
      "loss": 0.1783,
      "step": 4242
    },
    {
      "epoch": 0.1237820176206313,
      "grad_norm": 0.8785762286609688,
      "learning_rate": 9.771211531872935e-06,
      "loss": 0.1514,
      "step": 4243
    },
    {
      "epoch": 0.12381119085127487,
      "grad_norm": 0.823081519999211,
      "learning_rate": 9.771070236472324e-06,
      "loss": 0.1826,
      "step": 4244
    },
    {
      "epoch": 0.12384036408191842,
      "grad_norm": 0.7215599451247365,
      "learning_rate": 9.77092889847674e-06,
      "loss": 0.1731,
      "step": 4245
    },
    {
      "epoch": 0.12386953731256199,
      "grad_norm": 0.9713610710868089,
      "learning_rate": 9.770787517887439e-06,
      "loss": 0.1993,
      "step": 4246
    },
    {
      "epoch": 0.12389871054320556,
      "grad_norm": 0.8715957915372682,
      "learning_rate": 9.770646094705687e-06,
      "loss": 0.1883,
      "step": 4247
    },
    {
      "epoch": 0.12392788377384911,
      "grad_norm": 0.8420458576843002,
      "learning_rate": 9.770504628932744e-06,
      "loss": 0.1868,
      "step": 4248
    },
    {
      "epoch": 0.12395705700449268,
      "grad_norm": 0.9451975101715953,
      "learning_rate": 9.770363120569876e-06,
      "loss": 0.191,
      "step": 4249
    },
    {
      "epoch": 0.12398623023513623,
      "grad_norm": 0.9607044740168983,
      "learning_rate": 9.770221569618343e-06,
      "loss": 0.1633,
      "step": 4250
    },
    {
      "epoch": 0.1240154034657798,
      "grad_norm": 0.7286356854872065,
      "learning_rate": 9.770079976079414e-06,
      "loss": 0.1686,
      "step": 4251
    },
    {
      "epoch": 0.12404457669642337,
      "grad_norm": 0.8874022088049638,
      "learning_rate": 9.769938339954347e-06,
      "loss": 0.1866,
      "step": 4252
    },
    {
      "epoch": 0.12407374992706692,
      "grad_norm": 1.0625998134826966,
      "learning_rate": 9.76979666124441e-06,
      "loss": 0.1889,
      "step": 4253
    },
    {
      "epoch": 0.12410292315771049,
      "grad_norm": 0.8184826542103033,
      "learning_rate": 9.769654939950866e-06,
      "loss": 0.1633,
      "step": 4254
    },
    {
      "epoch": 0.12413209638835404,
      "grad_norm": 1.2131584622111504,
      "learning_rate": 9.769513176074982e-06,
      "loss": 0.1826,
      "step": 4255
    },
    {
      "epoch": 0.12416126961899761,
      "grad_norm": 0.9988584186668532,
      "learning_rate": 9.769371369618023e-06,
      "loss": 0.1853,
      "step": 4256
    },
    {
      "epoch": 0.12419044284964117,
      "grad_norm": 1.180325587882098,
      "learning_rate": 9.769229520581256e-06,
      "loss": 0.1509,
      "step": 4257
    },
    {
      "epoch": 0.12421961608028473,
      "grad_norm": 0.7234165745053014,
      "learning_rate": 9.769087628965945e-06,
      "loss": 0.1889,
      "step": 4258
    },
    {
      "epoch": 0.1242487893109283,
      "grad_norm": 0.8663462398173085,
      "learning_rate": 9.768945694773358e-06,
      "loss": 0.173,
      "step": 4259
    },
    {
      "epoch": 0.12427796254157185,
      "grad_norm": 0.7336133954693259,
      "learning_rate": 9.768803718004764e-06,
      "loss": 0.182,
      "step": 4260
    },
    {
      "epoch": 0.12430713577221542,
      "grad_norm": 0.7633816702741599,
      "learning_rate": 9.768661698661427e-06,
      "loss": 0.1595,
      "step": 4261
    },
    {
      "epoch": 0.12433630900285898,
      "grad_norm": 0.89792804587649,
      "learning_rate": 9.768519636744618e-06,
      "loss": 0.1715,
      "step": 4262
    },
    {
      "epoch": 0.12436548223350254,
      "grad_norm": 0.8208558511054609,
      "learning_rate": 9.768377532255602e-06,
      "loss": 0.2037,
      "step": 4263
    },
    {
      "epoch": 0.1243946554641461,
      "grad_norm": 1.213932174256665,
      "learning_rate": 9.768235385195653e-06,
      "loss": 0.1795,
      "step": 4264
    },
    {
      "epoch": 0.12442382869478966,
      "grad_norm": 0.7904329914917131,
      "learning_rate": 9.768093195566033e-06,
      "loss": 0.1611,
      "step": 4265
    },
    {
      "epoch": 0.12445300192543322,
      "grad_norm": 0.7345958432260812,
      "learning_rate": 9.767950963368018e-06,
      "loss": 0.1863,
      "step": 4266
    },
    {
      "epoch": 0.12448217515607679,
      "grad_norm": 0.872169523460647,
      "learning_rate": 9.767808688602873e-06,
      "loss": 0.159,
      "step": 4267
    },
    {
      "epoch": 0.12451134838672034,
      "grad_norm": 0.8695611591130454,
      "learning_rate": 9.767666371271871e-06,
      "loss": 0.1784,
      "step": 4268
    },
    {
      "epoch": 0.12454052161736391,
      "grad_norm": 0.6730778413023714,
      "learning_rate": 9.767524011376283e-06,
      "loss": 0.174,
      "step": 4269
    },
    {
      "epoch": 0.12456969484800746,
      "grad_norm": 0.9220896534051399,
      "learning_rate": 9.767381608917377e-06,
      "loss": 0.1973,
      "step": 4270
    },
    {
      "epoch": 0.12459886807865103,
      "grad_norm": 0.871915000359137,
      "learning_rate": 9.767239163896427e-06,
      "loss": 0.1691,
      "step": 4271
    },
    {
      "epoch": 0.12462804130929458,
      "grad_norm": 0.8734391780543423,
      "learning_rate": 9.767096676314703e-06,
      "loss": 0.194,
      "step": 4272
    },
    {
      "epoch": 0.12465721453993815,
      "grad_norm": 0.8891960491574288,
      "learning_rate": 9.76695414617348e-06,
      "loss": 0.1925,
      "step": 4273
    },
    {
      "epoch": 0.12468638777058172,
      "grad_norm": 0.8286255539685276,
      "learning_rate": 9.766811573474026e-06,
      "loss": 0.1743,
      "step": 4274
    },
    {
      "epoch": 0.12471556100122527,
      "grad_norm": 1.1132476951309755,
      "learning_rate": 9.766668958217617e-06,
      "loss": 0.1531,
      "step": 4275
    },
    {
      "epoch": 0.12474473423186884,
      "grad_norm": 0.9154750259971727,
      "learning_rate": 9.766526300405525e-06,
      "loss": 0.1677,
      "step": 4276
    },
    {
      "epoch": 0.12477390746251239,
      "grad_norm": 0.9013459160814709,
      "learning_rate": 9.766383600039025e-06,
      "loss": 0.1823,
      "step": 4277
    },
    {
      "epoch": 0.12480308069315596,
      "grad_norm": 0.9060290447522402,
      "learning_rate": 9.76624085711939e-06,
      "loss": 0.1581,
      "step": 4278
    },
    {
      "epoch": 0.12483225392379953,
      "grad_norm": 0.9803903375051782,
      "learning_rate": 9.766098071647892e-06,
      "loss": 0.1683,
      "step": 4279
    },
    {
      "epoch": 0.12486142715444308,
      "grad_norm": 0.8788098055543385,
      "learning_rate": 9.765955243625811e-06,
      "loss": 0.2036,
      "step": 4280
    },
    {
      "epoch": 0.12489060038508665,
      "grad_norm": 0.9587287730384955,
      "learning_rate": 9.765812373054418e-06,
      "loss": 0.1957,
      "step": 4281
    },
    {
      "epoch": 0.1249197736157302,
      "grad_norm": 0.9823218003703186,
      "learning_rate": 9.76566945993499e-06,
      "loss": 0.1852,
      "step": 4282
    },
    {
      "epoch": 0.12494894684637377,
      "grad_norm": 1.0552717781488987,
      "learning_rate": 9.765526504268803e-06,
      "loss": 0.1767,
      "step": 4283
    },
    {
      "epoch": 0.12497812007701734,
      "grad_norm": 1.0440471798962738,
      "learning_rate": 9.765383506057134e-06,
      "loss": 0.1985,
      "step": 4284
    },
    {
      "epoch": 0.1250072933076609,
      "grad_norm": 0.8833607271954884,
      "learning_rate": 9.765240465301256e-06,
      "loss": 0.1695,
      "step": 4285
    },
    {
      "epoch": 0.12503646653830444,
      "grad_norm": 1.0199050563499885,
      "learning_rate": 9.765097382002451e-06,
      "loss": 0.202,
      "step": 4286
    },
    {
      "epoch": 0.12506563976894802,
      "grad_norm": 1.00389863995398,
      "learning_rate": 9.764954256161994e-06,
      "loss": 0.1864,
      "step": 4287
    },
    {
      "epoch": 0.12509481299959158,
      "grad_norm": 1.0983481360195875,
      "learning_rate": 9.76481108778116e-06,
      "loss": 0.1737,
      "step": 4288
    },
    {
      "epoch": 0.12512398623023513,
      "grad_norm": 0.9268256651823165,
      "learning_rate": 9.764667876861234e-06,
      "loss": 0.1745,
      "step": 4289
    },
    {
      "epoch": 0.1251531594608787,
      "grad_norm": 0.7470529526993203,
      "learning_rate": 9.764524623403488e-06,
      "loss": 0.1788,
      "step": 4290
    },
    {
      "epoch": 0.12518233269152226,
      "grad_norm": 1.1216784539669975,
      "learning_rate": 9.764381327409204e-06,
      "loss": 0.176,
      "step": 4291
    },
    {
      "epoch": 0.12521150592216582,
      "grad_norm": 0.9260657882814729,
      "learning_rate": 9.764237988879663e-06,
      "loss": 0.1994,
      "step": 4292
    },
    {
      "epoch": 0.12524067915280937,
      "grad_norm": 0.8801649593751968,
      "learning_rate": 9.76409460781614e-06,
      "loss": 0.1698,
      "step": 4293
    },
    {
      "epoch": 0.12526985238345295,
      "grad_norm": 0.7524840407802879,
      "learning_rate": 9.76395118421992e-06,
      "loss": 0.1665,
      "step": 4294
    },
    {
      "epoch": 0.1252990256140965,
      "grad_norm": 0.9182771897339053,
      "learning_rate": 9.763807718092278e-06,
      "loss": 0.203,
      "step": 4295
    },
    {
      "epoch": 0.12532819884474006,
      "grad_norm": 0.9437971667408273,
      "learning_rate": 9.7636642094345e-06,
      "loss": 0.1744,
      "step": 4296
    },
    {
      "epoch": 0.12535737207538364,
      "grad_norm": 0.8554139785337814,
      "learning_rate": 9.763520658247866e-06,
      "loss": 0.1656,
      "step": 4297
    },
    {
      "epoch": 0.1253865453060272,
      "grad_norm": 1.0251410191592603,
      "learning_rate": 9.763377064533654e-06,
      "loss": 0.1814,
      "step": 4298
    },
    {
      "epoch": 0.12541571853667074,
      "grad_norm": 0.8584107009287739,
      "learning_rate": 9.76323342829315e-06,
      "loss": 0.1878,
      "step": 4299
    },
    {
      "epoch": 0.12544489176731433,
      "grad_norm": 1.298075008577799,
      "learning_rate": 9.763089749527635e-06,
      "loss": 0.1914,
      "step": 4300
    },
    {
      "epoch": 0.12547406499795788,
      "grad_norm": 0.8654377573473938,
      "learning_rate": 9.762946028238391e-06,
      "loss": 0.1741,
      "step": 4301
    },
    {
      "epoch": 0.12550323822860143,
      "grad_norm": 0.7753543465818352,
      "learning_rate": 9.762802264426703e-06,
      "loss": 0.1893,
      "step": 4302
    },
    {
      "epoch": 0.12553241145924499,
      "grad_norm": 1.3344574668330917,
      "learning_rate": 9.762658458093852e-06,
      "loss": 0.1782,
      "step": 4303
    },
    {
      "epoch": 0.12556158468988857,
      "grad_norm": 0.9834481709401277,
      "learning_rate": 9.762514609241124e-06,
      "loss": 0.1892,
      "step": 4304
    },
    {
      "epoch": 0.12559075792053212,
      "grad_norm": 0.8555403034672496,
      "learning_rate": 9.762370717869804e-06,
      "loss": 0.1835,
      "step": 4305
    },
    {
      "epoch": 0.12561993115117567,
      "grad_norm": 1.0901077663869283,
      "learning_rate": 9.762226783981174e-06,
      "loss": 0.1929,
      "step": 4306
    },
    {
      "epoch": 0.12564910438181925,
      "grad_norm": 0.9769178606454748,
      "learning_rate": 9.762082807576518e-06,
      "loss": 0.1909,
      "step": 4307
    },
    {
      "epoch": 0.1256782776124628,
      "grad_norm": 0.7929454063432803,
      "learning_rate": 9.761938788657127e-06,
      "loss": 0.1983,
      "step": 4308
    },
    {
      "epoch": 0.12570745084310636,
      "grad_norm": 0.8921832339132862,
      "learning_rate": 9.761794727224281e-06,
      "loss": 0.2156,
      "step": 4309
    },
    {
      "epoch": 0.12573662407374991,
      "grad_norm": 0.8545715424189586,
      "learning_rate": 9.761650623279269e-06,
      "loss": 0.1535,
      "step": 4310
    },
    {
      "epoch": 0.1257657973043935,
      "grad_norm": 0.8778341189523261,
      "learning_rate": 9.761506476823377e-06,
      "loss": 0.1788,
      "step": 4311
    },
    {
      "epoch": 0.12579497053503705,
      "grad_norm": 0.7884318379167016,
      "learning_rate": 9.761362287857891e-06,
      "loss": 0.1773,
      "step": 4312
    },
    {
      "epoch": 0.1258241437656806,
      "grad_norm": 0.8426840395974396,
      "learning_rate": 9.761218056384102e-06,
      "loss": 0.1728,
      "step": 4313
    },
    {
      "epoch": 0.12585331699632418,
      "grad_norm": 0.9073579849848015,
      "learning_rate": 9.761073782403291e-06,
      "loss": 0.1814,
      "step": 4314
    },
    {
      "epoch": 0.12588249022696774,
      "grad_norm": 0.9179197291915331,
      "learning_rate": 9.760929465916752e-06,
      "loss": 0.1588,
      "step": 4315
    },
    {
      "epoch": 0.1259116634576113,
      "grad_norm": 0.8223070944303513,
      "learning_rate": 9.76078510692577e-06,
      "loss": 0.1725,
      "step": 4316
    },
    {
      "epoch": 0.12594083668825487,
      "grad_norm": 0.9131029632198148,
      "learning_rate": 9.760640705431636e-06,
      "loss": 0.1931,
      "step": 4317
    },
    {
      "epoch": 0.12597000991889842,
      "grad_norm": 0.8772096245229108,
      "learning_rate": 9.76049626143564e-06,
      "loss": 0.1472,
      "step": 4318
    },
    {
      "epoch": 0.12599918314954198,
      "grad_norm": 0.7007596844463523,
      "learning_rate": 9.760351774939068e-06,
      "loss": 0.1809,
      "step": 4319
    },
    {
      "epoch": 0.12602835638018553,
      "grad_norm": 0.8300390578037848,
      "learning_rate": 9.76020724594321e-06,
      "loss": 0.1905,
      "step": 4320
    },
    {
      "epoch": 0.1260575296108291,
      "grad_norm": 1.0328419104468722,
      "learning_rate": 9.76006267444936e-06,
      "loss": 0.1781,
      "step": 4321
    },
    {
      "epoch": 0.12608670284147266,
      "grad_norm": 0.7480386916897522,
      "learning_rate": 9.759918060458807e-06,
      "loss": 0.1619,
      "step": 4322
    },
    {
      "epoch": 0.12611587607211622,
      "grad_norm": 0.8698351781302592,
      "learning_rate": 9.75977340397284e-06,
      "loss": 0.1793,
      "step": 4323
    },
    {
      "epoch": 0.1261450493027598,
      "grad_norm": 0.8172360464952455,
      "learning_rate": 9.759628704992754e-06,
      "loss": 0.1735,
      "step": 4324
    },
    {
      "epoch": 0.12617422253340335,
      "grad_norm": 0.8593913726772656,
      "learning_rate": 9.75948396351984e-06,
      "loss": 0.1738,
      "step": 4325
    },
    {
      "epoch": 0.1262033957640469,
      "grad_norm": 0.8322858175517441,
      "learning_rate": 9.759339179555387e-06,
      "loss": 0.1669,
      "step": 4326
    },
    {
      "epoch": 0.12623256899469046,
      "grad_norm": 0.8524071023669001,
      "learning_rate": 9.759194353100692e-06,
      "loss": 0.1765,
      "step": 4327
    },
    {
      "epoch": 0.12626174222533404,
      "grad_norm": 0.8115798583704614,
      "learning_rate": 9.759049484157045e-06,
      "loss": 0.1711,
      "step": 4328
    },
    {
      "epoch": 0.1262909154559776,
      "grad_norm": 0.830874898071875,
      "learning_rate": 9.758904572725739e-06,
      "loss": 0.1997,
      "step": 4329
    },
    {
      "epoch": 0.12632008868662115,
      "grad_norm": 0.8784167964965695,
      "learning_rate": 9.758759618808071e-06,
      "loss": 0.1765,
      "step": 4330
    },
    {
      "epoch": 0.12634926191726473,
      "grad_norm": 0.7548423758563226,
      "learning_rate": 9.75861462240533e-06,
      "loss": 0.1848,
      "step": 4331
    },
    {
      "epoch": 0.12637843514790828,
      "grad_norm": 0.844675211204546,
      "learning_rate": 9.758469583518819e-06,
      "loss": 0.1617,
      "step": 4332
    },
    {
      "epoch": 0.12640760837855183,
      "grad_norm": 0.8814391936112137,
      "learning_rate": 9.758324502149824e-06,
      "loss": 0.1688,
      "step": 4333
    },
    {
      "epoch": 0.12643678160919541,
      "grad_norm": 0.9049808953261231,
      "learning_rate": 9.758179378299644e-06,
      "loss": 0.1648,
      "step": 4334
    },
    {
      "epoch": 0.12646595483983897,
      "grad_norm": 0.941452688339845,
      "learning_rate": 9.758034211969573e-06,
      "loss": 0.1932,
      "step": 4335
    },
    {
      "epoch": 0.12649512807048252,
      "grad_norm": 0.7424095216186802,
      "learning_rate": 9.757889003160912e-06,
      "loss": 0.1624,
      "step": 4336
    },
    {
      "epoch": 0.12652430130112607,
      "grad_norm": 0.853858928136336,
      "learning_rate": 9.757743751874951e-06,
      "loss": 0.1705,
      "step": 4337
    },
    {
      "epoch": 0.12655347453176966,
      "grad_norm": 0.83593321941497,
      "learning_rate": 9.757598458112991e-06,
      "loss": 0.1698,
      "step": 4338
    },
    {
      "epoch": 0.1265826477624132,
      "grad_norm": 0.7729304014754222,
      "learning_rate": 9.757453121876327e-06,
      "loss": 0.1504,
      "step": 4339
    },
    {
      "epoch": 0.12661182099305676,
      "grad_norm": 1.2136177458516852,
      "learning_rate": 9.757307743166259e-06,
      "loss": 0.1568,
      "step": 4340
    },
    {
      "epoch": 0.12664099422370034,
      "grad_norm": 0.8488242378339931,
      "learning_rate": 9.757162321984079e-06,
      "loss": 0.1737,
      "step": 4341
    },
    {
      "epoch": 0.1266701674543439,
      "grad_norm": 0.8264301337138541,
      "learning_rate": 9.757016858331092e-06,
      "loss": 0.1948,
      "step": 4342
    },
    {
      "epoch": 0.12669934068498745,
      "grad_norm": 1.1684518053757948,
      "learning_rate": 9.756871352208594e-06,
      "loss": 0.1746,
      "step": 4343
    },
    {
      "epoch": 0.12672851391563103,
      "grad_norm": 0.9713931040968758,
      "learning_rate": 9.756725803617883e-06,
      "loss": 0.2024,
      "step": 4344
    },
    {
      "epoch": 0.12675768714627458,
      "grad_norm": 0.903833122560911,
      "learning_rate": 9.756580212560261e-06,
      "loss": 0.1805,
      "step": 4345
    },
    {
      "epoch": 0.12678686037691814,
      "grad_norm": 1.0932615376906702,
      "learning_rate": 9.756434579037027e-06,
      "loss": 0.1978,
      "step": 4346
    },
    {
      "epoch": 0.1268160336075617,
      "grad_norm": 1.019024004897646,
      "learning_rate": 9.75628890304948e-06,
      "loss": 0.1823,
      "step": 4347
    },
    {
      "epoch": 0.12684520683820527,
      "grad_norm": 0.7929642521823096,
      "learning_rate": 9.756143184598919e-06,
      "loss": 0.182,
      "step": 4348
    },
    {
      "epoch": 0.12687438006884882,
      "grad_norm": 0.839591692305866,
      "learning_rate": 9.755997423686649e-06,
      "loss": 0.1728,
      "step": 4349
    },
    {
      "epoch": 0.12690355329949238,
      "grad_norm": 0.9423603267135233,
      "learning_rate": 9.75585162031397e-06,
      "loss": 0.1765,
      "step": 4350
    },
    {
      "epoch": 0.12693272653013596,
      "grad_norm": 0.8079607281592949,
      "learning_rate": 9.75570577448218e-06,
      "loss": 0.1857,
      "step": 4351
    },
    {
      "epoch": 0.1269618997607795,
      "grad_norm": 0.8822615224554138,
      "learning_rate": 9.755559886192586e-06,
      "loss": 0.1741,
      "step": 4352
    },
    {
      "epoch": 0.12699107299142307,
      "grad_norm": 0.6578728844839694,
      "learning_rate": 9.75541395544649e-06,
      "loss": 0.1711,
      "step": 4353
    },
    {
      "epoch": 0.12702024622206662,
      "grad_norm": 0.7544640852412733,
      "learning_rate": 9.755267982245192e-06,
      "loss": 0.1746,
      "step": 4354
    },
    {
      "epoch": 0.1270494194527102,
      "grad_norm": 0.8261389581100625,
      "learning_rate": 9.755121966589996e-06,
      "loss": 0.1552,
      "step": 4355
    },
    {
      "epoch": 0.12707859268335375,
      "grad_norm": 0.7913161637905148,
      "learning_rate": 9.754975908482207e-06,
      "loss": 0.1747,
      "step": 4356
    },
    {
      "epoch": 0.1271077659139973,
      "grad_norm": 0.8225646514876421,
      "learning_rate": 9.75482980792313e-06,
      "loss": 0.1879,
      "step": 4357
    },
    {
      "epoch": 0.1271369391446409,
      "grad_norm": 0.876059043782913,
      "learning_rate": 9.754683664914064e-06,
      "loss": 0.2059,
      "step": 4358
    },
    {
      "epoch": 0.12716611237528444,
      "grad_norm": 0.8546291935416853,
      "learning_rate": 9.75453747945632e-06,
      "loss": 0.169,
      "step": 4359
    },
    {
      "epoch": 0.127195285605928,
      "grad_norm": 0.9040342126127462,
      "learning_rate": 9.754391251551199e-06,
      "loss": 0.1804,
      "step": 4360
    },
    {
      "epoch": 0.12722445883657157,
      "grad_norm": 0.7900388267349989,
      "learning_rate": 9.754244981200007e-06,
      "loss": 0.1708,
      "step": 4361
    },
    {
      "epoch": 0.12725363206721513,
      "grad_norm": 0.7686265863281533,
      "learning_rate": 9.754098668404053e-06,
      "loss": 0.1904,
      "step": 4362
    },
    {
      "epoch": 0.12728280529785868,
      "grad_norm": 0.8614124048181928,
      "learning_rate": 9.753952313164639e-06,
      "loss": 0.1962,
      "step": 4363
    },
    {
      "epoch": 0.12731197852850223,
      "grad_norm": 0.8364488152172986,
      "learning_rate": 9.753805915483076e-06,
      "loss": 0.1648,
      "step": 4364
    },
    {
      "epoch": 0.12734115175914582,
      "grad_norm": 1.1712213109837195,
      "learning_rate": 9.753659475360666e-06,
      "loss": 0.1745,
      "step": 4365
    },
    {
      "epoch": 0.12737032498978937,
      "grad_norm": 0.823577484205447,
      "learning_rate": 9.75351299279872e-06,
      "loss": 0.2046,
      "step": 4366
    },
    {
      "epoch": 0.12739949822043292,
      "grad_norm": 0.8274937620034111,
      "learning_rate": 9.753366467798545e-06,
      "loss": 0.1854,
      "step": 4367
    },
    {
      "epoch": 0.1274286714510765,
      "grad_norm": 0.7816174048127913,
      "learning_rate": 9.753219900361449e-06,
      "loss": 0.1667,
      "step": 4368
    },
    {
      "epoch": 0.12745784468172006,
      "grad_norm": 0.9266467808636973,
      "learning_rate": 9.75307329048874e-06,
      "loss": 0.1933,
      "step": 4369
    },
    {
      "epoch": 0.1274870179123636,
      "grad_norm": 0.8210069172112148,
      "learning_rate": 9.752926638181728e-06,
      "loss": 0.1567,
      "step": 4370
    },
    {
      "epoch": 0.1275161911430072,
      "grad_norm": 0.8234231612323969,
      "learning_rate": 9.75277994344172e-06,
      "loss": 0.1563,
      "step": 4371
    },
    {
      "epoch": 0.12754536437365074,
      "grad_norm": 0.9439437784972723,
      "learning_rate": 9.75263320627003e-06,
      "loss": 0.1654,
      "step": 4372
    },
    {
      "epoch": 0.1275745376042943,
      "grad_norm": 1.0884616616947242,
      "learning_rate": 9.752486426667963e-06,
      "loss": 0.187,
      "step": 4373
    },
    {
      "epoch": 0.12760371083493785,
      "grad_norm": 1.0232977420602125,
      "learning_rate": 9.752339604636832e-06,
      "loss": 0.1978,
      "step": 4374
    },
    {
      "epoch": 0.12763288406558143,
      "grad_norm": 0.8515421355954126,
      "learning_rate": 9.752192740177948e-06,
      "loss": 0.2103,
      "step": 4375
    },
    {
      "epoch": 0.12766205729622498,
      "grad_norm": 0.8976042839791545,
      "learning_rate": 9.752045833292622e-06,
      "loss": 0.167,
      "step": 4376
    },
    {
      "epoch": 0.12769123052686854,
      "grad_norm": 1.000535780969401,
      "learning_rate": 9.751898883982164e-06,
      "loss": 0.1835,
      "step": 4377
    },
    {
      "epoch": 0.12772040375751212,
      "grad_norm": 0.8509745941678003,
      "learning_rate": 9.751751892247888e-06,
      "loss": 0.2075,
      "step": 4378
    },
    {
      "epoch": 0.12774957698815567,
      "grad_norm": 1.074620987838019,
      "learning_rate": 9.751604858091106e-06,
      "loss": 0.2284,
      "step": 4379
    },
    {
      "epoch": 0.12777875021879923,
      "grad_norm": 0.8236903064117794,
      "learning_rate": 9.75145778151313e-06,
      "loss": 0.1752,
      "step": 4380
    },
    {
      "epoch": 0.12780792344944278,
      "grad_norm": 0.9561425146550531,
      "learning_rate": 9.751310662515271e-06,
      "loss": 0.1694,
      "step": 4381
    },
    {
      "epoch": 0.12783709668008636,
      "grad_norm": 1.0155410375377456,
      "learning_rate": 9.751163501098847e-06,
      "loss": 0.1628,
      "step": 4382
    },
    {
      "epoch": 0.1278662699107299,
      "grad_norm": 0.8281766624317595,
      "learning_rate": 9.751016297265168e-06,
      "loss": 0.1641,
      "step": 4383
    },
    {
      "epoch": 0.12789544314137347,
      "grad_norm": 1.0099441175090207,
      "learning_rate": 9.75086905101555e-06,
      "loss": 0.1824,
      "step": 4384
    },
    {
      "epoch": 0.12792461637201705,
      "grad_norm": 0.7999313130963264,
      "learning_rate": 9.750721762351308e-06,
      "loss": 0.1763,
      "step": 4385
    },
    {
      "epoch": 0.1279537896026606,
      "grad_norm": 1.054702610948299,
      "learning_rate": 9.750574431273756e-06,
      "loss": 0.1679,
      "step": 4386
    },
    {
      "epoch": 0.12798296283330415,
      "grad_norm": 0.9946926871128318,
      "learning_rate": 9.75042705778421e-06,
      "loss": 0.1783,
      "step": 4387
    },
    {
      "epoch": 0.12801213606394773,
      "grad_norm": 0.7788516142655981,
      "learning_rate": 9.750279641883985e-06,
      "loss": 0.1932,
      "step": 4388
    },
    {
      "epoch": 0.1280413092945913,
      "grad_norm": 1.0465756204115466,
      "learning_rate": 9.750132183574395e-06,
      "loss": 0.1718,
      "step": 4389
    },
    {
      "epoch": 0.12807048252523484,
      "grad_norm": 0.9532329728436199,
      "learning_rate": 9.749984682856762e-06,
      "loss": 0.1865,
      "step": 4390
    },
    {
      "epoch": 0.1280996557558784,
      "grad_norm": 0.8816784919587766,
      "learning_rate": 9.749837139732398e-06,
      "loss": 0.1822,
      "step": 4391
    },
    {
      "epoch": 0.12812882898652198,
      "grad_norm": 1.217591959923788,
      "learning_rate": 9.749689554202621e-06,
      "loss": 0.1874,
      "step": 4392
    },
    {
      "epoch": 0.12815800221716553,
      "grad_norm": 0.9820674644439095,
      "learning_rate": 9.74954192626875e-06,
      "loss": 0.1637,
      "step": 4393
    },
    {
      "epoch": 0.12818717544780908,
      "grad_norm": 0.7625453602481914,
      "learning_rate": 9.749394255932105e-06,
      "loss": 0.1606,
      "step": 4394
    },
    {
      "epoch": 0.12821634867845266,
      "grad_norm": 1.0725278108650127,
      "learning_rate": 9.749246543194e-06,
      "loss": 0.208,
      "step": 4395
    },
    {
      "epoch": 0.12824552190909622,
      "grad_norm": 0.731477959555322,
      "learning_rate": 9.749098788055755e-06,
      "loss": 0.1826,
      "step": 4396
    },
    {
      "epoch": 0.12827469513973977,
      "grad_norm": 0.881191244196159,
      "learning_rate": 9.748950990518691e-06,
      "loss": 0.2128,
      "step": 4397
    },
    {
      "epoch": 0.12830386837038332,
      "grad_norm": 1.0291776455260686,
      "learning_rate": 9.748803150584125e-06,
      "loss": 0.1821,
      "step": 4398
    },
    {
      "epoch": 0.1283330416010269,
      "grad_norm": 0.935685919356452,
      "learning_rate": 9.74865526825338e-06,
      "loss": 0.1605,
      "step": 4399
    },
    {
      "epoch": 0.12836221483167046,
      "grad_norm": 0.8613374160537525,
      "learning_rate": 9.748507343527772e-06,
      "loss": 0.1797,
      "step": 4400
    },
    {
      "epoch": 0.128391388062314,
      "grad_norm": 0.9866279611483154,
      "learning_rate": 9.748359376408625e-06,
      "loss": 0.1987,
      "step": 4401
    },
    {
      "epoch": 0.1284205612929576,
      "grad_norm": 0.8524349648703374,
      "learning_rate": 9.74821136689726e-06,
      "loss": 0.1823,
      "step": 4402
    },
    {
      "epoch": 0.12844973452360114,
      "grad_norm": 0.8114088040798733,
      "learning_rate": 9.748063314994995e-06,
      "loss": 0.1784,
      "step": 4403
    },
    {
      "epoch": 0.1284789077542447,
      "grad_norm": 1.0120858097085856,
      "learning_rate": 9.747915220703157e-06,
      "loss": 0.1882,
      "step": 4404
    },
    {
      "epoch": 0.12850808098488828,
      "grad_norm": 0.906713513376724,
      "learning_rate": 9.747767084023063e-06,
      "loss": 0.1698,
      "step": 4405
    },
    {
      "epoch": 0.12853725421553183,
      "grad_norm": 0.808284806868891,
      "learning_rate": 9.74761890495604e-06,
      "loss": 0.1788,
      "step": 4406
    },
    {
      "epoch": 0.12856642744617539,
      "grad_norm": 0.9157060587186935,
      "learning_rate": 9.747470683503407e-06,
      "loss": 0.1552,
      "step": 4407
    },
    {
      "epoch": 0.12859560067681894,
      "grad_norm": 0.7299765371223875,
      "learning_rate": 9.74732241966649e-06,
      "loss": 0.1783,
      "step": 4408
    },
    {
      "epoch": 0.12862477390746252,
      "grad_norm": 0.794202062630329,
      "learning_rate": 9.747174113446612e-06,
      "loss": 0.1805,
      "step": 4409
    },
    {
      "epoch": 0.12865394713810607,
      "grad_norm": 0.9529590201063353,
      "learning_rate": 9.747025764845095e-06,
      "loss": 0.1579,
      "step": 4410
    },
    {
      "epoch": 0.12868312036874963,
      "grad_norm": 0.815325678211468,
      "learning_rate": 9.746877373863265e-06,
      "loss": 0.173,
      "step": 4411
    },
    {
      "epoch": 0.1287122935993932,
      "grad_norm": 0.7731034448423537,
      "learning_rate": 9.74672894050245e-06,
      "loss": 0.1658,
      "step": 4412
    },
    {
      "epoch": 0.12874146683003676,
      "grad_norm": 0.9692188961092176,
      "learning_rate": 9.74658046476397e-06,
      "loss": 0.1806,
      "step": 4413
    },
    {
      "epoch": 0.1287706400606803,
      "grad_norm": 0.8441353019429177,
      "learning_rate": 9.746431946649153e-06,
      "loss": 0.1703,
      "step": 4414
    },
    {
      "epoch": 0.1287998132913239,
      "grad_norm": 1.4495784388670117,
      "learning_rate": 9.746283386159326e-06,
      "loss": 0.1687,
      "step": 4415
    },
    {
      "epoch": 0.12882898652196745,
      "grad_norm": 0.844591601552637,
      "learning_rate": 9.746134783295813e-06,
      "loss": 0.1833,
      "step": 4416
    },
    {
      "epoch": 0.128858159752611,
      "grad_norm": 0.8216297326240757,
      "learning_rate": 9.745986138059941e-06,
      "loss": 0.183,
      "step": 4417
    },
    {
      "epoch": 0.12888733298325455,
      "grad_norm": 0.7837085449321946,
      "learning_rate": 9.745837450453036e-06,
      "loss": 0.1558,
      "step": 4418
    },
    {
      "epoch": 0.12891650621389814,
      "grad_norm": 0.9203670229578482,
      "learning_rate": 9.745688720476431e-06,
      "loss": 0.171,
      "step": 4419
    },
    {
      "epoch": 0.1289456794445417,
      "grad_norm": 0.9020978895465857,
      "learning_rate": 9.745539948131449e-06,
      "loss": 0.1813,
      "step": 4420
    },
    {
      "epoch": 0.12897485267518524,
      "grad_norm": 0.8931646113379528,
      "learning_rate": 9.745391133419417e-06,
      "loss": 0.2127,
      "step": 4421
    },
    {
      "epoch": 0.12900402590582882,
      "grad_norm": 1.1000981484583716,
      "learning_rate": 9.74524227634167e-06,
      "loss": 0.16,
      "step": 4422
    },
    {
      "epoch": 0.12903319913647238,
      "grad_norm": 1.0698500351691078,
      "learning_rate": 9.745093376899528e-06,
      "loss": 0.1701,
      "step": 4423
    },
    {
      "epoch": 0.12906237236711593,
      "grad_norm": 0.9110995597732715,
      "learning_rate": 9.744944435094327e-06,
      "loss": 0.2185,
      "step": 4424
    },
    {
      "epoch": 0.12909154559775948,
      "grad_norm": 0.9159848910732172,
      "learning_rate": 9.744795450927395e-06,
      "loss": 0.2024,
      "step": 4425
    },
    {
      "epoch": 0.12912071882840306,
      "grad_norm": 0.8734090453282745,
      "learning_rate": 9.744646424400062e-06,
      "loss": 0.1625,
      "step": 4426
    },
    {
      "epoch": 0.12914989205904662,
      "grad_norm": 0.950467843389007,
      "learning_rate": 9.744497355513658e-06,
      "loss": 0.1709,
      "step": 4427
    },
    {
      "epoch": 0.12917906528969017,
      "grad_norm": 0.823200969360653,
      "learning_rate": 9.744348244269515e-06,
      "loss": 0.1609,
      "step": 4428
    },
    {
      "epoch": 0.12920823852033375,
      "grad_norm": 0.8237340072156272,
      "learning_rate": 9.744199090668963e-06,
      "loss": 0.1603,
      "step": 4429
    },
    {
      "epoch": 0.1292374117509773,
      "grad_norm": 0.8998333669220802,
      "learning_rate": 9.744049894713334e-06,
      "loss": 0.1796,
      "step": 4430
    },
    {
      "epoch": 0.12926658498162086,
      "grad_norm": 1.099354717803711,
      "learning_rate": 9.74390065640396e-06,
      "loss": 0.1642,
      "step": 4431
    },
    {
      "epoch": 0.12929575821226444,
      "grad_norm": 0.8097038713295855,
      "learning_rate": 9.743751375742171e-06,
      "loss": 0.1733,
      "step": 4432
    },
    {
      "epoch": 0.129324931442908,
      "grad_norm": 0.8592724449828005,
      "learning_rate": 9.743602052729307e-06,
      "loss": 0.1872,
      "step": 4433
    },
    {
      "epoch": 0.12935410467355155,
      "grad_norm": 1.0984868393219305,
      "learning_rate": 9.743452687366692e-06,
      "loss": 0.1914,
      "step": 4434
    },
    {
      "epoch": 0.1293832779041951,
      "grad_norm": 0.8896499347836703,
      "learning_rate": 9.743303279655666e-06,
      "loss": 0.1964,
      "step": 4435
    },
    {
      "epoch": 0.12941245113483868,
      "grad_norm": 1.0362742045145739,
      "learning_rate": 9.74315382959756e-06,
      "loss": 0.1828,
      "step": 4436
    },
    {
      "epoch": 0.12944162436548223,
      "grad_norm": 0.7021469494672792,
      "learning_rate": 9.743004337193708e-06,
      "loss": 0.1617,
      "step": 4437
    },
    {
      "epoch": 0.1294707975961258,
      "grad_norm": 0.8919651586134387,
      "learning_rate": 9.742854802445447e-06,
      "loss": 0.2237,
      "step": 4438
    },
    {
      "epoch": 0.12949997082676937,
      "grad_norm": 0.9760275214910902,
      "learning_rate": 9.74270522535411e-06,
      "loss": 0.1749,
      "step": 4439
    },
    {
      "epoch": 0.12952914405741292,
      "grad_norm": 0.856505135887569,
      "learning_rate": 9.742555605921033e-06,
      "loss": 0.177,
      "step": 4440
    },
    {
      "epoch": 0.12955831728805647,
      "grad_norm": 0.8350006804059159,
      "learning_rate": 9.742405944147552e-06,
      "loss": 0.1582,
      "step": 4441
    },
    {
      "epoch": 0.12958749051870003,
      "grad_norm": 0.7715643717784217,
      "learning_rate": 9.742256240035001e-06,
      "loss": 0.1942,
      "step": 4442
    },
    {
      "epoch": 0.1296166637493436,
      "grad_norm": 0.9894542110207976,
      "learning_rate": 9.74210649358472e-06,
      "loss": 0.2079,
      "step": 4443
    },
    {
      "epoch": 0.12964583697998716,
      "grad_norm": 0.9973062047101183,
      "learning_rate": 9.741956704798045e-06,
      "loss": 0.1907,
      "step": 4444
    },
    {
      "epoch": 0.12967501021063071,
      "grad_norm": 0.8975359268533455,
      "learning_rate": 9.741806873676311e-06,
      "loss": 0.1728,
      "step": 4445
    },
    {
      "epoch": 0.1297041834412743,
      "grad_norm": 0.9650392929427682,
      "learning_rate": 9.741657000220858e-06,
      "loss": 0.1688,
      "step": 4446
    },
    {
      "epoch": 0.12973335667191785,
      "grad_norm": 0.8087828182320042,
      "learning_rate": 9.741507084433024e-06,
      "loss": 0.1742,
      "step": 4447
    },
    {
      "epoch": 0.1297625299025614,
      "grad_norm": 0.8646356277152982,
      "learning_rate": 9.741357126314146e-06,
      "loss": 0.1692,
      "step": 4448
    },
    {
      "epoch": 0.12979170313320498,
      "grad_norm": 1.2893248191042816,
      "learning_rate": 9.741207125865562e-06,
      "loss": 0.199,
      "step": 4449
    },
    {
      "epoch": 0.12982087636384854,
      "grad_norm": 0.8472904394910414,
      "learning_rate": 9.741057083088614e-06,
      "loss": 0.1643,
      "step": 4450
    },
    {
      "epoch": 0.1298500495944921,
      "grad_norm": 0.6933867492193225,
      "learning_rate": 9.74090699798464e-06,
      "loss": 0.1597,
      "step": 4451
    },
    {
      "epoch": 0.12987922282513564,
      "grad_norm": 0.8737035311152308,
      "learning_rate": 9.740756870554979e-06,
      "loss": 0.1838,
      "step": 4452
    },
    {
      "epoch": 0.12990839605577922,
      "grad_norm": 0.7910643982888277,
      "learning_rate": 9.740606700800974e-06,
      "loss": 0.1497,
      "step": 4453
    },
    {
      "epoch": 0.12993756928642278,
      "grad_norm": 0.8199743996277233,
      "learning_rate": 9.740456488723964e-06,
      "loss": 0.1917,
      "step": 4454
    },
    {
      "epoch": 0.12996674251706633,
      "grad_norm": 0.7722978469757387,
      "learning_rate": 9.74030623432529e-06,
      "loss": 0.1695,
      "step": 4455
    },
    {
      "epoch": 0.1299959157477099,
      "grad_norm": 1.308533333466574,
      "learning_rate": 9.740155937606291e-06,
      "loss": 0.1709,
      "step": 4456
    },
    {
      "epoch": 0.13002508897835346,
      "grad_norm": 0.7990116703827624,
      "learning_rate": 9.740005598568314e-06,
      "loss": 0.1745,
      "step": 4457
    },
    {
      "epoch": 0.13005426220899702,
      "grad_norm": 1.0282780284630753,
      "learning_rate": 9.739855217212699e-06,
      "loss": 0.2187,
      "step": 4458
    },
    {
      "epoch": 0.1300834354396406,
      "grad_norm": 0.8502975395774706,
      "learning_rate": 9.739704793540786e-06,
      "loss": 0.1778,
      "step": 4459
    },
    {
      "epoch": 0.13011260867028415,
      "grad_norm": 0.9208482895620984,
      "learning_rate": 9.739554327553922e-06,
      "loss": 0.178,
      "step": 4460
    },
    {
      "epoch": 0.1301417819009277,
      "grad_norm": 0.9264866785624601,
      "learning_rate": 9.739403819253447e-06,
      "loss": 0.1888,
      "step": 4461
    },
    {
      "epoch": 0.13017095513157126,
      "grad_norm": 0.8794463549541848,
      "learning_rate": 9.739253268640705e-06,
      "loss": 0.1728,
      "step": 4462
    },
    {
      "epoch": 0.13020012836221484,
      "grad_norm": 0.9188176098897562,
      "learning_rate": 9.739102675717044e-06,
      "loss": 0.1627,
      "step": 4463
    },
    {
      "epoch": 0.1302293015928584,
      "grad_norm": 0.9793199473961062,
      "learning_rate": 9.738952040483804e-06,
      "loss": 0.178,
      "step": 4464
    },
    {
      "epoch": 0.13025847482350195,
      "grad_norm": 0.9646800594319975,
      "learning_rate": 9.738801362942332e-06,
      "loss": 0.1757,
      "step": 4465
    },
    {
      "epoch": 0.13028764805414553,
      "grad_norm": 1.1586724336458754,
      "learning_rate": 9.738650643093972e-06,
      "loss": 0.1992,
      "step": 4466
    },
    {
      "epoch": 0.13031682128478908,
      "grad_norm": 1.1308472705818482,
      "learning_rate": 9.738499880940071e-06,
      "loss": 0.167,
      "step": 4467
    },
    {
      "epoch": 0.13034599451543263,
      "grad_norm": 0.901168776397259,
      "learning_rate": 9.738349076481975e-06,
      "loss": 0.193,
      "step": 4468
    },
    {
      "epoch": 0.1303751677460762,
      "grad_norm": 0.9144738545975689,
      "learning_rate": 9.738198229721028e-06,
      "loss": 0.1824,
      "step": 4469
    },
    {
      "epoch": 0.13040434097671977,
      "grad_norm": 0.8880825342168955,
      "learning_rate": 9.738047340658578e-06,
      "loss": 0.2157,
      "step": 4470
    },
    {
      "epoch": 0.13043351420736332,
      "grad_norm": 0.7563688378989818,
      "learning_rate": 9.737896409295974e-06,
      "loss": 0.1791,
      "step": 4471
    },
    {
      "epoch": 0.13046268743800687,
      "grad_norm": 0.9776378723900304,
      "learning_rate": 9.73774543563456e-06,
      "loss": 0.1844,
      "step": 4472
    },
    {
      "epoch": 0.13049186066865046,
      "grad_norm": 0.7359968535887115,
      "learning_rate": 9.737594419675687e-06,
      "loss": 0.1737,
      "step": 4473
    },
    {
      "epoch": 0.130521033899294,
      "grad_norm": 0.7716440186722187,
      "learning_rate": 9.737443361420702e-06,
      "loss": 0.1718,
      "step": 4474
    },
    {
      "epoch": 0.13055020712993756,
      "grad_norm": 0.9520845386723726,
      "learning_rate": 9.737292260870954e-06,
      "loss": 0.1585,
      "step": 4475
    },
    {
      "epoch": 0.13057938036058114,
      "grad_norm": 0.721596082019839,
      "learning_rate": 9.737141118027791e-06,
      "loss": 0.1939,
      "step": 4476
    },
    {
      "epoch": 0.1306085535912247,
      "grad_norm": 0.8360220237465885,
      "learning_rate": 9.736989932892564e-06,
      "loss": 0.2028,
      "step": 4477
    },
    {
      "epoch": 0.13063772682186825,
      "grad_norm": 1.0491231446619385,
      "learning_rate": 9.73683870546662e-06,
      "loss": 0.1842,
      "step": 4478
    },
    {
      "epoch": 0.1306669000525118,
      "grad_norm": 0.8288794621008557,
      "learning_rate": 9.736687435751311e-06,
      "loss": 0.1732,
      "step": 4479
    },
    {
      "epoch": 0.13069607328315538,
      "grad_norm": 0.9218598572374194,
      "learning_rate": 9.736536123747989e-06,
      "loss": 0.1645,
      "step": 4480
    },
    {
      "epoch": 0.13072524651379894,
      "grad_norm": 0.8530519074164498,
      "learning_rate": 9.736384769458e-06,
      "loss": 0.1692,
      "step": 4481
    },
    {
      "epoch": 0.1307544197444425,
      "grad_norm": 0.8975258785313561,
      "learning_rate": 9.736233372882701e-06,
      "loss": 0.1797,
      "step": 4482
    },
    {
      "epoch": 0.13078359297508607,
      "grad_norm": 0.7132316423037498,
      "learning_rate": 9.73608193402344e-06,
      "loss": 0.184,
      "step": 4483
    },
    {
      "epoch": 0.13081276620572962,
      "grad_norm": 0.6603410548513219,
      "learning_rate": 9.735930452881571e-06,
      "loss": 0.1705,
      "step": 4484
    },
    {
      "epoch": 0.13084193943637318,
      "grad_norm": 0.9550476682958892,
      "learning_rate": 9.735778929458446e-06,
      "loss": 0.1967,
      "step": 4485
    },
    {
      "epoch": 0.13087111266701676,
      "grad_norm": 0.9924511364030971,
      "learning_rate": 9.735627363755415e-06,
      "loss": 0.1805,
      "step": 4486
    },
    {
      "epoch": 0.1309002858976603,
      "grad_norm": 0.887661578594931,
      "learning_rate": 9.735475755773836e-06,
      "loss": 0.1785,
      "step": 4487
    },
    {
      "epoch": 0.13092945912830387,
      "grad_norm": 0.7651320938224379,
      "learning_rate": 9.735324105515059e-06,
      "loss": 0.1823,
      "step": 4488
    },
    {
      "epoch": 0.13095863235894742,
      "grad_norm": 0.7714400056966344,
      "learning_rate": 9.735172412980439e-06,
      "loss": 0.1636,
      "step": 4489
    },
    {
      "epoch": 0.130987805589591,
      "grad_norm": 0.8132703483276422,
      "learning_rate": 9.735020678171327e-06,
      "loss": 0.1729,
      "step": 4490
    },
    {
      "epoch": 0.13101697882023455,
      "grad_norm": 0.7972538544908403,
      "learning_rate": 9.734868901089084e-06,
      "loss": 0.1672,
      "step": 4491
    },
    {
      "epoch": 0.1310461520508781,
      "grad_norm": 0.8735972701058112,
      "learning_rate": 9.734717081735061e-06,
      "loss": 0.1592,
      "step": 4492
    },
    {
      "epoch": 0.1310753252815217,
      "grad_norm": 0.9802666760505124,
      "learning_rate": 9.734565220110614e-06,
      "loss": 0.1874,
      "step": 4493
    },
    {
      "epoch": 0.13110449851216524,
      "grad_norm": 0.9545295838676167,
      "learning_rate": 9.7344133162171e-06,
      "loss": 0.1914,
      "step": 4494
    },
    {
      "epoch": 0.1311336717428088,
      "grad_norm": 0.7092996313604574,
      "learning_rate": 9.734261370055873e-06,
      "loss": 0.1794,
      "step": 4495
    },
    {
      "epoch": 0.13116284497345235,
      "grad_norm": 0.8287097252576615,
      "learning_rate": 9.734109381628289e-06,
      "loss": 0.1775,
      "step": 4496
    },
    {
      "epoch": 0.13119201820409593,
      "grad_norm": 0.9295568582263029,
      "learning_rate": 9.73395735093571e-06,
      "loss": 0.1621,
      "step": 4497
    },
    {
      "epoch": 0.13122119143473948,
      "grad_norm": 0.7091365935560728,
      "learning_rate": 9.733805277979488e-06,
      "loss": 0.1803,
      "step": 4498
    },
    {
      "epoch": 0.13125036466538303,
      "grad_norm": 0.8390669649611711,
      "learning_rate": 9.733653162760984e-06,
      "loss": 0.1791,
      "step": 4499
    },
    {
      "epoch": 0.13127953789602662,
      "grad_norm": 0.9664208370062155,
      "learning_rate": 9.733501005281552e-06,
      "loss": 0.1748,
      "step": 4500
    },
    {
      "epoch": 0.13130871112667017,
      "grad_norm": 0.8713257616137842,
      "learning_rate": 9.733348805542555e-06,
      "loss": 0.198,
      "step": 4501
    },
    {
      "epoch": 0.13133788435731372,
      "grad_norm": 1.0050067027036291,
      "learning_rate": 9.73319656354535e-06,
      "loss": 0.166,
      "step": 4502
    },
    {
      "epoch": 0.1313670575879573,
      "grad_norm": 0.8355747823438511,
      "learning_rate": 9.733044279291293e-06,
      "loss": 0.1673,
      "step": 4503
    },
    {
      "epoch": 0.13139623081860086,
      "grad_norm": 0.7444021287462947,
      "learning_rate": 9.73289195278175e-06,
      "loss": 0.1845,
      "step": 4504
    },
    {
      "epoch": 0.1314254040492444,
      "grad_norm": 0.9151521681337017,
      "learning_rate": 9.732739584018074e-06,
      "loss": 0.181,
      "step": 4505
    },
    {
      "epoch": 0.13145457727988796,
      "grad_norm": 0.7972987826597281,
      "learning_rate": 9.732587173001631e-06,
      "loss": 0.1592,
      "step": 4506
    },
    {
      "epoch": 0.13148375051053154,
      "grad_norm": 0.8646593079278234,
      "learning_rate": 9.732434719733782e-06,
      "loss": 0.1704,
      "step": 4507
    },
    {
      "epoch": 0.1315129237411751,
      "grad_norm": 0.8241084619506681,
      "learning_rate": 9.732282224215881e-06,
      "loss": 0.1901,
      "step": 4508
    },
    {
      "epoch": 0.13154209697181865,
      "grad_norm": 0.9892414621713331,
      "learning_rate": 9.732129686449296e-06,
      "loss": 0.1651,
      "step": 4509
    },
    {
      "epoch": 0.13157127020246223,
      "grad_norm": 0.9819990085102668,
      "learning_rate": 9.731977106435387e-06,
      "loss": 0.188,
      "step": 4510
    },
    {
      "epoch": 0.13160044343310578,
      "grad_norm": 0.9171421502044307,
      "learning_rate": 9.731824484175516e-06,
      "loss": 0.1876,
      "step": 4511
    },
    {
      "epoch": 0.13162961666374934,
      "grad_norm": 0.8218702361406134,
      "learning_rate": 9.731671819671045e-06,
      "loss": 0.179,
      "step": 4512
    },
    {
      "epoch": 0.1316587898943929,
      "grad_norm": 1.2322820415795661,
      "learning_rate": 9.731519112923338e-06,
      "loss": 0.2002,
      "step": 4513
    },
    {
      "epoch": 0.13168796312503647,
      "grad_norm": 0.9227080892573732,
      "learning_rate": 9.731366363933759e-06,
      "loss": 0.1608,
      "step": 4514
    },
    {
      "epoch": 0.13171713635568003,
      "grad_norm": 0.9087240830849821,
      "learning_rate": 9.731213572703668e-06,
      "loss": 0.1818,
      "step": 4515
    },
    {
      "epoch": 0.13174630958632358,
      "grad_norm": 0.8235324858729057,
      "learning_rate": 9.731060739234433e-06,
      "loss": 0.1703,
      "step": 4516
    },
    {
      "epoch": 0.13177548281696716,
      "grad_norm": 0.9362396933008768,
      "learning_rate": 9.730907863527417e-06,
      "loss": 0.1648,
      "step": 4517
    },
    {
      "epoch": 0.1318046560476107,
      "grad_norm": 1.1912283627279456,
      "learning_rate": 9.730754945583985e-06,
      "loss": 0.1865,
      "step": 4518
    },
    {
      "epoch": 0.13183382927825427,
      "grad_norm": 0.8935139894178387,
      "learning_rate": 9.730601985405502e-06,
      "loss": 0.1914,
      "step": 4519
    },
    {
      "epoch": 0.13186300250889785,
      "grad_norm": 0.973250479275411,
      "learning_rate": 9.730448982993335e-06,
      "loss": 0.184,
      "step": 4520
    },
    {
      "epoch": 0.1318921757395414,
      "grad_norm": 0.9207900746444148,
      "learning_rate": 9.730295938348847e-06,
      "loss": 0.1975,
      "step": 4521
    },
    {
      "epoch": 0.13192134897018495,
      "grad_norm": 0.783714243575158,
      "learning_rate": 9.730142851473407e-06,
      "loss": 0.1571,
      "step": 4522
    },
    {
      "epoch": 0.1319505222008285,
      "grad_norm": 0.9324602659366575,
      "learning_rate": 9.729989722368381e-06,
      "loss": 0.1912,
      "step": 4523
    },
    {
      "epoch": 0.1319796954314721,
      "grad_norm": 0.808382609467356,
      "learning_rate": 9.729836551035134e-06,
      "loss": 0.1559,
      "step": 4524
    },
    {
      "epoch": 0.13200886866211564,
      "grad_norm": 0.7921957445975134,
      "learning_rate": 9.729683337475037e-06,
      "loss": 0.1716,
      "step": 4525
    },
    {
      "epoch": 0.1320380418927592,
      "grad_norm": 0.8852867994342315,
      "learning_rate": 9.729530081689456e-06,
      "loss": 0.1857,
      "step": 4526
    },
    {
      "epoch": 0.13206721512340278,
      "grad_norm": 0.8749446727151079,
      "learning_rate": 9.72937678367976e-06,
      "loss": 0.1931,
      "step": 4527
    },
    {
      "epoch": 0.13209638835404633,
      "grad_norm": 0.7229377908847272,
      "learning_rate": 9.729223443447318e-06,
      "loss": 0.1641,
      "step": 4528
    },
    {
      "epoch": 0.13212556158468988,
      "grad_norm": 0.9158433909304731,
      "learning_rate": 9.729070060993495e-06,
      "loss": 0.1791,
      "step": 4529
    },
    {
      "epoch": 0.13215473481533346,
      "grad_norm": 0.8377665759811016,
      "learning_rate": 9.728916636319666e-06,
      "loss": 0.1791,
      "step": 4530
    },
    {
      "epoch": 0.13218390804597702,
      "grad_norm": 0.6941753010427415,
      "learning_rate": 9.728763169427197e-06,
      "loss": 0.1618,
      "step": 4531
    },
    {
      "epoch": 0.13221308127662057,
      "grad_norm": 0.8049438878497239,
      "learning_rate": 9.72860966031746e-06,
      "loss": 0.1604,
      "step": 4532
    },
    {
      "epoch": 0.13224225450726412,
      "grad_norm": 0.8760066339377347,
      "learning_rate": 9.728456108991824e-06,
      "loss": 0.1704,
      "step": 4533
    },
    {
      "epoch": 0.1322714277379077,
      "grad_norm": 0.7424387588543098,
      "learning_rate": 9.728302515451661e-06,
      "loss": 0.1493,
      "step": 4534
    },
    {
      "epoch": 0.13230060096855126,
      "grad_norm": 0.9813900623889721,
      "learning_rate": 9.728148879698341e-06,
      "loss": 0.1796,
      "step": 4535
    },
    {
      "epoch": 0.1323297741991948,
      "grad_norm": 0.8126444801411503,
      "learning_rate": 9.727995201733238e-06,
      "loss": 0.1783,
      "step": 4536
    },
    {
      "epoch": 0.1323589474298384,
      "grad_norm": 0.8546638488854917,
      "learning_rate": 9.727841481557722e-06,
      "loss": 0.2128,
      "step": 4537
    },
    {
      "epoch": 0.13238812066048194,
      "grad_norm": 0.6899971193858305,
      "learning_rate": 9.727687719173164e-06,
      "loss": 0.1565,
      "step": 4538
    },
    {
      "epoch": 0.1324172938911255,
      "grad_norm": 1.0858397714441617,
      "learning_rate": 9.727533914580941e-06,
      "loss": 0.1578,
      "step": 4539
    },
    {
      "epoch": 0.13244646712176905,
      "grad_norm": 0.9089000914262344,
      "learning_rate": 9.727380067782424e-06,
      "loss": 0.1679,
      "step": 4540
    },
    {
      "epoch": 0.13247564035241263,
      "grad_norm": 0.9199188357334842,
      "learning_rate": 9.727226178778985e-06,
      "loss": 0.1622,
      "step": 4541
    },
    {
      "epoch": 0.13250481358305619,
      "grad_norm": 1.0201509071058543,
      "learning_rate": 9.727072247572e-06,
      "loss": 0.1826,
      "step": 4542
    },
    {
      "epoch": 0.13253398681369974,
      "grad_norm": 0.8936142567745634,
      "learning_rate": 9.726918274162841e-06,
      "loss": 0.1627,
      "step": 4543
    },
    {
      "epoch": 0.13256316004434332,
      "grad_norm": 0.8517450630339168,
      "learning_rate": 9.726764258552885e-06,
      "loss": 0.1519,
      "step": 4544
    },
    {
      "epoch": 0.13259233327498687,
      "grad_norm": 0.9178315720289905,
      "learning_rate": 9.726610200743505e-06,
      "loss": 0.1757,
      "step": 4545
    },
    {
      "epoch": 0.13262150650563043,
      "grad_norm": 1.0190663322135842,
      "learning_rate": 9.726456100736079e-06,
      "loss": 0.1758,
      "step": 4546
    },
    {
      "epoch": 0.132650679736274,
      "grad_norm": 1.0820984922683805,
      "learning_rate": 9.72630195853198e-06,
      "loss": 0.2015,
      "step": 4547
    },
    {
      "epoch": 0.13267985296691756,
      "grad_norm": 1.0041173467043631,
      "learning_rate": 9.726147774132588e-06,
      "loss": 0.1603,
      "step": 4548
    },
    {
      "epoch": 0.13270902619756111,
      "grad_norm": 0.8375708058908469,
      "learning_rate": 9.725993547539274e-06,
      "loss": 0.1822,
      "step": 4549
    },
    {
      "epoch": 0.13273819942820467,
      "grad_norm": 1.068467284790401,
      "learning_rate": 9.72583927875342e-06,
      "loss": 0.1669,
      "step": 4550
    },
    {
      "epoch": 0.13276737265884825,
      "grad_norm": 0.6462988019926467,
      "learning_rate": 9.725684967776398e-06,
      "loss": 0.1908,
      "step": 4551
    },
    {
      "epoch": 0.1327965458894918,
      "grad_norm": 0.9865048607505865,
      "learning_rate": 9.725530614609592e-06,
      "loss": 0.187,
      "step": 4552
    },
    {
      "epoch": 0.13282571912013535,
      "grad_norm": 0.9761082259552106,
      "learning_rate": 9.725376219254374e-06,
      "loss": 0.1827,
      "step": 4553
    },
    {
      "epoch": 0.13285489235077894,
      "grad_norm": 0.7043053307124566,
      "learning_rate": 9.725221781712128e-06,
      "loss": 0.1714,
      "step": 4554
    },
    {
      "epoch": 0.1328840655814225,
      "grad_norm": 0.7991076445944068,
      "learning_rate": 9.725067301984228e-06,
      "loss": 0.1495,
      "step": 4555
    },
    {
      "epoch": 0.13291323881206604,
      "grad_norm": 0.8925024714989009,
      "learning_rate": 9.724912780072055e-06,
      "loss": 0.186,
      "step": 4556
    },
    {
      "epoch": 0.13294241204270962,
      "grad_norm": 0.9022041240709835,
      "learning_rate": 9.72475821597699e-06,
      "loss": 0.1866,
      "step": 4557
    },
    {
      "epoch": 0.13297158527335318,
      "grad_norm": 0.8633724589768932,
      "learning_rate": 9.724603609700409e-06,
      "loss": 0.1978,
      "step": 4558
    },
    {
      "epoch": 0.13300075850399673,
      "grad_norm": 0.9573618809816482,
      "learning_rate": 9.724448961243698e-06,
      "loss": 0.1618,
      "step": 4559
    },
    {
      "epoch": 0.13302993173464028,
      "grad_norm": 0.9401676823193406,
      "learning_rate": 9.724294270608232e-06,
      "loss": 0.1531,
      "step": 4560
    },
    {
      "epoch": 0.13305910496528386,
      "grad_norm": 0.7827004137553876,
      "learning_rate": 9.724139537795396e-06,
      "loss": 0.1691,
      "step": 4561
    },
    {
      "epoch": 0.13308827819592742,
      "grad_norm": 0.8200385903586284,
      "learning_rate": 9.72398476280657e-06,
      "loss": 0.1893,
      "step": 4562
    },
    {
      "epoch": 0.13311745142657097,
      "grad_norm": 0.9458018411150592,
      "learning_rate": 9.723829945643135e-06,
      "loss": 0.1903,
      "step": 4563
    },
    {
      "epoch": 0.13314662465721455,
      "grad_norm": 0.9489788729222979,
      "learning_rate": 9.723675086306474e-06,
      "loss": 0.1685,
      "step": 4564
    },
    {
      "epoch": 0.1331757978878581,
      "grad_norm": 0.935321272037622,
      "learning_rate": 9.72352018479797e-06,
      "loss": 0.1712,
      "step": 4565
    },
    {
      "epoch": 0.13320497111850166,
      "grad_norm": 1.0357709357010814,
      "learning_rate": 9.723365241119004e-06,
      "loss": 0.1975,
      "step": 4566
    },
    {
      "epoch": 0.1332341443491452,
      "grad_norm": 1.2735266730368744,
      "learning_rate": 9.723210255270962e-06,
      "loss": 0.1677,
      "step": 4567
    },
    {
      "epoch": 0.1332633175797888,
      "grad_norm": 0.9795401224102253,
      "learning_rate": 9.723055227255227e-06,
      "loss": 0.1783,
      "step": 4568
    },
    {
      "epoch": 0.13329249081043235,
      "grad_norm": 1.169361910063188,
      "learning_rate": 9.722900157073181e-06,
      "loss": 0.1731,
      "step": 4569
    },
    {
      "epoch": 0.1333216640410759,
      "grad_norm": 0.7712404266060171,
      "learning_rate": 9.72274504472621e-06,
      "loss": 0.1732,
      "step": 4570
    },
    {
      "epoch": 0.13335083727171948,
      "grad_norm": 0.9771102635200523,
      "learning_rate": 9.722589890215699e-06,
      "loss": 0.201,
      "step": 4571
    },
    {
      "epoch": 0.13338001050236303,
      "grad_norm": 0.9443789669268178,
      "learning_rate": 9.722434693543032e-06,
      "loss": 0.1674,
      "step": 4572
    },
    {
      "epoch": 0.1334091837330066,
      "grad_norm": 1.069382026402891,
      "learning_rate": 9.722279454709596e-06,
      "loss": 0.1968,
      "step": 4573
    },
    {
      "epoch": 0.13343835696365017,
      "grad_norm": 1.0769538937358414,
      "learning_rate": 9.722124173716776e-06,
      "loss": 0.1645,
      "step": 4574
    },
    {
      "epoch": 0.13346753019429372,
      "grad_norm": 0.923235470376459,
      "learning_rate": 9.72196885056596e-06,
      "loss": 0.1918,
      "step": 4575
    },
    {
      "epoch": 0.13349670342493727,
      "grad_norm": 0.7941395869754817,
      "learning_rate": 9.721813485258533e-06,
      "loss": 0.1563,
      "step": 4576
    },
    {
      "epoch": 0.13352587665558083,
      "grad_norm": 1.0700898790958382,
      "learning_rate": 9.72165807779588e-06,
      "loss": 0.1865,
      "step": 4577
    },
    {
      "epoch": 0.1335550498862244,
      "grad_norm": 0.9833498497939681,
      "learning_rate": 9.721502628179394e-06,
      "loss": 0.1802,
      "step": 4578
    },
    {
      "epoch": 0.13358422311686796,
      "grad_norm": 0.944078838232898,
      "learning_rate": 9.721347136410458e-06,
      "loss": 0.1633,
      "step": 4579
    },
    {
      "epoch": 0.13361339634751151,
      "grad_norm": 1.1012213330837473,
      "learning_rate": 9.721191602490463e-06,
      "loss": 0.1904,
      "step": 4580
    },
    {
      "epoch": 0.1336425695781551,
      "grad_norm": 1.1939507249414758,
      "learning_rate": 9.721036026420795e-06,
      "loss": 0.1858,
      "step": 4581
    },
    {
      "epoch": 0.13367174280879865,
      "grad_norm": 1.0069483261326841,
      "learning_rate": 9.720880408202844e-06,
      "loss": 0.1618,
      "step": 4582
    },
    {
      "epoch": 0.1337009160394422,
      "grad_norm": 1.124695251461858,
      "learning_rate": 9.720724747838002e-06,
      "loss": 0.1894,
      "step": 4583
    },
    {
      "epoch": 0.13373008927008576,
      "grad_norm": 1.2071403039976019,
      "learning_rate": 9.720569045327655e-06,
      "loss": 0.15,
      "step": 4584
    },
    {
      "epoch": 0.13375926250072934,
      "grad_norm": 0.8810175876828813,
      "learning_rate": 9.720413300673194e-06,
      "loss": 0.1794,
      "step": 4585
    },
    {
      "epoch": 0.1337884357313729,
      "grad_norm": 1.0453791704302176,
      "learning_rate": 9.72025751387601e-06,
      "loss": 0.213,
      "step": 4586
    },
    {
      "epoch": 0.13381760896201644,
      "grad_norm": 0.927080338413851,
      "learning_rate": 9.720101684937494e-06,
      "loss": 0.2021,
      "step": 4587
    },
    {
      "epoch": 0.13384678219266002,
      "grad_norm": 0.779984080753791,
      "learning_rate": 9.719945813859037e-06,
      "loss": 0.1872,
      "step": 4588
    },
    {
      "epoch": 0.13387595542330358,
      "grad_norm": 0.7996838449590193,
      "learning_rate": 9.719789900642031e-06,
      "loss": 0.1697,
      "step": 4589
    },
    {
      "epoch": 0.13390512865394713,
      "grad_norm": 1.0471543096218647,
      "learning_rate": 9.719633945287867e-06,
      "loss": 0.19,
      "step": 4590
    },
    {
      "epoch": 0.1339343018845907,
      "grad_norm": 0.8589292071998451,
      "learning_rate": 9.719477947797938e-06,
      "loss": 0.1916,
      "step": 4591
    },
    {
      "epoch": 0.13396347511523427,
      "grad_norm": 0.9015449264921895,
      "learning_rate": 9.719321908173636e-06,
      "loss": 0.1639,
      "step": 4592
    },
    {
      "epoch": 0.13399264834587782,
      "grad_norm": 0.7964675837696863,
      "learning_rate": 9.719165826416354e-06,
      "loss": 0.17,
      "step": 4593
    },
    {
      "epoch": 0.13402182157652137,
      "grad_norm": 0.7635652503837955,
      "learning_rate": 9.719009702527488e-06,
      "loss": 0.1807,
      "step": 4594
    },
    {
      "epoch": 0.13405099480716495,
      "grad_norm": 0.8480596163432714,
      "learning_rate": 9.718853536508428e-06,
      "loss": 0.1802,
      "step": 4595
    },
    {
      "epoch": 0.1340801680378085,
      "grad_norm": 0.9808208161559423,
      "learning_rate": 9.718697328360571e-06,
      "loss": 0.1766,
      "step": 4596
    },
    {
      "epoch": 0.13410934126845206,
      "grad_norm": 0.7814399820174374,
      "learning_rate": 9.71854107808531e-06,
      "loss": 0.1716,
      "step": 4597
    },
    {
      "epoch": 0.13413851449909564,
      "grad_norm": 1.0404926267332295,
      "learning_rate": 9.718384785684043e-06,
      "loss": 0.19,
      "step": 4598
    },
    {
      "epoch": 0.1341676877297392,
      "grad_norm": 0.8586317864673153,
      "learning_rate": 9.71822845115816e-06,
      "loss": 0.193,
      "step": 4599
    },
    {
      "epoch": 0.13419686096038275,
      "grad_norm": 0.8304034883877234,
      "learning_rate": 9.718072074509061e-06,
      "loss": 0.1874,
      "step": 4600
    },
    {
      "epoch": 0.13422603419102633,
      "grad_norm": 0.9164153625205795,
      "learning_rate": 9.717915655738142e-06,
      "loss": 0.2338,
      "step": 4601
    },
    {
      "epoch": 0.13425520742166988,
      "grad_norm": 1.1430108183267886,
      "learning_rate": 9.717759194846797e-06,
      "loss": 0.1782,
      "step": 4602
    },
    {
      "epoch": 0.13428438065231343,
      "grad_norm": 0.9137431537210429,
      "learning_rate": 9.717602691836423e-06,
      "loss": 0.1561,
      "step": 4603
    },
    {
      "epoch": 0.134313553882957,
      "grad_norm": 0.7274491086036875,
      "learning_rate": 9.717446146708421e-06,
      "loss": 0.1889,
      "step": 4604
    },
    {
      "epoch": 0.13434272711360057,
      "grad_norm": 0.9989608522592939,
      "learning_rate": 9.717289559464185e-06,
      "loss": 0.1808,
      "step": 4605
    },
    {
      "epoch": 0.13437190034424412,
      "grad_norm": 0.8142491245683972,
      "learning_rate": 9.717132930105114e-06,
      "loss": 0.1676,
      "step": 4606
    },
    {
      "epoch": 0.13440107357488768,
      "grad_norm": 0.9805855579751301,
      "learning_rate": 9.716976258632604e-06,
      "loss": 0.1949,
      "step": 4607
    },
    {
      "epoch": 0.13443024680553126,
      "grad_norm": 0.8075337425541218,
      "learning_rate": 9.716819545048058e-06,
      "loss": 0.168,
      "step": 4608
    },
    {
      "epoch": 0.1344594200361748,
      "grad_norm": 1.0539063299461362,
      "learning_rate": 9.716662789352872e-06,
      "loss": 0.1948,
      "step": 4609
    },
    {
      "epoch": 0.13448859326681836,
      "grad_norm": 0.8805078573360297,
      "learning_rate": 9.716505991548448e-06,
      "loss": 0.1681,
      "step": 4610
    },
    {
      "epoch": 0.13451776649746192,
      "grad_norm": 0.9125942018569365,
      "learning_rate": 9.716349151636183e-06,
      "loss": 0.1731,
      "step": 4611
    },
    {
      "epoch": 0.1345469397281055,
      "grad_norm": 0.9474453748762206,
      "learning_rate": 9.716192269617482e-06,
      "loss": 0.1584,
      "step": 4612
    },
    {
      "epoch": 0.13457611295874905,
      "grad_norm": 0.8860912041637936,
      "learning_rate": 9.71603534549374e-06,
      "loss": 0.1658,
      "step": 4613
    },
    {
      "epoch": 0.1346052861893926,
      "grad_norm": 0.8443991560150967,
      "learning_rate": 9.715878379266359e-06,
      "loss": 0.1585,
      "step": 4614
    },
    {
      "epoch": 0.13463445942003618,
      "grad_norm": 1.0155062221408329,
      "learning_rate": 9.715721370936742e-06,
      "loss": 0.1795,
      "step": 4615
    },
    {
      "epoch": 0.13466363265067974,
      "grad_norm": 0.8834955589918384,
      "learning_rate": 9.715564320506292e-06,
      "loss": 0.1727,
      "step": 4616
    },
    {
      "epoch": 0.1346928058813233,
      "grad_norm": 0.9188399044549099,
      "learning_rate": 9.715407227976408e-06,
      "loss": 0.1717,
      "step": 4617
    },
    {
      "epoch": 0.13472197911196687,
      "grad_norm": 1.028223764093895,
      "learning_rate": 9.715250093348494e-06,
      "loss": 0.1784,
      "step": 4618
    },
    {
      "epoch": 0.13475115234261043,
      "grad_norm": 0.844104848499323,
      "learning_rate": 9.715092916623954e-06,
      "loss": 0.1813,
      "step": 4619
    },
    {
      "epoch": 0.13478032557325398,
      "grad_norm": 0.9158693434032174,
      "learning_rate": 9.714935697804188e-06,
      "loss": 0.1856,
      "step": 4620
    },
    {
      "epoch": 0.13480949880389753,
      "grad_norm": 0.9538583470221261,
      "learning_rate": 9.714778436890604e-06,
      "loss": 0.1723,
      "step": 4621
    },
    {
      "epoch": 0.1348386720345411,
      "grad_norm": 0.9928564515885795,
      "learning_rate": 9.7146211338846e-06,
      "loss": 0.1954,
      "step": 4622
    },
    {
      "epoch": 0.13486784526518467,
      "grad_norm": 0.9387120450307491,
      "learning_rate": 9.714463788787588e-06,
      "loss": 0.1698,
      "step": 4623
    },
    {
      "epoch": 0.13489701849582822,
      "grad_norm": 1.0527771502712409,
      "learning_rate": 9.714306401600967e-06,
      "loss": 0.1907,
      "step": 4624
    },
    {
      "epoch": 0.1349261917264718,
      "grad_norm": 0.8933751834117948,
      "learning_rate": 9.714148972326144e-06,
      "loss": 0.1804,
      "step": 4625
    },
    {
      "epoch": 0.13495536495711535,
      "grad_norm": 0.6514000135480227,
      "learning_rate": 9.713991500964524e-06,
      "loss": 0.1694,
      "step": 4626
    },
    {
      "epoch": 0.1349845381877589,
      "grad_norm": 1.0321308868196355,
      "learning_rate": 9.713833987517514e-06,
      "loss": 0.1553,
      "step": 4627
    },
    {
      "epoch": 0.13501371141840246,
      "grad_norm": 0.8747041239954297,
      "learning_rate": 9.713676431986518e-06,
      "loss": 0.1678,
      "step": 4628
    },
    {
      "epoch": 0.13504288464904604,
      "grad_norm": 0.7833899639069093,
      "learning_rate": 9.713518834372946e-06,
      "loss": 0.198,
      "step": 4629
    },
    {
      "epoch": 0.1350720578796896,
      "grad_norm": 0.76908563457908,
      "learning_rate": 9.713361194678201e-06,
      "loss": 0.1672,
      "step": 4630
    },
    {
      "epoch": 0.13510123111033315,
      "grad_norm": 0.9875628885189603,
      "learning_rate": 9.713203512903695e-06,
      "loss": 0.1817,
      "step": 4631
    },
    {
      "epoch": 0.13513040434097673,
      "grad_norm": 0.9629519042401639,
      "learning_rate": 9.71304578905083e-06,
      "loss": 0.1672,
      "step": 4632
    },
    {
      "epoch": 0.13515957757162028,
      "grad_norm": 0.9857470706999932,
      "learning_rate": 9.71288802312102e-06,
      "loss": 0.186,
      "step": 4633
    },
    {
      "epoch": 0.13518875080226384,
      "grad_norm": 0.730565997339259,
      "learning_rate": 9.712730215115668e-06,
      "loss": 0.1552,
      "step": 4634
    },
    {
      "epoch": 0.13521792403290742,
      "grad_norm": 1.1446007527994253,
      "learning_rate": 9.71257236503619e-06,
      "loss": 0.1915,
      "step": 4635
    },
    {
      "epoch": 0.13524709726355097,
      "grad_norm": 0.9749549218703435,
      "learning_rate": 9.712414472883987e-06,
      "loss": 0.1812,
      "step": 4636
    },
    {
      "epoch": 0.13527627049419452,
      "grad_norm": 0.7799690275589842,
      "learning_rate": 9.712256538660474e-06,
      "loss": 0.1578,
      "step": 4637
    },
    {
      "epoch": 0.13530544372483808,
      "grad_norm": 0.7807747971278297,
      "learning_rate": 9.712098562367059e-06,
      "loss": 0.1758,
      "step": 4638
    },
    {
      "epoch": 0.13533461695548166,
      "grad_norm": 0.8710834872583403,
      "learning_rate": 9.711940544005154e-06,
      "loss": 0.1832,
      "step": 4639
    },
    {
      "epoch": 0.1353637901861252,
      "grad_norm": 0.7614578950485028,
      "learning_rate": 9.711782483576168e-06,
      "loss": 0.1732,
      "step": 4640
    },
    {
      "epoch": 0.13539296341676876,
      "grad_norm": 0.8424303678910273,
      "learning_rate": 9.711624381081513e-06,
      "loss": 0.1992,
      "step": 4641
    },
    {
      "epoch": 0.13542213664741234,
      "grad_norm": 0.865337928104798,
      "learning_rate": 9.711466236522599e-06,
      "loss": 0.1697,
      "step": 4642
    },
    {
      "epoch": 0.1354513098780559,
      "grad_norm": 0.9408710848620837,
      "learning_rate": 9.71130804990084e-06,
      "loss": 0.187,
      "step": 4643
    },
    {
      "epoch": 0.13548048310869945,
      "grad_norm": 0.8181683362796547,
      "learning_rate": 9.711149821217648e-06,
      "loss": 0.1815,
      "step": 4644
    },
    {
      "epoch": 0.13550965633934303,
      "grad_norm": 0.8425298921105472,
      "learning_rate": 9.710991550474435e-06,
      "loss": 0.1558,
      "step": 4645
    },
    {
      "epoch": 0.13553882956998659,
      "grad_norm": 0.8238656748496306,
      "learning_rate": 9.710833237672612e-06,
      "loss": 0.1592,
      "step": 4646
    },
    {
      "epoch": 0.13556800280063014,
      "grad_norm": 1.0303863480310418,
      "learning_rate": 9.710674882813598e-06,
      "loss": 0.1952,
      "step": 4647
    },
    {
      "epoch": 0.1355971760312737,
      "grad_norm": 0.9194943781256945,
      "learning_rate": 9.7105164858988e-06,
      "loss": 0.198,
      "step": 4648
    },
    {
      "epoch": 0.13562634926191727,
      "grad_norm": 0.8206790543242434,
      "learning_rate": 9.710358046929636e-06,
      "loss": 0.1733,
      "step": 4649
    },
    {
      "epoch": 0.13565552249256083,
      "grad_norm": 1.107013105208648,
      "learning_rate": 9.710199565907521e-06,
      "loss": 0.1918,
      "step": 4650
    },
    {
      "epoch": 0.13568469572320438,
      "grad_norm": 0.9489735749586852,
      "learning_rate": 9.710041042833869e-06,
      "loss": 0.1512,
      "step": 4651
    },
    {
      "epoch": 0.13571386895384796,
      "grad_norm": 0.7685001734325031,
      "learning_rate": 9.709882477710093e-06,
      "loss": 0.1822,
      "step": 4652
    },
    {
      "epoch": 0.1357430421844915,
      "grad_norm": 0.9211035392852889,
      "learning_rate": 9.709723870537613e-06,
      "loss": 0.1782,
      "step": 4653
    },
    {
      "epoch": 0.13577221541513507,
      "grad_norm": 0.9523188363240555,
      "learning_rate": 9.70956522131784e-06,
      "loss": 0.1606,
      "step": 4654
    },
    {
      "epoch": 0.13580138864577862,
      "grad_norm": 0.9060661785416497,
      "learning_rate": 9.709406530052194e-06,
      "loss": 0.1775,
      "step": 4655
    },
    {
      "epoch": 0.1358305618764222,
      "grad_norm": 0.9669192154217833,
      "learning_rate": 9.709247796742091e-06,
      "loss": 0.187,
      "step": 4656
    },
    {
      "epoch": 0.13585973510706575,
      "grad_norm": 0.9399764473212696,
      "learning_rate": 9.709089021388947e-06,
      "loss": 0.195,
      "step": 4657
    },
    {
      "epoch": 0.1358889083377093,
      "grad_norm": 0.9661483210094688,
      "learning_rate": 9.708930203994182e-06,
      "loss": 0.1798,
      "step": 4658
    },
    {
      "epoch": 0.1359180815683529,
      "grad_norm": 0.9124454167609475,
      "learning_rate": 9.708771344559212e-06,
      "loss": 0.1684,
      "step": 4659
    },
    {
      "epoch": 0.13594725479899644,
      "grad_norm": 1.0215319227945203,
      "learning_rate": 9.708612443085454e-06,
      "loss": 0.1919,
      "step": 4660
    },
    {
      "epoch": 0.13597642802964,
      "grad_norm": 1.0114839591203058,
      "learning_rate": 9.708453499574328e-06,
      "loss": 0.1734,
      "step": 4661
    },
    {
      "epoch": 0.13600560126028358,
      "grad_norm": 0.9298959485935899,
      "learning_rate": 9.708294514027255e-06,
      "loss": 0.177,
      "step": 4662
    },
    {
      "epoch": 0.13603477449092713,
      "grad_norm": 0.9542062169030524,
      "learning_rate": 9.708135486445652e-06,
      "loss": 0.1771,
      "step": 4663
    },
    {
      "epoch": 0.13606394772157068,
      "grad_norm": 0.858838796975156,
      "learning_rate": 9.707976416830938e-06,
      "loss": 0.1553,
      "step": 4664
    },
    {
      "epoch": 0.13609312095221424,
      "grad_norm": 0.7526944223238617,
      "learning_rate": 9.707817305184535e-06,
      "loss": 0.18,
      "step": 4665
    },
    {
      "epoch": 0.13612229418285782,
      "grad_norm": 0.7810099686966809,
      "learning_rate": 9.707658151507864e-06,
      "loss": 0.1637,
      "step": 4666
    },
    {
      "epoch": 0.13615146741350137,
      "grad_norm": 0.9086215597119266,
      "learning_rate": 9.707498955802343e-06,
      "loss": 0.1872,
      "step": 4667
    },
    {
      "epoch": 0.13618064064414492,
      "grad_norm": 0.9508524355128216,
      "learning_rate": 9.707339718069397e-06,
      "loss": 0.1891,
      "step": 4668
    },
    {
      "epoch": 0.1362098138747885,
      "grad_norm": 0.7195667254981255,
      "learning_rate": 9.707180438310446e-06,
      "loss": 0.1848,
      "step": 4669
    },
    {
      "epoch": 0.13623898710543206,
      "grad_norm": 0.8184403771567913,
      "learning_rate": 9.707021116526908e-06,
      "loss": 0.182,
      "step": 4670
    },
    {
      "epoch": 0.1362681603360756,
      "grad_norm": 0.9658791864775528,
      "learning_rate": 9.706861752720213e-06,
      "loss": 0.1559,
      "step": 4671
    },
    {
      "epoch": 0.1362973335667192,
      "grad_norm": 0.7212660341141374,
      "learning_rate": 9.706702346891778e-06,
      "loss": 0.1546,
      "step": 4672
    },
    {
      "epoch": 0.13632650679736275,
      "grad_norm": 0.8810046665043058,
      "learning_rate": 9.70654289904303e-06,
      "loss": 0.1698,
      "step": 4673
    },
    {
      "epoch": 0.1363556800280063,
      "grad_norm": 0.8165448817841481,
      "learning_rate": 9.70638340917539e-06,
      "loss": 0.1819,
      "step": 4674
    },
    {
      "epoch": 0.13638485325864985,
      "grad_norm": 1.0664393599702064,
      "learning_rate": 9.706223877290282e-06,
      "loss": 0.1813,
      "step": 4675
    },
    {
      "epoch": 0.13641402648929343,
      "grad_norm": 0.7238391073438882,
      "learning_rate": 9.70606430338913e-06,
      "loss": 0.1828,
      "step": 4676
    },
    {
      "epoch": 0.136443199719937,
      "grad_norm": 1.005694470360366,
      "learning_rate": 9.70590468747336e-06,
      "loss": 0.1973,
      "step": 4677
    },
    {
      "epoch": 0.13647237295058054,
      "grad_norm": 0.9241377635739217,
      "learning_rate": 9.705745029544396e-06,
      "loss": 0.1871,
      "step": 4678
    },
    {
      "epoch": 0.13650154618122412,
      "grad_norm": 0.8179031785167225,
      "learning_rate": 9.705585329603664e-06,
      "loss": 0.1804,
      "step": 4679
    },
    {
      "epoch": 0.13653071941186767,
      "grad_norm": 0.7308893257140879,
      "learning_rate": 9.705425587652589e-06,
      "loss": 0.1732,
      "step": 4680
    },
    {
      "epoch": 0.13655989264251123,
      "grad_norm": 0.7528912940315003,
      "learning_rate": 9.705265803692597e-06,
      "loss": 0.1722,
      "step": 4681
    },
    {
      "epoch": 0.13658906587315478,
      "grad_norm": 1.052690632300158,
      "learning_rate": 9.705105977725117e-06,
      "loss": 0.175,
      "step": 4682
    },
    {
      "epoch": 0.13661823910379836,
      "grad_norm": 0.8706043181573576,
      "learning_rate": 9.704946109751572e-06,
      "loss": 0.1625,
      "step": 4683
    },
    {
      "epoch": 0.13664741233444191,
      "grad_norm": 0.7828218637229405,
      "learning_rate": 9.704786199773392e-06,
      "loss": 0.1767,
      "step": 4684
    },
    {
      "epoch": 0.13667658556508547,
      "grad_norm": 1.6359450019303676,
      "learning_rate": 9.704626247792006e-06,
      "loss": 0.1768,
      "step": 4685
    },
    {
      "epoch": 0.13670575879572905,
      "grad_norm": 0.8522023527214693,
      "learning_rate": 9.704466253808837e-06,
      "loss": 0.1467,
      "step": 4686
    },
    {
      "epoch": 0.1367349320263726,
      "grad_norm": 0.9438185753052197,
      "learning_rate": 9.70430621782532e-06,
      "loss": 0.1943,
      "step": 4687
    },
    {
      "epoch": 0.13676410525701616,
      "grad_norm": 0.8967662864085583,
      "learning_rate": 9.704146139842876e-06,
      "loss": 0.1958,
      "step": 4688
    },
    {
      "epoch": 0.13679327848765974,
      "grad_norm": 0.8176074719163361,
      "learning_rate": 9.70398601986294e-06,
      "loss": 0.2017,
      "step": 4689
    },
    {
      "epoch": 0.1368224517183033,
      "grad_norm": 1.0065416187094867,
      "learning_rate": 9.70382585788694e-06,
      "loss": 0.2056,
      "step": 4690
    },
    {
      "epoch": 0.13685162494894684,
      "grad_norm": 0.8540913616045407,
      "learning_rate": 9.703665653916306e-06,
      "loss": 0.1881,
      "step": 4691
    },
    {
      "epoch": 0.1368807981795904,
      "grad_norm": 0.7964480986863118,
      "learning_rate": 9.703505407952467e-06,
      "loss": 0.1611,
      "step": 4692
    },
    {
      "epoch": 0.13690997141023398,
      "grad_norm": 1.0276098799259779,
      "learning_rate": 9.703345119996854e-06,
      "loss": 0.2064,
      "step": 4693
    },
    {
      "epoch": 0.13693914464087753,
      "grad_norm": 0.8780151029964625,
      "learning_rate": 9.7031847900509e-06,
      "loss": 0.181,
      "step": 4694
    },
    {
      "epoch": 0.13696831787152108,
      "grad_norm": 0.9988535269115225,
      "learning_rate": 9.703024418116035e-06,
      "loss": 0.1828,
      "step": 4695
    },
    {
      "epoch": 0.13699749110216466,
      "grad_norm": 0.8217531980589999,
      "learning_rate": 9.702864004193689e-06,
      "loss": 0.1904,
      "step": 4696
    },
    {
      "epoch": 0.13702666433280822,
      "grad_norm": 0.9369455041215813,
      "learning_rate": 9.702703548285297e-06,
      "loss": 0.194,
      "step": 4697
    },
    {
      "epoch": 0.13705583756345177,
      "grad_norm": 1.0206640713725064,
      "learning_rate": 9.702543050392289e-06,
      "loss": 0.1897,
      "step": 4698
    },
    {
      "epoch": 0.13708501079409532,
      "grad_norm": 0.905883141033335,
      "learning_rate": 9.702382510516101e-06,
      "loss": 0.1785,
      "step": 4699
    },
    {
      "epoch": 0.1371141840247389,
      "grad_norm": 0.822309423685823,
      "learning_rate": 9.702221928658162e-06,
      "loss": 0.1628,
      "step": 4700
    },
    {
      "epoch": 0.13714335725538246,
      "grad_norm": 0.830246206302412,
      "learning_rate": 9.702061304819912e-06,
      "loss": 0.1513,
      "step": 4701
    },
    {
      "epoch": 0.137172530486026,
      "grad_norm": 0.844493568165159,
      "learning_rate": 9.70190063900278e-06,
      "loss": 0.1923,
      "step": 4702
    },
    {
      "epoch": 0.1372017037166696,
      "grad_norm": 0.8159300219183769,
      "learning_rate": 9.701739931208199e-06,
      "loss": 0.1595,
      "step": 4703
    },
    {
      "epoch": 0.13723087694731315,
      "grad_norm": 0.9591670417352346,
      "learning_rate": 9.701579181437608e-06,
      "loss": 0.1657,
      "step": 4704
    },
    {
      "epoch": 0.1372600501779567,
      "grad_norm": 0.9397254357533835,
      "learning_rate": 9.701418389692441e-06,
      "loss": 0.1744,
      "step": 4705
    },
    {
      "epoch": 0.13728922340860028,
      "grad_norm": 0.8273778307663063,
      "learning_rate": 9.701257555974131e-06,
      "loss": 0.187,
      "step": 4706
    },
    {
      "epoch": 0.13731839663924383,
      "grad_norm": 1.003808430533134,
      "learning_rate": 9.701096680284119e-06,
      "loss": 0.1812,
      "step": 4707
    },
    {
      "epoch": 0.1373475698698874,
      "grad_norm": 0.7965113465415409,
      "learning_rate": 9.700935762623835e-06,
      "loss": 0.1621,
      "step": 4708
    },
    {
      "epoch": 0.13737674310053094,
      "grad_norm": 0.8984695013894336,
      "learning_rate": 9.700774802994721e-06,
      "loss": 0.1799,
      "step": 4709
    },
    {
      "epoch": 0.13740591633117452,
      "grad_norm": 1.0562346510337288,
      "learning_rate": 9.700613801398209e-06,
      "loss": 0.2136,
      "step": 4710
    },
    {
      "epoch": 0.13743508956181807,
      "grad_norm": 1.0092671338183845,
      "learning_rate": 9.700452757835741e-06,
      "loss": 0.179,
      "step": 4711
    },
    {
      "epoch": 0.13746426279246163,
      "grad_norm": 0.8689706657713749,
      "learning_rate": 9.700291672308752e-06,
      "loss": 0.1666,
      "step": 4712
    },
    {
      "epoch": 0.1374934360231052,
      "grad_norm": 0.9547444978433681,
      "learning_rate": 9.700130544818682e-06,
      "loss": 0.2214,
      "step": 4713
    },
    {
      "epoch": 0.13752260925374876,
      "grad_norm": 0.898366732252034,
      "learning_rate": 9.69996937536697e-06,
      "loss": 0.1915,
      "step": 4714
    },
    {
      "epoch": 0.13755178248439232,
      "grad_norm": 0.7665205658010267,
      "learning_rate": 9.69980816395505e-06,
      "loss": 0.1653,
      "step": 4715
    },
    {
      "epoch": 0.1375809557150359,
      "grad_norm": 0.8091459372871365,
      "learning_rate": 9.699646910584366e-06,
      "loss": 0.185,
      "step": 4716
    },
    {
      "epoch": 0.13761012894567945,
      "grad_norm": 0.9247503942827152,
      "learning_rate": 9.699485615256357e-06,
      "loss": 0.1824,
      "step": 4717
    },
    {
      "epoch": 0.137639302176323,
      "grad_norm": 0.8717922752137528,
      "learning_rate": 9.699324277972462e-06,
      "loss": 0.1819,
      "step": 4718
    },
    {
      "epoch": 0.13766847540696656,
      "grad_norm": 1.0773589783394717,
      "learning_rate": 9.699162898734122e-06,
      "loss": 0.1925,
      "step": 4719
    },
    {
      "epoch": 0.13769764863761014,
      "grad_norm": 1.0901931893968568,
      "learning_rate": 9.699001477542775e-06,
      "loss": 0.197,
      "step": 4720
    },
    {
      "epoch": 0.1377268218682537,
      "grad_norm": 1.1835489899861442,
      "learning_rate": 9.698840014399867e-06,
      "loss": 0.1804,
      "step": 4721
    },
    {
      "epoch": 0.13775599509889724,
      "grad_norm": 0.9021954966986981,
      "learning_rate": 9.698678509306836e-06,
      "loss": 0.207,
      "step": 4722
    },
    {
      "epoch": 0.13778516832954082,
      "grad_norm": 1.0469311591030825,
      "learning_rate": 9.698516962265125e-06,
      "loss": 0.175,
      "step": 4723
    },
    {
      "epoch": 0.13781434156018438,
      "grad_norm": 0.8073322173220286,
      "learning_rate": 9.698355373276178e-06,
      "loss": 0.1755,
      "step": 4724
    },
    {
      "epoch": 0.13784351479082793,
      "grad_norm": 0.8685317986389969,
      "learning_rate": 9.698193742341434e-06,
      "loss": 0.173,
      "step": 4725
    },
    {
      "epoch": 0.13787268802147148,
      "grad_norm": 0.8256525508359017,
      "learning_rate": 9.698032069462338e-06,
      "loss": 0.1755,
      "step": 4726
    },
    {
      "epoch": 0.13790186125211507,
      "grad_norm": 0.9431652155983686,
      "learning_rate": 9.697870354640334e-06,
      "loss": 0.1899,
      "step": 4727
    },
    {
      "epoch": 0.13793103448275862,
      "grad_norm": 1.0830611080645738,
      "learning_rate": 9.697708597876863e-06,
      "loss": 0.1504,
      "step": 4728
    },
    {
      "epoch": 0.13796020771340217,
      "grad_norm": 0.7177082561141378,
      "learning_rate": 9.697546799173372e-06,
      "loss": 0.1696,
      "step": 4729
    },
    {
      "epoch": 0.13798938094404575,
      "grad_norm": 1.0145093897760615,
      "learning_rate": 9.697384958531307e-06,
      "loss": 0.1684,
      "step": 4730
    },
    {
      "epoch": 0.1380185541746893,
      "grad_norm": 0.9537271286997424,
      "learning_rate": 9.697223075952107e-06,
      "loss": 0.2042,
      "step": 4731
    },
    {
      "epoch": 0.13804772740533286,
      "grad_norm": 0.8615635927849443,
      "learning_rate": 9.697061151437223e-06,
      "loss": 0.1744,
      "step": 4732
    },
    {
      "epoch": 0.13807690063597644,
      "grad_norm": 1.0196870703943546,
      "learning_rate": 9.696899184988097e-06,
      "loss": 0.164,
      "step": 4733
    },
    {
      "epoch": 0.13810607386662,
      "grad_norm": 0.7820486500981627,
      "learning_rate": 9.696737176606177e-06,
      "loss": 0.1603,
      "step": 4734
    },
    {
      "epoch": 0.13813524709726355,
      "grad_norm": 1.0072738160492847,
      "learning_rate": 9.696575126292908e-06,
      "loss": 0.1887,
      "step": 4735
    },
    {
      "epoch": 0.1381644203279071,
      "grad_norm": 1.002552660392895,
      "learning_rate": 9.696413034049738e-06,
      "loss": 0.1781,
      "step": 4736
    },
    {
      "epoch": 0.13819359355855068,
      "grad_norm": 0.8051411523838466,
      "learning_rate": 9.696250899878114e-06,
      "loss": 0.174,
      "step": 4737
    },
    {
      "epoch": 0.13822276678919423,
      "grad_norm": 0.8422472358201145,
      "learning_rate": 9.696088723779481e-06,
      "loss": 0.2058,
      "step": 4738
    },
    {
      "epoch": 0.1382519400198378,
      "grad_norm": 1.0801041751659373,
      "learning_rate": 9.695926505755291e-06,
      "loss": 0.1796,
      "step": 4739
    },
    {
      "epoch": 0.13828111325048137,
      "grad_norm": 0.7600740521139456,
      "learning_rate": 9.695764245806989e-06,
      "loss": 0.1877,
      "step": 4740
    },
    {
      "epoch": 0.13831028648112492,
      "grad_norm": 0.7945319040741914,
      "learning_rate": 9.695601943936026e-06,
      "loss": 0.1549,
      "step": 4741
    },
    {
      "epoch": 0.13833945971176848,
      "grad_norm": 0.992848090373319,
      "learning_rate": 9.69543960014385e-06,
      "loss": 0.1774,
      "step": 4742
    },
    {
      "epoch": 0.13836863294241206,
      "grad_norm": 0.7954009770454634,
      "learning_rate": 9.695277214431909e-06,
      "loss": 0.1525,
      "step": 4743
    },
    {
      "epoch": 0.1383978061730556,
      "grad_norm": 1.0887109705714113,
      "learning_rate": 9.695114786801654e-06,
      "loss": 0.1813,
      "step": 4744
    },
    {
      "epoch": 0.13842697940369916,
      "grad_norm": 0.7643587553064333,
      "learning_rate": 9.694952317254535e-06,
      "loss": 0.1788,
      "step": 4745
    },
    {
      "epoch": 0.13845615263434272,
      "grad_norm": 1.371828272599082,
      "learning_rate": 9.694789805792001e-06,
      "loss": 0.1648,
      "step": 4746
    },
    {
      "epoch": 0.1384853258649863,
      "grad_norm": 0.8498365739317533,
      "learning_rate": 9.694627252415507e-06,
      "loss": 0.1668,
      "step": 4747
    },
    {
      "epoch": 0.13851449909562985,
      "grad_norm": 1.2725773426236568,
      "learning_rate": 9.6944646571265e-06,
      "loss": 0.1729,
      "step": 4748
    },
    {
      "epoch": 0.1385436723262734,
      "grad_norm": 0.8315757480574972,
      "learning_rate": 9.694302019926433e-06,
      "loss": 0.1991,
      "step": 4749
    },
    {
      "epoch": 0.13857284555691698,
      "grad_norm": 1.1038174200605573,
      "learning_rate": 9.69413934081676e-06,
      "loss": 0.1822,
      "step": 4750
    },
    {
      "epoch": 0.13860201878756054,
      "grad_norm": 0.9394178159287982,
      "learning_rate": 9.69397661979893e-06,
      "loss": 0.1935,
      "step": 4751
    },
    {
      "epoch": 0.1386311920182041,
      "grad_norm": 0.9224771336747449,
      "learning_rate": 9.693813856874399e-06,
      "loss": 0.181,
      "step": 4752
    },
    {
      "epoch": 0.13866036524884764,
      "grad_norm": 1.0966758332974174,
      "learning_rate": 9.693651052044617e-06,
      "loss": 0.201,
      "step": 4753
    },
    {
      "epoch": 0.13868953847949123,
      "grad_norm": 1.0613697191543172,
      "learning_rate": 9.693488205311039e-06,
      "loss": 0.1764,
      "step": 4754
    },
    {
      "epoch": 0.13871871171013478,
      "grad_norm": 0.8008712692693377,
      "learning_rate": 9.693325316675118e-06,
      "loss": 0.1506,
      "step": 4755
    },
    {
      "epoch": 0.13874788494077833,
      "grad_norm": 0.9747624024045326,
      "learning_rate": 9.69316238613831e-06,
      "loss": 0.1886,
      "step": 4756
    },
    {
      "epoch": 0.1387770581714219,
      "grad_norm": 1.0202675067414013,
      "learning_rate": 9.69299941370207e-06,
      "loss": 0.1833,
      "step": 4757
    },
    {
      "epoch": 0.13880623140206547,
      "grad_norm": 0.8653840356432823,
      "learning_rate": 9.692836399367849e-06,
      "loss": 0.1441,
      "step": 4758
    },
    {
      "epoch": 0.13883540463270902,
      "grad_norm": 0.8570360870727849,
      "learning_rate": 9.692673343137105e-06,
      "loss": 0.1548,
      "step": 4759
    },
    {
      "epoch": 0.1388645778633526,
      "grad_norm": 0.6883782771617556,
      "learning_rate": 9.692510245011295e-06,
      "loss": 0.1604,
      "step": 4760
    },
    {
      "epoch": 0.13889375109399615,
      "grad_norm": 0.8918098074714871,
      "learning_rate": 9.692347104991872e-06,
      "loss": 0.1833,
      "step": 4761
    },
    {
      "epoch": 0.1389229243246397,
      "grad_norm": 0.8063028934656418,
      "learning_rate": 9.692183923080296e-06,
      "loss": 0.2015,
      "step": 4762
    },
    {
      "epoch": 0.13895209755528326,
      "grad_norm": 0.8285488144862405,
      "learning_rate": 9.692020699278022e-06,
      "loss": 0.2058,
      "step": 4763
    },
    {
      "epoch": 0.13898127078592684,
      "grad_norm": 0.9826245249457514,
      "learning_rate": 9.691857433586506e-06,
      "loss": 0.1755,
      "step": 4764
    },
    {
      "epoch": 0.1390104440165704,
      "grad_norm": 1.0692972601624349,
      "learning_rate": 9.691694126007207e-06,
      "loss": 0.1621,
      "step": 4765
    },
    {
      "epoch": 0.13903961724721395,
      "grad_norm": 0.7243879299682265,
      "learning_rate": 9.691530776541584e-06,
      "loss": 0.1718,
      "step": 4766
    },
    {
      "epoch": 0.13906879047785753,
      "grad_norm": 1.469514154173167,
      "learning_rate": 9.691367385191092e-06,
      "loss": 0.1684,
      "step": 4767
    },
    {
      "epoch": 0.13909796370850108,
      "grad_norm": 1.007738393673632,
      "learning_rate": 9.691203951957195e-06,
      "loss": 0.1612,
      "step": 4768
    },
    {
      "epoch": 0.13912713693914464,
      "grad_norm": 0.7627352180329647,
      "learning_rate": 9.691040476841347e-06,
      "loss": 0.1872,
      "step": 4769
    },
    {
      "epoch": 0.1391563101697882,
      "grad_norm": 1.0794946403128456,
      "learning_rate": 9.69087695984501e-06,
      "loss": 0.1836,
      "step": 4770
    },
    {
      "epoch": 0.13918548340043177,
      "grad_norm": 0.8567896408660971,
      "learning_rate": 9.690713400969643e-06,
      "loss": 0.1692,
      "step": 4771
    },
    {
      "epoch": 0.13921465663107532,
      "grad_norm": 0.7951352571820303,
      "learning_rate": 9.690549800216707e-06,
      "loss": 0.1707,
      "step": 4772
    },
    {
      "epoch": 0.13924382986171888,
      "grad_norm": 0.8863994923542984,
      "learning_rate": 9.69038615758766e-06,
      "loss": 0.1804,
      "step": 4773
    },
    {
      "epoch": 0.13927300309236246,
      "grad_norm": 1.00564861230173,
      "learning_rate": 9.690222473083969e-06,
      "loss": 0.1781,
      "step": 4774
    },
    {
      "epoch": 0.139302176323006,
      "grad_norm": 0.940740162116202,
      "learning_rate": 9.690058746707088e-06,
      "loss": 0.157,
      "step": 4775
    },
    {
      "epoch": 0.13933134955364956,
      "grad_norm": 0.8229699866809916,
      "learning_rate": 9.689894978458483e-06,
      "loss": 0.1905,
      "step": 4776
    },
    {
      "epoch": 0.13936052278429314,
      "grad_norm": 0.8355385203041596,
      "learning_rate": 9.689731168339617e-06,
      "loss": 0.1481,
      "step": 4777
    },
    {
      "epoch": 0.1393896960149367,
      "grad_norm": 1.0131008965133432,
      "learning_rate": 9.689567316351948e-06,
      "loss": 0.1554,
      "step": 4778
    },
    {
      "epoch": 0.13941886924558025,
      "grad_norm": 0.922450578193256,
      "learning_rate": 9.689403422496943e-06,
      "loss": 0.1821,
      "step": 4779
    },
    {
      "epoch": 0.1394480424762238,
      "grad_norm": 0.8739646449136426,
      "learning_rate": 9.689239486776062e-06,
      "loss": 0.2,
      "step": 4780
    },
    {
      "epoch": 0.13947721570686739,
      "grad_norm": 0.7958490554860915,
      "learning_rate": 9.689075509190773e-06,
      "loss": 0.1892,
      "step": 4781
    },
    {
      "epoch": 0.13950638893751094,
      "grad_norm": 1.081798561341606,
      "learning_rate": 9.688911489742536e-06,
      "loss": 0.1629,
      "step": 4782
    },
    {
      "epoch": 0.1395355621681545,
      "grad_norm": 1.0034315642502445,
      "learning_rate": 9.688747428432817e-06,
      "loss": 0.1786,
      "step": 4783
    },
    {
      "epoch": 0.13956473539879807,
      "grad_norm": 1.1513257134225778,
      "learning_rate": 9.68858332526308e-06,
      "loss": 0.1918,
      "step": 4784
    },
    {
      "epoch": 0.13959390862944163,
      "grad_norm": 0.931118991810217,
      "learning_rate": 9.68841918023479e-06,
      "loss": 0.1708,
      "step": 4785
    },
    {
      "epoch": 0.13962308186008518,
      "grad_norm": 0.9032086134251692,
      "learning_rate": 9.688254993349413e-06,
      "loss": 0.1728,
      "step": 4786
    },
    {
      "epoch": 0.13965225509072876,
      "grad_norm": 0.8804102543714083,
      "learning_rate": 9.688090764608414e-06,
      "loss": 0.1724,
      "step": 4787
    },
    {
      "epoch": 0.13968142832137231,
      "grad_norm": 0.9761342170497085,
      "learning_rate": 9.68792649401326e-06,
      "loss": 0.1996,
      "step": 4788
    },
    {
      "epoch": 0.13971060155201587,
      "grad_norm": 1.1328109314819435,
      "learning_rate": 9.687762181565417e-06,
      "loss": 0.1751,
      "step": 4789
    },
    {
      "epoch": 0.13973977478265942,
      "grad_norm": 0.6669056177756585,
      "learning_rate": 9.687597827266355e-06,
      "loss": 0.1617,
      "step": 4790
    },
    {
      "epoch": 0.139768948013303,
      "grad_norm": 1.0562784085060106,
      "learning_rate": 9.687433431117536e-06,
      "loss": 0.1762,
      "step": 4791
    },
    {
      "epoch": 0.13979812124394655,
      "grad_norm": 0.9796484153565788,
      "learning_rate": 9.68726899312043e-06,
      "loss": 0.1845,
      "step": 4792
    },
    {
      "epoch": 0.1398272944745901,
      "grad_norm": 0.8837581374484309,
      "learning_rate": 9.687104513276506e-06,
      "loss": 0.1808,
      "step": 4793
    },
    {
      "epoch": 0.1398564677052337,
      "grad_norm": 0.9029351835677035,
      "learning_rate": 9.686939991587231e-06,
      "loss": 0.1809,
      "step": 4794
    },
    {
      "epoch": 0.13988564093587724,
      "grad_norm": 1.073322187155975,
      "learning_rate": 9.686775428054077e-06,
      "loss": 0.1855,
      "step": 4795
    },
    {
      "epoch": 0.1399148141665208,
      "grad_norm": 0.872777369415657,
      "learning_rate": 9.68661082267851e-06,
      "loss": 0.1758,
      "step": 4796
    },
    {
      "epoch": 0.13994398739716435,
      "grad_norm": 1.220254433923574,
      "learning_rate": 9.686446175462e-06,
      "loss": 0.1992,
      "step": 4797
    },
    {
      "epoch": 0.13997316062780793,
      "grad_norm": 1.0710973421363612,
      "learning_rate": 9.686281486406016e-06,
      "loss": 0.1957,
      "step": 4798
    },
    {
      "epoch": 0.14000233385845148,
      "grad_norm": 0.8878692918807475,
      "learning_rate": 9.68611675551203e-06,
      "loss": 0.1805,
      "step": 4799
    },
    {
      "epoch": 0.14003150708909504,
      "grad_norm": 1.2768733842676163,
      "learning_rate": 9.685951982781515e-06,
      "loss": 0.1702,
      "step": 4800
    },
    {
      "epoch": 0.14006068031973862,
      "grad_norm": 0.7965365572186565,
      "learning_rate": 9.685787168215936e-06,
      "loss": 0.1667,
      "step": 4801
    },
    {
      "epoch": 0.14008985355038217,
      "grad_norm": 0.9463080448399828,
      "learning_rate": 9.68562231181677e-06,
      "loss": 0.1803,
      "step": 4802
    },
    {
      "epoch": 0.14011902678102572,
      "grad_norm": 0.8209363619499964,
      "learning_rate": 9.685457413585485e-06,
      "loss": 0.1546,
      "step": 4803
    },
    {
      "epoch": 0.1401482000116693,
      "grad_norm": 0.9216286184168969,
      "learning_rate": 9.685292473523556e-06,
      "loss": 0.195,
      "step": 4804
    },
    {
      "epoch": 0.14017737324231286,
      "grad_norm": 0.7379346868485538,
      "learning_rate": 9.685127491632453e-06,
      "loss": 0.2126,
      "step": 4805
    },
    {
      "epoch": 0.1402065464729564,
      "grad_norm": 0.8735232330808408,
      "learning_rate": 9.68496246791365e-06,
      "loss": 0.1843,
      "step": 4806
    },
    {
      "epoch": 0.14023571970359996,
      "grad_norm": 0.7222127071955207,
      "learning_rate": 9.684797402368622e-06,
      "loss": 0.1806,
      "step": 4807
    },
    {
      "epoch": 0.14026489293424355,
      "grad_norm": 0.7931973528469856,
      "learning_rate": 9.684632294998839e-06,
      "loss": 0.1766,
      "step": 4808
    },
    {
      "epoch": 0.1402940661648871,
      "grad_norm": 0.8743287978832351,
      "learning_rate": 9.68446714580578e-06,
      "loss": 0.1581,
      "step": 4809
    },
    {
      "epoch": 0.14032323939553065,
      "grad_norm": 0.8218092003905312,
      "learning_rate": 9.684301954790914e-06,
      "loss": 0.1527,
      "step": 4810
    },
    {
      "epoch": 0.14035241262617423,
      "grad_norm": 0.8003358263025518,
      "learning_rate": 9.68413672195572e-06,
      "loss": 0.1785,
      "step": 4811
    },
    {
      "epoch": 0.1403815858568178,
      "grad_norm": 0.8468720761596434,
      "learning_rate": 9.683971447301672e-06,
      "loss": 0.1701,
      "step": 4812
    },
    {
      "epoch": 0.14041075908746134,
      "grad_norm": 0.8887877124128191,
      "learning_rate": 9.683806130830243e-06,
      "loss": 0.1741,
      "step": 4813
    },
    {
      "epoch": 0.1404399323181049,
      "grad_norm": 1.0279176973479827,
      "learning_rate": 9.683640772542913e-06,
      "loss": 0.1731,
      "step": 4814
    },
    {
      "epoch": 0.14046910554874847,
      "grad_norm": 0.7195582202001497,
      "learning_rate": 9.683475372441154e-06,
      "loss": 0.1852,
      "step": 4815
    },
    {
      "epoch": 0.14049827877939203,
      "grad_norm": 0.8020805476616278,
      "learning_rate": 9.683309930526447e-06,
      "loss": 0.1855,
      "step": 4816
    },
    {
      "epoch": 0.14052745201003558,
      "grad_norm": 0.9048510224345565,
      "learning_rate": 9.683144446800265e-06,
      "loss": 0.195,
      "step": 4817
    },
    {
      "epoch": 0.14055662524067916,
      "grad_norm": 0.9783988267007112,
      "learning_rate": 9.682978921264091e-06,
      "loss": 0.2009,
      "step": 4818
    },
    {
      "epoch": 0.14058579847132271,
      "grad_norm": 0.854084900659183,
      "learning_rate": 9.682813353919395e-06,
      "loss": 0.1682,
      "step": 4819
    },
    {
      "epoch": 0.14061497170196627,
      "grad_norm": 0.9144785598322361,
      "learning_rate": 9.68264774476766e-06,
      "loss": 0.1763,
      "step": 4820
    },
    {
      "epoch": 0.14064414493260985,
      "grad_norm": 0.8744938036025505,
      "learning_rate": 9.682482093810366e-06,
      "loss": 0.1974,
      "step": 4821
    },
    {
      "epoch": 0.1406733181632534,
      "grad_norm": 0.9891754775443123,
      "learning_rate": 9.682316401048988e-06,
      "loss": 0.2059,
      "step": 4822
    },
    {
      "epoch": 0.14070249139389696,
      "grad_norm": 0.8074593601049309,
      "learning_rate": 9.682150666485007e-06,
      "loss": 0.1657,
      "step": 4823
    },
    {
      "epoch": 0.1407316646245405,
      "grad_norm": 0.8042886657050788,
      "learning_rate": 9.681984890119903e-06,
      "loss": 0.1729,
      "step": 4824
    },
    {
      "epoch": 0.1407608378551841,
      "grad_norm": 1.025908523614656,
      "learning_rate": 9.681819071955155e-06,
      "loss": 0.2028,
      "step": 4825
    },
    {
      "epoch": 0.14079001108582764,
      "grad_norm": 0.8273014432705452,
      "learning_rate": 9.681653211992244e-06,
      "loss": 0.1789,
      "step": 4826
    },
    {
      "epoch": 0.1408191843164712,
      "grad_norm": 1.0057024319220127,
      "learning_rate": 9.68148731023265e-06,
      "loss": 0.1711,
      "step": 4827
    },
    {
      "epoch": 0.14084835754711478,
      "grad_norm": 1.168127807007021,
      "learning_rate": 9.681321366677858e-06,
      "loss": 0.1922,
      "step": 4828
    },
    {
      "epoch": 0.14087753077775833,
      "grad_norm": 1.0477758856695032,
      "learning_rate": 9.681155381329344e-06,
      "loss": 0.1857,
      "step": 4829
    },
    {
      "epoch": 0.14090670400840188,
      "grad_norm": 0.8195480953049104,
      "learning_rate": 9.680989354188593e-06,
      "loss": 0.1767,
      "step": 4830
    },
    {
      "epoch": 0.14093587723904547,
      "grad_norm": 0.9603507354533929,
      "learning_rate": 9.680823285257087e-06,
      "loss": 0.1824,
      "step": 4831
    },
    {
      "epoch": 0.14096505046968902,
      "grad_norm": 1.1537190976081713,
      "learning_rate": 9.680657174536305e-06,
      "loss": 0.1955,
      "step": 4832
    },
    {
      "epoch": 0.14099422370033257,
      "grad_norm": 0.8363877872012376,
      "learning_rate": 9.680491022027736e-06,
      "loss": 0.1614,
      "step": 4833
    },
    {
      "epoch": 0.14102339693097612,
      "grad_norm": 0.910452640839773,
      "learning_rate": 9.68032482773286e-06,
      "loss": 0.1586,
      "step": 4834
    },
    {
      "epoch": 0.1410525701616197,
      "grad_norm": 1.0384120108467383,
      "learning_rate": 9.680158591653162e-06,
      "loss": 0.1877,
      "step": 4835
    },
    {
      "epoch": 0.14108174339226326,
      "grad_norm": 0.8799418821600672,
      "learning_rate": 9.679992313790123e-06,
      "loss": 0.181,
      "step": 4836
    },
    {
      "epoch": 0.1411109166229068,
      "grad_norm": 1.1052277610346195,
      "learning_rate": 9.679825994145232e-06,
      "loss": 0.1696,
      "step": 4837
    },
    {
      "epoch": 0.1411400898535504,
      "grad_norm": 0.8070947761707465,
      "learning_rate": 9.67965963271997e-06,
      "loss": 0.1689,
      "step": 4838
    },
    {
      "epoch": 0.14116926308419395,
      "grad_norm": 0.8348709118117703,
      "learning_rate": 9.679493229515825e-06,
      "loss": 0.1831,
      "step": 4839
    },
    {
      "epoch": 0.1411984363148375,
      "grad_norm": 0.883498083009703,
      "learning_rate": 9.679326784534283e-06,
      "loss": 0.1851,
      "step": 4840
    },
    {
      "epoch": 0.14122760954548105,
      "grad_norm": 0.8407038467802971,
      "learning_rate": 9.679160297776826e-06,
      "loss": 0.1669,
      "step": 4841
    },
    {
      "epoch": 0.14125678277612463,
      "grad_norm": 0.8112263115764934,
      "learning_rate": 9.678993769244942e-06,
      "loss": 0.185,
      "step": 4842
    },
    {
      "epoch": 0.1412859560067682,
      "grad_norm": 0.8819726568891366,
      "learning_rate": 9.678827198940121e-06,
      "loss": 0.172,
      "step": 4843
    },
    {
      "epoch": 0.14131512923741174,
      "grad_norm": 0.8196877100230318,
      "learning_rate": 9.678660586863847e-06,
      "loss": 0.1838,
      "step": 4844
    },
    {
      "epoch": 0.14134430246805532,
      "grad_norm": 0.8916286389589517,
      "learning_rate": 9.678493933017608e-06,
      "loss": 0.1889,
      "step": 4845
    },
    {
      "epoch": 0.14137347569869887,
      "grad_norm": 0.8081831430889657,
      "learning_rate": 9.678327237402892e-06,
      "loss": 0.1844,
      "step": 4846
    },
    {
      "epoch": 0.14140264892934243,
      "grad_norm": 0.8996943013924475,
      "learning_rate": 9.678160500021188e-06,
      "loss": 0.2088,
      "step": 4847
    },
    {
      "epoch": 0.141431822159986,
      "grad_norm": 1.1877947274995793,
      "learning_rate": 9.677993720873983e-06,
      "loss": 0.1849,
      "step": 4848
    },
    {
      "epoch": 0.14146099539062956,
      "grad_norm": 0.8454475429653244,
      "learning_rate": 9.677826899962767e-06,
      "loss": 0.181,
      "step": 4849
    },
    {
      "epoch": 0.14149016862127312,
      "grad_norm": 0.9777729656039873,
      "learning_rate": 9.677660037289029e-06,
      "loss": 0.2063,
      "step": 4850
    },
    {
      "epoch": 0.14151934185191667,
      "grad_norm": 0.8346924308437704,
      "learning_rate": 9.67749313285426e-06,
      "loss": 0.1797,
      "step": 4851
    },
    {
      "epoch": 0.14154851508256025,
      "grad_norm": 0.8235078319478367,
      "learning_rate": 9.677326186659947e-06,
      "loss": 0.1828,
      "step": 4852
    },
    {
      "epoch": 0.1415776883132038,
      "grad_norm": 0.8134056400223716,
      "learning_rate": 9.677159198707582e-06,
      "loss": 0.1679,
      "step": 4853
    },
    {
      "epoch": 0.14160686154384736,
      "grad_norm": 0.9372396523329549,
      "learning_rate": 9.676992168998657e-06,
      "loss": 0.1782,
      "step": 4854
    },
    {
      "epoch": 0.14163603477449094,
      "grad_norm": 0.75538528348334,
      "learning_rate": 9.676825097534663e-06,
      "loss": 0.1568,
      "step": 4855
    },
    {
      "epoch": 0.1416652080051345,
      "grad_norm": 0.7854605906588132,
      "learning_rate": 9.676657984317092e-06,
      "loss": 0.1759,
      "step": 4856
    },
    {
      "epoch": 0.14169438123577804,
      "grad_norm": 0.9421707495263524,
      "learning_rate": 9.676490829347434e-06,
      "loss": 0.1794,
      "step": 4857
    },
    {
      "epoch": 0.14172355446642163,
      "grad_norm": 0.7743067145071496,
      "learning_rate": 9.67632363262718e-06,
      "loss": 0.1703,
      "step": 4858
    },
    {
      "epoch": 0.14175272769706518,
      "grad_norm": 0.930500697699094,
      "learning_rate": 9.676156394157829e-06,
      "loss": 0.1743,
      "step": 4859
    },
    {
      "epoch": 0.14178190092770873,
      "grad_norm": 0.7814997975949639,
      "learning_rate": 9.675989113940866e-06,
      "loss": 0.1612,
      "step": 4860
    },
    {
      "epoch": 0.14181107415835228,
      "grad_norm": 0.8865724413664395,
      "learning_rate": 9.67582179197779e-06,
      "loss": 0.1699,
      "step": 4861
    },
    {
      "epoch": 0.14184024738899587,
      "grad_norm": 1.1884952321978883,
      "learning_rate": 9.675654428270094e-06,
      "loss": 0.183,
      "step": 4862
    },
    {
      "epoch": 0.14186942061963942,
      "grad_norm": 0.8074043648783656,
      "learning_rate": 9.675487022819273e-06,
      "loss": 0.1713,
      "step": 4863
    },
    {
      "epoch": 0.14189859385028297,
      "grad_norm": 0.8235702417362052,
      "learning_rate": 9.675319575626817e-06,
      "loss": 0.1705,
      "step": 4864
    },
    {
      "epoch": 0.14192776708092655,
      "grad_norm": 1.2325535207631664,
      "learning_rate": 9.675152086694226e-06,
      "loss": 0.1851,
      "step": 4865
    },
    {
      "epoch": 0.1419569403115701,
      "grad_norm": 1.0524729024408745,
      "learning_rate": 9.67498455602299e-06,
      "loss": 0.183,
      "step": 4866
    },
    {
      "epoch": 0.14198611354221366,
      "grad_norm": 1.0442032804423818,
      "learning_rate": 9.674816983614611e-06,
      "loss": 0.194,
      "step": 4867
    },
    {
      "epoch": 0.1420152867728572,
      "grad_norm": 0.958524240553488,
      "learning_rate": 9.67464936947058e-06,
      "loss": 0.1929,
      "step": 4868
    },
    {
      "epoch": 0.1420444600035008,
      "grad_norm": 0.9250214990383446,
      "learning_rate": 9.674481713592398e-06,
      "loss": 0.1904,
      "step": 4869
    },
    {
      "epoch": 0.14207363323414435,
      "grad_norm": 0.8600711976437861,
      "learning_rate": 9.674314015981557e-06,
      "loss": 0.2122,
      "step": 4870
    },
    {
      "epoch": 0.1421028064647879,
      "grad_norm": 0.8270568012751615,
      "learning_rate": 9.674146276639556e-06,
      "loss": 0.1628,
      "step": 4871
    },
    {
      "epoch": 0.14213197969543148,
      "grad_norm": 0.891043897789974,
      "learning_rate": 9.673978495567895e-06,
      "loss": 0.1797,
      "step": 4872
    },
    {
      "epoch": 0.14216115292607504,
      "grad_norm": 0.9007499180467402,
      "learning_rate": 9.673810672768068e-06,
      "loss": 0.1758,
      "step": 4873
    },
    {
      "epoch": 0.1421903261567186,
      "grad_norm": 0.9840940820256868,
      "learning_rate": 9.673642808241574e-06,
      "loss": 0.1674,
      "step": 4874
    },
    {
      "epoch": 0.14221949938736217,
      "grad_norm": 1.0706059805118107,
      "learning_rate": 9.673474901989916e-06,
      "loss": 0.1663,
      "step": 4875
    },
    {
      "epoch": 0.14224867261800572,
      "grad_norm": 0.7539907717704584,
      "learning_rate": 9.673306954014588e-06,
      "loss": 0.1641,
      "step": 4876
    },
    {
      "epoch": 0.14227784584864928,
      "grad_norm": 0.9564622588789834,
      "learning_rate": 9.673138964317091e-06,
      "loss": 0.1917,
      "step": 4877
    },
    {
      "epoch": 0.14230701907929283,
      "grad_norm": 1.072023018231923,
      "learning_rate": 9.672970932898923e-06,
      "loss": 0.1669,
      "step": 4878
    },
    {
      "epoch": 0.1423361923099364,
      "grad_norm": 0.876912322391861,
      "learning_rate": 9.67280285976159e-06,
      "loss": 0.1719,
      "step": 4879
    },
    {
      "epoch": 0.14236536554057996,
      "grad_norm": 1.0984522305127342,
      "learning_rate": 9.672634744906585e-06,
      "loss": 0.1677,
      "step": 4880
    },
    {
      "epoch": 0.14239453877122352,
      "grad_norm": 0.9969050836869188,
      "learning_rate": 9.672466588335414e-06,
      "loss": 0.1843,
      "step": 4881
    },
    {
      "epoch": 0.1424237120018671,
      "grad_norm": 0.8220361004029493,
      "learning_rate": 9.672298390049577e-06,
      "loss": 0.1782,
      "step": 4882
    },
    {
      "epoch": 0.14245288523251065,
      "grad_norm": 1.0229843634298637,
      "learning_rate": 9.672130150050576e-06,
      "loss": 0.1809,
      "step": 4883
    },
    {
      "epoch": 0.1424820584631542,
      "grad_norm": 0.970871248674842,
      "learning_rate": 9.67196186833991e-06,
      "loss": 0.1731,
      "step": 4884
    },
    {
      "epoch": 0.14251123169379776,
      "grad_norm": 1.1818675378503378,
      "learning_rate": 9.671793544919086e-06,
      "loss": 0.1873,
      "step": 4885
    },
    {
      "epoch": 0.14254040492444134,
      "grad_norm": 0.9214167530946143,
      "learning_rate": 9.671625179789603e-06,
      "loss": 0.1758,
      "step": 4886
    },
    {
      "epoch": 0.1425695781550849,
      "grad_norm": 0.807857849013604,
      "learning_rate": 9.671456772952967e-06,
      "loss": 0.1707,
      "step": 4887
    },
    {
      "epoch": 0.14259875138572844,
      "grad_norm": 1.1217031758878113,
      "learning_rate": 9.671288324410678e-06,
      "loss": 0.1761,
      "step": 4888
    },
    {
      "epoch": 0.14262792461637203,
      "grad_norm": 0.7196737452829093,
      "learning_rate": 9.671119834164245e-06,
      "loss": 0.1554,
      "step": 4889
    },
    {
      "epoch": 0.14265709784701558,
      "grad_norm": 0.8343552659400554,
      "learning_rate": 9.670951302215166e-06,
      "loss": 0.2039,
      "step": 4890
    },
    {
      "epoch": 0.14268627107765913,
      "grad_norm": 0.967408695123973,
      "learning_rate": 9.67078272856495e-06,
      "loss": 0.1932,
      "step": 4891
    },
    {
      "epoch": 0.1427154443083027,
      "grad_norm": 0.892280770306024,
      "learning_rate": 9.670614113215102e-06,
      "loss": 0.1958,
      "step": 4892
    },
    {
      "epoch": 0.14274461753894627,
      "grad_norm": 0.8341263048425164,
      "learning_rate": 9.670445456167125e-06,
      "loss": 0.177,
      "step": 4893
    },
    {
      "epoch": 0.14277379076958982,
      "grad_norm": 0.8470140746338892,
      "learning_rate": 9.670276757422525e-06,
      "loss": 0.1557,
      "step": 4894
    },
    {
      "epoch": 0.14280296400023337,
      "grad_norm": 0.7642882308535696,
      "learning_rate": 9.670108016982812e-06,
      "loss": 0.1607,
      "step": 4895
    },
    {
      "epoch": 0.14283213723087695,
      "grad_norm": 0.9228984753411509,
      "learning_rate": 9.669939234849485e-06,
      "loss": 0.1648,
      "step": 4896
    },
    {
      "epoch": 0.1428613104615205,
      "grad_norm": 0.9083247188440113,
      "learning_rate": 9.66977041102406e-06,
      "loss": 0.1886,
      "step": 4897
    },
    {
      "epoch": 0.14289048369216406,
      "grad_norm": 0.9890740184703097,
      "learning_rate": 9.669601545508037e-06,
      "loss": 0.1827,
      "step": 4898
    },
    {
      "epoch": 0.14291965692280764,
      "grad_norm": 0.952680685214979,
      "learning_rate": 9.669432638302926e-06,
      "loss": 0.1732,
      "step": 4899
    },
    {
      "epoch": 0.1429488301534512,
      "grad_norm": 0.9701128725983815,
      "learning_rate": 9.669263689410236e-06,
      "loss": 0.1736,
      "step": 4900
    },
    {
      "epoch": 0.14297800338409475,
      "grad_norm": 0.9325190999545404,
      "learning_rate": 9.669094698831474e-06,
      "loss": 0.1794,
      "step": 4901
    },
    {
      "epoch": 0.14300717661473833,
      "grad_norm": 0.9819126438419686,
      "learning_rate": 9.66892566656815e-06,
      "loss": 0.1813,
      "step": 4902
    },
    {
      "epoch": 0.14303634984538188,
      "grad_norm": 0.7815883657284478,
      "learning_rate": 9.668756592621771e-06,
      "loss": 0.1745,
      "step": 4903
    },
    {
      "epoch": 0.14306552307602544,
      "grad_norm": 1.0241138247997523,
      "learning_rate": 9.668587476993847e-06,
      "loss": 0.1786,
      "step": 4904
    },
    {
      "epoch": 0.143094696306669,
      "grad_norm": 0.8584384943611965,
      "learning_rate": 9.66841831968589e-06,
      "loss": 0.1738,
      "step": 4905
    },
    {
      "epoch": 0.14312386953731257,
      "grad_norm": 0.8130231945414756,
      "learning_rate": 9.668249120699409e-06,
      "loss": 0.1784,
      "step": 4906
    },
    {
      "epoch": 0.14315304276795612,
      "grad_norm": 0.9657054584040975,
      "learning_rate": 9.668079880035911e-06,
      "loss": 0.1666,
      "step": 4907
    },
    {
      "epoch": 0.14318221599859968,
      "grad_norm": 0.7926088134825279,
      "learning_rate": 9.667910597696914e-06,
      "loss": 0.1716,
      "step": 4908
    },
    {
      "epoch": 0.14321138922924326,
      "grad_norm": 0.7750102848427548,
      "learning_rate": 9.667741273683924e-06,
      "loss": 0.172,
      "step": 4909
    },
    {
      "epoch": 0.1432405624598868,
      "grad_norm": 0.945000267560802,
      "learning_rate": 9.667571907998455e-06,
      "loss": 0.2008,
      "step": 4910
    },
    {
      "epoch": 0.14326973569053036,
      "grad_norm": 0.8321070111201656,
      "learning_rate": 9.667402500642017e-06,
      "loss": 0.1951,
      "step": 4911
    },
    {
      "epoch": 0.14329890892117392,
      "grad_norm": 0.7348755939022059,
      "learning_rate": 9.667233051616124e-06,
      "loss": 0.1851,
      "step": 4912
    },
    {
      "epoch": 0.1433280821518175,
      "grad_norm": 0.998817677821531,
      "learning_rate": 9.66706356092229e-06,
      "loss": 0.211,
      "step": 4913
    },
    {
      "epoch": 0.14335725538246105,
      "grad_norm": 1.2869541877033552,
      "learning_rate": 9.666894028562025e-06,
      "loss": 0.1577,
      "step": 4914
    },
    {
      "epoch": 0.1433864286131046,
      "grad_norm": 0.810689609054851,
      "learning_rate": 9.666724454536844e-06,
      "loss": 0.1902,
      "step": 4915
    },
    {
      "epoch": 0.14341560184374819,
      "grad_norm": 0.7782743730720433,
      "learning_rate": 9.666554838848262e-06,
      "loss": 0.1869,
      "step": 4916
    },
    {
      "epoch": 0.14344477507439174,
      "grad_norm": 0.7901143669137367,
      "learning_rate": 9.666385181497793e-06,
      "loss": 0.1727,
      "step": 4917
    },
    {
      "epoch": 0.1434739483050353,
      "grad_norm": 0.8631548557260729,
      "learning_rate": 9.66621548248695e-06,
      "loss": 0.1963,
      "step": 4918
    },
    {
      "epoch": 0.14350312153567887,
      "grad_norm": 0.9935157262006136,
      "learning_rate": 9.666045741817249e-06,
      "loss": 0.2079,
      "step": 4919
    },
    {
      "epoch": 0.14353229476632243,
      "grad_norm": 0.9220926059322345,
      "learning_rate": 9.665875959490205e-06,
      "loss": 0.1619,
      "step": 4920
    },
    {
      "epoch": 0.14356146799696598,
      "grad_norm": 0.8758027143288649,
      "learning_rate": 9.665706135507336e-06,
      "loss": 0.1579,
      "step": 4921
    },
    {
      "epoch": 0.14359064122760953,
      "grad_norm": 1.0016470592695905,
      "learning_rate": 9.665536269870155e-06,
      "loss": 0.171,
      "step": 4922
    },
    {
      "epoch": 0.14361981445825311,
      "grad_norm": 0.8461107158112773,
      "learning_rate": 9.665366362580179e-06,
      "loss": 0.1906,
      "step": 4923
    },
    {
      "epoch": 0.14364898768889667,
      "grad_norm": 0.8362304161899768,
      "learning_rate": 9.665196413638929e-06,
      "loss": 0.1687,
      "step": 4924
    },
    {
      "epoch": 0.14367816091954022,
      "grad_norm": 0.9839200656933649,
      "learning_rate": 9.665026423047916e-06,
      "loss": 0.1795,
      "step": 4925
    },
    {
      "epoch": 0.1437073341501838,
      "grad_norm": 0.7722141779186894,
      "learning_rate": 9.664856390808661e-06,
      "loss": 0.1687,
      "step": 4926
    },
    {
      "epoch": 0.14373650738082736,
      "grad_norm": 0.9790159364229566,
      "learning_rate": 9.664686316922684e-06,
      "loss": 0.2082,
      "step": 4927
    },
    {
      "epoch": 0.1437656806114709,
      "grad_norm": 0.7355735603486906,
      "learning_rate": 9.664516201391501e-06,
      "loss": 0.1859,
      "step": 4928
    },
    {
      "epoch": 0.1437948538421145,
      "grad_norm": 0.7096188215676326,
      "learning_rate": 9.664346044216628e-06,
      "loss": 0.1685,
      "step": 4929
    },
    {
      "epoch": 0.14382402707275804,
      "grad_norm": 0.7527802600599048,
      "learning_rate": 9.66417584539959e-06,
      "loss": 0.1758,
      "step": 4930
    },
    {
      "epoch": 0.1438532003034016,
      "grad_norm": 0.9988017797907917,
      "learning_rate": 9.664005604941901e-06,
      "loss": 0.1843,
      "step": 4931
    },
    {
      "epoch": 0.14388237353404515,
      "grad_norm": 0.8848600437266344,
      "learning_rate": 9.663835322845086e-06,
      "loss": 0.1741,
      "step": 4932
    },
    {
      "epoch": 0.14391154676468873,
      "grad_norm": 0.8896283950204757,
      "learning_rate": 9.66366499911066e-06,
      "loss": 0.1653,
      "step": 4933
    },
    {
      "epoch": 0.14394071999533228,
      "grad_norm": 0.9126068563691186,
      "learning_rate": 9.663494633740148e-06,
      "loss": 0.1836,
      "step": 4934
    },
    {
      "epoch": 0.14396989322597584,
      "grad_norm": 0.9289381424332497,
      "learning_rate": 9.663324226735069e-06,
      "loss": 0.1843,
      "step": 4935
    },
    {
      "epoch": 0.14399906645661942,
      "grad_norm": 0.9131368057191285,
      "learning_rate": 9.663153778096943e-06,
      "loss": 0.1585,
      "step": 4936
    },
    {
      "epoch": 0.14402823968726297,
      "grad_norm": 0.9669098843130987,
      "learning_rate": 9.662983287827295e-06,
      "loss": 0.1739,
      "step": 4937
    },
    {
      "epoch": 0.14405741291790652,
      "grad_norm": 0.9790305801935686,
      "learning_rate": 9.662812755927645e-06,
      "loss": 0.1903,
      "step": 4938
    },
    {
      "epoch": 0.14408658614855008,
      "grad_norm": 0.899433487635994,
      "learning_rate": 9.662642182399514e-06,
      "loss": 0.1882,
      "step": 4939
    },
    {
      "epoch": 0.14411575937919366,
      "grad_norm": 1.0199047256431513,
      "learning_rate": 9.662471567244428e-06,
      "loss": 0.1937,
      "step": 4940
    },
    {
      "epoch": 0.1441449326098372,
      "grad_norm": 0.8457275564071992,
      "learning_rate": 9.662300910463908e-06,
      "loss": 0.1606,
      "step": 4941
    },
    {
      "epoch": 0.14417410584048077,
      "grad_norm": 1.0844181886077786,
      "learning_rate": 9.662130212059481e-06,
      "loss": 0.1799,
      "step": 4942
    },
    {
      "epoch": 0.14420327907112435,
      "grad_norm": 1.225013607112546,
      "learning_rate": 9.661959472032667e-06,
      "loss": 0.1576,
      "step": 4943
    },
    {
      "epoch": 0.1442324523017679,
      "grad_norm": 0.88643665251535,
      "learning_rate": 9.66178869038499e-06,
      "loss": 0.183,
      "step": 4944
    },
    {
      "epoch": 0.14426162553241145,
      "grad_norm": 1.0884878450603226,
      "learning_rate": 9.661617867117978e-06,
      "loss": 0.2036,
      "step": 4945
    },
    {
      "epoch": 0.14429079876305503,
      "grad_norm": 1.194654672655845,
      "learning_rate": 9.661447002233156e-06,
      "loss": 0.1587,
      "step": 4946
    },
    {
      "epoch": 0.1443199719936986,
      "grad_norm": 0.9949113935387659,
      "learning_rate": 9.661276095732046e-06,
      "loss": 0.1819,
      "step": 4947
    },
    {
      "epoch": 0.14434914522434214,
      "grad_norm": 0.9433703823494165,
      "learning_rate": 9.661105147616177e-06,
      "loss": 0.1672,
      "step": 4948
    },
    {
      "epoch": 0.1443783184549857,
      "grad_norm": 1.1335559624318436,
      "learning_rate": 9.660934157887072e-06,
      "loss": 0.1802,
      "step": 4949
    },
    {
      "epoch": 0.14440749168562927,
      "grad_norm": 1.0942795572129844,
      "learning_rate": 9.66076312654626e-06,
      "loss": 0.1969,
      "step": 4950
    },
    {
      "epoch": 0.14443666491627283,
      "grad_norm": 0.7748508902390537,
      "learning_rate": 9.660592053595268e-06,
      "loss": 0.1666,
      "step": 4951
    },
    {
      "epoch": 0.14446583814691638,
      "grad_norm": 1.1553100246260095,
      "learning_rate": 9.660420939035624e-06,
      "loss": 0.1774,
      "step": 4952
    },
    {
      "epoch": 0.14449501137755996,
      "grad_norm": 0.9604119024938216,
      "learning_rate": 9.660249782868853e-06,
      "loss": 0.2019,
      "step": 4953
    },
    {
      "epoch": 0.14452418460820352,
      "grad_norm": 0.9544521640614683,
      "learning_rate": 9.660078585096484e-06,
      "loss": 0.1765,
      "step": 4954
    },
    {
      "epoch": 0.14455335783884707,
      "grad_norm": 1.1864981680288276,
      "learning_rate": 9.659907345720046e-06,
      "loss": 0.176,
      "step": 4955
    },
    {
      "epoch": 0.14458253106949062,
      "grad_norm": 0.8692202165735067,
      "learning_rate": 9.659736064741068e-06,
      "loss": 0.2009,
      "step": 4956
    },
    {
      "epoch": 0.1446117043001342,
      "grad_norm": 0.646865467793404,
      "learning_rate": 9.65956474216108e-06,
      "loss": 0.1424,
      "step": 4957
    },
    {
      "epoch": 0.14464087753077776,
      "grad_norm": 1.0337038399625629,
      "learning_rate": 9.659393377981609e-06,
      "loss": 0.1845,
      "step": 4958
    },
    {
      "epoch": 0.1446700507614213,
      "grad_norm": 1.0443727583431346,
      "learning_rate": 9.659221972204186e-06,
      "loss": 0.1727,
      "step": 4959
    },
    {
      "epoch": 0.1446992239920649,
      "grad_norm": 0.8110179707939673,
      "learning_rate": 9.65905052483034e-06,
      "loss": 0.1673,
      "step": 4960
    },
    {
      "epoch": 0.14472839722270844,
      "grad_norm": 0.7813388231834524,
      "learning_rate": 9.658879035861606e-06,
      "loss": 0.1705,
      "step": 4961
    },
    {
      "epoch": 0.144757570453352,
      "grad_norm": 0.9773009102407664,
      "learning_rate": 9.65870750529951e-06,
      "loss": 0.1749,
      "step": 4962
    },
    {
      "epoch": 0.14478674368399558,
      "grad_norm": 1.0953635856464539,
      "learning_rate": 9.658535933145588e-06,
      "loss": 0.2042,
      "step": 4963
    },
    {
      "epoch": 0.14481591691463913,
      "grad_norm": 0.8310078695169273,
      "learning_rate": 9.658364319401368e-06,
      "loss": 0.1684,
      "step": 4964
    },
    {
      "epoch": 0.14484509014528268,
      "grad_norm": 0.8966136224520148,
      "learning_rate": 9.658192664068382e-06,
      "loss": 0.1942,
      "step": 4965
    },
    {
      "epoch": 0.14487426337592624,
      "grad_norm": 0.9646429390948612,
      "learning_rate": 9.658020967148166e-06,
      "loss": 0.1953,
      "step": 4966
    },
    {
      "epoch": 0.14490343660656982,
      "grad_norm": 0.9060359924917519,
      "learning_rate": 9.65784922864225e-06,
      "loss": 0.188,
      "step": 4967
    },
    {
      "epoch": 0.14493260983721337,
      "grad_norm": 1.21107510751277,
      "learning_rate": 9.657677448552167e-06,
      "loss": 0.1605,
      "step": 4968
    },
    {
      "epoch": 0.14496178306785693,
      "grad_norm": 0.8668860283561532,
      "learning_rate": 9.657505626879452e-06,
      "loss": 0.1642,
      "step": 4969
    },
    {
      "epoch": 0.1449909562985005,
      "grad_norm": 1.0359011983742257,
      "learning_rate": 9.65733376362564e-06,
      "loss": 0.1879,
      "step": 4970
    },
    {
      "epoch": 0.14502012952914406,
      "grad_norm": 0.7821949860670446,
      "learning_rate": 9.657161858792263e-06,
      "loss": 0.1684,
      "step": 4971
    },
    {
      "epoch": 0.1450493027597876,
      "grad_norm": 0.7176589228105676,
      "learning_rate": 9.656989912380857e-06,
      "loss": 0.1677,
      "step": 4972
    },
    {
      "epoch": 0.1450784759904312,
      "grad_norm": 0.8690548931633352,
      "learning_rate": 9.656817924392958e-06,
      "loss": 0.1957,
      "step": 4973
    },
    {
      "epoch": 0.14510764922107475,
      "grad_norm": 0.8615092289222902,
      "learning_rate": 9.656645894830098e-06,
      "loss": 0.2092,
      "step": 4974
    },
    {
      "epoch": 0.1451368224517183,
      "grad_norm": 0.9381894088351447,
      "learning_rate": 9.656473823693814e-06,
      "loss": 0.1725,
      "step": 4975
    },
    {
      "epoch": 0.14516599568236185,
      "grad_norm": 0.7216487457606322,
      "learning_rate": 9.656301710985646e-06,
      "loss": 0.1581,
      "step": 4976
    },
    {
      "epoch": 0.14519516891300543,
      "grad_norm": 0.9951387042927817,
      "learning_rate": 9.656129556707127e-06,
      "loss": 0.1655,
      "step": 4977
    },
    {
      "epoch": 0.145224342143649,
      "grad_norm": 0.6943663219266645,
      "learning_rate": 9.655957360859796e-06,
      "loss": 0.1642,
      "step": 4978
    },
    {
      "epoch": 0.14525351537429254,
      "grad_norm": 0.745824175529699,
      "learning_rate": 9.655785123445186e-06,
      "loss": 0.163,
      "step": 4979
    },
    {
      "epoch": 0.14528268860493612,
      "grad_norm": 0.7571817541508856,
      "learning_rate": 9.65561284446484e-06,
      "loss": 0.1852,
      "step": 4980
    },
    {
      "epoch": 0.14531186183557968,
      "grad_norm": 0.8569921722541516,
      "learning_rate": 9.655440523920295e-06,
      "loss": 0.1658,
      "step": 4981
    },
    {
      "epoch": 0.14534103506622323,
      "grad_norm": 0.7833330263188776,
      "learning_rate": 9.655268161813088e-06,
      "loss": 0.1627,
      "step": 4982
    },
    {
      "epoch": 0.14537020829686678,
      "grad_norm": 0.7938495250166756,
      "learning_rate": 9.655095758144757e-06,
      "loss": 0.1823,
      "step": 4983
    },
    {
      "epoch": 0.14539938152751036,
      "grad_norm": 0.8190841000349872,
      "learning_rate": 9.654923312916842e-06,
      "loss": 0.1646,
      "step": 4984
    },
    {
      "epoch": 0.14542855475815392,
      "grad_norm": 1.2172420550336633,
      "learning_rate": 9.654750826130882e-06,
      "loss": 0.1607,
      "step": 4985
    },
    {
      "epoch": 0.14545772798879747,
      "grad_norm": 0.8853725881410209,
      "learning_rate": 9.654578297788421e-06,
      "loss": 0.161,
      "step": 4986
    },
    {
      "epoch": 0.14548690121944105,
      "grad_norm": 0.83162721813925,
      "learning_rate": 9.654405727890994e-06,
      "loss": 0.1581,
      "step": 4987
    },
    {
      "epoch": 0.1455160744500846,
      "grad_norm": 1.23119552734588,
      "learning_rate": 9.654233116440144e-06,
      "loss": 0.1761,
      "step": 4988
    },
    {
      "epoch": 0.14554524768072816,
      "grad_norm": 0.8811144445160094,
      "learning_rate": 9.654060463437411e-06,
      "loss": 0.176,
      "step": 4989
    },
    {
      "epoch": 0.14557442091137174,
      "grad_norm": 0.7174372214501783,
      "learning_rate": 9.65388776888434e-06,
      "loss": 0.1919,
      "step": 4990
    },
    {
      "epoch": 0.1456035941420153,
      "grad_norm": 0.99449911960273,
      "learning_rate": 9.653715032782467e-06,
      "loss": 0.1632,
      "step": 4991
    },
    {
      "epoch": 0.14563276737265884,
      "grad_norm": 0.8985792692934712,
      "learning_rate": 9.653542255133339e-06,
      "loss": 0.2133,
      "step": 4992
    },
    {
      "epoch": 0.1456619406033024,
      "grad_norm": 0.8439128817625573,
      "learning_rate": 9.653369435938495e-06,
      "loss": 0.1738,
      "step": 4993
    },
    {
      "epoch": 0.14569111383394598,
      "grad_norm": 0.9013739350288998,
      "learning_rate": 9.65319657519948e-06,
      "loss": 0.2177,
      "step": 4994
    },
    {
      "epoch": 0.14572028706458953,
      "grad_norm": 1.1169097052433632,
      "learning_rate": 9.653023672917839e-06,
      "loss": 0.1606,
      "step": 4995
    },
    {
      "epoch": 0.14574946029523309,
      "grad_norm": 0.8952326065505679,
      "learning_rate": 9.65285072909511e-06,
      "loss": 0.1931,
      "step": 4996
    },
    {
      "epoch": 0.14577863352587667,
      "grad_norm": 0.8400266765729321,
      "learning_rate": 9.652677743732843e-06,
      "loss": 0.1632,
      "step": 4997
    },
    {
      "epoch": 0.14580780675652022,
      "grad_norm": 1.0135250732946663,
      "learning_rate": 9.652504716832578e-06,
      "loss": 0.1917,
      "step": 4998
    },
    {
      "epoch": 0.14583697998716377,
      "grad_norm": 0.8595442628576205,
      "learning_rate": 9.652331648395863e-06,
      "loss": 0.1656,
      "step": 4999
    },
    {
      "epoch": 0.14586615321780733,
      "grad_norm": 0.9970121540001996,
      "learning_rate": 9.65215853842424e-06,
      "loss": 0.1969,
      "step": 5000
    },
    {
      "epoch": 0.1458953264484509,
      "grad_norm": 0.9562353762307105,
      "learning_rate": 9.651985386919257e-06,
      "loss": 0.174,
      "step": 5001
    },
    {
      "epoch": 0.14592449967909446,
      "grad_norm": 0.8746215172434272,
      "learning_rate": 9.65181219388246e-06,
      "loss": 0.1831,
      "step": 5002
    },
    {
      "epoch": 0.145953672909738,
      "grad_norm": 0.834023705470222,
      "learning_rate": 9.651638959315392e-06,
      "loss": 0.1906,
      "step": 5003
    },
    {
      "epoch": 0.1459828461403816,
      "grad_norm": 0.8542850057204772,
      "learning_rate": 9.651465683219603e-06,
      "loss": 0.1756,
      "step": 5004
    },
    {
      "epoch": 0.14601201937102515,
      "grad_norm": 0.9872404766398125,
      "learning_rate": 9.65129236559664e-06,
      "loss": 0.1664,
      "step": 5005
    },
    {
      "epoch": 0.1460411926016687,
      "grad_norm": 0.9819310883974036,
      "learning_rate": 9.651119006448047e-06,
      "loss": 0.1721,
      "step": 5006
    },
    {
      "epoch": 0.14607036583231228,
      "grad_norm": 0.7927647989876684,
      "learning_rate": 9.650945605775374e-06,
      "loss": 0.1691,
      "step": 5007
    },
    {
      "epoch": 0.14609953906295584,
      "grad_norm": 0.8871799854751007,
      "learning_rate": 9.650772163580171e-06,
      "loss": 0.1719,
      "step": 5008
    },
    {
      "epoch": 0.1461287122935994,
      "grad_norm": 1.0809928484274853,
      "learning_rate": 9.650598679863983e-06,
      "loss": 0.2095,
      "step": 5009
    },
    {
      "epoch": 0.14615788552424294,
      "grad_norm": 0.7793813813735866,
      "learning_rate": 9.65042515462836e-06,
      "loss": 0.1676,
      "step": 5010
    },
    {
      "epoch": 0.14618705875488652,
      "grad_norm": 1.0455750202298337,
      "learning_rate": 9.65025158787485e-06,
      "loss": 0.1879,
      "step": 5011
    },
    {
      "epoch": 0.14621623198553008,
      "grad_norm": 0.8515921729199322,
      "learning_rate": 9.650077979605008e-06,
      "loss": 0.1956,
      "step": 5012
    },
    {
      "epoch": 0.14624540521617363,
      "grad_norm": 0.9358857516516348,
      "learning_rate": 9.649904329820377e-06,
      "loss": 0.1838,
      "step": 5013
    },
    {
      "epoch": 0.1462745784468172,
      "grad_norm": 1.471933046679515,
      "learning_rate": 9.64973063852251e-06,
      "loss": 0.1612,
      "step": 5014
    },
    {
      "epoch": 0.14630375167746076,
      "grad_norm": 0.7511996901125207,
      "learning_rate": 9.649556905712958e-06,
      "loss": 0.1826,
      "step": 5015
    },
    {
      "epoch": 0.14633292490810432,
      "grad_norm": 1.0837699750013672,
      "learning_rate": 9.649383131393273e-06,
      "loss": 0.1909,
      "step": 5016
    },
    {
      "epoch": 0.1463620981387479,
      "grad_norm": 0.9137538164715912,
      "learning_rate": 9.649209315565005e-06,
      "loss": 0.1615,
      "step": 5017
    },
    {
      "epoch": 0.14639127136939145,
      "grad_norm": 0.9160622542964243,
      "learning_rate": 9.649035458229706e-06,
      "loss": 0.2022,
      "step": 5018
    },
    {
      "epoch": 0.146420444600035,
      "grad_norm": 0.9417295540153825,
      "learning_rate": 9.648861559388927e-06,
      "loss": 0.1823,
      "step": 5019
    },
    {
      "epoch": 0.14644961783067856,
      "grad_norm": 0.9319882259605891,
      "learning_rate": 9.648687619044222e-06,
      "loss": 0.2048,
      "step": 5020
    },
    {
      "epoch": 0.14647879106132214,
      "grad_norm": 0.90146406917979,
      "learning_rate": 9.648513637197145e-06,
      "loss": 0.1749,
      "step": 5021
    },
    {
      "epoch": 0.1465079642919657,
      "grad_norm": 1.0829929748917295,
      "learning_rate": 9.648339613849246e-06,
      "loss": 0.1848,
      "step": 5022
    },
    {
      "epoch": 0.14653713752260925,
      "grad_norm": 1.0302658800200448,
      "learning_rate": 9.648165549002082e-06,
      "loss": 0.19,
      "step": 5023
    },
    {
      "epoch": 0.14656631075325283,
      "grad_norm": 0.911838579645531,
      "learning_rate": 9.647991442657206e-06,
      "loss": 0.1965,
      "step": 5024
    },
    {
      "epoch": 0.14659548398389638,
      "grad_norm": 1.0274456874566988,
      "learning_rate": 9.647817294816171e-06,
      "loss": 0.1928,
      "step": 5025
    },
    {
      "epoch": 0.14662465721453993,
      "grad_norm": 0.9573276296487475,
      "learning_rate": 9.647643105480533e-06,
      "loss": 0.1619,
      "step": 5026
    },
    {
      "epoch": 0.1466538304451835,
      "grad_norm": 0.8000526587833882,
      "learning_rate": 9.647468874651847e-06,
      "loss": 0.1878,
      "step": 5027
    },
    {
      "epoch": 0.14668300367582707,
      "grad_norm": 1.0577708390316358,
      "learning_rate": 9.64729460233167e-06,
      "loss": 0.1555,
      "step": 5028
    },
    {
      "epoch": 0.14671217690647062,
      "grad_norm": 1.000180504334239,
      "learning_rate": 9.647120288521552e-06,
      "loss": 0.1695,
      "step": 5029
    },
    {
      "epoch": 0.14674135013711417,
      "grad_norm": 0.862740318043579,
      "learning_rate": 9.646945933223058e-06,
      "loss": 0.2048,
      "step": 5030
    },
    {
      "epoch": 0.14677052336775775,
      "grad_norm": 0.9669313044036966,
      "learning_rate": 9.646771536437737e-06,
      "loss": 0.1508,
      "step": 5031
    },
    {
      "epoch": 0.1467996965984013,
      "grad_norm": 0.9811796630238051,
      "learning_rate": 9.64659709816715e-06,
      "loss": 0.1808,
      "step": 5032
    },
    {
      "epoch": 0.14682886982904486,
      "grad_norm": 0.7942673025650904,
      "learning_rate": 9.646422618412853e-06,
      "loss": 0.1899,
      "step": 5033
    },
    {
      "epoch": 0.14685804305968844,
      "grad_norm": 0.8135131995389027,
      "learning_rate": 9.646248097176404e-06,
      "loss": 0.1431,
      "step": 5034
    },
    {
      "epoch": 0.146887216290332,
      "grad_norm": 0.7968946027923035,
      "learning_rate": 9.646073534459362e-06,
      "loss": 0.1833,
      "step": 5035
    },
    {
      "epoch": 0.14691638952097555,
      "grad_norm": 0.7283403021107399,
      "learning_rate": 9.645898930263284e-06,
      "loss": 0.1513,
      "step": 5036
    },
    {
      "epoch": 0.1469455627516191,
      "grad_norm": 0.876018090340404,
      "learning_rate": 9.64572428458973e-06,
      "loss": 0.1943,
      "step": 5037
    },
    {
      "epoch": 0.14697473598226268,
      "grad_norm": 0.942450992235393,
      "learning_rate": 9.645549597440258e-06,
      "loss": 0.193,
      "step": 5038
    },
    {
      "epoch": 0.14700390921290624,
      "grad_norm": 1.3212259219432179,
      "learning_rate": 9.645374868816427e-06,
      "loss": 0.1722,
      "step": 5039
    },
    {
      "epoch": 0.1470330824435498,
      "grad_norm": 0.9321174895484561,
      "learning_rate": 9.6452000987198e-06,
      "loss": 0.1932,
      "step": 5040
    },
    {
      "epoch": 0.14706225567419337,
      "grad_norm": 0.8246588295826806,
      "learning_rate": 9.645025287151935e-06,
      "loss": 0.1897,
      "step": 5041
    },
    {
      "epoch": 0.14709142890483692,
      "grad_norm": 1.1071089887580734,
      "learning_rate": 9.644850434114392e-06,
      "loss": 0.1841,
      "step": 5042
    },
    {
      "epoch": 0.14712060213548048,
      "grad_norm": 0.7533652903409431,
      "learning_rate": 9.644675539608735e-06,
      "loss": 0.1802,
      "step": 5043
    },
    {
      "epoch": 0.14714977536612406,
      "grad_norm": 0.7793821199418741,
      "learning_rate": 9.644500603636521e-06,
      "loss": 0.192,
      "step": 5044
    },
    {
      "epoch": 0.1471789485967676,
      "grad_norm": 1.0249558726678674,
      "learning_rate": 9.644325626199315e-06,
      "loss": 0.1674,
      "step": 5045
    },
    {
      "epoch": 0.14720812182741116,
      "grad_norm": 0.9035026720752898,
      "learning_rate": 9.64415060729868e-06,
      "loss": 0.1565,
      "step": 5046
    },
    {
      "epoch": 0.14723729505805472,
      "grad_norm": 0.9098129543276077,
      "learning_rate": 9.643975546936177e-06,
      "loss": 0.2052,
      "step": 5047
    },
    {
      "epoch": 0.1472664682886983,
      "grad_norm": 1.145881702321572,
      "learning_rate": 9.64380044511337e-06,
      "loss": 0.2131,
      "step": 5048
    },
    {
      "epoch": 0.14729564151934185,
      "grad_norm": 1.0054627880130236,
      "learning_rate": 9.643625301831819e-06,
      "loss": 0.1987,
      "step": 5049
    },
    {
      "epoch": 0.1473248147499854,
      "grad_norm": 0.8709340744090021,
      "learning_rate": 9.64345011709309e-06,
      "loss": 0.1869,
      "step": 5050
    },
    {
      "epoch": 0.147353987980629,
      "grad_norm": 0.8013928944623968,
      "learning_rate": 9.643274890898746e-06,
      "loss": 0.1622,
      "step": 5051
    },
    {
      "epoch": 0.14738316121127254,
      "grad_norm": 1.1178847635819615,
      "learning_rate": 9.643099623250354e-06,
      "loss": 0.167,
      "step": 5052
    },
    {
      "epoch": 0.1474123344419161,
      "grad_norm": 0.8633938310892648,
      "learning_rate": 9.642924314149476e-06,
      "loss": 0.1725,
      "step": 5053
    },
    {
      "epoch": 0.14744150767255965,
      "grad_norm": 0.8510786116023561,
      "learning_rate": 9.642748963597679e-06,
      "loss": 0.1859,
      "step": 5054
    },
    {
      "epoch": 0.14747068090320323,
      "grad_norm": 0.8381533064398632,
      "learning_rate": 9.642573571596526e-06,
      "loss": 0.1587,
      "step": 5055
    },
    {
      "epoch": 0.14749985413384678,
      "grad_norm": 0.8260893979067963,
      "learning_rate": 9.642398138147586e-06,
      "loss": 0.1804,
      "step": 5056
    },
    {
      "epoch": 0.14752902736449033,
      "grad_norm": 1.0002854630513247,
      "learning_rate": 9.642222663252423e-06,
      "loss": 0.1834,
      "step": 5057
    },
    {
      "epoch": 0.14755820059513391,
      "grad_norm": 0.7813155893855992,
      "learning_rate": 9.642047146912605e-06,
      "loss": 0.1655,
      "step": 5058
    },
    {
      "epoch": 0.14758737382577747,
      "grad_norm": 1.0238358688806195,
      "learning_rate": 9.641871589129696e-06,
      "loss": 0.1804,
      "step": 5059
    },
    {
      "epoch": 0.14761654705642102,
      "grad_norm": 1.0137480806042538,
      "learning_rate": 9.641695989905268e-06,
      "loss": 0.1947,
      "step": 5060
    },
    {
      "epoch": 0.1476457202870646,
      "grad_norm": 0.8568766580525737,
      "learning_rate": 9.641520349240885e-06,
      "loss": 0.1635,
      "step": 5061
    },
    {
      "epoch": 0.14767489351770816,
      "grad_norm": 1.0123927327613185,
      "learning_rate": 9.641344667138117e-06,
      "loss": 0.1587,
      "step": 5062
    },
    {
      "epoch": 0.1477040667483517,
      "grad_norm": 0.7770446009954005,
      "learning_rate": 9.641168943598531e-06,
      "loss": 0.1828,
      "step": 5063
    },
    {
      "epoch": 0.14773323997899526,
      "grad_norm": 1.0421396794898743,
      "learning_rate": 9.640993178623698e-06,
      "loss": 0.2229,
      "step": 5064
    },
    {
      "epoch": 0.14776241320963884,
      "grad_norm": 0.945971704493877,
      "learning_rate": 9.640817372215184e-06,
      "loss": 0.1589,
      "step": 5065
    },
    {
      "epoch": 0.1477915864402824,
      "grad_norm": 0.8100496290166211,
      "learning_rate": 9.640641524374561e-06,
      "loss": 0.167,
      "step": 5066
    },
    {
      "epoch": 0.14782075967092595,
      "grad_norm": 0.9022934117122732,
      "learning_rate": 9.6404656351034e-06,
      "loss": 0.2038,
      "step": 5067
    },
    {
      "epoch": 0.14784993290156953,
      "grad_norm": 0.940857312978776,
      "learning_rate": 9.640289704403268e-06,
      "loss": 0.1746,
      "step": 5068
    },
    {
      "epoch": 0.14787910613221308,
      "grad_norm": 1.1533203706367532,
      "learning_rate": 9.640113732275736e-06,
      "loss": 0.1631,
      "step": 5069
    },
    {
      "epoch": 0.14790827936285664,
      "grad_norm": 0.726215537350509,
      "learning_rate": 9.639937718722379e-06,
      "loss": 0.1671,
      "step": 5070
    },
    {
      "epoch": 0.1479374525935002,
      "grad_norm": 0.94110480875373,
      "learning_rate": 9.639761663744764e-06,
      "loss": 0.1644,
      "step": 5071
    },
    {
      "epoch": 0.14796662582414377,
      "grad_norm": 0.8060087313547423,
      "learning_rate": 9.639585567344464e-06,
      "loss": 0.1642,
      "step": 5072
    },
    {
      "epoch": 0.14799579905478732,
      "grad_norm": 0.8795655083349191,
      "learning_rate": 9.639409429523053e-06,
      "loss": 0.1669,
      "step": 5073
    },
    {
      "epoch": 0.14802497228543088,
      "grad_norm": 0.8783713990715803,
      "learning_rate": 9.639233250282101e-06,
      "loss": 0.1813,
      "step": 5074
    },
    {
      "epoch": 0.14805414551607446,
      "grad_norm": 0.923042653322279,
      "learning_rate": 9.639057029623183e-06,
      "loss": 0.1919,
      "step": 5075
    },
    {
      "epoch": 0.148083318746718,
      "grad_norm": 0.8036545791712385,
      "learning_rate": 9.63888076754787e-06,
      "loss": 0.1818,
      "step": 5076
    },
    {
      "epoch": 0.14811249197736157,
      "grad_norm": 0.8408184672385143,
      "learning_rate": 9.63870446405774e-06,
      "loss": 0.1742,
      "step": 5077
    },
    {
      "epoch": 0.14814166520800515,
      "grad_norm": 0.8302103753326288,
      "learning_rate": 9.63852811915436e-06,
      "loss": 0.1864,
      "step": 5078
    },
    {
      "epoch": 0.1481708384386487,
      "grad_norm": 0.987029581812354,
      "learning_rate": 9.638351732839311e-06,
      "loss": 0.186,
      "step": 5079
    },
    {
      "epoch": 0.14820001166929225,
      "grad_norm": 0.7762550095069394,
      "learning_rate": 9.638175305114163e-06,
      "loss": 0.189,
      "step": 5080
    },
    {
      "epoch": 0.1482291848999358,
      "grad_norm": 0.948031984001899,
      "learning_rate": 9.637998835980493e-06,
      "loss": 0.1626,
      "step": 5081
    },
    {
      "epoch": 0.1482583581305794,
      "grad_norm": 0.8432231161022153,
      "learning_rate": 9.637822325439878e-06,
      "loss": 0.1575,
      "step": 5082
    },
    {
      "epoch": 0.14828753136122294,
      "grad_norm": 0.8277721859028909,
      "learning_rate": 9.637645773493893e-06,
      "loss": 0.1795,
      "step": 5083
    },
    {
      "epoch": 0.1483167045918665,
      "grad_norm": 0.8421227542506831,
      "learning_rate": 9.637469180144112e-06,
      "loss": 0.1863,
      "step": 5084
    },
    {
      "epoch": 0.14834587782251007,
      "grad_norm": 0.9640066115636375,
      "learning_rate": 9.637292545392114e-06,
      "loss": 0.1518,
      "step": 5085
    },
    {
      "epoch": 0.14837505105315363,
      "grad_norm": 0.7376568881964116,
      "learning_rate": 9.637115869239475e-06,
      "loss": 0.1631,
      "step": 5086
    },
    {
      "epoch": 0.14840422428379718,
      "grad_norm": 0.894572818155894,
      "learning_rate": 9.636939151687772e-06,
      "loss": 0.1839,
      "step": 5087
    },
    {
      "epoch": 0.14843339751444076,
      "grad_norm": 0.8624785688237763,
      "learning_rate": 9.636762392738583e-06,
      "loss": 0.1718,
      "step": 5088
    },
    {
      "epoch": 0.14846257074508432,
      "grad_norm": 0.8963683902542617,
      "learning_rate": 9.636585592393489e-06,
      "loss": 0.1636,
      "step": 5089
    },
    {
      "epoch": 0.14849174397572787,
      "grad_norm": 0.7614672069464431,
      "learning_rate": 9.636408750654062e-06,
      "loss": 0.1761,
      "step": 5090
    },
    {
      "epoch": 0.14852091720637142,
      "grad_norm": 0.8099578606723454,
      "learning_rate": 9.636231867521886e-06,
      "loss": 0.1641,
      "step": 5091
    },
    {
      "epoch": 0.148550090437015,
      "grad_norm": 0.7303341978527088,
      "learning_rate": 9.636054942998538e-06,
      "loss": 0.1641,
      "step": 5092
    },
    {
      "epoch": 0.14857926366765856,
      "grad_norm": 0.9262007731517238,
      "learning_rate": 9.635877977085599e-06,
      "loss": 0.1661,
      "step": 5093
    },
    {
      "epoch": 0.1486084368983021,
      "grad_norm": 0.8515555470352184,
      "learning_rate": 9.635700969784648e-06,
      "loss": 0.1975,
      "step": 5094
    },
    {
      "epoch": 0.1486376101289457,
      "grad_norm": 0.7646387338569562,
      "learning_rate": 9.635523921097265e-06,
      "loss": 0.1685,
      "step": 5095
    },
    {
      "epoch": 0.14866678335958924,
      "grad_norm": 0.9120152936496411,
      "learning_rate": 9.635346831025032e-06,
      "loss": 0.1685,
      "step": 5096
    },
    {
      "epoch": 0.1486959565902328,
      "grad_norm": 0.8963548524790588,
      "learning_rate": 9.635169699569528e-06,
      "loss": 0.1688,
      "step": 5097
    },
    {
      "epoch": 0.14872512982087635,
      "grad_norm": 0.8247082107867015,
      "learning_rate": 9.634992526732336e-06,
      "loss": 0.1815,
      "step": 5098
    },
    {
      "epoch": 0.14875430305151993,
      "grad_norm": 0.894825694058036,
      "learning_rate": 9.634815312515038e-06,
      "loss": 0.1901,
      "step": 5099
    },
    {
      "epoch": 0.14878347628216348,
      "grad_norm": 0.84077347909432,
      "learning_rate": 9.634638056919213e-06,
      "loss": 0.1655,
      "step": 5100
    },
    {
      "epoch": 0.14881264951280704,
      "grad_norm": 0.781795925008661,
      "learning_rate": 9.634460759946449e-06,
      "loss": 0.1801,
      "step": 5101
    },
    {
      "epoch": 0.14884182274345062,
      "grad_norm": 0.9837437794935363,
      "learning_rate": 9.634283421598322e-06,
      "loss": 0.1749,
      "step": 5102
    },
    {
      "epoch": 0.14887099597409417,
      "grad_norm": 0.8140802808114325,
      "learning_rate": 9.63410604187642e-06,
      "loss": 0.156,
      "step": 5103
    },
    {
      "epoch": 0.14890016920473773,
      "grad_norm": 0.9795870482107814,
      "learning_rate": 9.633928620782327e-06,
      "loss": 0.1786,
      "step": 5104
    },
    {
      "epoch": 0.1489293424353813,
      "grad_norm": 0.8467942217568136,
      "learning_rate": 9.633751158317624e-06,
      "loss": 0.1864,
      "step": 5105
    },
    {
      "epoch": 0.14895851566602486,
      "grad_norm": 0.9165020355845057,
      "learning_rate": 9.633573654483898e-06,
      "loss": 0.179,
      "step": 5106
    },
    {
      "epoch": 0.1489876888966684,
      "grad_norm": 0.9626048326471355,
      "learning_rate": 9.633396109282733e-06,
      "loss": 0.1839,
      "step": 5107
    },
    {
      "epoch": 0.14901686212731197,
      "grad_norm": 0.7993374003325108,
      "learning_rate": 9.633218522715713e-06,
      "loss": 0.1642,
      "step": 5108
    },
    {
      "epoch": 0.14904603535795555,
      "grad_norm": 0.8175438779764319,
      "learning_rate": 9.633040894784423e-06,
      "loss": 0.1833,
      "step": 5109
    },
    {
      "epoch": 0.1490752085885991,
      "grad_norm": 0.8280294357075404,
      "learning_rate": 9.63286322549045e-06,
      "loss": 0.1653,
      "step": 5110
    },
    {
      "epoch": 0.14910438181924265,
      "grad_norm": 0.908338783381698,
      "learning_rate": 9.632685514835381e-06,
      "loss": 0.2017,
      "step": 5111
    },
    {
      "epoch": 0.14913355504988624,
      "grad_norm": 1.0516092502100833,
      "learning_rate": 9.632507762820802e-06,
      "loss": 0.1945,
      "step": 5112
    },
    {
      "epoch": 0.1491627282805298,
      "grad_norm": 0.8242778378158175,
      "learning_rate": 9.632329969448297e-06,
      "loss": 0.214,
      "step": 5113
    },
    {
      "epoch": 0.14919190151117334,
      "grad_norm": 0.9240705779461624,
      "learning_rate": 9.63215213471946e-06,
      "loss": 0.188,
      "step": 5114
    },
    {
      "epoch": 0.14922107474181692,
      "grad_norm": 0.9747253445918209,
      "learning_rate": 9.631974258635872e-06,
      "loss": 0.176,
      "step": 5115
    },
    {
      "epoch": 0.14925024797246048,
      "grad_norm": 0.7211727458179493,
      "learning_rate": 9.631796341199122e-06,
      "loss": 0.1661,
      "step": 5116
    },
    {
      "epoch": 0.14927942120310403,
      "grad_norm": 0.8473016855698315,
      "learning_rate": 9.631618382410804e-06,
      "loss": 0.1683,
      "step": 5117
    },
    {
      "epoch": 0.14930859443374758,
      "grad_norm": 0.970757502447642,
      "learning_rate": 9.631440382272498e-06,
      "loss": 0.178,
      "step": 5118
    },
    {
      "epoch": 0.14933776766439116,
      "grad_norm": 0.6904814083838131,
      "learning_rate": 9.631262340785802e-06,
      "loss": 0.1492,
      "step": 5119
    },
    {
      "epoch": 0.14936694089503472,
      "grad_norm": 0.7421442896285018,
      "learning_rate": 9.6310842579523e-06,
      "loss": 0.1924,
      "step": 5120
    },
    {
      "epoch": 0.14939611412567827,
      "grad_norm": 1.0651467930117666,
      "learning_rate": 9.630906133773583e-06,
      "loss": 0.1639,
      "step": 5121
    },
    {
      "epoch": 0.14942528735632185,
      "grad_norm": 0.8390889273605537,
      "learning_rate": 9.63072796825124e-06,
      "loss": 0.1725,
      "step": 5122
    },
    {
      "epoch": 0.1494544605869654,
      "grad_norm": 0.9309394813860895,
      "learning_rate": 9.630549761386865e-06,
      "loss": 0.1823,
      "step": 5123
    },
    {
      "epoch": 0.14948363381760896,
      "grad_norm": 0.8677477915122752,
      "learning_rate": 9.630371513182047e-06,
      "loss": 0.1674,
      "step": 5124
    },
    {
      "epoch": 0.1495128070482525,
      "grad_norm": 0.7952579105541172,
      "learning_rate": 9.630193223638378e-06,
      "loss": 0.1645,
      "step": 5125
    },
    {
      "epoch": 0.1495419802788961,
      "grad_norm": 0.805111346894642,
      "learning_rate": 9.630014892757449e-06,
      "loss": 0.168,
      "step": 5126
    },
    {
      "epoch": 0.14957115350953964,
      "grad_norm": 0.9688313650719023,
      "learning_rate": 9.629836520540851e-06,
      "loss": 0.1874,
      "step": 5127
    },
    {
      "epoch": 0.1496003267401832,
      "grad_norm": 1.100268308538829,
      "learning_rate": 9.629658106990179e-06,
      "loss": 0.1825,
      "step": 5128
    },
    {
      "epoch": 0.14962949997082678,
      "grad_norm": 0.8652801324456252,
      "learning_rate": 9.629479652107024e-06,
      "loss": 0.1855,
      "step": 5129
    },
    {
      "epoch": 0.14965867320147033,
      "grad_norm": 0.8891167955846057,
      "learning_rate": 9.62930115589298e-06,
      "loss": 0.1559,
      "step": 5130
    },
    {
      "epoch": 0.14968784643211389,
      "grad_norm": 0.9620161577897258,
      "learning_rate": 9.62912261834964e-06,
      "loss": 0.198,
      "step": 5131
    },
    {
      "epoch": 0.14971701966275747,
      "grad_norm": 0.9557939480904629,
      "learning_rate": 9.628944039478599e-06,
      "loss": 0.1839,
      "step": 5132
    },
    {
      "epoch": 0.14974619289340102,
      "grad_norm": 0.7887926047280494,
      "learning_rate": 9.628765419281452e-06,
      "loss": 0.1862,
      "step": 5133
    },
    {
      "epoch": 0.14977536612404457,
      "grad_norm": 0.9482297956073317,
      "learning_rate": 9.62858675775979e-06,
      "loss": 0.1729,
      "step": 5134
    },
    {
      "epoch": 0.14980453935468813,
      "grad_norm": 0.8407297247504261,
      "learning_rate": 9.62840805491521e-06,
      "loss": 0.1659,
      "step": 5135
    },
    {
      "epoch": 0.1498337125853317,
      "grad_norm": 0.7981482556755108,
      "learning_rate": 9.62822931074931e-06,
      "loss": 0.1801,
      "step": 5136
    },
    {
      "epoch": 0.14986288581597526,
      "grad_norm": 0.9121878201813708,
      "learning_rate": 9.62805052526368e-06,
      "loss": 0.2001,
      "step": 5137
    },
    {
      "epoch": 0.14989205904661881,
      "grad_norm": 1.113022027779779,
      "learning_rate": 9.627871698459925e-06,
      "loss": 0.1788,
      "step": 5138
    },
    {
      "epoch": 0.1499212322772624,
      "grad_norm": 0.897779033375477,
      "learning_rate": 9.627692830339633e-06,
      "loss": 0.1934,
      "step": 5139
    },
    {
      "epoch": 0.14995040550790595,
      "grad_norm": 0.7520491511565212,
      "learning_rate": 9.627513920904403e-06,
      "loss": 0.1609,
      "step": 5140
    },
    {
      "epoch": 0.1499795787385495,
      "grad_norm": 0.8198308417034144,
      "learning_rate": 9.627334970155837e-06,
      "loss": 0.1887,
      "step": 5141
    },
    {
      "epoch": 0.15000875196919305,
      "grad_norm": 0.7567235699570818,
      "learning_rate": 9.627155978095526e-06,
      "loss": 0.1605,
      "step": 5142
    },
    {
      "epoch": 0.15003792519983664,
      "grad_norm": 0.8121192608338688,
      "learning_rate": 9.626976944725071e-06,
      "loss": 0.155,
      "step": 5143
    },
    {
      "epoch": 0.1500670984304802,
      "grad_norm": 0.8404794822023604,
      "learning_rate": 9.626797870046071e-06,
      "loss": 0.1668,
      "step": 5144
    },
    {
      "epoch": 0.15009627166112374,
      "grad_norm": 0.8568013721923577,
      "learning_rate": 9.626618754060127e-06,
      "loss": 0.1675,
      "step": 5145
    },
    {
      "epoch": 0.15012544489176732,
      "grad_norm": 0.7932146477101287,
      "learning_rate": 9.626439596768831e-06,
      "loss": 0.2013,
      "step": 5146
    },
    {
      "epoch": 0.15015461812241088,
      "grad_norm": 0.9595123753157163,
      "learning_rate": 9.626260398173788e-06,
      "loss": 0.1901,
      "step": 5147
    },
    {
      "epoch": 0.15018379135305443,
      "grad_norm": 0.8542659439878874,
      "learning_rate": 9.626081158276597e-06,
      "loss": 0.1652,
      "step": 5148
    },
    {
      "epoch": 0.150212964583698,
      "grad_norm": 0.9191254664150484,
      "learning_rate": 9.625901877078857e-06,
      "loss": 0.1802,
      "step": 5149
    },
    {
      "epoch": 0.15024213781434156,
      "grad_norm": 0.8044557749570149,
      "learning_rate": 9.625722554582171e-06,
      "loss": 0.1558,
      "step": 5150
    },
    {
      "epoch": 0.15027131104498512,
      "grad_norm": 1.1638830160852909,
      "learning_rate": 9.625543190788138e-06,
      "loss": 0.166,
      "step": 5151
    },
    {
      "epoch": 0.15030048427562867,
      "grad_norm": 0.7792950629208888,
      "learning_rate": 9.625363785698358e-06,
      "loss": 0.1691,
      "step": 5152
    },
    {
      "epoch": 0.15032965750627225,
      "grad_norm": 0.9530255030838118,
      "learning_rate": 9.625184339314435e-06,
      "loss": 0.1545,
      "step": 5153
    },
    {
      "epoch": 0.1503588307369158,
      "grad_norm": 0.7949708710489812,
      "learning_rate": 9.625004851637972e-06,
      "loss": 0.1808,
      "step": 5154
    },
    {
      "epoch": 0.15038800396755936,
      "grad_norm": 0.884317595587077,
      "learning_rate": 9.624825322670567e-06,
      "loss": 0.2024,
      "step": 5155
    },
    {
      "epoch": 0.15041717719820294,
      "grad_norm": 1.021448310381221,
      "learning_rate": 9.624645752413827e-06,
      "loss": 0.1817,
      "step": 5156
    },
    {
      "epoch": 0.1504463504288465,
      "grad_norm": 1.011998252678421,
      "learning_rate": 9.624466140869353e-06,
      "loss": 0.166,
      "step": 5157
    },
    {
      "epoch": 0.15047552365949005,
      "grad_norm": 0.8965535961246643,
      "learning_rate": 9.62428648803875e-06,
      "loss": 0.1771,
      "step": 5158
    },
    {
      "epoch": 0.15050469689013363,
      "grad_norm": 0.8663976474058169,
      "learning_rate": 9.624106793923622e-06,
      "loss": 0.1823,
      "step": 5159
    },
    {
      "epoch": 0.15053387012077718,
      "grad_norm": 0.8580090319773263,
      "learning_rate": 9.62392705852557e-06,
      "loss": 0.1517,
      "step": 5160
    },
    {
      "epoch": 0.15056304335142073,
      "grad_norm": 0.8654448618053974,
      "learning_rate": 9.623747281846203e-06,
      "loss": 0.1695,
      "step": 5161
    },
    {
      "epoch": 0.1505922165820643,
      "grad_norm": 0.7153421101554999,
      "learning_rate": 9.623567463887123e-06,
      "loss": 0.1712,
      "step": 5162
    },
    {
      "epoch": 0.15062138981270787,
      "grad_norm": 0.7000303542349987,
      "learning_rate": 9.623387604649937e-06,
      "loss": 0.1591,
      "step": 5163
    },
    {
      "epoch": 0.15065056304335142,
      "grad_norm": 1.0505270905577375,
      "learning_rate": 9.62320770413625e-06,
      "loss": 0.1674,
      "step": 5164
    },
    {
      "epoch": 0.15067973627399497,
      "grad_norm": 0.7705851421827375,
      "learning_rate": 9.623027762347669e-06,
      "loss": 0.1835,
      "step": 5165
    },
    {
      "epoch": 0.15070890950463856,
      "grad_norm": 1.0398084532315381,
      "learning_rate": 9.622847779285798e-06,
      "loss": 0.1675,
      "step": 5166
    },
    {
      "epoch": 0.1507380827352821,
      "grad_norm": 0.8384019112904402,
      "learning_rate": 9.622667754952246e-06,
      "loss": 0.1654,
      "step": 5167
    },
    {
      "epoch": 0.15076725596592566,
      "grad_norm": 0.9030474618235137,
      "learning_rate": 9.62248768934862e-06,
      "loss": 0.1583,
      "step": 5168
    },
    {
      "epoch": 0.15079642919656921,
      "grad_norm": 0.8620656330514548,
      "learning_rate": 9.62230758247653e-06,
      "loss": 0.1434,
      "step": 5169
    },
    {
      "epoch": 0.1508256024272128,
      "grad_norm": 0.9205909108268115,
      "learning_rate": 9.622127434337578e-06,
      "loss": 0.1841,
      "step": 5170
    },
    {
      "epoch": 0.15085477565785635,
      "grad_norm": 1.0488539207987573,
      "learning_rate": 9.621947244933377e-06,
      "loss": 0.1992,
      "step": 5171
    },
    {
      "epoch": 0.1508839488884999,
      "grad_norm": 0.7687755427598137,
      "learning_rate": 9.621767014265534e-06,
      "loss": 0.1772,
      "step": 5172
    },
    {
      "epoch": 0.15091312211914348,
      "grad_norm": 0.9379189448252973,
      "learning_rate": 9.621586742335658e-06,
      "loss": 0.1617,
      "step": 5173
    },
    {
      "epoch": 0.15094229534978704,
      "grad_norm": 0.9344745331471433,
      "learning_rate": 9.62140642914536e-06,
      "loss": 0.1783,
      "step": 5174
    },
    {
      "epoch": 0.1509714685804306,
      "grad_norm": 0.9544197104113092,
      "learning_rate": 9.621226074696249e-06,
      "loss": 0.1823,
      "step": 5175
    },
    {
      "epoch": 0.15100064181107417,
      "grad_norm": 0.8035634563611824,
      "learning_rate": 9.621045678989933e-06,
      "loss": 0.1848,
      "step": 5176
    },
    {
      "epoch": 0.15102981504171772,
      "grad_norm": 1.2575767680881196,
      "learning_rate": 9.620865242028025e-06,
      "loss": 0.1656,
      "step": 5177
    },
    {
      "epoch": 0.15105898827236128,
      "grad_norm": 0.9709566370302833,
      "learning_rate": 9.620684763812135e-06,
      "loss": 0.1561,
      "step": 5178
    },
    {
      "epoch": 0.15108816150300483,
      "grad_norm": 0.8131796770604164,
      "learning_rate": 9.620504244343875e-06,
      "loss": 0.1895,
      "step": 5179
    },
    {
      "epoch": 0.1511173347336484,
      "grad_norm": 0.9585894437782273,
      "learning_rate": 9.620323683624855e-06,
      "loss": 0.1597,
      "step": 5180
    },
    {
      "epoch": 0.15114650796429197,
      "grad_norm": 0.7724375926022282,
      "learning_rate": 9.62014308165669e-06,
      "loss": 0.1802,
      "step": 5181
    },
    {
      "epoch": 0.15117568119493552,
      "grad_norm": 0.7903951900843166,
      "learning_rate": 9.619962438440988e-06,
      "loss": 0.1554,
      "step": 5182
    },
    {
      "epoch": 0.1512048544255791,
      "grad_norm": 0.8380458263545416,
      "learning_rate": 9.619781753979367e-06,
      "loss": 0.1607,
      "step": 5183
    },
    {
      "epoch": 0.15123402765622265,
      "grad_norm": 1.2674925913842625,
      "learning_rate": 9.619601028273436e-06,
      "loss": 0.148,
      "step": 5184
    },
    {
      "epoch": 0.1512632008868662,
      "grad_norm": 0.6770142237354774,
      "learning_rate": 9.61942026132481e-06,
      "loss": 0.1669,
      "step": 5185
    },
    {
      "epoch": 0.15129237411750976,
      "grad_norm": 1.2654969480912075,
      "learning_rate": 9.619239453135103e-06,
      "loss": 0.1922,
      "step": 5186
    },
    {
      "epoch": 0.15132154734815334,
      "grad_norm": 0.9072409755784046,
      "learning_rate": 9.619058603705927e-06,
      "loss": 0.16,
      "step": 5187
    },
    {
      "epoch": 0.1513507205787969,
      "grad_norm": 0.9758151903911123,
      "learning_rate": 9.6188777130389e-06,
      "loss": 0.1828,
      "step": 5188
    },
    {
      "epoch": 0.15137989380944045,
      "grad_norm": 0.8384463599622914,
      "learning_rate": 9.618696781135635e-06,
      "loss": 0.1837,
      "step": 5189
    },
    {
      "epoch": 0.15140906704008403,
      "grad_norm": 1.0062725527060505,
      "learning_rate": 9.618515807997748e-06,
      "loss": 0.1779,
      "step": 5190
    },
    {
      "epoch": 0.15143824027072758,
      "grad_norm": 0.9554657667561735,
      "learning_rate": 9.618334793626855e-06,
      "loss": 0.2033,
      "step": 5191
    },
    {
      "epoch": 0.15146741350137113,
      "grad_norm": 0.7103028587542359,
      "learning_rate": 9.61815373802457e-06,
      "loss": 0.1588,
      "step": 5192
    },
    {
      "epoch": 0.15149658673201472,
      "grad_norm": 0.9075256364303415,
      "learning_rate": 9.617972641192513e-06,
      "loss": 0.165,
      "step": 5193
    },
    {
      "epoch": 0.15152575996265827,
      "grad_norm": 1.1186651509945547,
      "learning_rate": 9.617791503132297e-06,
      "loss": 0.1742,
      "step": 5194
    },
    {
      "epoch": 0.15155493319330182,
      "grad_norm": 0.916327418833708,
      "learning_rate": 9.617610323845539e-06,
      "loss": 0.223,
      "step": 5195
    },
    {
      "epoch": 0.15158410642394538,
      "grad_norm": 0.874467535661217,
      "learning_rate": 9.617429103333862e-06,
      "loss": 0.1556,
      "step": 5196
    },
    {
      "epoch": 0.15161327965458896,
      "grad_norm": 1.1124542867749956,
      "learning_rate": 9.617247841598877e-06,
      "loss": 0.1789,
      "step": 5197
    },
    {
      "epoch": 0.1516424528852325,
      "grad_norm": 0.9619492305116694,
      "learning_rate": 9.617066538642209e-06,
      "loss": 0.1895,
      "step": 5198
    },
    {
      "epoch": 0.15167162611587606,
      "grad_norm": 1.0891589166378086,
      "learning_rate": 9.616885194465471e-06,
      "loss": 0.1647,
      "step": 5199
    },
    {
      "epoch": 0.15170079934651964,
      "grad_norm": 0.8885211172842047,
      "learning_rate": 9.616703809070283e-06,
      "loss": 0.1664,
      "step": 5200
    },
    {
      "epoch": 0.1517299725771632,
      "grad_norm": 0.8612314005430644,
      "learning_rate": 9.616522382458268e-06,
      "loss": 0.1675,
      "step": 5201
    },
    {
      "epoch": 0.15175914580780675,
      "grad_norm": 0.9704916637924584,
      "learning_rate": 9.616340914631041e-06,
      "loss": 0.1864,
      "step": 5202
    },
    {
      "epoch": 0.15178831903845033,
      "grad_norm": 0.8954603361269353,
      "learning_rate": 9.616159405590226e-06,
      "loss": 0.1916,
      "step": 5203
    },
    {
      "epoch": 0.15181749226909388,
      "grad_norm": 1.0133855686822917,
      "learning_rate": 9.615977855337442e-06,
      "loss": 0.1832,
      "step": 5204
    },
    {
      "epoch": 0.15184666549973744,
      "grad_norm": 0.8147693290478469,
      "learning_rate": 9.615796263874308e-06,
      "loss": 0.1587,
      "step": 5205
    },
    {
      "epoch": 0.151875838730381,
      "grad_norm": 1.0275112228198235,
      "learning_rate": 9.615614631202449e-06,
      "loss": 0.1581,
      "step": 5206
    },
    {
      "epoch": 0.15190501196102457,
      "grad_norm": 1.1285279421671728,
      "learning_rate": 9.615432957323481e-06,
      "loss": 0.1894,
      "step": 5207
    },
    {
      "epoch": 0.15193418519166813,
      "grad_norm": 0.8877871622878019,
      "learning_rate": 9.615251242239033e-06,
      "loss": 0.1739,
      "step": 5208
    },
    {
      "epoch": 0.15196335842231168,
      "grad_norm": 1.050245295509508,
      "learning_rate": 9.61506948595072e-06,
      "loss": 0.1652,
      "step": 5209
    },
    {
      "epoch": 0.15199253165295526,
      "grad_norm": 0.9636502036667239,
      "learning_rate": 9.614887688460171e-06,
      "loss": 0.1741,
      "step": 5210
    },
    {
      "epoch": 0.1520217048835988,
      "grad_norm": 1.020172444862389,
      "learning_rate": 9.614705849769006e-06,
      "loss": 0.1775,
      "step": 5211
    },
    {
      "epoch": 0.15205087811424237,
      "grad_norm": 0.7703176891004924,
      "learning_rate": 9.61452396987885e-06,
      "loss": 0.1719,
      "step": 5212
    },
    {
      "epoch": 0.15208005134488592,
      "grad_norm": 0.7928883634593038,
      "learning_rate": 9.614342048791322e-06,
      "loss": 0.2006,
      "step": 5213
    },
    {
      "epoch": 0.1521092245755295,
      "grad_norm": 0.8622712038997947,
      "learning_rate": 9.614160086508053e-06,
      "loss": 0.1738,
      "step": 5214
    },
    {
      "epoch": 0.15213839780617305,
      "grad_norm": 1.015876504668712,
      "learning_rate": 9.613978083030663e-06,
      "loss": 0.1908,
      "step": 5215
    },
    {
      "epoch": 0.1521675710368166,
      "grad_norm": 0.9547319868253054,
      "learning_rate": 9.613796038360779e-06,
      "loss": 0.1813,
      "step": 5216
    },
    {
      "epoch": 0.1521967442674602,
      "grad_norm": 0.8052457579092854,
      "learning_rate": 9.613613952500024e-06,
      "loss": 0.178,
      "step": 5217
    },
    {
      "epoch": 0.15222591749810374,
      "grad_norm": 0.9372740219522526,
      "learning_rate": 9.613431825450026e-06,
      "loss": 0.1479,
      "step": 5218
    },
    {
      "epoch": 0.1522550907287473,
      "grad_norm": 1.0724994981092462,
      "learning_rate": 9.613249657212408e-06,
      "loss": 0.1575,
      "step": 5219
    },
    {
      "epoch": 0.15228426395939088,
      "grad_norm": 0.8514462977175778,
      "learning_rate": 9.613067447788802e-06,
      "loss": 0.1506,
      "step": 5220
    },
    {
      "epoch": 0.15231343719003443,
      "grad_norm": 1.186589938781719,
      "learning_rate": 9.612885197180828e-06,
      "loss": 0.1783,
      "step": 5221
    },
    {
      "epoch": 0.15234261042067798,
      "grad_norm": 0.9758065586189032,
      "learning_rate": 9.612702905390116e-06,
      "loss": 0.1699,
      "step": 5222
    },
    {
      "epoch": 0.15237178365132154,
      "grad_norm": 0.8549776954726223,
      "learning_rate": 9.612520572418296e-06,
      "loss": 0.1536,
      "step": 5223
    },
    {
      "epoch": 0.15240095688196512,
      "grad_norm": 0.9629543975850083,
      "learning_rate": 9.612338198266993e-06,
      "loss": 0.1806,
      "step": 5224
    },
    {
      "epoch": 0.15243013011260867,
      "grad_norm": 0.944028999761969,
      "learning_rate": 9.612155782937835e-06,
      "loss": 0.1792,
      "step": 5225
    },
    {
      "epoch": 0.15245930334325222,
      "grad_norm": 0.8341628912648352,
      "learning_rate": 9.61197332643245e-06,
      "loss": 0.1704,
      "step": 5226
    },
    {
      "epoch": 0.1524884765738958,
      "grad_norm": 0.8256586584529005,
      "learning_rate": 9.61179082875247e-06,
      "loss": 0.196,
      "step": 5227
    },
    {
      "epoch": 0.15251764980453936,
      "grad_norm": 0.8263066750080229,
      "learning_rate": 9.611608289899521e-06,
      "loss": 0.1666,
      "step": 5228
    },
    {
      "epoch": 0.1525468230351829,
      "grad_norm": 0.9071922848564402,
      "learning_rate": 9.611425709875234e-06,
      "loss": 0.1835,
      "step": 5229
    },
    {
      "epoch": 0.1525759962658265,
      "grad_norm": 0.6553228819023137,
      "learning_rate": 9.611243088681239e-06,
      "loss": 0.1432,
      "step": 5230
    },
    {
      "epoch": 0.15260516949647004,
      "grad_norm": 0.7810850295323523,
      "learning_rate": 9.611060426319168e-06,
      "loss": 0.1773,
      "step": 5231
    },
    {
      "epoch": 0.1526343427271136,
      "grad_norm": 0.9786584587614672,
      "learning_rate": 9.61087772279065e-06,
      "loss": 0.1377,
      "step": 5232
    },
    {
      "epoch": 0.15266351595775715,
      "grad_norm": 0.688277715765858,
      "learning_rate": 9.610694978097314e-06,
      "loss": 0.1647,
      "step": 5233
    },
    {
      "epoch": 0.15269268918840073,
      "grad_norm": 0.9339862118585256,
      "learning_rate": 9.610512192240797e-06,
      "loss": 0.1758,
      "step": 5234
    },
    {
      "epoch": 0.15272186241904429,
      "grad_norm": 0.9559664169701665,
      "learning_rate": 9.610329365222725e-06,
      "loss": 0.1923,
      "step": 5235
    },
    {
      "epoch": 0.15275103564968784,
      "grad_norm": 0.7826416609748704,
      "learning_rate": 9.610146497044736e-06,
      "loss": 0.1703,
      "step": 5236
    },
    {
      "epoch": 0.15278020888033142,
      "grad_norm": 0.9205570412734108,
      "learning_rate": 9.609963587708457e-06,
      "loss": 0.2068,
      "step": 5237
    },
    {
      "epoch": 0.15280938211097497,
      "grad_norm": 0.9580224749521794,
      "learning_rate": 9.609780637215525e-06,
      "loss": 0.1729,
      "step": 5238
    },
    {
      "epoch": 0.15283855534161853,
      "grad_norm": 0.7730007566000654,
      "learning_rate": 9.609597645567572e-06,
      "loss": 0.1668,
      "step": 5239
    },
    {
      "epoch": 0.15286772857226208,
      "grad_norm": 0.8259023240302131,
      "learning_rate": 9.609414612766231e-06,
      "loss": 0.1562,
      "step": 5240
    },
    {
      "epoch": 0.15289690180290566,
      "grad_norm": 0.8524120965477474,
      "learning_rate": 9.609231538813137e-06,
      "loss": 0.1703,
      "step": 5241
    },
    {
      "epoch": 0.1529260750335492,
      "grad_norm": 0.7507180816390783,
      "learning_rate": 9.609048423709923e-06,
      "loss": 0.1637,
      "step": 5242
    },
    {
      "epoch": 0.15295524826419277,
      "grad_norm": 1.003073067269039,
      "learning_rate": 9.608865267458227e-06,
      "loss": 0.1764,
      "step": 5243
    },
    {
      "epoch": 0.15298442149483635,
      "grad_norm": 0.7127185709273672,
      "learning_rate": 9.60868207005968e-06,
      "loss": 0.1496,
      "step": 5244
    },
    {
      "epoch": 0.1530135947254799,
      "grad_norm": 0.772085291176157,
      "learning_rate": 9.608498831515921e-06,
      "loss": 0.2168,
      "step": 5245
    },
    {
      "epoch": 0.15304276795612345,
      "grad_norm": 0.9274081634817349,
      "learning_rate": 9.608315551828584e-06,
      "loss": 0.1735,
      "step": 5246
    },
    {
      "epoch": 0.15307194118676704,
      "grad_norm": 0.7450579897763286,
      "learning_rate": 9.608132230999308e-06,
      "loss": 0.1768,
      "step": 5247
    },
    {
      "epoch": 0.1531011144174106,
      "grad_norm": 0.7313613290022853,
      "learning_rate": 9.607948869029723e-06,
      "loss": 0.1442,
      "step": 5248
    },
    {
      "epoch": 0.15313028764805414,
      "grad_norm": 0.8460874138905079,
      "learning_rate": 9.607765465921475e-06,
      "loss": 0.1954,
      "step": 5249
    },
    {
      "epoch": 0.1531594608786977,
      "grad_norm": 0.9068670462246586,
      "learning_rate": 9.607582021676193e-06,
      "loss": 0.1687,
      "step": 5250
    },
    {
      "epoch": 0.15318863410934128,
      "grad_norm": 0.9946540282040286,
      "learning_rate": 9.607398536295522e-06,
      "loss": 0.1773,
      "step": 5251
    },
    {
      "epoch": 0.15321780733998483,
      "grad_norm": 0.9035621429233804,
      "learning_rate": 9.607215009781094e-06,
      "loss": 0.1719,
      "step": 5252
    },
    {
      "epoch": 0.15324698057062838,
      "grad_norm": 0.9185277929085259,
      "learning_rate": 9.607031442134554e-06,
      "loss": 0.177,
      "step": 5253
    },
    {
      "epoch": 0.15327615380127196,
      "grad_norm": 0.869343631941605,
      "learning_rate": 9.606847833357534e-06,
      "loss": 0.1675,
      "step": 5254
    },
    {
      "epoch": 0.15330532703191552,
      "grad_norm": 0.7384229055781494,
      "learning_rate": 9.606664183451677e-06,
      "loss": 0.1361,
      "step": 5255
    },
    {
      "epoch": 0.15333450026255907,
      "grad_norm": 0.9312720913620172,
      "learning_rate": 9.606480492418622e-06,
      "loss": 0.1833,
      "step": 5256
    },
    {
      "epoch": 0.15336367349320262,
      "grad_norm": 0.9086396553991825,
      "learning_rate": 9.606296760260008e-06,
      "loss": 0.152,
      "step": 5257
    },
    {
      "epoch": 0.1533928467238462,
      "grad_norm": 0.8849816361045499,
      "learning_rate": 9.606112986977477e-06,
      "loss": 0.1819,
      "step": 5258
    },
    {
      "epoch": 0.15342201995448976,
      "grad_norm": 0.9573284683873251,
      "learning_rate": 9.605929172572668e-06,
      "loss": 0.1692,
      "step": 5259
    },
    {
      "epoch": 0.1534511931851333,
      "grad_norm": 0.9932776120380594,
      "learning_rate": 9.605745317047224e-06,
      "loss": 0.1778,
      "step": 5260
    },
    {
      "epoch": 0.1534803664157769,
      "grad_norm": 0.7934079782105875,
      "learning_rate": 9.605561420402786e-06,
      "loss": 0.1704,
      "step": 5261
    },
    {
      "epoch": 0.15350953964642045,
      "grad_norm": 1.0393421979411546,
      "learning_rate": 9.605377482640991e-06,
      "loss": 0.1662,
      "step": 5262
    },
    {
      "epoch": 0.153538712877064,
      "grad_norm": 0.8575229870657547,
      "learning_rate": 9.60519350376349e-06,
      "loss": 0.1714,
      "step": 5263
    },
    {
      "epoch": 0.15356788610770758,
      "grad_norm": 0.968177165950683,
      "learning_rate": 9.605009483771919e-06,
      "loss": 0.1795,
      "step": 5264
    },
    {
      "epoch": 0.15359705933835113,
      "grad_norm": 0.8745136764727854,
      "learning_rate": 9.604825422667921e-06,
      "loss": 0.1902,
      "step": 5265
    },
    {
      "epoch": 0.1536262325689947,
      "grad_norm": 0.7757863453994622,
      "learning_rate": 9.604641320453143e-06,
      "loss": 0.1536,
      "step": 5266
    },
    {
      "epoch": 0.15365540579963824,
      "grad_norm": 0.7955542026906197,
      "learning_rate": 9.604457177129226e-06,
      "loss": 0.1776,
      "step": 5267
    },
    {
      "epoch": 0.15368457903028182,
      "grad_norm": 1.4192027347687417,
      "learning_rate": 9.604272992697814e-06,
      "loss": 0.202,
      "step": 5268
    },
    {
      "epoch": 0.15371375226092537,
      "grad_norm": 1.0294363103726043,
      "learning_rate": 9.604088767160553e-06,
      "loss": 0.1645,
      "step": 5269
    },
    {
      "epoch": 0.15374292549156893,
      "grad_norm": 0.84436034845223,
      "learning_rate": 9.603904500519086e-06,
      "loss": 0.1819,
      "step": 5270
    },
    {
      "epoch": 0.1537720987222125,
      "grad_norm": 1.2536092902006988,
      "learning_rate": 9.603720192775057e-06,
      "loss": 0.1605,
      "step": 5271
    },
    {
      "epoch": 0.15380127195285606,
      "grad_norm": 1.0431662330200993,
      "learning_rate": 9.603535843930116e-06,
      "loss": 0.1796,
      "step": 5272
    },
    {
      "epoch": 0.15383044518349961,
      "grad_norm": 0.916858461356858,
      "learning_rate": 9.603351453985903e-06,
      "loss": 0.1757,
      "step": 5273
    },
    {
      "epoch": 0.1538596184141432,
      "grad_norm": 0.9931652330333578,
      "learning_rate": 9.603167022944069e-06,
      "loss": 0.1658,
      "step": 5274
    },
    {
      "epoch": 0.15388879164478675,
      "grad_norm": 1.0203396642721403,
      "learning_rate": 9.602982550806259e-06,
      "loss": 0.1544,
      "step": 5275
    },
    {
      "epoch": 0.1539179648754303,
      "grad_norm": 0.8700065891494176,
      "learning_rate": 9.602798037574117e-06,
      "loss": 0.153,
      "step": 5276
    },
    {
      "epoch": 0.15394713810607386,
      "grad_norm": 0.8711074668522925,
      "learning_rate": 9.602613483249297e-06,
      "loss": 0.1747,
      "step": 5277
    },
    {
      "epoch": 0.15397631133671744,
      "grad_norm": 0.8864803591663832,
      "learning_rate": 9.60242888783344e-06,
      "loss": 0.1628,
      "step": 5278
    },
    {
      "epoch": 0.154005484567361,
      "grad_norm": 0.8195468392838261,
      "learning_rate": 9.602244251328197e-06,
      "loss": 0.1876,
      "step": 5279
    },
    {
      "epoch": 0.15403465779800454,
      "grad_norm": 0.7120788408881081,
      "learning_rate": 9.602059573735216e-06,
      "loss": 0.1477,
      "step": 5280
    },
    {
      "epoch": 0.15406383102864812,
      "grad_norm": 1.1433381576343686,
      "learning_rate": 9.601874855056144e-06,
      "loss": 0.191,
      "step": 5281
    },
    {
      "epoch": 0.15409300425929168,
      "grad_norm": 1.1968477507990083,
      "learning_rate": 9.601690095292634e-06,
      "loss": 0.1571,
      "step": 5282
    },
    {
      "epoch": 0.15412217748993523,
      "grad_norm": 0.7344208807854702,
      "learning_rate": 9.601505294446333e-06,
      "loss": 0.1788,
      "step": 5283
    },
    {
      "epoch": 0.15415135072057878,
      "grad_norm": 0.9059529903033163,
      "learning_rate": 9.60132045251889e-06,
      "loss": 0.1953,
      "step": 5284
    },
    {
      "epoch": 0.15418052395122236,
      "grad_norm": 1.0109403347908683,
      "learning_rate": 9.60113556951196e-06,
      "loss": 0.1667,
      "step": 5285
    },
    {
      "epoch": 0.15420969718186592,
      "grad_norm": 0.7689079319908673,
      "learning_rate": 9.600950645427185e-06,
      "loss": 0.1509,
      "step": 5286
    },
    {
      "epoch": 0.15423887041250947,
      "grad_norm": 1.00848130810581,
      "learning_rate": 9.600765680266225e-06,
      "loss": 0.1778,
      "step": 5287
    },
    {
      "epoch": 0.15426804364315305,
      "grad_norm": 0.8903844184600506,
      "learning_rate": 9.600580674030724e-06,
      "loss": 0.1809,
      "step": 5288
    },
    {
      "epoch": 0.1542972168737966,
      "grad_norm": 0.9875250119902921,
      "learning_rate": 9.600395626722339e-06,
      "loss": 0.2061,
      "step": 5289
    },
    {
      "epoch": 0.15432639010444016,
      "grad_norm": 1.0680535625688914,
      "learning_rate": 9.60021053834272e-06,
      "loss": 0.1896,
      "step": 5290
    },
    {
      "epoch": 0.15435556333508374,
      "grad_norm": 0.8705081594665709,
      "learning_rate": 9.60002540889352e-06,
      "loss": 0.1605,
      "step": 5291
    },
    {
      "epoch": 0.1543847365657273,
      "grad_norm": 0.9134141055638958,
      "learning_rate": 9.59984023837639e-06,
      "loss": 0.1667,
      "step": 5292
    },
    {
      "epoch": 0.15441390979637085,
      "grad_norm": 0.9746062664402452,
      "learning_rate": 9.599655026792984e-06,
      "loss": 0.2085,
      "step": 5293
    },
    {
      "epoch": 0.1544430830270144,
      "grad_norm": 1.0177194139187573,
      "learning_rate": 9.599469774144958e-06,
      "loss": 0.1848,
      "step": 5294
    },
    {
      "epoch": 0.15447225625765798,
      "grad_norm": 0.7336467370838153,
      "learning_rate": 9.599284480433963e-06,
      "loss": 0.1558,
      "step": 5295
    },
    {
      "epoch": 0.15450142948830153,
      "grad_norm": 0.8934190069714771,
      "learning_rate": 9.599099145661654e-06,
      "loss": 0.1593,
      "step": 5296
    },
    {
      "epoch": 0.1545306027189451,
      "grad_norm": 0.7370426149551272,
      "learning_rate": 9.598913769829685e-06,
      "loss": 0.1686,
      "step": 5297
    },
    {
      "epoch": 0.15455977594958867,
      "grad_norm": 0.8063677727885334,
      "learning_rate": 9.598728352939713e-06,
      "loss": 0.1655,
      "step": 5298
    },
    {
      "epoch": 0.15458894918023222,
      "grad_norm": 0.8655458639912662,
      "learning_rate": 9.59854289499339e-06,
      "loss": 0.2007,
      "step": 5299
    },
    {
      "epoch": 0.15461812241087577,
      "grad_norm": 0.9481333670787759,
      "learning_rate": 9.598357395992375e-06,
      "loss": 0.2051,
      "step": 5300
    },
    {
      "epoch": 0.15464729564151936,
      "grad_norm": 0.9178766672636066,
      "learning_rate": 9.598171855938323e-06,
      "loss": 0.1735,
      "step": 5301
    },
    {
      "epoch": 0.1546764688721629,
      "grad_norm": 0.9383207793954956,
      "learning_rate": 9.597986274832891e-06,
      "loss": 0.1885,
      "step": 5302
    },
    {
      "epoch": 0.15470564210280646,
      "grad_norm": 0.9201225255267969,
      "learning_rate": 9.597800652677734e-06,
      "loss": 0.1754,
      "step": 5303
    },
    {
      "epoch": 0.15473481533345002,
      "grad_norm": 0.8736399135909124,
      "learning_rate": 9.597614989474512e-06,
      "loss": 0.164,
      "step": 5304
    },
    {
      "epoch": 0.1547639885640936,
      "grad_norm": 0.8028640814658534,
      "learning_rate": 9.597429285224879e-06,
      "loss": 0.1654,
      "step": 5305
    },
    {
      "epoch": 0.15479316179473715,
      "grad_norm": 0.8444657613859604,
      "learning_rate": 9.597243539930496e-06,
      "loss": 0.1777,
      "step": 5306
    },
    {
      "epoch": 0.1548223350253807,
      "grad_norm": 0.9995551637483626,
      "learning_rate": 9.597057753593018e-06,
      "loss": 0.176,
      "step": 5307
    },
    {
      "epoch": 0.15485150825602428,
      "grad_norm": 1.0362160814459496,
      "learning_rate": 9.59687192621411e-06,
      "loss": 0.1801,
      "step": 5308
    },
    {
      "epoch": 0.15488068148666784,
      "grad_norm": 0.7848692139608957,
      "learning_rate": 9.596686057795424e-06,
      "loss": 0.1802,
      "step": 5309
    },
    {
      "epoch": 0.1549098547173114,
      "grad_norm": 0.8313163715921467,
      "learning_rate": 9.59650014833862e-06,
      "loss": 0.1841,
      "step": 5310
    },
    {
      "epoch": 0.15493902794795494,
      "grad_norm": 0.9399501200895383,
      "learning_rate": 9.596314197845365e-06,
      "loss": 0.1847,
      "step": 5311
    },
    {
      "epoch": 0.15496820117859852,
      "grad_norm": 1.0063637393463205,
      "learning_rate": 9.59612820631731e-06,
      "loss": 0.1573,
      "step": 5312
    },
    {
      "epoch": 0.15499737440924208,
      "grad_norm": 0.8905078304122748,
      "learning_rate": 9.595942173756121e-06,
      "loss": 0.181,
      "step": 5313
    },
    {
      "epoch": 0.15502654763988563,
      "grad_norm": 0.8347368721986382,
      "learning_rate": 9.595756100163459e-06,
      "loss": 0.1461,
      "step": 5314
    },
    {
      "epoch": 0.1550557208705292,
      "grad_norm": 0.7521674181445719,
      "learning_rate": 9.59556998554098e-06,
      "loss": 0.1756,
      "step": 5315
    },
    {
      "epoch": 0.15508489410117277,
      "grad_norm": 0.6519258185824471,
      "learning_rate": 9.595383829890352e-06,
      "loss": 0.166,
      "step": 5316
    },
    {
      "epoch": 0.15511406733181632,
      "grad_norm": 0.8118094863671605,
      "learning_rate": 9.595197633213233e-06,
      "loss": 0.1663,
      "step": 5317
    },
    {
      "epoch": 0.1551432405624599,
      "grad_norm": 0.8365807746765657,
      "learning_rate": 9.595011395511288e-06,
      "loss": 0.1569,
      "step": 5318
    },
    {
      "epoch": 0.15517241379310345,
      "grad_norm": 0.8117970104724412,
      "learning_rate": 9.594825116786177e-06,
      "loss": 0.1725,
      "step": 5319
    },
    {
      "epoch": 0.155201587023747,
      "grad_norm": 1.0313484257999297,
      "learning_rate": 9.594638797039564e-06,
      "loss": 0.1667,
      "step": 5320
    },
    {
      "epoch": 0.15523076025439056,
      "grad_norm": 0.7814312257479537,
      "learning_rate": 9.594452436273113e-06,
      "loss": 0.1444,
      "step": 5321
    },
    {
      "epoch": 0.15525993348503414,
      "grad_norm": 0.8938854346748851,
      "learning_rate": 9.594266034488487e-06,
      "loss": 0.1819,
      "step": 5322
    },
    {
      "epoch": 0.1552891067156777,
      "grad_norm": 0.8963589527425608,
      "learning_rate": 9.594079591687352e-06,
      "loss": 0.1534,
      "step": 5323
    },
    {
      "epoch": 0.15531827994632125,
      "grad_norm": 1.015163944737604,
      "learning_rate": 9.593893107871371e-06,
      "loss": 0.1572,
      "step": 5324
    },
    {
      "epoch": 0.15534745317696483,
      "grad_norm": 0.93609118151598,
      "learning_rate": 9.593706583042208e-06,
      "loss": 0.1667,
      "step": 5325
    },
    {
      "epoch": 0.15537662640760838,
      "grad_norm": 0.9236317790651747,
      "learning_rate": 9.593520017201528e-06,
      "loss": 0.1812,
      "step": 5326
    },
    {
      "epoch": 0.15540579963825193,
      "grad_norm": 0.8454415638573578,
      "learning_rate": 9.593333410351e-06,
      "loss": 0.184,
      "step": 5327
    },
    {
      "epoch": 0.1554349728688955,
      "grad_norm": 1.3257250309757944,
      "learning_rate": 9.593146762492287e-06,
      "loss": 0.1783,
      "step": 5328
    },
    {
      "epoch": 0.15546414609953907,
      "grad_norm": 1.0337770473486,
      "learning_rate": 9.592960073627055e-06,
      "loss": 0.1603,
      "step": 5329
    },
    {
      "epoch": 0.15549331933018262,
      "grad_norm": 0.9237513937216989,
      "learning_rate": 9.592773343756973e-06,
      "loss": 0.1514,
      "step": 5330
    },
    {
      "epoch": 0.15552249256082618,
      "grad_norm": 1.1175713018548445,
      "learning_rate": 9.592586572883709e-06,
      "loss": 0.1684,
      "step": 5331
    },
    {
      "epoch": 0.15555166579146976,
      "grad_norm": 0.9757367878718678,
      "learning_rate": 9.592399761008925e-06,
      "loss": 0.1635,
      "step": 5332
    },
    {
      "epoch": 0.1555808390221133,
      "grad_norm": 1.0955431275446061,
      "learning_rate": 9.592212908134295e-06,
      "loss": 0.2119,
      "step": 5333
    },
    {
      "epoch": 0.15561001225275686,
      "grad_norm": 0.923702646775755,
      "learning_rate": 9.592026014261482e-06,
      "loss": 0.1719,
      "step": 5334
    },
    {
      "epoch": 0.15563918548340044,
      "grad_norm": 0.8292061516193554,
      "learning_rate": 9.59183907939216e-06,
      "loss": 0.1842,
      "step": 5335
    },
    {
      "epoch": 0.155668358714044,
      "grad_norm": 0.9886712419812845,
      "learning_rate": 9.591652103527992e-06,
      "loss": 0.1537,
      "step": 5336
    },
    {
      "epoch": 0.15569753194468755,
      "grad_norm": 0.7914672609825438,
      "learning_rate": 9.591465086670651e-06,
      "loss": 0.16,
      "step": 5337
    },
    {
      "epoch": 0.1557267051753311,
      "grad_norm": 0.8589726046962344,
      "learning_rate": 9.591278028821806e-06,
      "loss": 0.1973,
      "step": 5338
    },
    {
      "epoch": 0.15575587840597468,
      "grad_norm": 0.8789279455211334,
      "learning_rate": 9.591090929983127e-06,
      "loss": 0.1521,
      "step": 5339
    },
    {
      "epoch": 0.15578505163661824,
      "grad_norm": 0.8498280798734861,
      "learning_rate": 9.590903790156282e-06,
      "loss": 0.1736,
      "step": 5340
    },
    {
      "epoch": 0.1558142248672618,
      "grad_norm": 0.7567362231450246,
      "learning_rate": 9.590716609342947e-06,
      "loss": 0.1713,
      "step": 5341
    },
    {
      "epoch": 0.15584339809790537,
      "grad_norm": 0.9352837267459277,
      "learning_rate": 9.590529387544789e-06,
      "loss": 0.1797,
      "step": 5342
    },
    {
      "epoch": 0.15587257132854893,
      "grad_norm": 0.9945709590177103,
      "learning_rate": 9.59034212476348e-06,
      "loss": 0.1724,
      "step": 5343
    },
    {
      "epoch": 0.15590174455919248,
      "grad_norm": 0.8160437077233418,
      "learning_rate": 9.590154821000692e-06,
      "loss": 0.1771,
      "step": 5344
    },
    {
      "epoch": 0.15593091778983606,
      "grad_norm": 0.9219373694189473,
      "learning_rate": 9.5899674762581e-06,
      "loss": 0.1703,
      "step": 5345
    },
    {
      "epoch": 0.1559600910204796,
      "grad_norm": 0.8541268506205408,
      "learning_rate": 9.589780090537371e-06,
      "loss": 0.157,
      "step": 5346
    },
    {
      "epoch": 0.15598926425112317,
      "grad_norm": 0.8950324678253496,
      "learning_rate": 9.589592663840182e-06,
      "loss": 0.1925,
      "step": 5347
    },
    {
      "epoch": 0.15601843748176672,
      "grad_norm": 0.7035913765548862,
      "learning_rate": 9.589405196168204e-06,
      "loss": 0.1651,
      "step": 5348
    },
    {
      "epoch": 0.1560476107124103,
      "grad_norm": 1.6294715856673787,
      "learning_rate": 9.589217687523114e-06,
      "loss": 0.1657,
      "step": 5349
    },
    {
      "epoch": 0.15607678394305385,
      "grad_norm": 0.8411972847649672,
      "learning_rate": 9.589030137906584e-06,
      "loss": 0.1592,
      "step": 5350
    },
    {
      "epoch": 0.1561059571736974,
      "grad_norm": 0.7574829571818203,
      "learning_rate": 9.588842547320287e-06,
      "loss": 0.182,
      "step": 5351
    },
    {
      "epoch": 0.156135130404341,
      "grad_norm": 0.9529049001866505,
      "learning_rate": 9.588654915765901e-06,
      "loss": 0.1912,
      "step": 5352
    },
    {
      "epoch": 0.15616430363498454,
      "grad_norm": 0.895152485217695,
      "learning_rate": 9.588467243245099e-06,
      "loss": 0.1724,
      "step": 5353
    },
    {
      "epoch": 0.1561934768656281,
      "grad_norm": 0.8419340032565599,
      "learning_rate": 9.588279529759556e-06,
      "loss": 0.1535,
      "step": 5354
    },
    {
      "epoch": 0.15622265009627165,
      "grad_norm": 0.8098932874842786,
      "learning_rate": 9.588091775310948e-06,
      "loss": 0.1682,
      "step": 5355
    },
    {
      "epoch": 0.15625182332691523,
      "grad_norm": 1.0321349408698615,
      "learning_rate": 9.587903979900953e-06,
      "loss": 0.1503,
      "step": 5356
    },
    {
      "epoch": 0.15628099655755878,
      "grad_norm": 0.9084734717859104,
      "learning_rate": 9.587716143531248e-06,
      "loss": 0.1763,
      "step": 5357
    },
    {
      "epoch": 0.15631016978820234,
      "grad_norm": 0.8982855549872955,
      "learning_rate": 9.587528266203505e-06,
      "loss": 0.1778,
      "step": 5358
    },
    {
      "epoch": 0.15633934301884592,
      "grad_norm": 0.8508686200377119,
      "learning_rate": 9.587340347919406e-06,
      "loss": 0.1855,
      "step": 5359
    },
    {
      "epoch": 0.15636851624948947,
      "grad_norm": 1.0722445709322017,
      "learning_rate": 9.587152388680628e-06,
      "loss": 0.1845,
      "step": 5360
    },
    {
      "epoch": 0.15639768948013302,
      "grad_norm": 0.9633100778239042,
      "learning_rate": 9.586964388488849e-06,
      "loss": 0.1822,
      "step": 5361
    },
    {
      "epoch": 0.1564268627107766,
      "grad_norm": 1.1789964108458322,
      "learning_rate": 9.586776347345745e-06,
      "loss": 0.2008,
      "step": 5362
    },
    {
      "epoch": 0.15645603594142016,
      "grad_norm": 0.9854937256141431,
      "learning_rate": 9.586588265252999e-06,
      "loss": 0.1583,
      "step": 5363
    },
    {
      "epoch": 0.1564852091720637,
      "grad_norm": 0.8743567094915753,
      "learning_rate": 9.586400142212287e-06,
      "loss": 0.1722,
      "step": 5364
    },
    {
      "epoch": 0.15651438240270726,
      "grad_norm": 1.2650349148349425,
      "learning_rate": 9.58621197822529e-06,
      "loss": 0.1827,
      "step": 5365
    },
    {
      "epoch": 0.15654355563335084,
      "grad_norm": 0.6444370402660897,
      "learning_rate": 9.586023773293687e-06,
      "loss": 0.1711,
      "step": 5366
    },
    {
      "epoch": 0.1565727288639944,
      "grad_norm": 0.9065177099854337,
      "learning_rate": 9.585835527419157e-06,
      "loss": 0.1852,
      "step": 5367
    },
    {
      "epoch": 0.15660190209463795,
      "grad_norm": 0.9754269080467419,
      "learning_rate": 9.585647240603384e-06,
      "loss": 0.1482,
      "step": 5368
    },
    {
      "epoch": 0.15663107532528153,
      "grad_norm": 0.645413530190737,
      "learning_rate": 9.585458912848048e-06,
      "loss": 0.1816,
      "step": 5369
    },
    {
      "epoch": 0.15666024855592509,
      "grad_norm": 0.7349710144313402,
      "learning_rate": 9.585270544154825e-06,
      "loss": 0.1865,
      "step": 5370
    },
    {
      "epoch": 0.15668942178656864,
      "grad_norm": 0.8156158013215924,
      "learning_rate": 9.585082134525405e-06,
      "loss": 0.1839,
      "step": 5371
    },
    {
      "epoch": 0.1567185950172122,
      "grad_norm": 0.7953523323285103,
      "learning_rate": 9.584893683961464e-06,
      "loss": 0.2123,
      "step": 5372
    },
    {
      "epoch": 0.15674776824785577,
      "grad_norm": 0.7576460182732079,
      "learning_rate": 9.58470519246469e-06,
      "loss": 0.1591,
      "step": 5373
    },
    {
      "epoch": 0.15677694147849933,
      "grad_norm": 0.7287874841201041,
      "learning_rate": 9.58451666003676e-06,
      "loss": 0.174,
      "step": 5374
    },
    {
      "epoch": 0.15680611470914288,
      "grad_norm": 0.880261036992491,
      "learning_rate": 9.58432808667936e-06,
      "loss": 0.1652,
      "step": 5375
    },
    {
      "epoch": 0.15683528793978646,
      "grad_norm": 0.7200438830764648,
      "learning_rate": 9.584139472394173e-06,
      "loss": 0.1956,
      "step": 5376
    },
    {
      "epoch": 0.15686446117043001,
      "grad_norm": 0.8144091771627586,
      "learning_rate": 9.583950817182883e-06,
      "loss": 0.1845,
      "step": 5377
    },
    {
      "epoch": 0.15689363440107357,
      "grad_norm": 1.0797429701934214,
      "learning_rate": 9.583762121047175e-06,
      "loss": 0.1918,
      "step": 5378
    },
    {
      "epoch": 0.15692280763171715,
      "grad_norm": 0.8390675918028163,
      "learning_rate": 9.583573383988733e-06,
      "loss": 0.236,
      "step": 5379
    },
    {
      "epoch": 0.1569519808623607,
      "grad_norm": 0.830419468352586,
      "learning_rate": 9.583384606009243e-06,
      "loss": 0.1648,
      "step": 5380
    },
    {
      "epoch": 0.15698115409300425,
      "grad_norm": 0.7595095834019951,
      "learning_rate": 9.583195787110387e-06,
      "loss": 0.1763,
      "step": 5381
    },
    {
      "epoch": 0.1570103273236478,
      "grad_norm": 0.8040826984867856,
      "learning_rate": 9.583006927293855e-06,
      "loss": 0.1583,
      "step": 5382
    },
    {
      "epoch": 0.1570395005542914,
      "grad_norm": 0.9431321511186578,
      "learning_rate": 9.582818026561332e-06,
      "loss": 0.1714,
      "step": 5383
    },
    {
      "epoch": 0.15706867378493494,
      "grad_norm": 0.976150288729968,
      "learning_rate": 9.5826290849145e-06,
      "loss": 0.1668,
      "step": 5384
    },
    {
      "epoch": 0.1570978470155785,
      "grad_norm": 0.6976835131639112,
      "learning_rate": 9.582440102355052e-06,
      "loss": 0.1677,
      "step": 5385
    },
    {
      "epoch": 0.15712702024622208,
      "grad_norm": 1.0044919521364357,
      "learning_rate": 9.582251078884672e-06,
      "loss": 0.1982,
      "step": 5386
    },
    {
      "epoch": 0.15715619347686563,
      "grad_norm": 0.8706452783430578,
      "learning_rate": 9.58206201450505e-06,
      "loss": 0.1784,
      "step": 5387
    },
    {
      "epoch": 0.15718536670750918,
      "grad_norm": 0.8441239421029116,
      "learning_rate": 9.58187290921787e-06,
      "loss": 0.168,
      "step": 5388
    },
    {
      "epoch": 0.15721453993815276,
      "grad_norm": 1.0036271562359225,
      "learning_rate": 9.581683763024825e-06,
      "loss": 0.1832,
      "step": 5389
    },
    {
      "epoch": 0.15724371316879632,
      "grad_norm": 0.7096517906693629,
      "learning_rate": 9.5814945759276e-06,
      "loss": 0.1699,
      "step": 5390
    },
    {
      "epoch": 0.15727288639943987,
      "grad_norm": 0.9350427729280126,
      "learning_rate": 9.581305347927883e-06,
      "loss": 0.1939,
      "step": 5391
    },
    {
      "epoch": 0.15730205963008342,
      "grad_norm": 0.8232720239413225,
      "learning_rate": 9.581116079027367e-06,
      "loss": 0.1818,
      "step": 5392
    },
    {
      "epoch": 0.157331232860727,
      "grad_norm": 0.8040089296347235,
      "learning_rate": 9.580926769227741e-06,
      "loss": 0.1906,
      "step": 5393
    },
    {
      "epoch": 0.15736040609137056,
      "grad_norm": 0.8168039532541982,
      "learning_rate": 9.580737418530693e-06,
      "loss": 0.1559,
      "step": 5394
    },
    {
      "epoch": 0.1573895793220141,
      "grad_norm": 0.8357454538670241,
      "learning_rate": 9.580548026937915e-06,
      "loss": 0.1698,
      "step": 5395
    },
    {
      "epoch": 0.1574187525526577,
      "grad_norm": 0.8003691384268423,
      "learning_rate": 9.580358594451098e-06,
      "loss": 0.1648,
      "step": 5396
    },
    {
      "epoch": 0.15744792578330125,
      "grad_norm": 0.9345653440989051,
      "learning_rate": 9.580169121071934e-06,
      "loss": 0.182,
      "step": 5397
    },
    {
      "epoch": 0.1574770990139448,
      "grad_norm": 0.86565735878832,
      "learning_rate": 9.579979606802112e-06,
      "loss": 0.1632,
      "step": 5398
    },
    {
      "epoch": 0.15750627224458835,
      "grad_norm": 0.9821047596796536,
      "learning_rate": 9.579790051643325e-06,
      "loss": 0.2039,
      "step": 5399
    },
    {
      "epoch": 0.15753544547523193,
      "grad_norm": 0.9166169957795464,
      "learning_rate": 9.579600455597266e-06,
      "loss": 0.1609,
      "step": 5400
    },
    {
      "epoch": 0.1575646187058755,
      "grad_norm": 0.9635580581460742,
      "learning_rate": 9.579410818665628e-06,
      "loss": 0.1557,
      "step": 5401
    },
    {
      "epoch": 0.15759379193651904,
      "grad_norm": 1.080706368444192,
      "learning_rate": 9.579221140850104e-06,
      "loss": 0.1944,
      "step": 5402
    },
    {
      "epoch": 0.15762296516716262,
      "grad_norm": 0.927293969695493,
      "learning_rate": 9.579031422152387e-06,
      "loss": 0.1764,
      "step": 5403
    },
    {
      "epoch": 0.15765213839780617,
      "grad_norm": 0.9799503631241685,
      "learning_rate": 9.57884166257417e-06,
      "loss": 0.1652,
      "step": 5404
    },
    {
      "epoch": 0.15768131162844973,
      "grad_norm": 1.0106878771654144,
      "learning_rate": 9.578651862117148e-06,
      "loss": 0.188,
      "step": 5405
    },
    {
      "epoch": 0.1577104848590933,
      "grad_norm": 0.7422579840142393,
      "learning_rate": 9.578462020783013e-06,
      "loss": 0.1628,
      "step": 5406
    },
    {
      "epoch": 0.15773965808973686,
      "grad_norm": 1.3446219409699107,
      "learning_rate": 9.578272138573463e-06,
      "loss": 0.1757,
      "step": 5407
    },
    {
      "epoch": 0.15776883132038041,
      "grad_norm": 1.0520443887428077,
      "learning_rate": 9.578082215490194e-06,
      "loss": 0.1895,
      "step": 5408
    },
    {
      "epoch": 0.15779800455102397,
      "grad_norm": 0.8377729339645991,
      "learning_rate": 9.577892251534899e-06,
      "loss": 0.1933,
      "step": 5409
    },
    {
      "epoch": 0.15782717778166755,
      "grad_norm": 0.743670948833949,
      "learning_rate": 9.577702246709275e-06,
      "loss": 0.1654,
      "step": 5410
    },
    {
      "epoch": 0.1578563510123111,
      "grad_norm": 1.030117423630722,
      "learning_rate": 9.577512201015017e-06,
      "loss": 0.1672,
      "step": 5411
    },
    {
      "epoch": 0.15788552424295466,
      "grad_norm": 0.9125179739569904,
      "learning_rate": 9.577322114453823e-06,
      "loss": 0.1775,
      "step": 5412
    },
    {
      "epoch": 0.15791469747359824,
      "grad_norm": 0.9642911522184233,
      "learning_rate": 9.57713198702739e-06,
      "loss": 0.1844,
      "step": 5413
    },
    {
      "epoch": 0.1579438707042418,
      "grad_norm": 0.8439583916956757,
      "learning_rate": 9.576941818737417e-06,
      "loss": 0.159,
      "step": 5414
    },
    {
      "epoch": 0.15797304393488534,
      "grad_norm": 0.7592182479014896,
      "learning_rate": 9.576751609585598e-06,
      "loss": 0.1573,
      "step": 5415
    },
    {
      "epoch": 0.15800221716552892,
      "grad_norm": 0.9401020753462049,
      "learning_rate": 9.576561359573634e-06,
      "loss": 0.1709,
      "step": 5416
    },
    {
      "epoch": 0.15803139039617248,
      "grad_norm": 0.9758425237649551,
      "learning_rate": 9.576371068703223e-06,
      "loss": 0.1919,
      "step": 5417
    },
    {
      "epoch": 0.15806056362681603,
      "grad_norm": 0.7927983301156055,
      "learning_rate": 9.576180736976063e-06,
      "loss": 0.1742,
      "step": 5418
    },
    {
      "epoch": 0.15808973685745958,
      "grad_norm": 0.9042913679129185,
      "learning_rate": 9.575990364393854e-06,
      "loss": 0.1855,
      "step": 5419
    },
    {
      "epoch": 0.15811891008810317,
      "grad_norm": 1.0121199501897107,
      "learning_rate": 9.575799950958296e-06,
      "loss": 0.1931,
      "step": 5420
    },
    {
      "epoch": 0.15814808331874672,
      "grad_norm": 0.7928701851499547,
      "learning_rate": 9.575609496671087e-06,
      "loss": 0.1656,
      "step": 5421
    },
    {
      "epoch": 0.15817725654939027,
      "grad_norm": 0.9124917297475127,
      "learning_rate": 9.57541900153393e-06,
      "loss": 0.1773,
      "step": 5422
    },
    {
      "epoch": 0.15820642978003385,
      "grad_norm": 0.9829388883479042,
      "learning_rate": 9.575228465548523e-06,
      "loss": 0.1692,
      "step": 5423
    },
    {
      "epoch": 0.1582356030106774,
      "grad_norm": 0.7949378524931011,
      "learning_rate": 9.57503788871657e-06,
      "loss": 0.1558,
      "step": 5424
    },
    {
      "epoch": 0.15826477624132096,
      "grad_norm": 0.8954911948894755,
      "learning_rate": 9.57484727103977e-06,
      "loss": 0.1726,
      "step": 5425
    },
    {
      "epoch": 0.1582939494719645,
      "grad_norm": 0.7890650899596373,
      "learning_rate": 9.574656612519826e-06,
      "loss": 0.1642,
      "step": 5426
    },
    {
      "epoch": 0.1583231227026081,
      "grad_norm": 0.8069001450485412,
      "learning_rate": 9.57446591315844e-06,
      "loss": 0.168,
      "step": 5427
    },
    {
      "epoch": 0.15835229593325165,
      "grad_norm": 0.9666639566313857,
      "learning_rate": 9.574275172957312e-06,
      "loss": 0.209,
      "step": 5428
    },
    {
      "epoch": 0.1583814691638952,
      "grad_norm": 0.9358702572827285,
      "learning_rate": 9.57408439191815e-06,
      "loss": 0.1843,
      "step": 5429
    },
    {
      "epoch": 0.15841064239453878,
      "grad_norm": 0.7631703200514474,
      "learning_rate": 9.573893570042654e-06,
      "loss": 0.1591,
      "step": 5430
    },
    {
      "epoch": 0.15843981562518233,
      "grad_norm": 1.1370567339111364,
      "learning_rate": 9.573702707332527e-06,
      "loss": 0.1791,
      "step": 5431
    },
    {
      "epoch": 0.1584689888558259,
      "grad_norm": 0.8459786444647961,
      "learning_rate": 9.573511803789475e-06,
      "loss": 0.1593,
      "step": 5432
    },
    {
      "epoch": 0.15849816208646947,
      "grad_norm": 0.8080793621441061,
      "learning_rate": 9.573320859415202e-06,
      "loss": 0.1971,
      "step": 5433
    },
    {
      "epoch": 0.15852733531711302,
      "grad_norm": 1.074489446362773,
      "learning_rate": 9.573129874211411e-06,
      "loss": 0.1996,
      "step": 5434
    },
    {
      "epoch": 0.15855650854775657,
      "grad_norm": 0.9428179071504762,
      "learning_rate": 9.57293884817981e-06,
      "loss": 0.1816,
      "step": 5435
    },
    {
      "epoch": 0.15858568177840013,
      "grad_norm": 0.8700286428009719,
      "learning_rate": 9.572747781322099e-06,
      "loss": 0.1804,
      "step": 5436
    },
    {
      "epoch": 0.1586148550090437,
      "grad_norm": 0.8717119106477643,
      "learning_rate": 9.57255667363999e-06,
      "loss": 0.1623,
      "step": 5437
    },
    {
      "epoch": 0.15864402823968726,
      "grad_norm": 0.7060402488018908,
      "learning_rate": 9.572365525135185e-06,
      "loss": 0.1646,
      "step": 5438
    },
    {
      "epoch": 0.15867320147033082,
      "grad_norm": 0.8811567443773396,
      "learning_rate": 9.572174335809394e-06,
      "loss": 0.1633,
      "step": 5439
    },
    {
      "epoch": 0.1587023747009744,
      "grad_norm": 0.7270687461858438,
      "learning_rate": 9.571983105664322e-06,
      "loss": 0.1772,
      "step": 5440
    },
    {
      "epoch": 0.15873154793161795,
      "grad_norm": 0.8497674277569558,
      "learning_rate": 9.571791834701675e-06,
      "loss": 0.1702,
      "step": 5441
    },
    {
      "epoch": 0.1587607211622615,
      "grad_norm": 0.8674696912018066,
      "learning_rate": 9.571600522923163e-06,
      "loss": 0.2009,
      "step": 5442
    },
    {
      "epoch": 0.15878989439290506,
      "grad_norm": 0.8656224250311568,
      "learning_rate": 9.571409170330491e-06,
      "loss": 0.1604,
      "step": 5443
    },
    {
      "epoch": 0.15881906762354864,
      "grad_norm": 0.7641537478070192,
      "learning_rate": 9.57121777692537e-06,
      "loss": 0.1809,
      "step": 5444
    },
    {
      "epoch": 0.1588482408541922,
      "grad_norm": 1.1083848267124718,
      "learning_rate": 9.571026342709508e-06,
      "loss": 0.184,
      "step": 5445
    },
    {
      "epoch": 0.15887741408483574,
      "grad_norm": 0.7476783036580589,
      "learning_rate": 9.570834867684615e-06,
      "loss": 0.1665,
      "step": 5446
    },
    {
      "epoch": 0.15890658731547933,
      "grad_norm": 0.6403376710925319,
      "learning_rate": 9.5706433518524e-06,
      "loss": 0.1715,
      "step": 5447
    },
    {
      "epoch": 0.15893576054612288,
      "grad_norm": 0.9263578967867363,
      "learning_rate": 9.57045179521457e-06,
      "loss": 0.1795,
      "step": 5448
    },
    {
      "epoch": 0.15896493377676643,
      "grad_norm": 0.8908157844591182,
      "learning_rate": 9.570260197772838e-06,
      "loss": 0.1744,
      "step": 5449
    },
    {
      "epoch": 0.15899410700741,
      "grad_norm": 0.7682575593731672,
      "learning_rate": 9.570068559528915e-06,
      "loss": 0.1492,
      "step": 5450
    },
    {
      "epoch": 0.15902328023805357,
      "grad_norm": 0.920412489242452,
      "learning_rate": 9.56987688048451e-06,
      "loss": 0.1744,
      "step": 5451
    },
    {
      "epoch": 0.15905245346869712,
      "grad_norm": 0.8094421064257414,
      "learning_rate": 9.569685160641335e-06,
      "loss": 0.1601,
      "step": 5452
    },
    {
      "epoch": 0.15908162669934067,
      "grad_norm": 0.9815816069454216,
      "learning_rate": 9.569493400001102e-06,
      "loss": 0.1831,
      "step": 5453
    },
    {
      "epoch": 0.15911079992998425,
      "grad_norm": 0.8030345602944954,
      "learning_rate": 9.569301598565523e-06,
      "loss": 0.1607,
      "step": 5454
    },
    {
      "epoch": 0.1591399731606278,
      "grad_norm": 0.931746539425742,
      "learning_rate": 9.56910975633631e-06,
      "loss": 0.1863,
      "step": 5455
    },
    {
      "epoch": 0.15916914639127136,
      "grad_norm": 0.9966607132436579,
      "learning_rate": 9.568917873315176e-06,
      "loss": 0.1812,
      "step": 5456
    },
    {
      "epoch": 0.15919831962191494,
      "grad_norm": 0.861422562580085,
      "learning_rate": 9.568725949503834e-06,
      "loss": 0.1611,
      "step": 5457
    },
    {
      "epoch": 0.1592274928525585,
      "grad_norm": 0.7667715833184627,
      "learning_rate": 9.568533984903999e-06,
      "loss": 0.1589,
      "step": 5458
    },
    {
      "epoch": 0.15925666608320205,
      "grad_norm": 0.7941531497924824,
      "learning_rate": 9.568341979517379e-06,
      "loss": 0.1553,
      "step": 5459
    },
    {
      "epoch": 0.15928583931384563,
      "grad_norm": 0.8236868673519583,
      "learning_rate": 9.568149933345696e-06,
      "loss": 0.1503,
      "step": 5460
    },
    {
      "epoch": 0.15931501254448918,
      "grad_norm": 0.9462066861728884,
      "learning_rate": 9.567957846390659e-06,
      "loss": 0.1653,
      "step": 5461
    },
    {
      "epoch": 0.15934418577513274,
      "grad_norm": 0.7394685143027268,
      "learning_rate": 9.567765718653985e-06,
      "loss": 0.148,
      "step": 5462
    },
    {
      "epoch": 0.1593733590057763,
      "grad_norm": 0.8259509489049427,
      "learning_rate": 9.56757355013739e-06,
      "loss": 0.1822,
      "step": 5463
    },
    {
      "epoch": 0.15940253223641987,
      "grad_norm": 0.7205320025845744,
      "learning_rate": 9.567381340842587e-06,
      "loss": 0.1607,
      "step": 5464
    },
    {
      "epoch": 0.15943170546706342,
      "grad_norm": 0.9278171605585844,
      "learning_rate": 9.567189090771297e-06,
      "loss": 0.1819,
      "step": 5465
    },
    {
      "epoch": 0.15946087869770698,
      "grad_norm": 0.7434139517527243,
      "learning_rate": 9.56699679992523e-06,
      "loss": 0.1654,
      "step": 5466
    },
    {
      "epoch": 0.15949005192835056,
      "grad_norm": 0.7608680124730055,
      "learning_rate": 9.566804468306106e-06,
      "loss": 0.1971,
      "step": 5467
    },
    {
      "epoch": 0.1595192251589941,
      "grad_norm": 0.8194086504675923,
      "learning_rate": 9.566612095915645e-06,
      "loss": 0.1558,
      "step": 5468
    },
    {
      "epoch": 0.15954839838963766,
      "grad_norm": 0.8348174586247553,
      "learning_rate": 9.566419682755556e-06,
      "loss": 0.1753,
      "step": 5469
    },
    {
      "epoch": 0.15957757162028122,
      "grad_norm": 0.7991239621964856,
      "learning_rate": 9.566227228827567e-06,
      "loss": 0.1789,
      "step": 5470
    },
    {
      "epoch": 0.1596067448509248,
      "grad_norm": 1.142913604038722,
      "learning_rate": 9.566034734133389e-06,
      "loss": 0.1584,
      "step": 5471
    },
    {
      "epoch": 0.15963591808156835,
      "grad_norm": 1.2896668932997617,
      "learning_rate": 9.565842198674745e-06,
      "loss": 0.1798,
      "step": 5472
    },
    {
      "epoch": 0.1596650913122119,
      "grad_norm": 0.6454306703971007,
      "learning_rate": 9.565649622453348e-06,
      "loss": 0.1493,
      "step": 5473
    },
    {
      "epoch": 0.15969426454285549,
      "grad_norm": 0.815570010582798,
      "learning_rate": 9.565457005470924e-06,
      "loss": 0.1752,
      "step": 5474
    },
    {
      "epoch": 0.15972343777349904,
      "grad_norm": 0.8262825873337042,
      "learning_rate": 9.565264347729188e-06,
      "loss": 0.202,
      "step": 5475
    },
    {
      "epoch": 0.1597526110041426,
      "grad_norm": 0.7935681384870441,
      "learning_rate": 9.565071649229864e-06,
      "loss": 0.1589,
      "step": 5476
    },
    {
      "epoch": 0.15978178423478617,
      "grad_norm": 0.8986351776738177,
      "learning_rate": 9.564878909974668e-06,
      "loss": 0.1599,
      "step": 5477
    },
    {
      "epoch": 0.15981095746542973,
      "grad_norm": 1.1528515935918588,
      "learning_rate": 9.564686129965324e-06,
      "loss": 0.1857,
      "step": 5478
    },
    {
      "epoch": 0.15984013069607328,
      "grad_norm": 0.8541012777506539,
      "learning_rate": 9.56449330920355e-06,
      "loss": 0.1662,
      "step": 5479
    },
    {
      "epoch": 0.15986930392671683,
      "grad_norm": 0.8620862149959613,
      "learning_rate": 9.564300447691073e-06,
      "loss": 0.1712,
      "step": 5480
    },
    {
      "epoch": 0.1598984771573604,
      "grad_norm": 0.8990771412602441,
      "learning_rate": 9.564107545429609e-06,
      "loss": 0.1683,
      "step": 5481
    },
    {
      "epoch": 0.15992765038800397,
      "grad_norm": 0.9941917188876096,
      "learning_rate": 9.563914602420882e-06,
      "loss": 0.1797,
      "step": 5482
    },
    {
      "epoch": 0.15995682361864752,
      "grad_norm": 0.9751589699648194,
      "learning_rate": 9.563721618666616e-06,
      "loss": 0.1647,
      "step": 5483
    },
    {
      "epoch": 0.1599859968492911,
      "grad_norm": 0.7272839451291004,
      "learning_rate": 9.563528594168533e-06,
      "loss": 0.145,
      "step": 5484
    },
    {
      "epoch": 0.16001517007993465,
      "grad_norm": 1.0892100242230787,
      "learning_rate": 9.563335528928355e-06,
      "loss": 0.1563,
      "step": 5485
    },
    {
      "epoch": 0.1600443433105782,
      "grad_norm": 0.9542053547141254,
      "learning_rate": 9.563142422947806e-06,
      "loss": 0.1882,
      "step": 5486
    },
    {
      "epoch": 0.1600735165412218,
      "grad_norm": 0.7216372949107076,
      "learning_rate": 9.562949276228612e-06,
      "loss": 0.1576,
      "step": 5487
    },
    {
      "epoch": 0.16010268977186534,
      "grad_norm": 0.9598284668236613,
      "learning_rate": 9.562756088772496e-06,
      "loss": 0.164,
      "step": 5488
    },
    {
      "epoch": 0.1601318630025089,
      "grad_norm": 0.8168559170918406,
      "learning_rate": 9.562562860581183e-06,
      "loss": 0.1567,
      "step": 5489
    },
    {
      "epoch": 0.16016103623315245,
      "grad_norm": 0.9889863519411258,
      "learning_rate": 9.562369591656397e-06,
      "loss": 0.1995,
      "step": 5490
    },
    {
      "epoch": 0.16019020946379603,
      "grad_norm": 0.9228763922672527,
      "learning_rate": 9.562176281999866e-06,
      "loss": 0.1785,
      "step": 5491
    },
    {
      "epoch": 0.16021938269443958,
      "grad_norm": 1.0300716925012865,
      "learning_rate": 9.561982931613314e-06,
      "loss": 0.1806,
      "step": 5492
    },
    {
      "epoch": 0.16024855592508314,
      "grad_norm": 0.8421692313466544,
      "learning_rate": 9.561789540498466e-06,
      "loss": 0.1564,
      "step": 5493
    },
    {
      "epoch": 0.16027772915572672,
      "grad_norm": 0.7979222576177974,
      "learning_rate": 9.56159610865705e-06,
      "loss": 0.1791,
      "step": 5494
    },
    {
      "epoch": 0.16030690238637027,
      "grad_norm": 0.8987821980671169,
      "learning_rate": 9.561402636090795e-06,
      "loss": 0.1549,
      "step": 5495
    },
    {
      "epoch": 0.16033607561701382,
      "grad_norm": 0.8303855245795769,
      "learning_rate": 9.561209122801424e-06,
      "loss": 0.1734,
      "step": 5496
    },
    {
      "epoch": 0.16036524884765738,
      "grad_norm": 0.8530449356921735,
      "learning_rate": 9.561015568790667e-06,
      "loss": 0.1541,
      "step": 5497
    },
    {
      "epoch": 0.16039442207830096,
      "grad_norm": 1.0519487125777036,
      "learning_rate": 9.560821974060253e-06,
      "loss": 0.1764,
      "step": 5498
    },
    {
      "epoch": 0.1604235953089445,
      "grad_norm": 0.7882587733865246,
      "learning_rate": 9.56062833861191e-06,
      "loss": 0.1941,
      "step": 5499
    },
    {
      "epoch": 0.16045276853958806,
      "grad_norm": 0.7412433977247188,
      "learning_rate": 9.560434662447364e-06,
      "loss": 0.1807,
      "step": 5500
    },
    {
      "epoch": 0.16048194177023165,
      "grad_norm": 0.7990183668196447,
      "learning_rate": 9.560240945568346e-06,
      "loss": 0.1818,
      "step": 5501
    },
    {
      "epoch": 0.1605111150008752,
      "grad_norm": 0.8293011877629302,
      "learning_rate": 9.560047187976586e-06,
      "loss": 0.1754,
      "step": 5502
    },
    {
      "epoch": 0.16054028823151875,
      "grad_norm": 0.8004321008653155,
      "learning_rate": 9.559853389673814e-06,
      "loss": 0.1708,
      "step": 5503
    },
    {
      "epoch": 0.16056946146216233,
      "grad_norm": 0.7603853919729233,
      "learning_rate": 9.559659550661759e-06,
      "loss": 0.1696,
      "step": 5504
    },
    {
      "epoch": 0.16059863469280589,
      "grad_norm": 0.8410948698440288,
      "learning_rate": 9.559465670942151e-06,
      "loss": 0.1771,
      "step": 5505
    },
    {
      "epoch": 0.16062780792344944,
      "grad_norm": 0.784680841322243,
      "learning_rate": 9.559271750516723e-06,
      "loss": 0.168,
      "step": 5506
    },
    {
      "epoch": 0.160656981154093,
      "grad_norm": 0.8670621225688402,
      "learning_rate": 9.559077789387204e-06,
      "loss": 0.1869,
      "step": 5507
    },
    {
      "epoch": 0.16068615438473657,
      "grad_norm": 0.9298948577198906,
      "learning_rate": 9.558883787555328e-06,
      "loss": 0.1893,
      "step": 5508
    },
    {
      "epoch": 0.16071532761538013,
      "grad_norm": 1.0162196307683045,
      "learning_rate": 9.558689745022825e-06,
      "loss": 0.1788,
      "step": 5509
    },
    {
      "epoch": 0.16074450084602368,
      "grad_norm": 1.1083655323504589,
      "learning_rate": 9.558495661791429e-06,
      "loss": 0.1449,
      "step": 5510
    },
    {
      "epoch": 0.16077367407666726,
      "grad_norm": 0.9613057259010386,
      "learning_rate": 9.558301537862873e-06,
      "loss": 0.1608,
      "step": 5511
    },
    {
      "epoch": 0.16080284730731081,
      "grad_norm": 0.8995678877128398,
      "learning_rate": 9.558107373238887e-06,
      "loss": 0.1601,
      "step": 5512
    },
    {
      "epoch": 0.16083202053795437,
      "grad_norm": 1.0690399837000504,
      "learning_rate": 9.557913167921206e-06,
      "loss": 0.1552,
      "step": 5513
    },
    {
      "epoch": 0.16086119376859792,
      "grad_norm": 0.9360316503120157,
      "learning_rate": 9.557718921911567e-06,
      "loss": 0.157,
      "step": 5514
    },
    {
      "epoch": 0.1608903669992415,
      "grad_norm": 0.9575391506624618,
      "learning_rate": 9.5575246352117e-06,
      "loss": 0.1924,
      "step": 5515
    },
    {
      "epoch": 0.16091954022988506,
      "grad_norm": 0.9290381634060488,
      "learning_rate": 9.55733030782334e-06,
      "loss": 0.1749,
      "step": 5516
    },
    {
      "epoch": 0.1609487134605286,
      "grad_norm": 0.7528786554678382,
      "learning_rate": 9.557135939748224e-06,
      "loss": 0.1837,
      "step": 5517
    },
    {
      "epoch": 0.1609778866911722,
      "grad_norm": 1.0485476870540271,
      "learning_rate": 9.556941530988087e-06,
      "loss": 0.1937,
      "step": 5518
    },
    {
      "epoch": 0.16100705992181574,
      "grad_norm": 0.9967608410686682,
      "learning_rate": 9.556747081544663e-06,
      "loss": 0.1733,
      "step": 5519
    },
    {
      "epoch": 0.1610362331524593,
      "grad_norm": 0.7148948430951124,
      "learning_rate": 9.556552591419688e-06,
      "loss": 0.1674,
      "step": 5520
    },
    {
      "epoch": 0.16106540638310288,
      "grad_norm": 1.1189871928418456,
      "learning_rate": 9.556358060614901e-06,
      "loss": 0.1876,
      "step": 5521
    },
    {
      "epoch": 0.16109457961374643,
      "grad_norm": 0.9812964305816959,
      "learning_rate": 9.556163489132036e-06,
      "loss": 0.1999,
      "step": 5522
    },
    {
      "epoch": 0.16112375284438998,
      "grad_norm": 0.8065992828984858,
      "learning_rate": 9.55596887697283e-06,
      "loss": 0.1967,
      "step": 5523
    },
    {
      "epoch": 0.16115292607503354,
      "grad_norm": 1.0285633053461214,
      "learning_rate": 9.555774224139022e-06,
      "loss": 0.1568,
      "step": 5524
    },
    {
      "epoch": 0.16118209930567712,
      "grad_norm": 0.8012421283696906,
      "learning_rate": 9.555579530632351e-06,
      "loss": 0.1449,
      "step": 5525
    },
    {
      "epoch": 0.16121127253632067,
      "grad_norm": 0.8074301832246265,
      "learning_rate": 9.555384796454551e-06,
      "loss": 0.1733,
      "step": 5526
    },
    {
      "epoch": 0.16124044576696422,
      "grad_norm": 0.9412707946278771,
      "learning_rate": 9.555190021607364e-06,
      "loss": 0.1658,
      "step": 5527
    },
    {
      "epoch": 0.1612696189976078,
      "grad_norm": 1.0414637734958772,
      "learning_rate": 9.554995206092527e-06,
      "loss": 0.2106,
      "step": 5528
    },
    {
      "epoch": 0.16129879222825136,
      "grad_norm": 0.8999965230351273,
      "learning_rate": 9.554800349911784e-06,
      "loss": 0.1802,
      "step": 5529
    },
    {
      "epoch": 0.1613279654588949,
      "grad_norm": 0.9708105064062906,
      "learning_rate": 9.554605453066868e-06,
      "loss": 0.2027,
      "step": 5530
    },
    {
      "epoch": 0.1613571386895385,
      "grad_norm": 0.8076048243727747,
      "learning_rate": 9.55441051555952e-06,
      "loss": 0.169,
      "step": 5531
    },
    {
      "epoch": 0.16138631192018205,
      "grad_norm": 0.9693097447828146,
      "learning_rate": 9.554215537391485e-06,
      "loss": 0.1857,
      "step": 5532
    },
    {
      "epoch": 0.1614154851508256,
      "grad_norm": 0.68000558152758,
      "learning_rate": 9.5540205185645e-06,
      "loss": 0.1715,
      "step": 5533
    },
    {
      "epoch": 0.16144465838146915,
      "grad_norm": 0.9131535674772204,
      "learning_rate": 9.553825459080306e-06,
      "loss": 0.1742,
      "step": 5534
    },
    {
      "epoch": 0.16147383161211273,
      "grad_norm": 0.9443091267167332,
      "learning_rate": 9.553630358940647e-06,
      "loss": 0.1684,
      "step": 5535
    },
    {
      "epoch": 0.1615030048427563,
      "grad_norm": 0.8825146081504416,
      "learning_rate": 9.553435218147262e-06,
      "loss": 0.1637,
      "step": 5536
    },
    {
      "epoch": 0.16153217807339984,
      "grad_norm": 0.8556868523999357,
      "learning_rate": 9.553240036701893e-06,
      "loss": 0.181,
      "step": 5537
    },
    {
      "epoch": 0.16156135130404342,
      "grad_norm": 1.0266545781521235,
      "learning_rate": 9.553044814606287e-06,
      "loss": 0.1845,
      "step": 5538
    },
    {
      "epoch": 0.16159052453468697,
      "grad_norm": 0.8386571943758669,
      "learning_rate": 9.552849551862182e-06,
      "loss": 0.1725,
      "step": 5539
    },
    {
      "epoch": 0.16161969776533053,
      "grad_norm": 0.7416334146289594,
      "learning_rate": 9.552654248471323e-06,
      "loss": 0.1419,
      "step": 5540
    },
    {
      "epoch": 0.16164887099597408,
      "grad_norm": 0.7914754642929637,
      "learning_rate": 9.552458904435454e-06,
      "loss": 0.1788,
      "step": 5541
    },
    {
      "epoch": 0.16167804422661766,
      "grad_norm": 0.8697126259385773,
      "learning_rate": 9.55226351975632e-06,
      "loss": 0.1791,
      "step": 5542
    },
    {
      "epoch": 0.16170721745726122,
      "grad_norm": 0.8546440583131948,
      "learning_rate": 9.552068094435663e-06,
      "loss": 0.1811,
      "step": 5543
    },
    {
      "epoch": 0.16173639068790477,
      "grad_norm": 0.7984828349344645,
      "learning_rate": 9.551872628475227e-06,
      "loss": 0.1487,
      "step": 5544
    },
    {
      "epoch": 0.16176556391854835,
      "grad_norm": 0.8557930680296238,
      "learning_rate": 9.551677121876761e-06,
      "loss": 0.1632,
      "step": 5545
    },
    {
      "epoch": 0.1617947371491919,
      "grad_norm": 0.8212788916557553,
      "learning_rate": 9.551481574642008e-06,
      "loss": 0.1918,
      "step": 5546
    },
    {
      "epoch": 0.16182391037983546,
      "grad_norm": 0.893961801426182,
      "learning_rate": 9.551285986772714e-06,
      "loss": 0.1712,
      "step": 5547
    },
    {
      "epoch": 0.16185308361047904,
      "grad_norm": 1.05923268609053,
      "learning_rate": 9.551090358270624e-06,
      "loss": 0.1753,
      "step": 5548
    },
    {
      "epoch": 0.1618822568411226,
      "grad_norm": 0.8934698772171971,
      "learning_rate": 9.550894689137487e-06,
      "loss": 0.1622,
      "step": 5549
    },
    {
      "epoch": 0.16191143007176614,
      "grad_norm": 1.0534821834609347,
      "learning_rate": 9.550698979375046e-06,
      "loss": 0.2013,
      "step": 5550
    },
    {
      "epoch": 0.1619406033024097,
      "grad_norm": 0.8065592466817278,
      "learning_rate": 9.550503228985053e-06,
      "loss": 0.1754,
      "step": 5551
    },
    {
      "epoch": 0.16196977653305328,
      "grad_norm": 1.027763839163784,
      "learning_rate": 9.550307437969254e-06,
      "loss": 0.1603,
      "step": 5552
    },
    {
      "epoch": 0.16199894976369683,
      "grad_norm": 0.7821991728621691,
      "learning_rate": 9.550111606329396e-06,
      "loss": 0.185,
      "step": 5553
    },
    {
      "epoch": 0.16202812299434038,
      "grad_norm": 0.9745333289682548,
      "learning_rate": 9.549915734067229e-06,
      "loss": 0.1697,
      "step": 5554
    },
    {
      "epoch": 0.16205729622498397,
      "grad_norm": 0.959499070010955,
      "learning_rate": 9.549719821184498e-06,
      "loss": 0.1752,
      "step": 5555
    },
    {
      "epoch": 0.16208646945562752,
      "grad_norm": 0.6914850172054944,
      "learning_rate": 9.549523867682955e-06,
      "loss": 0.1589,
      "step": 5556
    },
    {
      "epoch": 0.16211564268627107,
      "grad_norm": 0.8649204270790649,
      "learning_rate": 9.54932787356435e-06,
      "loss": 0.1686,
      "step": 5557
    },
    {
      "epoch": 0.16214481591691463,
      "grad_norm": 0.9069323247739839,
      "learning_rate": 9.54913183883043e-06,
      "loss": 0.1747,
      "step": 5558
    },
    {
      "epoch": 0.1621739891475582,
      "grad_norm": 0.7860750154736431,
      "learning_rate": 9.548935763482949e-06,
      "loss": 0.1626,
      "step": 5559
    },
    {
      "epoch": 0.16220316237820176,
      "grad_norm": 0.7955310104902404,
      "learning_rate": 9.548739647523654e-06,
      "loss": 0.1506,
      "step": 5560
    },
    {
      "epoch": 0.1622323356088453,
      "grad_norm": 1.0055939065865114,
      "learning_rate": 9.548543490954299e-06,
      "loss": 0.1723,
      "step": 5561
    },
    {
      "epoch": 0.1622615088394889,
      "grad_norm": 0.8232828850756518,
      "learning_rate": 9.548347293776632e-06,
      "loss": 0.1868,
      "step": 5562
    },
    {
      "epoch": 0.16229068207013245,
      "grad_norm": 0.8850197389033428,
      "learning_rate": 9.548151055992407e-06,
      "loss": 0.1684,
      "step": 5563
    },
    {
      "epoch": 0.162319855300776,
      "grad_norm": 0.9326340762746111,
      "learning_rate": 9.547954777603374e-06,
      "loss": 0.1787,
      "step": 5564
    },
    {
      "epoch": 0.16234902853141958,
      "grad_norm": 0.80297701862277,
      "learning_rate": 9.547758458611287e-06,
      "loss": 0.1705,
      "step": 5565
    },
    {
      "epoch": 0.16237820176206313,
      "grad_norm": 0.7602025155635146,
      "learning_rate": 9.5475620990179e-06,
      "loss": 0.1827,
      "step": 5566
    },
    {
      "epoch": 0.1624073749927067,
      "grad_norm": 1.3730000806359959,
      "learning_rate": 9.547365698824962e-06,
      "loss": 0.1789,
      "step": 5567
    },
    {
      "epoch": 0.16243654822335024,
      "grad_norm": 0.8780466005884809,
      "learning_rate": 9.547169258034228e-06,
      "loss": 0.16,
      "step": 5568
    },
    {
      "epoch": 0.16246572145399382,
      "grad_norm": 0.7973441690357147,
      "learning_rate": 9.546972776647454e-06,
      "loss": 0.1688,
      "step": 5569
    },
    {
      "epoch": 0.16249489468463738,
      "grad_norm": 0.9387914187545926,
      "learning_rate": 9.546776254666392e-06,
      "loss": 0.1712,
      "step": 5570
    },
    {
      "epoch": 0.16252406791528093,
      "grad_norm": 1.0942556767713258,
      "learning_rate": 9.546579692092797e-06,
      "loss": 0.1728,
      "step": 5571
    },
    {
      "epoch": 0.1625532411459245,
      "grad_norm": 0.8994339052118556,
      "learning_rate": 9.546383088928423e-06,
      "loss": 0.1851,
      "step": 5572
    },
    {
      "epoch": 0.16258241437656806,
      "grad_norm": 0.8631096758899228,
      "learning_rate": 9.546186445175027e-06,
      "loss": 0.1838,
      "step": 5573
    },
    {
      "epoch": 0.16261158760721162,
      "grad_norm": 1.0114924830145064,
      "learning_rate": 9.545989760834365e-06,
      "loss": 0.1857,
      "step": 5574
    },
    {
      "epoch": 0.1626407608378552,
      "grad_norm": 0.8805384329134841,
      "learning_rate": 9.545793035908188e-06,
      "loss": 0.1674,
      "step": 5575
    },
    {
      "epoch": 0.16266993406849875,
      "grad_norm": 0.7961742170072749,
      "learning_rate": 9.545596270398258e-06,
      "loss": 0.1974,
      "step": 5576
    },
    {
      "epoch": 0.1626991072991423,
      "grad_norm": 0.9199810343204181,
      "learning_rate": 9.54539946430633e-06,
      "loss": 0.1811,
      "step": 5577
    },
    {
      "epoch": 0.16272828052978586,
      "grad_norm": 0.8605639682493905,
      "learning_rate": 9.545202617634162e-06,
      "loss": 0.16,
      "step": 5578
    },
    {
      "epoch": 0.16275745376042944,
      "grad_norm": 0.989559048965002,
      "learning_rate": 9.545005730383508e-06,
      "loss": 0.1785,
      "step": 5579
    },
    {
      "epoch": 0.162786626991073,
      "grad_norm": 0.8561875892868223,
      "learning_rate": 9.544808802556129e-06,
      "loss": 0.1726,
      "step": 5580
    },
    {
      "epoch": 0.16281580022171654,
      "grad_norm": 0.8761340504741546,
      "learning_rate": 9.544611834153781e-06,
      "loss": 0.1483,
      "step": 5581
    },
    {
      "epoch": 0.16284497345236013,
      "grad_norm": 0.9637252105261748,
      "learning_rate": 9.544414825178223e-06,
      "loss": 0.1778,
      "step": 5582
    },
    {
      "epoch": 0.16287414668300368,
      "grad_norm": 0.8010618817615577,
      "learning_rate": 9.544217775631215e-06,
      "loss": 0.1713,
      "step": 5583
    },
    {
      "epoch": 0.16290331991364723,
      "grad_norm": 0.8309504926748028,
      "learning_rate": 9.544020685514515e-06,
      "loss": 0.1451,
      "step": 5584
    },
    {
      "epoch": 0.16293249314429079,
      "grad_norm": 0.8439330817438556,
      "learning_rate": 9.543823554829884e-06,
      "loss": 0.1656,
      "step": 5585
    },
    {
      "epoch": 0.16296166637493437,
      "grad_norm": 0.6605145046534995,
      "learning_rate": 9.54362638357908e-06,
      "loss": 0.1534,
      "step": 5586
    },
    {
      "epoch": 0.16299083960557792,
      "grad_norm": 0.7667144900167461,
      "learning_rate": 9.543429171763865e-06,
      "loss": 0.1763,
      "step": 5587
    },
    {
      "epoch": 0.16302001283622147,
      "grad_norm": 0.8680758585249889,
      "learning_rate": 9.543231919385999e-06,
      "loss": 0.1585,
      "step": 5588
    },
    {
      "epoch": 0.16304918606686505,
      "grad_norm": 0.7830878767600102,
      "learning_rate": 9.543034626447243e-06,
      "loss": 0.1641,
      "step": 5589
    },
    {
      "epoch": 0.1630783592975086,
      "grad_norm": 0.863822806015243,
      "learning_rate": 9.542837292949358e-06,
      "loss": 0.1905,
      "step": 5590
    },
    {
      "epoch": 0.16310753252815216,
      "grad_norm": 0.7331609304442888,
      "learning_rate": 9.542639918894105e-06,
      "loss": 0.171,
      "step": 5591
    },
    {
      "epoch": 0.16313670575879574,
      "grad_norm": 0.8984095274978744,
      "learning_rate": 9.542442504283249e-06,
      "loss": 0.1641,
      "step": 5592
    },
    {
      "epoch": 0.1631658789894393,
      "grad_norm": 0.8234488756856335,
      "learning_rate": 9.542245049118551e-06,
      "loss": 0.1696,
      "step": 5593
    },
    {
      "epoch": 0.16319505222008285,
      "grad_norm": 0.7996534533990739,
      "learning_rate": 9.542047553401773e-06,
      "loss": 0.174,
      "step": 5594
    },
    {
      "epoch": 0.1632242254507264,
      "grad_norm": 0.9163249519287838,
      "learning_rate": 9.541850017134678e-06,
      "loss": 0.174,
      "step": 5595
    },
    {
      "epoch": 0.16325339868136998,
      "grad_norm": 0.8898491930885384,
      "learning_rate": 9.54165244031903e-06,
      "loss": 0.191,
      "step": 5596
    },
    {
      "epoch": 0.16328257191201354,
      "grad_norm": 0.7146522455286817,
      "learning_rate": 9.541454822956592e-06,
      "loss": 0.1731,
      "step": 5597
    },
    {
      "epoch": 0.1633117451426571,
      "grad_norm": 0.9071124784826288,
      "learning_rate": 9.541257165049132e-06,
      "loss": 0.1873,
      "step": 5598
    },
    {
      "epoch": 0.16334091837330067,
      "grad_norm": 1.0050295452380662,
      "learning_rate": 9.541059466598413e-06,
      "loss": 0.1836,
      "step": 5599
    },
    {
      "epoch": 0.16337009160394422,
      "grad_norm": 0.960905206118692,
      "learning_rate": 9.540861727606196e-06,
      "loss": 0.1866,
      "step": 5600
    },
    {
      "epoch": 0.16339926483458778,
      "grad_norm": 1.0512020080564881,
      "learning_rate": 9.540663948074251e-06,
      "loss": 0.1609,
      "step": 5601
    },
    {
      "epoch": 0.16342843806523136,
      "grad_norm": 0.9435689980908454,
      "learning_rate": 9.540466128004342e-06,
      "loss": 0.1866,
      "step": 5602
    },
    {
      "epoch": 0.1634576112958749,
      "grad_norm": 1.1703447518077454,
      "learning_rate": 9.540268267398237e-06,
      "loss": 0.1726,
      "step": 5603
    },
    {
      "epoch": 0.16348678452651846,
      "grad_norm": 1.488296337065139,
      "learning_rate": 9.540070366257699e-06,
      "loss": 0.1829,
      "step": 5604
    },
    {
      "epoch": 0.16351595775716202,
      "grad_norm": 0.922847693332177,
      "learning_rate": 9.539872424584496e-06,
      "loss": 0.1969,
      "step": 5605
    },
    {
      "epoch": 0.1635451309878056,
      "grad_norm": 0.9796080261896619,
      "learning_rate": 9.539674442380397e-06,
      "loss": 0.1792,
      "step": 5606
    },
    {
      "epoch": 0.16357430421844915,
      "grad_norm": 1.1559993570626443,
      "learning_rate": 9.539476419647168e-06,
      "loss": 0.1811,
      "step": 5607
    },
    {
      "epoch": 0.1636034774490927,
      "grad_norm": 1.002853243594281,
      "learning_rate": 9.539278356386577e-06,
      "loss": 0.1813,
      "step": 5608
    },
    {
      "epoch": 0.16363265067973629,
      "grad_norm": 1.042737315841317,
      "learning_rate": 9.539080252600392e-06,
      "loss": 0.1737,
      "step": 5609
    },
    {
      "epoch": 0.16366182391037984,
      "grad_norm": 0.8994279842767887,
      "learning_rate": 9.538882108290384e-06,
      "loss": 0.1659,
      "step": 5610
    },
    {
      "epoch": 0.1636909971410234,
      "grad_norm": 1.1451182307194003,
      "learning_rate": 9.538683923458319e-06,
      "loss": 0.1722,
      "step": 5611
    },
    {
      "epoch": 0.16372017037166695,
      "grad_norm": 0.8847074219193041,
      "learning_rate": 9.538485698105965e-06,
      "loss": 0.1794,
      "step": 5612
    },
    {
      "epoch": 0.16374934360231053,
      "grad_norm": 0.8445970893768949,
      "learning_rate": 9.538287432235096e-06,
      "loss": 0.1737,
      "step": 5613
    },
    {
      "epoch": 0.16377851683295408,
      "grad_norm": 0.9592856169076226,
      "learning_rate": 9.53808912584748e-06,
      "loss": 0.1914,
      "step": 5614
    },
    {
      "epoch": 0.16380769006359763,
      "grad_norm": 0.7155258987602527,
      "learning_rate": 9.53789077894489e-06,
      "loss": 0.1689,
      "step": 5615
    },
    {
      "epoch": 0.16383686329424121,
      "grad_norm": 0.7297140127836609,
      "learning_rate": 9.537692391529093e-06,
      "loss": 0.1556,
      "step": 5616
    },
    {
      "epoch": 0.16386603652488477,
      "grad_norm": 0.98604310466573,
      "learning_rate": 9.53749396360186e-06,
      "loss": 0.1589,
      "step": 5617
    },
    {
      "epoch": 0.16389520975552832,
      "grad_norm": 0.7811506326450793,
      "learning_rate": 9.537295495164965e-06,
      "loss": 0.1926,
      "step": 5618
    },
    {
      "epoch": 0.1639243829861719,
      "grad_norm": 0.828578056936563,
      "learning_rate": 9.537096986220177e-06,
      "loss": 0.1641,
      "step": 5619
    },
    {
      "epoch": 0.16395355621681545,
      "grad_norm": 0.9639280325185331,
      "learning_rate": 9.536898436769273e-06,
      "loss": 0.1866,
      "step": 5620
    },
    {
      "epoch": 0.163982729447459,
      "grad_norm": 0.7248176740815467,
      "learning_rate": 9.536699846814023e-06,
      "loss": 0.1743,
      "step": 5621
    },
    {
      "epoch": 0.16401190267810256,
      "grad_norm": 0.8192479076642077,
      "learning_rate": 9.536501216356198e-06,
      "loss": 0.1711,
      "step": 5622
    },
    {
      "epoch": 0.16404107590874614,
      "grad_norm": 0.8799247150203047,
      "learning_rate": 9.536302545397575e-06,
      "loss": 0.2011,
      "step": 5623
    },
    {
      "epoch": 0.1640702491393897,
      "grad_norm": 0.7990696505438856,
      "learning_rate": 9.536103833939924e-06,
      "loss": 0.1714,
      "step": 5624
    },
    {
      "epoch": 0.16409942237003325,
      "grad_norm": 0.8583153072459949,
      "learning_rate": 9.535905081985022e-06,
      "loss": 0.1907,
      "step": 5625
    },
    {
      "epoch": 0.16412859560067683,
      "grad_norm": 0.8126369465430842,
      "learning_rate": 9.53570628953464e-06,
      "loss": 0.149,
      "step": 5626
    },
    {
      "epoch": 0.16415776883132038,
      "grad_norm": 0.8111059379127915,
      "learning_rate": 9.535507456590559e-06,
      "loss": 0.1939,
      "step": 5627
    },
    {
      "epoch": 0.16418694206196394,
      "grad_norm": 0.7953149377586634,
      "learning_rate": 9.535308583154546e-06,
      "loss": 0.1913,
      "step": 5628
    },
    {
      "epoch": 0.1642161152926075,
      "grad_norm": 1.0358783893962589,
      "learning_rate": 9.535109669228383e-06,
      "loss": 0.1657,
      "step": 5629
    },
    {
      "epoch": 0.16424528852325107,
      "grad_norm": 0.9209626254686679,
      "learning_rate": 9.534910714813843e-06,
      "loss": 0.1766,
      "step": 5630
    },
    {
      "epoch": 0.16427446175389462,
      "grad_norm": 0.7619437338287154,
      "learning_rate": 9.534711719912701e-06,
      "loss": 0.171,
      "step": 5631
    },
    {
      "epoch": 0.16430363498453818,
      "grad_norm": 1.101362466756387,
      "learning_rate": 9.534512684526738e-06,
      "loss": 0.1589,
      "step": 5632
    },
    {
      "epoch": 0.16433280821518176,
      "grad_norm": 0.9187218551616347,
      "learning_rate": 9.534313608657728e-06,
      "loss": 0.1726,
      "step": 5633
    },
    {
      "epoch": 0.1643619814458253,
      "grad_norm": 0.6625857365164486,
      "learning_rate": 9.534114492307447e-06,
      "loss": 0.1282,
      "step": 5634
    },
    {
      "epoch": 0.16439115467646886,
      "grad_norm": 1.0032110101859075,
      "learning_rate": 9.533915335477675e-06,
      "loss": 0.2211,
      "step": 5635
    },
    {
      "epoch": 0.16442032790711245,
      "grad_norm": 0.9061869707204406,
      "learning_rate": 9.53371613817019e-06,
      "loss": 0.1721,
      "step": 5636
    },
    {
      "epoch": 0.164449501137756,
      "grad_norm": 0.8174193509889471,
      "learning_rate": 9.533516900386768e-06,
      "loss": 0.1486,
      "step": 5637
    },
    {
      "epoch": 0.16447867436839955,
      "grad_norm": 0.848974270108626,
      "learning_rate": 9.53331762212919e-06,
      "loss": 0.195,
      "step": 5638
    },
    {
      "epoch": 0.1645078475990431,
      "grad_norm": 0.8537514937918482,
      "learning_rate": 9.533118303399234e-06,
      "loss": 0.1512,
      "step": 5639
    },
    {
      "epoch": 0.1645370208296867,
      "grad_norm": 0.7619147168312305,
      "learning_rate": 9.53291894419868e-06,
      "loss": 0.1543,
      "step": 5640
    },
    {
      "epoch": 0.16456619406033024,
      "grad_norm": 0.7312178056896909,
      "learning_rate": 9.53271954452931e-06,
      "loss": 0.1475,
      "step": 5641
    },
    {
      "epoch": 0.1645953672909738,
      "grad_norm": 0.8892306734556975,
      "learning_rate": 9.5325201043929e-06,
      "loss": 0.1967,
      "step": 5642
    },
    {
      "epoch": 0.16462454052161737,
      "grad_norm": 0.8135042187040571,
      "learning_rate": 9.53232062379123e-06,
      "loss": 0.1904,
      "step": 5643
    },
    {
      "epoch": 0.16465371375226093,
      "grad_norm": 0.8037126020990946,
      "learning_rate": 9.532121102726088e-06,
      "loss": 0.1686,
      "step": 5644
    },
    {
      "epoch": 0.16468288698290448,
      "grad_norm": 0.8641625415937917,
      "learning_rate": 9.531921541199249e-06,
      "loss": 0.1906,
      "step": 5645
    },
    {
      "epoch": 0.16471206021354806,
      "grad_norm": 1.0199354918080799,
      "learning_rate": 9.531721939212497e-06,
      "loss": 0.1827,
      "step": 5646
    },
    {
      "epoch": 0.16474123344419161,
      "grad_norm": 0.8532780844787409,
      "learning_rate": 9.53152229676761e-06,
      "loss": 0.1537,
      "step": 5647
    },
    {
      "epoch": 0.16477040667483517,
      "grad_norm": 0.9380098937926005,
      "learning_rate": 9.531322613866378e-06,
      "loss": 0.1719,
      "step": 5648
    },
    {
      "epoch": 0.16479957990547872,
      "grad_norm": 0.7853122808500244,
      "learning_rate": 9.531122890510577e-06,
      "loss": 0.167,
      "step": 5649
    },
    {
      "epoch": 0.1648287531361223,
      "grad_norm": 0.9987451694364654,
      "learning_rate": 9.530923126701994e-06,
      "loss": 0.1777,
      "step": 5650
    },
    {
      "epoch": 0.16485792636676586,
      "grad_norm": 1.13444692949112,
      "learning_rate": 9.530723322442408e-06,
      "loss": 0.1762,
      "step": 5651
    },
    {
      "epoch": 0.1648870995974094,
      "grad_norm": 0.9127974602991187,
      "learning_rate": 9.530523477733608e-06,
      "loss": 0.1584,
      "step": 5652
    },
    {
      "epoch": 0.164916272828053,
      "grad_norm": 0.98414473118197,
      "learning_rate": 9.530323592577376e-06,
      "loss": 0.1649,
      "step": 5653
    },
    {
      "epoch": 0.16494544605869654,
      "grad_norm": 0.9241482100282952,
      "learning_rate": 9.530123666975498e-06,
      "loss": 0.1655,
      "step": 5654
    },
    {
      "epoch": 0.1649746192893401,
      "grad_norm": 0.8354171263977225,
      "learning_rate": 9.529923700929753e-06,
      "loss": 0.1782,
      "step": 5655
    },
    {
      "epoch": 0.16500379251998365,
      "grad_norm": 1.1181879737222529,
      "learning_rate": 9.529723694441935e-06,
      "loss": 0.1907,
      "step": 5656
    },
    {
      "epoch": 0.16503296575062723,
      "grad_norm": 0.7588624883101615,
      "learning_rate": 9.529523647513824e-06,
      "loss": 0.1565,
      "step": 5657
    },
    {
      "epoch": 0.16506213898127078,
      "grad_norm": 0.9046596801968325,
      "learning_rate": 9.529323560147204e-06,
      "loss": 0.1788,
      "step": 5658
    },
    {
      "epoch": 0.16509131221191434,
      "grad_norm": 0.8841903567900284,
      "learning_rate": 9.529123432343868e-06,
      "loss": 0.1992,
      "step": 5659
    },
    {
      "epoch": 0.16512048544255792,
      "grad_norm": 0.8972858405509329,
      "learning_rate": 9.528923264105597e-06,
      "loss": 0.158,
      "step": 5660
    },
    {
      "epoch": 0.16514965867320147,
      "grad_norm": 0.8204773792832021,
      "learning_rate": 9.528723055434182e-06,
      "loss": 0.1641,
      "step": 5661
    },
    {
      "epoch": 0.16517883190384502,
      "grad_norm": 0.8422016494152981,
      "learning_rate": 9.528522806331409e-06,
      "loss": 0.186,
      "step": 5662
    },
    {
      "epoch": 0.1652080051344886,
      "grad_norm": 1.0036955772986154,
      "learning_rate": 9.528322516799064e-06,
      "loss": 0.1788,
      "step": 5663
    },
    {
      "epoch": 0.16523717836513216,
      "grad_norm": 0.8658475774572192,
      "learning_rate": 9.528122186838935e-06,
      "loss": 0.1682,
      "step": 5664
    },
    {
      "epoch": 0.1652663515957757,
      "grad_norm": 0.9343986033765772,
      "learning_rate": 9.527921816452815e-06,
      "loss": 0.196,
      "step": 5665
    },
    {
      "epoch": 0.16529552482641927,
      "grad_norm": 0.9523178297434219,
      "learning_rate": 9.527721405642489e-06,
      "loss": 0.1598,
      "step": 5666
    },
    {
      "epoch": 0.16532469805706285,
      "grad_norm": 0.8053235060679232,
      "learning_rate": 9.527520954409748e-06,
      "loss": 0.1837,
      "step": 5667
    },
    {
      "epoch": 0.1653538712877064,
      "grad_norm": 0.9254610811105273,
      "learning_rate": 9.527320462756379e-06,
      "loss": 0.1632,
      "step": 5668
    },
    {
      "epoch": 0.16538304451834995,
      "grad_norm": 0.8281553470358143,
      "learning_rate": 9.527119930684174e-06,
      "loss": 0.1702,
      "step": 5669
    },
    {
      "epoch": 0.16541221774899353,
      "grad_norm": 0.6681916431569204,
      "learning_rate": 9.526919358194923e-06,
      "loss": 0.1847,
      "step": 5670
    },
    {
      "epoch": 0.1654413909796371,
      "grad_norm": 0.8700015870060569,
      "learning_rate": 9.526718745290418e-06,
      "loss": 0.1643,
      "step": 5671
    },
    {
      "epoch": 0.16547056421028064,
      "grad_norm": 0.9119618151662121,
      "learning_rate": 9.526518091972447e-06,
      "loss": 0.188,
      "step": 5672
    },
    {
      "epoch": 0.16549973744092422,
      "grad_norm": 0.7895264606281064,
      "learning_rate": 9.526317398242803e-06,
      "loss": 0.1717,
      "step": 5673
    },
    {
      "epoch": 0.16552891067156777,
      "grad_norm": 0.8419253020973396,
      "learning_rate": 9.52611666410328e-06,
      "loss": 0.1682,
      "step": 5674
    },
    {
      "epoch": 0.16555808390221133,
      "grad_norm": 0.9489149000767232,
      "learning_rate": 9.525915889555666e-06,
      "loss": 0.1559,
      "step": 5675
    },
    {
      "epoch": 0.16558725713285488,
      "grad_norm": 1.1362243233797817,
      "learning_rate": 9.525715074601756e-06,
      "loss": 0.1551,
      "step": 5676
    },
    {
      "epoch": 0.16561643036349846,
      "grad_norm": 0.8007023063084524,
      "learning_rate": 9.525514219243342e-06,
      "loss": 0.1671,
      "step": 5677
    },
    {
      "epoch": 0.16564560359414202,
      "grad_norm": 0.8760805163055179,
      "learning_rate": 9.525313323482217e-06,
      "loss": 0.1884,
      "step": 5678
    },
    {
      "epoch": 0.16567477682478557,
      "grad_norm": 0.7956839749398408,
      "learning_rate": 9.525112387320177e-06,
      "loss": 0.1659,
      "step": 5679
    },
    {
      "epoch": 0.16570395005542915,
      "grad_norm": 0.970887167937794,
      "learning_rate": 9.524911410759012e-06,
      "loss": 0.1632,
      "step": 5680
    },
    {
      "epoch": 0.1657331232860727,
      "grad_norm": 1.3408322576460419,
      "learning_rate": 9.524710393800518e-06,
      "loss": 0.1861,
      "step": 5681
    },
    {
      "epoch": 0.16576229651671626,
      "grad_norm": 0.9953309589317046,
      "learning_rate": 9.524509336446489e-06,
      "loss": 0.1553,
      "step": 5682
    },
    {
      "epoch": 0.1657914697473598,
      "grad_norm": 1.0691731422030517,
      "learning_rate": 9.524308238698723e-06,
      "loss": 0.1746,
      "step": 5683
    },
    {
      "epoch": 0.1658206429780034,
      "grad_norm": 0.9751236644786784,
      "learning_rate": 9.52410710055901e-06,
      "loss": 0.1469,
      "step": 5684
    },
    {
      "epoch": 0.16584981620864694,
      "grad_norm": 1.376478960450591,
      "learning_rate": 9.52390592202915e-06,
      "loss": 0.1604,
      "step": 5685
    },
    {
      "epoch": 0.1658789894392905,
      "grad_norm": 0.9059184036656142,
      "learning_rate": 9.523704703110939e-06,
      "loss": 0.181,
      "step": 5686
    },
    {
      "epoch": 0.16590816266993408,
      "grad_norm": 0.9531920355015756,
      "learning_rate": 9.523503443806173e-06,
      "loss": 0.1871,
      "step": 5687
    },
    {
      "epoch": 0.16593733590057763,
      "grad_norm": 1.3610865414447926,
      "learning_rate": 9.523302144116647e-06,
      "loss": 0.1468,
      "step": 5688
    },
    {
      "epoch": 0.16596650913122118,
      "grad_norm": 0.9879132279272322,
      "learning_rate": 9.523100804044159e-06,
      "loss": 0.1886,
      "step": 5689
    },
    {
      "epoch": 0.16599568236186477,
      "grad_norm": 0.9880328281509605,
      "learning_rate": 9.522899423590507e-06,
      "loss": 0.17,
      "step": 5690
    },
    {
      "epoch": 0.16602485559250832,
      "grad_norm": 0.8304979236237056,
      "learning_rate": 9.52269800275749e-06,
      "loss": 0.1827,
      "step": 5691
    },
    {
      "epoch": 0.16605402882315187,
      "grad_norm": 0.9552655816701444,
      "learning_rate": 9.522496541546901e-06,
      "loss": 0.1884,
      "step": 5692
    },
    {
      "epoch": 0.16608320205379543,
      "grad_norm": 0.8358397925988778,
      "learning_rate": 9.522295039960544e-06,
      "loss": 0.1643,
      "step": 5693
    },
    {
      "epoch": 0.166112375284439,
      "grad_norm": 0.6558762570348208,
      "learning_rate": 9.522093498000218e-06,
      "loss": 0.1639,
      "step": 5694
    },
    {
      "epoch": 0.16614154851508256,
      "grad_norm": 0.9007862062668994,
      "learning_rate": 9.521891915667722e-06,
      "loss": 0.1665,
      "step": 5695
    },
    {
      "epoch": 0.1661707217457261,
      "grad_norm": 0.8837016889555012,
      "learning_rate": 9.52169029296485e-06,
      "loss": 0.1949,
      "step": 5696
    },
    {
      "epoch": 0.1661998949763697,
      "grad_norm": 0.7108733949655556,
      "learning_rate": 9.521488629893411e-06,
      "loss": 0.1772,
      "step": 5697
    },
    {
      "epoch": 0.16622906820701325,
      "grad_norm": 1.7374926645259825,
      "learning_rate": 9.521286926455198e-06,
      "loss": 0.1789,
      "step": 5698
    },
    {
      "epoch": 0.1662582414376568,
      "grad_norm": 1.0873970809436402,
      "learning_rate": 9.521085182652016e-06,
      "loss": 0.1643,
      "step": 5699
    },
    {
      "epoch": 0.16628741466830035,
      "grad_norm": 0.9235896302946089,
      "learning_rate": 9.520883398485665e-06,
      "loss": 0.1598,
      "step": 5700
    },
    {
      "epoch": 0.16631658789894394,
      "grad_norm": 0.8480519275285644,
      "learning_rate": 9.520681573957944e-06,
      "loss": 0.1547,
      "step": 5701
    },
    {
      "epoch": 0.1663457611295875,
      "grad_norm": 0.8684837851224382,
      "learning_rate": 9.520479709070661e-06,
      "loss": 0.1657,
      "step": 5702
    },
    {
      "epoch": 0.16637493436023104,
      "grad_norm": 0.8678892323263708,
      "learning_rate": 9.52027780382561e-06,
      "loss": 0.209,
      "step": 5703
    },
    {
      "epoch": 0.16640410759087462,
      "grad_norm": 0.7701939936552961,
      "learning_rate": 9.5200758582246e-06,
      "loss": 0.1808,
      "step": 5704
    },
    {
      "epoch": 0.16643328082151818,
      "grad_norm": 0.7743930184201232,
      "learning_rate": 9.519873872269431e-06,
      "loss": 0.1687,
      "step": 5705
    },
    {
      "epoch": 0.16646245405216173,
      "grad_norm": 0.8668160425028448,
      "learning_rate": 9.519671845961908e-06,
      "loss": 0.1751,
      "step": 5706
    },
    {
      "epoch": 0.1664916272828053,
      "grad_norm": 1.0076935631153972,
      "learning_rate": 9.519469779303833e-06,
      "loss": 0.1701,
      "step": 5707
    },
    {
      "epoch": 0.16652080051344886,
      "grad_norm": 0.7363457026736757,
      "learning_rate": 9.519267672297013e-06,
      "loss": 0.1487,
      "step": 5708
    },
    {
      "epoch": 0.16654997374409242,
      "grad_norm": 0.8646562493640063,
      "learning_rate": 9.519065524943247e-06,
      "loss": 0.1532,
      "step": 5709
    },
    {
      "epoch": 0.16657914697473597,
      "grad_norm": 0.9669843846088726,
      "learning_rate": 9.518863337244344e-06,
      "loss": 0.1767,
      "step": 5710
    },
    {
      "epoch": 0.16660832020537955,
      "grad_norm": 0.7631156015200443,
      "learning_rate": 9.518661109202107e-06,
      "loss": 0.149,
      "step": 5711
    },
    {
      "epoch": 0.1666374934360231,
      "grad_norm": 0.9086542199023313,
      "learning_rate": 9.518458840818343e-06,
      "loss": 0.1494,
      "step": 5712
    },
    {
      "epoch": 0.16666666666666666,
      "grad_norm": 0.9339741301798014,
      "learning_rate": 9.518256532094859e-06,
      "loss": 0.1917,
      "step": 5713
    },
    {
      "epoch": 0.16669583989731024,
      "grad_norm": 0.9002561742502497,
      "learning_rate": 9.518054183033456e-06,
      "loss": 0.1496,
      "step": 5714
    },
    {
      "epoch": 0.1667250131279538,
      "grad_norm": 0.8132471465484027,
      "learning_rate": 9.517851793635946e-06,
      "loss": 0.1735,
      "step": 5715
    },
    {
      "epoch": 0.16675418635859734,
      "grad_norm": 0.8527596315922845,
      "learning_rate": 9.517649363904132e-06,
      "loss": 0.1886,
      "step": 5716
    },
    {
      "epoch": 0.16678335958924093,
      "grad_norm": 0.8714980015972134,
      "learning_rate": 9.517446893839824e-06,
      "loss": 0.1692,
      "step": 5717
    },
    {
      "epoch": 0.16681253281988448,
      "grad_norm": 0.8587323457269065,
      "learning_rate": 9.517244383444829e-06,
      "loss": 0.1681,
      "step": 5718
    },
    {
      "epoch": 0.16684170605052803,
      "grad_norm": 0.9271190757581552,
      "learning_rate": 9.517041832720953e-06,
      "loss": 0.1441,
      "step": 5719
    },
    {
      "epoch": 0.16687087928117159,
      "grad_norm": 0.9111026655110478,
      "learning_rate": 9.516839241670006e-06,
      "loss": 0.1813,
      "step": 5720
    },
    {
      "epoch": 0.16690005251181517,
      "grad_norm": 0.8138309657015205,
      "learning_rate": 9.516636610293798e-06,
      "loss": 0.1993,
      "step": 5721
    },
    {
      "epoch": 0.16692922574245872,
      "grad_norm": 0.9979093129062674,
      "learning_rate": 9.516433938594137e-06,
      "loss": 0.1764,
      "step": 5722
    },
    {
      "epoch": 0.16695839897310227,
      "grad_norm": 0.8876233363953347,
      "learning_rate": 9.51623122657283e-06,
      "loss": 0.1888,
      "step": 5723
    },
    {
      "epoch": 0.16698757220374585,
      "grad_norm": 0.7661865290715371,
      "learning_rate": 9.516028474231689e-06,
      "loss": 0.1652,
      "step": 5724
    },
    {
      "epoch": 0.1670167454343894,
      "grad_norm": 0.7970528798299262,
      "learning_rate": 9.515825681572523e-06,
      "loss": 0.1705,
      "step": 5725
    },
    {
      "epoch": 0.16704591866503296,
      "grad_norm": 0.8731151303887534,
      "learning_rate": 9.515622848597145e-06,
      "loss": 0.1685,
      "step": 5726
    },
    {
      "epoch": 0.16707509189567651,
      "grad_norm": 0.8423270951003244,
      "learning_rate": 9.515419975307365e-06,
      "loss": 0.2072,
      "step": 5727
    },
    {
      "epoch": 0.1671042651263201,
      "grad_norm": 0.9563534421355293,
      "learning_rate": 9.515217061704991e-06,
      "loss": 0.1861,
      "step": 5728
    },
    {
      "epoch": 0.16713343835696365,
      "grad_norm": 1.0794263591197344,
      "learning_rate": 9.515014107791839e-06,
      "loss": 0.1862,
      "step": 5729
    },
    {
      "epoch": 0.1671626115876072,
      "grad_norm": 0.8482002265219688,
      "learning_rate": 9.514811113569718e-06,
      "loss": 0.167,
      "step": 5730
    },
    {
      "epoch": 0.16719178481825078,
      "grad_norm": 0.8831824460817326,
      "learning_rate": 9.514608079040441e-06,
      "loss": 0.1649,
      "step": 5731
    },
    {
      "epoch": 0.16722095804889434,
      "grad_norm": 0.8771172281238426,
      "learning_rate": 9.51440500420582e-06,
      "loss": 0.168,
      "step": 5732
    },
    {
      "epoch": 0.1672501312795379,
      "grad_norm": 0.801982788928741,
      "learning_rate": 9.51420188906767e-06,
      "loss": 0.1793,
      "step": 5733
    },
    {
      "epoch": 0.16727930451018147,
      "grad_norm": 0.9045361378829273,
      "learning_rate": 9.513998733627802e-06,
      "loss": 0.179,
      "step": 5734
    },
    {
      "epoch": 0.16730847774082502,
      "grad_norm": 0.9572368123826093,
      "learning_rate": 9.513795537888032e-06,
      "loss": 0.1709,
      "step": 5735
    },
    {
      "epoch": 0.16733765097146858,
      "grad_norm": 0.9885183697072522,
      "learning_rate": 9.513592301850174e-06,
      "loss": 0.1658,
      "step": 5736
    },
    {
      "epoch": 0.16736682420211213,
      "grad_norm": 0.8311614188640513,
      "learning_rate": 9.51338902551604e-06,
      "loss": 0.1757,
      "step": 5737
    },
    {
      "epoch": 0.1673959974327557,
      "grad_norm": 0.9266043594963513,
      "learning_rate": 9.513185708887445e-06,
      "loss": 0.1685,
      "step": 5738
    },
    {
      "epoch": 0.16742517066339926,
      "grad_norm": 0.8195980583339016,
      "learning_rate": 9.512982351966207e-06,
      "loss": 0.1695,
      "step": 5739
    },
    {
      "epoch": 0.16745434389404282,
      "grad_norm": 0.8522298747866902,
      "learning_rate": 9.51277895475414e-06,
      "loss": 0.173,
      "step": 5740
    },
    {
      "epoch": 0.1674835171246864,
      "grad_norm": 0.8191427424235472,
      "learning_rate": 9.51257551725306e-06,
      "loss": 0.1659,
      "step": 5741
    },
    {
      "epoch": 0.16751269035532995,
      "grad_norm": 0.883421807881772,
      "learning_rate": 9.512372039464782e-06,
      "loss": 0.1802,
      "step": 5742
    },
    {
      "epoch": 0.1675418635859735,
      "grad_norm": 0.8813050107409939,
      "learning_rate": 9.512168521391123e-06,
      "loss": 0.1639,
      "step": 5743
    },
    {
      "epoch": 0.16757103681661709,
      "grad_norm": 0.9627359759151015,
      "learning_rate": 9.511964963033902e-06,
      "loss": 0.194,
      "step": 5744
    },
    {
      "epoch": 0.16760021004726064,
      "grad_norm": 0.7925154895715254,
      "learning_rate": 9.511761364394935e-06,
      "loss": 0.1694,
      "step": 5745
    },
    {
      "epoch": 0.1676293832779042,
      "grad_norm": 0.850554932296648,
      "learning_rate": 9.51155772547604e-06,
      "loss": 0.1515,
      "step": 5746
    },
    {
      "epoch": 0.16765855650854775,
      "grad_norm": 0.8708859149242727,
      "learning_rate": 9.511354046279032e-06,
      "loss": 0.1725,
      "step": 5747
    },
    {
      "epoch": 0.16768772973919133,
      "grad_norm": 0.909647459641582,
      "learning_rate": 9.511150326805734e-06,
      "loss": 0.1995,
      "step": 5748
    },
    {
      "epoch": 0.16771690296983488,
      "grad_norm": 0.8624369587185624,
      "learning_rate": 9.510946567057963e-06,
      "loss": 0.1752,
      "step": 5749
    },
    {
      "epoch": 0.16774607620047843,
      "grad_norm": 0.8705414073551789,
      "learning_rate": 9.510742767037538e-06,
      "loss": 0.1655,
      "step": 5750
    },
    {
      "epoch": 0.16777524943112201,
      "grad_norm": 0.8458393316577807,
      "learning_rate": 9.510538926746276e-06,
      "loss": 0.1691,
      "step": 5751
    },
    {
      "epoch": 0.16780442266176557,
      "grad_norm": 0.7781652817375657,
      "learning_rate": 9.510335046186001e-06,
      "loss": 0.1847,
      "step": 5752
    },
    {
      "epoch": 0.16783359589240912,
      "grad_norm": 0.7684798531988254,
      "learning_rate": 9.510131125358532e-06,
      "loss": 0.1997,
      "step": 5753
    },
    {
      "epoch": 0.16786276912305267,
      "grad_norm": 0.7430952463355108,
      "learning_rate": 9.509927164265688e-06,
      "loss": 0.1543,
      "step": 5754
    },
    {
      "epoch": 0.16789194235369626,
      "grad_norm": 0.7541261204553376,
      "learning_rate": 9.509723162909292e-06,
      "loss": 0.1617,
      "step": 5755
    },
    {
      "epoch": 0.1679211155843398,
      "grad_norm": 1.0395113794202016,
      "learning_rate": 9.509519121291164e-06,
      "loss": 0.168,
      "step": 5756
    },
    {
      "epoch": 0.16795028881498336,
      "grad_norm": 0.8457258943799788,
      "learning_rate": 9.509315039413126e-06,
      "loss": 0.1668,
      "step": 5757
    },
    {
      "epoch": 0.16797946204562694,
      "grad_norm": 0.8008591685751763,
      "learning_rate": 9.509110917276997e-06,
      "loss": 0.1658,
      "step": 5758
    },
    {
      "epoch": 0.1680086352762705,
      "grad_norm": 1.0079926273568525,
      "learning_rate": 9.508906754884603e-06,
      "loss": 0.1943,
      "step": 5759
    },
    {
      "epoch": 0.16803780850691405,
      "grad_norm": 0.796659282858451,
      "learning_rate": 9.508702552237768e-06,
      "loss": 0.1741,
      "step": 5760
    },
    {
      "epoch": 0.16806698173755763,
      "grad_norm": 0.9530830112998195,
      "learning_rate": 9.508498309338313e-06,
      "loss": 0.1626,
      "step": 5761
    },
    {
      "epoch": 0.16809615496820118,
      "grad_norm": 0.8356891526275132,
      "learning_rate": 9.50829402618806e-06,
      "loss": 0.2005,
      "step": 5762
    },
    {
      "epoch": 0.16812532819884474,
      "grad_norm": 0.9700998337612351,
      "learning_rate": 9.508089702788835e-06,
      "loss": 0.1966,
      "step": 5763
    },
    {
      "epoch": 0.1681545014294883,
      "grad_norm": 0.7978492663470175,
      "learning_rate": 9.50788533914246e-06,
      "loss": 0.1479,
      "step": 5764
    },
    {
      "epoch": 0.16818367466013187,
      "grad_norm": 0.8288991693189907,
      "learning_rate": 9.507680935250762e-06,
      "loss": 0.1873,
      "step": 5765
    },
    {
      "epoch": 0.16821284789077542,
      "grad_norm": 0.7963707692136316,
      "learning_rate": 9.507476491115564e-06,
      "loss": 0.1828,
      "step": 5766
    },
    {
      "epoch": 0.16824202112141898,
      "grad_norm": 0.8758297637996928,
      "learning_rate": 9.507272006738692e-06,
      "loss": 0.1466,
      "step": 5767
    },
    {
      "epoch": 0.16827119435206256,
      "grad_norm": 0.8754145738446034,
      "learning_rate": 9.50706748212197e-06,
      "loss": 0.1904,
      "step": 5768
    },
    {
      "epoch": 0.1683003675827061,
      "grad_norm": 0.7414681198536065,
      "learning_rate": 9.506862917267228e-06,
      "loss": 0.1713,
      "step": 5769
    },
    {
      "epoch": 0.16832954081334967,
      "grad_norm": 0.7989072402561423,
      "learning_rate": 9.506658312176288e-06,
      "loss": 0.1419,
      "step": 5770
    },
    {
      "epoch": 0.16835871404399322,
      "grad_norm": 0.7659770789703081,
      "learning_rate": 9.506453666850982e-06,
      "loss": 0.1562,
      "step": 5771
    },
    {
      "epoch": 0.1683878872746368,
      "grad_norm": 0.9861733022695788,
      "learning_rate": 9.50624898129313e-06,
      "loss": 0.1646,
      "step": 5772
    },
    {
      "epoch": 0.16841706050528035,
      "grad_norm": 0.9031594025140651,
      "learning_rate": 9.506044255504563e-06,
      "loss": 0.1901,
      "step": 5773
    },
    {
      "epoch": 0.1684462337359239,
      "grad_norm": 0.9649187871318449,
      "learning_rate": 9.50583948948711e-06,
      "loss": 0.1716,
      "step": 5774
    },
    {
      "epoch": 0.1684754069665675,
      "grad_norm": 0.9163837986095745,
      "learning_rate": 9.505634683242595e-06,
      "loss": 0.1537,
      "step": 5775
    },
    {
      "epoch": 0.16850458019721104,
      "grad_norm": 0.7913032036590982,
      "learning_rate": 9.505429836772852e-06,
      "loss": 0.16,
      "step": 5776
    },
    {
      "epoch": 0.1685337534278546,
      "grad_norm": 1.0419250495818513,
      "learning_rate": 9.505224950079705e-06,
      "loss": 0.1684,
      "step": 5777
    },
    {
      "epoch": 0.16856292665849817,
      "grad_norm": 0.8186132470280999,
      "learning_rate": 9.505020023164985e-06,
      "loss": 0.1549,
      "step": 5778
    },
    {
      "epoch": 0.16859209988914173,
      "grad_norm": 0.9145764220434629,
      "learning_rate": 9.504815056030523e-06,
      "loss": 0.1798,
      "step": 5779
    },
    {
      "epoch": 0.16862127311978528,
      "grad_norm": 0.7939721453720862,
      "learning_rate": 9.504610048678148e-06,
      "loss": 0.1757,
      "step": 5780
    },
    {
      "epoch": 0.16865044635042883,
      "grad_norm": 0.9123615104748429,
      "learning_rate": 9.504405001109688e-06,
      "loss": 0.1634,
      "step": 5781
    },
    {
      "epoch": 0.16867961958107242,
      "grad_norm": 0.7837052496776843,
      "learning_rate": 9.504199913326977e-06,
      "loss": 0.1433,
      "step": 5782
    },
    {
      "epoch": 0.16870879281171597,
      "grad_norm": 0.9136821640560577,
      "learning_rate": 9.503994785331845e-06,
      "loss": 0.1827,
      "step": 5783
    },
    {
      "epoch": 0.16873796604235952,
      "grad_norm": 0.8988171773056757,
      "learning_rate": 9.50378961712612e-06,
      "loss": 0.1697,
      "step": 5784
    },
    {
      "epoch": 0.1687671392730031,
      "grad_norm": 0.8454932841127984,
      "learning_rate": 9.50358440871164e-06,
      "loss": 0.1913,
      "step": 5785
    },
    {
      "epoch": 0.16879631250364666,
      "grad_norm": 0.9256260902701139,
      "learning_rate": 9.50337916009023e-06,
      "loss": 0.1785,
      "step": 5786
    },
    {
      "epoch": 0.1688254857342902,
      "grad_norm": 0.8611191499739325,
      "learning_rate": 9.503173871263728e-06,
      "loss": 0.1585,
      "step": 5787
    },
    {
      "epoch": 0.1688546589649338,
      "grad_norm": 1.1466914106383315,
      "learning_rate": 9.502968542233963e-06,
      "loss": 0.188,
      "step": 5788
    },
    {
      "epoch": 0.16888383219557734,
      "grad_norm": 0.855778321669791,
      "learning_rate": 9.502763173002772e-06,
      "loss": 0.1756,
      "step": 5789
    },
    {
      "epoch": 0.1689130054262209,
      "grad_norm": 1.1559620482353958,
      "learning_rate": 9.502557763571984e-06,
      "loss": 0.1736,
      "step": 5790
    },
    {
      "epoch": 0.16894217865686445,
      "grad_norm": 0.8539528088437137,
      "learning_rate": 9.502352313943437e-06,
      "loss": 0.1652,
      "step": 5791
    },
    {
      "epoch": 0.16897135188750803,
      "grad_norm": 1.0127769894227874,
      "learning_rate": 9.502146824118964e-06,
      "loss": 0.1729,
      "step": 5792
    },
    {
      "epoch": 0.16900052511815158,
      "grad_norm": 1.0727713735757451,
      "learning_rate": 9.501941294100397e-06,
      "loss": 0.1648,
      "step": 5793
    },
    {
      "epoch": 0.16902969834879514,
      "grad_norm": 0.9718664482621123,
      "learning_rate": 9.501735723889573e-06,
      "loss": 0.159,
      "step": 5794
    },
    {
      "epoch": 0.16905887157943872,
      "grad_norm": 1.3117615410056271,
      "learning_rate": 9.501530113488326e-06,
      "loss": 0.1744,
      "step": 5795
    },
    {
      "epoch": 0.16908804481008227,
      "grad_norm": 1.0712446014810333,
      "learning_rate": 9.501324462898495e-06,
      "loss": 0.1749,
      "step": 5796
    },
    {
      "epoch": 0.16911721804072583,
      "grad_norm": 0.8634629738870782,
      "learning_rate": 9.501118772121913e-06,
      "loss": 0.1892,
      "step": 5797
    },
    {
      "epoch": 0.16914639127136938,
      "grad_norm": 1.0273353474251192,
      "learning_rate": 9.500913041160417e-06,
      "loss": 0.1707,
      "step": 5798
    },
    {
      "epoch": 0.16917556450201296,
      "grad_norm": 1.0731080401598547,
      "learning_rate": 9.500707270015846e-06,
      "loss": 0.1939,
      "step": 5799
    },
    {
      "epoch": 0.1692047377326565,
      "grad_norm": 1.0145773911709222,
      "learning_rate": 9.500501458690031e-06,
      "loss": 0.186,
      "step": 5800
    },
    {
      "epoch": 0.16923391096330007,
      "grad_norm": 0.8661426553472247,
      "learning_rate": 9.500295607184815e-06,
      "loss": 0.1807,
      "step": 5801
    },
    {
      "epoch": 0.16926308419394365,
      "grad_norm": 0.8685623315834823,
      "learning_rate": 9.500089715502035e-06,
      "loss": 0.166,
      "step": 5802
    },
    {
      "epoch": 0.1692922574245872,
      "grad_norm": 0.8529004373329677,
      "learning_rate": 9.499883783643526e-06,
      "loss": 0.1672,
      "step": 5803
    },
    {
      "epoch": 0.16932143065523075,
      "grad_norm": 0.6636763544981219,
      "learning_rate": 9.499677811611133e-06,
      "loss": 0.162,
      "step": 5804
    },
    {
      "epoch": 0.16935060388587433,
      "grad_norm": 0.7199486842416523,
      "learning_rate": 9.499471799406687e-06,
      "loss": 0.168,
      "step": 5805
    },
    {
      "epoch": 0.1693797771165179,
      "grad_norm": 0.7058273381665765,
      "learning_rate": 9.49926574703203e-06,
      "loss": 0.1727,
      "step": 5806
    },
    {
      "epoch": 0.16940895034716144,
      "grad_norm": 0.7683216964440348,
      "learning_rate": 9.499059654489005e-06,
      "loss": 0.1457,
      "step": 5807
    },
    {
      "epoch": 0.169438123577805,
      "grad_norm": 0.7008463940438889,
      "learning_rate": 9.498853521779449e-06,
      "loss": 0.1513,
      "step": 5808
    },
    {
      "epoch": 0.16946729680844858,
      "grad_norm": 0.77511154420062,
      "learning_rate": 9.498647348905203e-06,
      "loss": 0.186,
      "step": 5809
    },
    {
      "epoch": 0.16949647003909213,
      "grad_norm": 0.8665380179551119,
      "learning_rate": 9.498441135868107e-06,
      "loss": 0.1804,
      "step": 5810
    },
    {
      "epoch": 0.16952564326973568,
      "grad_norm": 0.864757819281507,
      "learning_rate": 9.498234882670003e-06,
      "loss": 0.1512,
      "step": 5811
    },
    {
      "epoch": 0.16955481650037926,
      "grad_norm": 0.670650331227537,
      "learning_rate": 9.49802858931273e-06,
      "loss": 0.1806,
      "step": 5812
    },
    {
      "epoch": 0.16958398973102282,
      "grad_norm": 0.9059467872314915,
      "learning_rate": 9.497822255798132e-06,
      "loss": 0.1798,
      "step": 5813
    },
    {
      "epoch": 0.16961316296166637,
      "grad_norm": 0.936561903060316,
      "learning_rate": 9.497615882128053e-06,
      "loss": 0.1619,
      "step": 5814
    },
    {
      "epoch": 0.16964233619230992,
      "grad_norm": 0.8561048927543008,
      "learning_rate": 9.497409468304331e-06,
      "loss": 0.1629,
      "step": 5815
    },
    {
      "epoch": 0.1696715094229535,
      "grad_norm": 1.0939282699945563,
      "learning_rate": 9.49720301432881e-06,
      "loss": 0.1657,
      "step": 5816
    },
    {
      "epoch": 0.16970068265359706,
      "grad_norm": 1.3073161896135992,
      "learning_rate": 9.496996520203336e-06,
      "loss": 0.1787,
      "step": 5817
    },
    {
      "epoch": 0.1697298558842406,
      "grad_norm": 1.4015824687904928,
      "learning_rate": 9.496789985929749e-06,
      "loss": 0.1585,
      "step": 5818
    },
    {
      "epoch": 0.1697590291148842,
      "grad_norm": 0.9853956862235628,
      "learning_rate": 9.496583411509897e-06,
      "loss": 0.156,
      "step": 5819
    },
    {
      "epoch": 0.16978820234552774,
      "grad_norm": 0.7535386398845096,
      "learning_rate": 9.49637679694562e-06,
      "loss": 0.166,
      "step": 5820
    },
    {
      "epoch": 0.1698173755761713,
      "grad_norm": 1.1852578585965288,
      "learning_rate": 9.496170142238763e-06,
      "loss": 0.2054,
      "step": 5821
    },
    {
      "epoch": 0.16984654880681488,
      "grad_norm": 0.6938151094907697,
      "learning_rate": 9.495963447391174e-06,
      "loss": 0.1493,
      "step": 5822
    },
    {
      "epoch": 0.16987572203745843,
      "grad_norm": 0.825948110000058,
      "learning_rate": 9.495756712404695e-06,
      "loss": 0.1709,
      "step": 5823
    },
    {
      "epoch": 0.16990489526810199,
      "grad_norm": 0.8517842125407908,
      "learning_rate": 9.495549937281177e-06,
      "loss": 0.1946,
      "step": 5824
    },
    {
      "epoch": 0.16993406849874554,
      "grad_norm": 0.8141468243310551,
      "learning_rate": 9.495343122022458e-06,
      "loss": 0.146,
      "step": 5825
    },
    {
      "epoch": 0.16996324172938912,
      "grad_norm": 0.8520375754287067,
      "learning_rate": 9.495136266630392e-06,
      "loss": 0.1805,
      "step": 5826
    },
    {
      "epoch": 0.16999241496003267,
      "grad_norm": 0.8924959092288862,
      "learning_rate": 9.49492937110682e-06,
      "loss": 0.2049,
      "step": 5827
    },
    {
      "epoch": 0.17002158819067623,
      "grad_norm": 0.7921049782065184,
      "learning_rate": 9.494722435453593e-06,
      "loss": 0.1619,
      "step": 5828
    },
    {
      "epoch": 0.1700507614213198,
      "grad_norm": 0.8075579509681048,
      "learning_rate": 9.494515459672557e-06,
      "loss": 0.1754,
      "step": 5829
    },
    {
      "epoch": 0.17007993465196336,
      "grad_norm": 0.924799922280922,
      "learning_rate": 9.49430844376556e-06,
      "loss": 0.169,
      "step": 5830
    },
    {
      "epoch": 0.1701091078826069,
      "grad_norm": 0.7704035544183194,
      "learning_rate": 9.494101387734448e-06,
      "loss": 0.1633,
      "step": 5831
    },
    {
      "epoch": 0.1701382811132505,
      "grad_norm": 0.7869394969098592,
      "learning_rate": 9.493894291581074e-06,
      "loss": 0.1566,
      "step": 5832
    },
    {
      "epoch": 0.17016745434389405,
      "grad_norm": 0.9816539631901909,
      "learning_rate": 9.493687155307285e-06,
      "loss": 0.1649,
      "step": 5833
    },
    {
      "epoch": 0.1701966275745376,
      "grad_norm": 0.8285838160023006,
      "learning_rate": 9.493479978914928e-06,
      "loss": 0.173,
      "step": 5834
    },
    {
      "epoch": 0.17022580080518115,
      "grad_norm": 0.7251819896058284,
      "learning_rate": 9.493272762405856e-06,
      "loss": 0.1633,
      "step": 5835
    },
    {
      "epoch": 0.17025497403582474,
      "grad_norm": 0.8053394908046079,
      "learning_rate": 9.493065505781916e-06,
      "loss": 0.1884,
      "step": 5836
    },
    {
      "epoch": 0.1702841472664683,
      "grad_norm": 0.8806605956908382,
      "learning_rate": 9.49285820904496e-06,
      "loss": 0.1711,
      "step": 5837
    },
    {
      "epoch": 0.17031332049711184,
      "grad_norm": 0.7246739029732067,
      "learning_rate": 9.492650872196839e-06,
      "loss": 0.1613,
      "step": 5838
    },
    {
      "epoch": 0.17034249372775542,
      "grad_norm": 0.8886602726063461,
      "learning_rate": 9.492443495239404e-06,
      "loss": 0.1658,
      "step": 5839
    },
    {
      "epoch": 0.17037166695839898,
      "grad_norm": 0.8578512789545912,
      "learning_rate": 9.492236078174504e-06,
      "loss": 0.1753,
      "step": 5840
    },
    {
      "epoch": 0.17040084018904253,
      "grad_norm": 1.030892003656916,
      "learning_rate": 9.492028621003994e-06,
      "loss": 0.1645,
      "step": 5841
    },
    {
      "epoch": 0.17043001341968608,
      "grad_norm": 1.062780621892427,
      "learning_rate": 9.491821123729725e-06,
      "loss": 0.1726,
      "step": 5842
    },
    {
      "epoch": 0.17045918665032966,
      "grad_norm": 0.944916376038252,
      "learning_rate": 9.49161358635355e-06,
      "loss": 0.1743,
      "step": 5843
    },
    {
      "epoch": 0.17048835988097322,
      "grad_norm": 0.9317109319704673,
      "learning_rate": 9.49140600887732e-06,
      "loss": 0.198,
      "step": 5844
    },
    {
      "epoch": 0.17051753311161677,
      "grad_norm": 0.8489490810089662,
      "learning_rate": 9.49119839130289e-06,
      "loss": 0.1691,
      "step": 5845
    },
    {
      "epoch": 0.17054670634226035,
      "grad_norm": 0.9000019676171469,
      "learning_rate": 9.49099073363211e-06,
      "loss": 0.1644,
      "step": 5846
    },
    {
      "epoch": 0.1705758795729039,
      "grad_norm": 0.6412596210619251,
      "learning_rate": 9.49078303586684e-06,
      "loss": 0.1541,
      "step": 5847
    },
    {
      "epoch": 0.17060505280354746,
      "grad_norm": 0.8121724387267714,
      "learning_rate": 9.49057529800893e-06,
      "loss": 0.1775,
      "step": 5848
    },
    {
      "epoch": 0.17063422603419104,
      "grad_norm": 0.7549442879290451,
      "learning_rate": 9.490367520060236e-06,
      "loss": 0.1587,
      "step": 5849
    },
    {
      "epoch": 0.1706633992648346,
      "grad_norm": 0.8276613389561046,
      "learning_rate": 9.490159702022611e-06,
      "loss": 0.1524,
      "step": 5850
    },
    {
      "epoch": 0.17069257249547815,
      "grad_norm": 0.6336513971427156,
      "learning_rate": 9.489951843897916e-06,
      "loss": 0.1403,
      "step": 5851
    },
    {
      "epoch": 0.1707217457261217,
      "grad_norm": 0.835059243274728,
      "learning_rate": 9.489743945688e-06,
      "loss": 0.1801,
      "step": 5852
    },
    {
      "epoch": 0.17075091895676528,
      "grad_norm": 0.8635128132339861,
      "learning_rate": 9.489536007394721e-06,
      "loss": 0.1545,
      "step": 5853
    },
    {
      "epoch": 0.17078009218740883,
      "grad_norm": 0.937145630045578,
      "learning_rate": 9.489328029019939e-06,
      "loss": 0.1942,
      "step": 5854
    },
    {
      "epoch": 0.1708092654180524,
      "grad_norm": 0.8666681316470812,
      "learning_rate": 9.489120010565506e-06,
      "loss": 0.1652,
      "step": 5855
    },
    {
      "epoch": 0.17083843864869597,
      "grad_norm": 0.9248298409223987,
      "learning_rate": 9.488911952033283e-06,
      "loss": 0.159,
      "step": 5856
    },
    {
      "epoch": 0.17086761187933952,
      "grad_norm": 1.0064340540443737,
      "learning_rate": 9.488703853425125e-06,
      "loss": 0.1645,
      "step": 5857
    },
    {
      "epoch": 0.17089678510998307,
      "grad_norm": 0.9921130219170589,
      "learning_rate": 9.48849571474289e-06,
      "loss": 0.1615,
      "step": 5858
    },
    {
      "epoch": 0.17092595834062665,
      "grad_norm": 0.7842710062827105,
      "learning_rate": 9.488287535988437e-06,
      "loss": 0.1696,
      "step": 5859
    },
    {
      "epoch": 0.1709551315712702,
      "grad_norm": 1.2510016437280238,
      "learning_rate": 9.488079317163624e-06,
      "loss": 0.1512,
      "step": 5860
    },
    {
      "epoch": 0.17098430480191376,
      "grad_norm": 1.1097337672361536,
      "learning_rate": 9.48787105827031e-06,
      "loss": 0.1964,
      "step": 5861
    },
    {
      "epoch": 0.17101347803255731,
      "grad_norm": 0.9184401922265988,
      "learning_rate": 9.487662759310354e-06,
      "loss": 0.188,
      "step": 5862
    },
    {
      "epoch": 0.1710426512632009,
      "grad_norm": 0.8779127180149895,
      "learning_rate": 9.487454420285618e-06,
      "loss": 0.1921,
      "step": 5863
    },
    {
      "epoch": 0.17107182449384445,
      "grad_norm": 1.0451577051652654,
      "learning_rate": 9.48724604119796e-06,
      "loss": 0.1722,
      "step": 5864
    },
    {
      "epoch": 0.171100997724488,
      "grad_norm": 1.0543682482136674,
      "learning_rate": 9.487037622049238e-06,
      "loss": 0.1486,
      "step": 5865
    },
    {
      "epoch": 0.17113017095513158,
      "grad_norm": 0.9540005552983775,
      "learning_rate": 9.486829162841318e-06,
      "loss": 0.161,
      "step": 5866
    },
    {
      "epoch": 0.17115934418577514,
      "grad_norm": 0.9103475364595928,
      "learning_rate": 9.486620663576058e-06,
      "loss": 0.1627,
      "step": 5867
    },
    {
      "epoch": 0.1711885174164187,
      "grad_norm": 0.9717469794001211,
      "learning_rate": 9.486412124255318e-06,
      "loss": 0.1894,
      "step": 5868
    },
    {
      "epoch": 0.17121769064706224,
      "grad_norm": 0.7894868945012453,
      "learning_rate": 9.486203544880963e-06,
      "loss": 0.1707,
      "step": 5869
    },
    {
      "epoch": 0.17124686387770582,
      "grad_norm": 0.9958722679967967,
      "learning_rate": 9.485994925454853e-06,
      "loss": 0.1624,
      "step": 5870
    },
    {
      "epoch": 0.17127603710834938,
      "grad_norm": 0.9983706914619377,
      "learning_rate": 9.485786265978852e-06,
      "loss": 0.1894,
      "step": 5871
    },
    {
      "epoch": 0.17130521033899293,
      "grad_norm": 0.8709152103796894,
      "learning_rate": 9.485577566454822e-06,
      "loss": 0.187,
      "step": 5872
    },
    {
      "epoch": 0.1713343835696365,
      "grad_norm": 0.7704062122345586,
      "learning_rate": 9.485368826884625e-06,
      "loss": 0.1687,
      "step": 5873
    },
    {
      "epoch": 0.17136355680028006,
      "grad_norm": 0.9113342942131873,
      "learning_rate": 9.485160047270128e-06,
      "loss": 0.1797,
      "step": 5874
    },
    {
      "epoch": 0.17139273003092362,
      "grad_norm": 0.8294673446858788,
      "learning_rate": 9.48495122761319e-06,
      "loss": 0.1787,
      "step": 5875
    },
    {
      "epoch": 0.1714219032615672,
      "grad_norm": 0.7857387256772692,
      "learning_rate": 9.48474236791568e-06,
      "loss": 0.1792,
      "step": 5876
    },
    {
      "epoch": 0.17145107649221075,
      "grad_norm": 0.7558495006071084,
      "learning_rate": 9.484533468179461e-06,
      "loss": 0.179,
      "step": 5877
    },
    {
      "epoch": 0.1714802497228543,
      "grad_norm": 0.7936706599984249,
      "learning_rate": 9.484324528406397e-06,
      "loss": 0.1888,
      "step": 5878
    },
    {
      "epoch": 0.17150942295349786,
      "grad_norm": 0.7705994564390533,
      "learning_rate": 9.484115548598353e-06,
      "loss": 0.1719,
      "step": 5879
    },
    {
      "epoch": 0.17153859618414144,
      "grad_norm": 0.7123561091210682,
      "learning_rate": 9.483906528757199e-06,
      "loss": 0.169,
      "step": 5880
    },
    {
      "epoch": 0.171567769414785,
      "grad_norm": 0.7658641762756876,
      "learning_rate": 9.483697468884795e-06,
      "loss": 0.1585,
      "step": 5881
    },
    {
      "epoch": 0.17159694264542855,
      "grad_norm": 0.7731858572594492,
      "learning_rate": 9.483488368983012e-06,
      "loss": 0.1663,
      "step": 5882
    },
    {
      "epoch": 0.17162611587607213,
      "grad_norm": 0.723524529100536,
      "learning_rate": 9.483279229053715e-06,
      "loss": 0.1838,
      "step": 5883
    },
    {
      "epoch": 0.17165528910671568,
      "grad_norm": 0.837544800571379,
      "learning_rate": 9.48307004909877e-06,
      "loss": 0.156,
      "step": 5884
    },
    {
      "epoch": 0.17168446233735923,
      "grad_norm": 0.7445408836660294,
      "learning_rate": 9.482860829120046e-06,
      "loss": 0.1569,
      "step": 5885
    },
    {
      "epoch": 0.1717136355680028,
      "grad_norm": 0.9026515150135537,
      "learning_rate": 9.482651569119412e-06,
      "loss": 0.1529,
      "step": 5886
    },
    {
      "epoch": 0.17174280879864637,
      "grad_norm": 0.8318603556547081,
      "learning_rate": 9.482442269098734e-06,
      "loss": 0.1881,
      "step": 5887
    },
    {
      "epoch": 0.17177198202928992,
      "grad_norm": 0.8781507598297048,
      "learning_rate": 9.482232929059882e-06,
      "loss": 0.1774,
      "step": 5888
    },
    {
      "epoch": 0.17180115525993347,
      "grad_norm": 0.8663416585325672,
      "learning_rate": 9.482023549004725e-06,
      "loss": 0.1579,
      "step": 5889
    },
    {
      "epoch": 0.17183032849057706,
      "grad_norm": 0.9783300054513878,
      "learning_rate": 9.48181412893513e-06,
      "loss": 0.163,
      "step": 5890
    },
    {
      "epoch": 0.1718595017212206,
      "grad_norm": 1.0992628919712968,
      "learning_rate": 9.481604668852969e-06,
      "loss": 0.1858,
      "step": 5891
    },
    {
      "epoch": 0.17188867495186416,
      "grad_norm": 1.474379319261719,
      "learning_rate": 9.48139516876011e-06,
      "loss": 0.1917,
      "step": 5892
    },
    {
      "epoch": 0.17191784818250774,
      "grad_norm": 0.9871554734600607,
      "learning_rate": 9.481185628658427e-06,
      "loss": 0.1567,
      "step": 5893
    },
    {
      "epoch": 0.1719470214131513,
      "grad_norm": 0.8606364597631231,
      "learning_rate": 9.480976048549788e-06,
      "loss": 0.1584,
      "step": 5894
    },
    {
      "epoch": 0.17197619464379485,
      "grad_norm": 1.0110607347591787,
      "learning_rate": 9.480766428436064e-06,
      "loss": 0.1546,
      "step": 5895
    },
    {
      "epoch": 0.1720053678744384,
      "grad_norm": 0.8948162546528683,
      "learning_rate": 9.480556768319127e-06,
      "loss": 0.1724,
      "step": 5896
    },
    {
      "epoch": 0.17203454110508198,
      "grad_norm": 1.041592621819511,
      "learning_rate": 9.480347068200848e-06,
      "loss": 0.1653,
      "step": 5897
    },
    {
      "epoch": 0.17206371433572554,
      "grad_norm": 0.8787071543385294,
      "learning_rate": 9.480137328083102e-06,
      "loss": 0.1618,
      "step": 5898
    },
    {
      "epoch": 0.1720928875663691,
      "grad_norm": 0.9065587370815846,
      "learning_rate": 9.479927547967758e-06,
      "loss": 0.1756,
      "step": 5899
    },
    {
      "epoch": 0.17212206079701267,
      "grad_norm": 1.1462145546323987,
      "learning_rate": 9.47971772785669e-06,
      "loss": 0.1534,
      "step": 5900
    },
    {
      "epoch": 0.17215123402765622,
      "grad_norm": 0.8907073665411045,
      "learning_rate": 9.479507867751772e-06,
      "loss": 0.1838,
      "step": 5901
    },
    {
      "epoch": 0.17218040725829978,
      "grad_norm": 0.8302950690582827,
      "learning_rate": 9.479297967654877e-06,
      "loss": 0.1477,
      "step": 5902
    },
    {
      "epoch": 0.17220958048894336,
      "grad_norm": 1.0954433713833198,
      "learning_rate": 9.479088027567879e-06,
      "loss": 0.1661,
      "step": 5903
    },
    {
      "epoch": 0.1722387537195869,
      "grad_norm": 0.8691451504944712,
      "learning_rate": 9.478878047492653e-06,
      "loss": 0.1754,
      "step": 5904
    },
    {
      "epoch": 0.17226792695023047,
      "grad_norm": 1.0099113901889492,
      "learning_rate": 9.478668027431071e-06,
      "loss": 0.1695,
      "step": 5905
    },
    {
      "epoch": 0.17229710018087402,
      "grad_norm": 0.9850315987586054,
      "learning_rate": 9.478457967385013e-06,
      "loss": 0.1894,
      "step": 5906
    },
    {
      "epoch": 0.1723262734115176,
      "grad_norm": 0.7299713781503557,
      "learning_rate": 9.47824786735635e-06,
      "loss": 0.1422,
      "step": 5907
    },
    {
      "epoch": 0.17235544664216115,
      "grad_norm": 0.8527877793748475,
      "learning_rate": 9.478037727346959e-06,
      "loss": 0.1688,
      "step": 5908
    },
    {
      "epoch": 0.1723846198728047,
      "grad_norm": 0.7605342388667926,
      "learning_rate": 9.477827547358716e-06,
      "loss": 0.1654,
      "step": 5909
    },
    {
      "epoch": 0.1724137931034483,
      "grad_norm": 0.7355909282845711,
      "learning_rate": 9.477617327393496e-06,
      "loss": 0.1578,
      "step": 5910
    },
    {
      "epoch": 0.17244296633409184,
      "grad_norm": 0.895120318100169,
      "learning_rate": 9.47740706745318e-06,
      "loss": 0.1666,
      "step": 5911
    },
    {
      "epoch": 0.1724721395647354,
      "grad_norm": 0.7411542237171944,
      "learning_rate": 9.47719676753964e-06,
      "loss": 0.185,
      "step": 5912
    },
    {
      "epoch": 0.17250131279537895,
      "grad_norm": 0.710531623170141,
      "learning_rate": 9.476986427654759e-06,
      "loss": 0.1589,
      "step": 5913
    },
    {
      "epoch": 0.17253048602602253,
      "grad_norm": 0.7799338090969948,
      "learning_rate": 9.476776047800412e-06,
      "loss": 0.1717,
      "step": 5914
    },
    {
      "epoch": 0.17255965925666608,
      "grad_norm": 0.6765085921456918,
      "learning_rate": 9.476565627978473e-06,
      "loss": 0.1505,
      "step": 5915
    },
    {
      "epoch": 0.17258883248730963,
      "grad_norm": 1.1168412809581576,
      "learning_rate": 9.47635516819083e-06,
      "loss": 0.1527,
      "step": 5916
    },
    {
      "epoch": 0.17261800571795322,
      "grad_norm": 0.776208522254885,
      "learning_rate": 9.476144668439353e-06,
      "loss": 0.1811,
      "step": 5917
    },
    {
      "epoch": 0.17264717894859677,
      "grad_norm": 0.8716613828854245,
      "learning_rate": 9.475934128725926e-06,
      "loss": 0.164,
      "step": 5918
    },
    {
      "epoch": 0.17267635217924032,
      "grad_norm": 0.7464377444385695,
      "learning_rate": 9.475723549052427e-06,
      "loss": 0.1558,
      "step": 5919
    },
    {
      "epoch": 0.1727055254098839,
      "grad_norm": 0.8552805410092627,
      "learning_rate": 9.475512929420739e-06,
      "loss": 0.1651,
      "step": 5920
    },
    {
      "epoch": 0.17273469864052746,
      "grad_norm": 0.8271860599322377,
      "learning_rate": 9.475302269832736e-06,
      "loss": 0.1598,
      "step": 5921
    },
    {
      "epoch": 0.172763871871171,
      "grad_norm": 0.9967403670391106,
      "learning_rate": 9.475091570290306e-06,
      "loss": 0.1584,
      "step": 5922
    },
    {
      "epoch": 0.17279304510181456,
      "grad_norm": 0.9750872399469732,
      "learning_rate": 9.474880830795326e-06,
      "loss": 0.1686,
      "step": 5923
    },
    {
      "epoch": 0.17282221833245814,
      "grad_norm": 0.9847673654342468,
      "learning_rate": 9.474670051349677e-06,
      "loss": 0.1659,
      "step": 5924
    },
    {
      "epoch": 0.1728513915631017,
      "grad_norm": 1.5394662452136487,
      "learning_rate": 9.474459231955243e-06,
      "loss": 0.1882,
      "step": 5925
    },
    {
      "epoch": 0.17288056479374525,
      "grad_norm": 0.8466411139553673,
      "learning_rate": 9.474248372613904e-06,
      "loss": 0.1705,
      "step": 5926
    },
    {
      "epoch": 0.17290973802438883,
      "grad_norm": 0.797873163935156,
      "learning_rate": 9.474037473327546e-06,
      "loss": 0.1929,
      "step": 5927
    },
    {
      "epoch": 0.17293891125503238,
      "grad_norm": 1.0790938186326624,
      "learning_rate": 9.473826534098048e-06,
      "loss": 0.1621,
      "step": 5928
    },
    {
      "epoch": 0.17296808448567594,
      "grad_norm": 0.9646935693715456,
      "learning_rate": 9.473615554927294e-06,
      "loss": 0.1808,
      "step": 5929
    },
    {
      "epoch": 0.17299725771631952,
      "grad_norm": 0.8499971109968554,
      "learning_rate": 9.473404535817168e-06,
      "loss": 0.1757,
      "step": 5930
    },
    {
      "epoch": 0.17302643094696307,
      "grad_norm": 0.8560382926739467,
      "learning_rate": 9.473193476769556e-06,
      "loss": 0.1693,
      "step": 5931
    },
    {
      "epoch": 0.17305560417760663,
      "grad_norm": 1.0118514806139574,
      "learning_rate": 9.47298237778634e-06,
      "loss": 0.1765,
      "step": 5932
    },
    {
      "epoch": 0.17308477740825018,
      "grad_norm": 0.8925696467654138,
      "learning_rate": 9.472771238869404e-06,
      "loss": 0.1825,
      "step": 5933
    },
    {
      "epoch": 0.17311395063889376,
      "grad_norm": 1.1306236346184735,
      "learning_rate": 9.472560060020635e-06,
      "loss": 0.1787,
      "step": 5934
    },
    {
      "epoch": 0.1731431238695373,
      "grad_norm": 1.1127923557527213,
      "learning_rate": 9.472348841241917e-06,
      "loss": 0.1656,
      "step": 5935
    },
    {
      "epoch": 0.17317229710018087,
      "grad_norm": 0.9330077890733786,
      "learning_rate": 9.472137582535137e-06,
      "loss": 0.1771,
      "step": 5936
    },
    {
      "epoch": 0.17320147033082445,
      "grad_norm": 0.8874432218424393,
      "learning_rate": 9.47192628390218e-06,
      "loss": 0.1713,
      "step": 5937
    },
    {
      "epoch": 0.173230643561468,
      "grad_norm": 0.7874904272003118,
      "learning_rate": 9.471714945344932e-06,
      "loss": 0.1793,
      "step": 5938
    },
    {
      "epoch": 0.17325981679211155,
      "grad_norm": 1.0186968902520714,
      "learning_rate": 9.471503566865281e-06,
      "loss": 0.1784,
      "step": 5939
    },
    {
      "epoch": 0.1732889900227551,
      "grad_norm": 0.7667923188504596,
      "learning_rate": 9.471292148465113e-06,
      "loss": 0.1898,
      "step": 5940
    },
    {
      "epoch": 0.1733181632533987,
      "grad_norm": 0.7736002263705937,
      "learning_rate": 9.471080690146316e-06,
      "loss": 0.1546,
      "step": 5941
    },
    {
      "epoch": 0.17334733648404224,
      "grad_norm": 0.8722639941577405,
      "learning_rate": 9.470869191910779e-06,
      "loss": 0.1596,
      "step": 5942
    },
    {
      "epoch": 0.1733765097146858,
      "grad_norm": 0.9742425619725633,
      "learning_rate": 9.47065765376039e-06,
      "loss": 0.1701,
      "step": 5943
    },
    {
      "epoch": 0.17340568294532938,
      "grad_norm": 0.8977959599437407,
      "learning_rate": 9.470446075697033e-06,
      "loss": 0.1874,
      "step": 5944
    },
    {
      "epoch": 0.17343485617597293,
      "grad_norm": 0.8078948420257451,
      "learning_rate": 9.470234457722604e-06,
      "loss": 0.1744,
      "step": 5945
    },
    {
      "epoch": 0.17346402940661648,
      "grad_norm": 0.8299434972604273,
      "learning_rate": 9.470022799838986e-06,
      "loss": 0.1822,
      "step": 5946
    },
    {
      "epoch": 0.17349320263726006,
      "grad_norm": 0.9917905356144868,
      "learning_rate": 9.469811102048074e-06,
      "loss": 0.1774,
      "step": 5947
    },
    {
      "epoch": 0.17352237586790362,
      "grad_norm": 0.7140443255715782,
      "learning_rate": 9.469599364351756e-06,
      "loss": 0.153,
      "step": 5948
    },
    {
      "epoch": 0.17355154909854717,
      "grad_norm": 0.9216111115796793,
      "learning_rate": 9.46938758675192e-06,
      "loss": 0.1676,
      "step": 5949
    },
    {
      "epoch": 0.17358072232919072,
      "grad_norm": 0.8813427707493571,
      "learning_rate": 9.46917576925046e-06,
      "loss": 0.1472,
      "step": 5950
    },
    {
      "epoch": 0.1736098955598343,
      "grad_norm": 0.745755009849987,
      "learning_rate": 9.468963911849264e-06,
      "loss": 0.1761,
      "step": 5951
    },
    {
      "epoch": 0.17363906879047786,
      "grad_norm": 0.9162878895306341,
      "learning_rate": 9.468752014550227e-06,
      "loss": 0.1837,
      "step": 5952
    },
    {
      "epoch": 0.1736682420211214,
      "grad_norm": 0.9514592588014141,
      "learning_rate": 9.468540077355237e-06,
      "loss": 0.1837,
      "step": 5953
    },
    {
      "epoch": 0.173697415251765,
      "grad_norm": 0.8191123760490736,
      "learning_rate": 9.468328100266189e-06,
      "loss": 0.1692,
      "step": 5954
    },
    {
      "epoch": 0.17372658848240854,
      "grad_norm": 0.8295743832141615,
      "learning_rate": 9.468116083284972e-06,
      "loss": 0.1745,
      "step": 5955
    },
    {
      "epoch": 0.1737557617130521,
      "grad_norm": 0.9984093734614587,
      "learning_rate": 9.467904026413485e-06,
      "loss": 0.1653,
      "step": 5956
    },
    {
      "epoch": 0.17378493494369565,
      "grad_norm": 0.8485915407094916,
      "learning_rate": 9.467691929653615e-06,
      "loss": 0.1516,
      "step": 5957
    },
    {
      "epoch": 0.17381410817433923,
      "grad_norm": 0.8760360025884906,
      "learning_rate": 9.46747979300726e-06,
      "loss": 0.1734,
      "step": 5958
    },
    {
      "epoch": 0.17384328140498279,
      "grad_norm": 0.974936331869323,
      "learning_rate": 9.46726761647631e-06,
      "loss": 0.1604,
      "step": 5959
    },
    {
      "epoch": 0.17387245463562634,
      "grad_norm": 0.8487539303292866,
      "learning_rate": 9.467055400062661e-06,
      "loss": 0.1659,
      "step": 5960
    },
    {
      "epoch": 0.17390162786626992,
      "grad_norm": 0.879925549034562,
      "learning_rate": 9.466843143768208e-06,
      "loss": 0.1537,
      "step": 5961
    },
    {
      "epoch": 0.17393080109691347,
      "grad_norm": 1.0531656602568154,
      "learning_rate": 9.466630847594846e-06,
      "loss": 0.1588,
      "step": 5962
    },
    {
      "epoch": 0.17395997432755703,
      "grad_norm": 0.8055294366853316,
      "learning_rate": 9.46641851154447e-06,
      "loss": 0.1529,
      "step": 5963
    },
    {
      "epoch": 0.1739891475582006,
      "grad_norm": 0.8963424247946293,
      "learning_rate": 9.466206135618976e-06,
      "loss": 0.1388,
      "step": 5964
    },
    {
      "epoch": 0.17401832078884416,
      "grad_norm": 0.8520309048234174,
      "learning_rate": 9.46599371982026e-06,
      "loss": 0.1421,
      "step": 5965
    },
    {
      "epoch": 0.17404749401948771,
      "grad_norm": 0.9403170645611213,
      "learning_rate": 9.465781264150218e-06,
      "loss": 0.145,
      "step": 5966
    },
    {
      "epoch": 0.17407666725013127,
      "grad_norm": 0.7752863862495293,
      "learning_rate": 9.465568768610746e-06,
      "loss": 0.1815,
      "step": 5967
    },
    {
      "epoch": 0.17410584048077485,
      "grad_norm": 0.7940347796017446,
      "learning_rate": 9.465356233203744e-06,
      "loss": 0.1653,
      "step": 5968
    },
    {
      "epoch": 0.1741350137114184,
      "grad_norm": 0.9184169748005556,
      "learning_rate": 9.465143657931107e-06,
      "loss": 0.155,
      "step": 5969
    },
    {
      "epoch": 0.17416418694206195,
      "grad_norm": 0.8519137706543816,
      "learning_rate": 9.464931042794732e-06,
      "loss": 0.1706,
      "step": 5970
    },
    {
      "epoch": 0.17419336017270554,
      "grad_norm": 1.0760462073750925,
      "learning_rate": 9.464718387796519e-06,
      "loss": 0.1962,
      "step": 5971
    },
    {
      "epoch": 0.1742225334033491,
      "grad_norm": 0.9306352113149406,
      "learning_rate": 9.464505692938366e-06,
      "loss": 0.1602,
      "step": 5972
    },
    {
      "epoch": 0.17425170663399264,
      "grad_norm": 0.9626143433325164,
      "learning_rate": 9.464292958222173e-06,
      "loss": 0.1799,
      "step": 5973
    },
    {
      "epoch": 0.17428087986463622,
      "grad_norm": 0.8169859941832072,
      "learning_rate": 9.464080183649838e-06,
      "loss": 0.2107,
      "step": 5974
    },
    {
      "epoch": 0.17431005309527978,
      "grad_norm": 0.8663734991313604,
      "learning_rate": 9.46386736922326e-06,
      "loss": 0.1526,
      "step": 5975
    },
    {
      "epoch": 0.17433922632592333,
      "grad_norm": 0.9345566173290707,
      "learning_rate": 9.46365451494434e-06,
      "loss": 0.1616,
      "step": 5976
    },
    {
      "epoch": 0.17436839955656688,
      "grad_norm": 0.768155821852064,
      "learning_rate": 9.463441620814978e-06,
      "loss": 0.1564,
      "step": 5977
    },
    {
      "epoch": 0.17439757278721046,
      "grad_norm": 0.9104263530019562,
      "learning_rate": 9.463228686837073e-06,
      "loss": 0.1618,
      "step": 5978
    },
    {
      "epoch": 0.17442674601785402,
      "grad_norm": 0.7977559775174695,
      "learning_rate": 9.463015713012531e-06,
      "loss": 0.1752,
      "step": 5979
    },
    {
      "epoch": 0.17445591924849757,
      "grad_norm": 0.8462361553626403,
      "learning_rate": 9.462802699343248e-06,
      "loss": 0.1526,
      "step": 5980
    },
    {
      "epoch": 0.17448509247914115,
      "grad_norm": 0.9133740290586382,
      "learning_rate": 9.462589645831128e-06,
      "loss": 0.2182,
      "step": 5981
    },
    {
      "epoch": 0.1745142657097847,
      "grad_norm": 0.7961643263590523,
      "learning_rate": 9.462376552478074e-06,
      "loss": 0.1599,
      "step": 5982
    },
    {
      "epoch": 0.17454343894042826,
      "grad_norm": 0.8578588468885399,
      "learning_rate": 9.462163419285987e-06,
      "loss": 0.1813,
      "step": 5983
    },
    {
      "epoch": 0.1745726121710718,
      "grad_norm": 1.0301119862778012,
      "learning_rate": 9.46195024625677e-06,
      "loss": 0.1771,
      "step": 5984
    },
    {
      "epoch": 0.1746017854017154,
      "grad_norm": 0.9098537210614265,
      "learning_rate": 9.461737033392327e-06,
      "loss": 0.154,
      "step": 5985
    },
    {
      "epoch": 0.17463095863235895,
      "grad_norm": 1.021547156446205,
      "learning_rate": 9.461523780694559e-06,
      "loss": 0.1792,
      "step": 5986
    },
    {
      "epoch": 0.1746601318630025,
      "grad_norm": 1.0632900295887269,
      "learning_rate": 9.461310488165373e-06,
      "loss": 0.1887,
      "step": 5987
    },
    {
      "epoch": 0.17468930509364608,
      "grad_norm": 1.296644032438599,
      "learning_rate": 9.461097155806673e-06,
      "loss": 0.1698,
      "step": 5988
    },
    {
      "epoch": 0.17471847832428963,
      "grad_norm": 0.9021366184339357,
      "learning_rate": 9.46088378362036e-06,
      "loss": 0.1681,
      "step": 5989
    },
    {
      "epoch": 0.1747476515549332,
      "grad_norm": 1.1251424074743566,
      "learning_rate": 9.460670371608345e-06,
      "loss": 0.2058,
      "step": 5990
    },
    {
      "epoch": 0.17477682478557677,
      "grad_norm": 0.9416358799561906,
      "learning_rate": 9.460456919772527e-06,
      "loss": 0.1645,
      "step": 5991
    },
    {
      "epoch": 0.17480599801622032,
      "grad_norm": 1.1801873635148534,
      "learning_rate": 9.460243428114815e-06,
      "loss": 0.1857,
      "step": 5992
    },
    {
      "epoch": 0.17483517124686387,
      "grad_norm": 0.9746959799548469,
      "learning_rate": 9.460029896637115e-06,
      "loss": 0.1506,
      "step": 5993
    },
    {
      "epoch": 0.17486434447750743,
      "grad_norm": 1.036935427850256,
      "learning_rate": 9.459816325341331e-06,
      "loss": 0.1684,
      "step": 5994
    },
    {
      "epoch": 0.174893517708151,
      "grad_norm": 0.7227916773233188,
      "learning_rate": 9.459602714229373e-06,
      "loss": 0.1819,
      "step": 5995
    },
    {
      "epoch": 0.17492269093879456,
      "grad_norm": 0.7485200504083205,
      "learning_rate": 9.459389063303147e-06,
      "loss": 0.1471,
      "step": 5996
    },
    {
      "epoch": 0.17495186416943811,
      "grad_norm": 0.9782731977611967,
      "learning_rate": 9.45917537256456e-06,
      "loss": 0.191,
      "step": 5997
    },
    {
      "epoch": 0.1749810374000817,
      "grad_norm": 0.8880707327511158,
      "learning_rate": 9.458961642015518e-06,
      "loss": 0.1485,
      "step": 5998
    },
    {
      "epoch": 0.17501021063072525,
      "grad_norm": 0.8374546938347205,
      "learning_rate": 9.458747871657931e-06,
      "loss": 0.1749,
      "step": 5999
    },
    {
      "epoch": 0.1750393838613688,
      "grad_norm": 1.3244317301623223,
      "learning_rate": 9.45853406149371e-06,
      "loss": 0.157,
      "step": 6000
    },
    {
      "epoch": 0.17506855709201236,
      "grad_norm": 1.331792773017602,
      "learning_rate": 9.45832021152476e-06,
      "loss": 0.1953,
      "step": 6001
    },
    {
      "epoch": 0.17509773032265594,
      "grad_norm": 0.8670937692733209,
      "learning_rate": 9.458106321752992e-06,
      "loss": 0.1745,
      "step": 6002
    },
    {
      "epoch": 0.1751269035532995,
      "grad_norm": 1.064234831429125,
      "learning_rate": 9.457892392180313e-06,
      "loss": 0.1731,
      "step": 6003
    },
    {
      "epoch": 0.17515607678394304,
      "grad_norm": 0.9256814528488202,
      "learning_rate": 9.457678422808636e-06,
      "loss": 0.1863,
      "step": 6004
    },
    {
      "epoch": 0.17518525001458662,
      "grad_norm": 1.1655334845881768,
      "learning_rate": 9.45746441363987e-06,
      "loss": 0.1429,
      "step": 6005
    },
    {
      "epoch": 0.17521442324523018,
      "grad_norm": 0.9539661582528656,
      "learning_rate": 9.457250364675926e-06,
      "loss": 0.1785,
      "step": 6006
    },
    {
      "epoch": 0.17524359647587373,
      "grad_norm": 0.8417616364845183,
      "learning_rate": 9.457036275918714e-06,
      "loss": 0.1558,
      "step": 6007
    },
    {
      "epoch": 0.1752727697065173,
      "grad_norm": 1.041565807270326,
      "learning_rate": 9.456822147370149e-06,
      "loss": 0.1854,
      "step": 6008
    },
    {
      "epoch": 0.17530194293716087,
      "grad_norm": 0.7819352945132542,
      "learning_rate": 9.456607979032137e-06,
      "loss": 0.1599,
      "step": 6009
    },
    {
      "epoch": 0.17533111616780442,
      "grad_norm": 0.9423586350948675,
      "learning_rate": 9.456393770906594e-06,
      "loss": 0.1703,
      "step": 6010
    },
    {
      "epoch": 0.17536028939844797,
      "grad_norm": 0.8588015736703734,
      "learning_rate": 9.45617952299543e-06,
      "loss": 0.1773,
      "step": 6011
    },
    {
      "epoch": 0.17538946262909155,
      "grad_norm": 0.7507797498074688,
      "learning_rate": 9.455965235300559e-06,
      "loss": 0.1753,
      "step": 6012
    },
    {
      "epoch": 0.1754186358597351,
      "grad_norm": 1.2850760496932208,
      "learning_rate": 9.455750907823895e-06,
      "loss": 0.18,
      "step": 6013
    },
    {
      "epoch": 0.17544780909037866,
      "grad_norm": 0.8619489869393079,
      "learning_rate": 9.45553654056735e-06,
      "loss": 0.1579,
      "step": 6014
    },
    {
      "epoch": 0.17547698232102224,
      "grad_norm": 0.7063174720726706,
      "learning_rate": 9.45532213353284e-06,
      "loss": 0.1606,
      "step": 6015
    },
    {
      "epoch": 0.1755061555516658,
      "grad_norm": 0.7749751225775768,
      "learning_rate": 9.455107686722276e-06,
      "loss": 0.1626,
      "step": 6016
    },
    {
      "epoch": 0.17553532878230935,
      "grad_norm": 0.8938572402846047,
      "learning_rate": 9.454893200137574e-06,
      "loss": 0.1955,
      "step": 6017
    },
    {
      "epoch": 0.17556450201295293,
      "grad_norm": 0.7553586630209235,
      "learning_rate": 9.45467867378065e-06,
      "loss": 0.1532,
      "step": 6018
    },
    {
      "epoch": 0.17559367524359648,
      "grad_norm": 0.8913931513797135,
      "learning_rate": 9.454464107653418e-06,
      "loss": 0.1577,
      "step": 6019
    },
    {
      "epoch": 0.17562284847424003,
      "grad_norm": 0.8958707414497841,
      "learning_rate": 9.454249501757794e-06,
      "loss": 0.1942,
      "step": 6020
    },
    {
      "epoch": 0.1756520217048836,
      "grad_norm": 1.0879702654646908,
      "learning_rate": 9.454034856095693e-06,
      "loss": 0.1688,
      "step": 6021
    },
    {
      "epoch": 0.17568119493552717,
      "grad_norm": 0.7765588735381302,
      "learning_rate": 9.45382017066903e-06,
      "loss": 0.1803,
      "step": 6022
    },
    {
      "epoch": 0.17571036816617072,
      "grad_norm": 0.8144183889369842,
      "learning_rate": 9.453605445479727e-06,
      "loss": 0.1716,
      "step": 6023
    },
    {
      "epoch": 0.17573954139681427,
      "grad_norm": 0.7901507030206922,
      "learning_rate": 9.453390680529696e-06,
      "loss": 0.16,
      "step": 6024
    },
    {
      "epoch": 0.17576871462745786,
      "grad_norm": 0.9581655271792159,
      "learning_rate": 9.453175875820857e-06,
      "loss": 0.181,
      "step": 6025
    },
    {
      "epoch": 0.1757978878581014,
      "grad_norm": 0.7957862568945363,
      "learning_rate": 9.452961031355128e-06,
      "loss": 0.1694,
      "step": 6026
    },
    {
      "epoch": 0.17582706108874496,
      "grad_norm": 0.8253825296684637,
      "learning_rate": 9.452746147134423e-06,
      "loss": 0.1731,
      "step": 6027
    },
    {
      "epoch": 0.17585623431938852,
      "grad_norm": 0.8902491986234154,
      "learning_rate": 9.452531223160665e-06,
      "loss": 0.148,
      "step": 6028
    },
    {
      "epoch": 0.1758854075500321,
      "grad_norm": 0.860239722845713,
      "learning_rate": 9.452316259435771e-06,
      "loss": 0.1761,
      "step": 6029
    },
    {
      "epoch": 0.17591458078067565,
      "grad_norm": 1.2344045246556863,
      "learning_rate": 9.45210125596166e-06,
      "loss": 0.1718,
      "step": 6030
    },
    {
      "epoch": 0.1759437540113192,
      "grad_norm": 0.9847896196009573,
      "learning_rate": 9.451886212740253e-06,
      "loss": 0.1543,
      "step": 6031
    },
    {
      "epoch": 0.17597292724196278,
      "grad_norm": 0.8664705599311466,
      "learning_rate": 9.45167112977347e-06,
      "loss": 0.1601,
      "step": 6032
    },
    {
      "epoch": 0.17600210047260634,
      "grad_norm": 1.0049382101091084,
      "learning_rate": 9.451456007063227e-06,
      "loss": 0.1798,
      "step": 6033
    },
    {
      "epoch": 0.1760312737032499,
      "grad_norm": 0.8925356302001541,
      "learning_rate": 9.451240844611447e-06,
      "loss": 0.167,
      "step": 6034
    },
    {
      "epoch": 0.17606044693389347,
      "grad_norm": 0.7737694120149674,
      "learning_rate": 9.451025642420053e-06,
      "loss": 0.1752,
      "step": 6035
    },
    {
      "epoch": 0.17608962016453703,
      "grad_norm": 0.997220088503365,
      "learning_rate": 9.450810400490964e-06,
      "loss": 0.1857,
      "step": 6036
    },
    {
      "epoch": 0.17611879339518058,
      "grad_norm": 0.5647631873998366,
      "learning_rate": 9.450595118826102e-06,
      "loss": 0.1308,
      "step": 6037
    },
    {
      "epoch": 0.17614796662582413,
      "grad_norm": 0.8906089630960201,
      "learning_rate": 9.450379797427389e-06,
      "loss": 0.1719,
      "step": 6038
    },
    {
      "epoch": 0.1761771398564677,
      "grad_norm": 0.8681627581345246,
      "learning_rate": 9.450164436296749e-06,
      "loss": 0.1563,
      "step": 6039
    },
    {
      "epoch": 0.17620631308711127,
      "grad_norm": 1.049205464938567,
      "learning_rate": 9.449949035436103e-06,
      "loss": 0.1704,
      "step": 6040
    },
    {
      "epoch": 0.17623548631775482,
      "grad_norm": 0.8109953914533022,
      "learning_rate": 9.449733594847372e-06,
      "loss": 0.1629,
      "step": 6041
    },
    {
      "epoch": 0.1762646595483984,
      "grad_norm": 0.8814713554775707,
      "learning_rate": 9.449518114532484e-06,
      "loss": 0.1474,
      "step": 6042
    },
    {
      "epoch": 0.17629383277904195,
      "grad_norm": 0.8461899653006055,
      "learning_rate": 9.449302594493359e-06,
      "loss": 0.1661,
      "step": 6043
    },
    {
      "epoch": 0.1763230060096855,
      "grad_norm": 0.8465339571038941,
      "learning_rate": 9.449087034731924e-06,
      "loss": 0.166,
      "step": 6044
    },
    {
      "epoch": 0.1763521792403291,
      "grad_norm": 0.8806843254973223,
      "learning_rate": 9.448871435250102e-06,
      "loss": 0.1616,
      "step": 6045
    },
    {
      "epoch": 0.17638135247097264,
      "grad_norm": 0.89434729703085,
      "learning_rate": 9.448655796049817e-06,
      "loss": 0.1627,
      "step": 6046
    },
    {
      "epoch": 0.1764105257016162,
      "grad_norm": 0.6465515443049981,
      "learning_rate": 9.448440117132995e-06,
      "loss": 0.1562,
      "step": 6047
    },
    {
      "epoch": 0.17643969893225975,
      "grad_norm": 0.9649083594734409,
      "learning_rate": 9.448224398501562e-06,
      "loss": 0.2078,
      "step": 6048
    },
    {
      "epoch": 0.17646887216290333,
      "grad_norm": 1.0466966296126328,
      "learning_rate": 9.448008640157444e-06,
      "loss": 0.1997,
      "step": 6049
    },
    {
      "epoch": 0.17649804539354688,
      "grad_norm": 0.8800708082618894,
      "learning_rate": 9.447792842102566e-06,
      "loss": 0.1664,
      "step": 6050
    },
    {
      "epoch": 0.17652721862419044,
      "grad_norm": 1.063802355330643,
      "learning_rate": 9.447577004338855e-06,
      "loss": 0.1781,
      "step": 6051
    },
    {
      "epoch": 0.17655639185483402,
      "grad_norm": 0.9923137827728826,
      "learning_rate": 9.447361126868238e-06,
      "loss": 0.1667,
      "step": 6052
    },
    {
      "epoch": 0.17658556508547757,
      "grad_norm": 0.7367424048833521,
      "learning_rate": 9.447145209692643e-06,
      "loss": 0.1577,
      "step": 6053
    },
    {
      "epoch": 0.17661473831612112,
      "grad_norm": 0.8162899202253272,
      "learning_rate": 9.446929252813997e-06,
      "loss": 0.1612,
      "step": 6054
    },
    {
      "epoch": 0.17664391154676468,
      "grad_norm": 0.7287134917054725,
      "learning_rate": 9.446713256234229e-06,
      "loss": 0.1548,
      "step": 6055
    },
    {
      "epoch": 0.17667308477740826,
      "grad_norm": 0.9588223252770661,
      "learning_rate": 9.446497219955266e-06,
      "loss": 0.1878,
      "step": 6056
    },
    {
      "epoch": 0.1767022580080518,
      "grad_norm": 0.7985875635412174,
      "learning_rate": 9.446281143979038e-06,
      "loss": 0.1373,
      "step": 6057
    },
    {
      "epoch": 0.17673143123869536,
      "grad_norm": 0.8723456676721568,
      "learning_rate": 9.446065028307472e-06,
      "loss": 0.1828,
      "step": 6058
    },
    {
      "epoch": 0.17676060446933894,
      "grad_norm": 0.8644260320732804,
      "learning_rate": 9.4458488729425e-06,
      "loss": 0.1608,
      "step": 6059
    },
    {
      "epoch": 0.1767897776999825,
      "grad_norm": 0.8467008843915851,
      "learning_rate": 9.44563267788605e-06,
      "loss": 0.1579,
      "step": 6060
    },
    {
      "epoch": 0.17681895093062605,
      "grad_norm": 0.8381171799356435,
      "learning_rate": 9.445416443140052e-06,
      "loss": 0.176,
      "step": 6061
    },
    {
      "epoch": 0.17684812416126963,
      "grad_norm": 0.6937196730004178,
      "learning_rate": 9.445200168706438e-06,
      "loss": 0.1741,
      "step": 6062
    },
    {
      "epoch": 0.17687729739191319,
      "grad_norm": 1.3537163352995736,
      "learning_rate": 9.444983854587138e-06,
      "loss": 0.1656,
      "step": 6063
    },
    {
      "epoch": 0.17690647062255674,
      "grad_norm": 0.883871849743838,
      "learning_rate": 9.444767500784084e-06,
      "loss": 0.1757,
      "step": 6064
    },
    {
      "epoch": 0.1769356438532003,
      "grad_norm": 0.7363846114493716,
      "learning_rate": 9.444551107299205e-06,
      "loss": 0.1602,
      "step": 6065
    },
    {
      "epoch": 0.17696481708384387,
      "grad_norm": 0.8035527678014244,
      "learning_rate": 9.444334674134437e-06,
      "loss": 0.1573,
      "step": 6066
    },
    {
      "epoch": 0.17699399031448743,
      "grad_norm": 0.9608067326938806,
      "learning_rate": 9.444118201291707e-06,
      "loss": 0.1568,
      "step": 6067
    },
    {
      "epoch": 0.17702316354513098,
      "grad_norm": 0.957312868090363,
      "learning_rate": 9.443901688772953e-06,
      "loss": 0.1816,
      "step": 6068
    },
    {
      "epoch": 0.17705233677577456,
      "grad_norm": 0.8634941496582655,
      "learning_rate": 9.443685136580105e-06,
      "loss": 0.1401,
      "step": 6069
    },
    {
      "epoch": 0.1770815100064181,
      "grad_norm": 0.8787652676909244,
      "learning_rate": 9.443468544715097e-06,
      "loss": 0.1834,
      "step": 6070
    },
    {
      "epoch": 0.17711068323706167,
      "grad_norm": 0.8212803919043005,
      "learning_rate": 9.443251913179862e-06,
      "loss": 0.1573,
      "step": 6071
    },
    {
      "epoch": 0.17713985646770522,
      "grad_norm": 0.73746500851273,
      "learning_rate": 9.443035241976335e-06,
      "loss": 0.1568,
      "step": 6072
    },
    {
      "epoch": 0.1771690296983488,
      "grad_norm": 0.7926757313992104,
      "learning_rate": 9.442818531106451e-06,
      "loss": 0.1791,
      "step": 6073
    },
    {
      "epoch": 0.17719820292899235,
      "grad_norm": 0.9429842205284127,
      "learning_rate": 9.442601780572141e-06,
      "loss": 0.1981,
      "step": 6074
    },
    {
      "epoch": 0.1772273761596359,
      "grad_norm": 0.833712576794287,
      "learning_rate": 9.442384990375344e-06,
      "loss": 0.1437,
      "step": 6075
    },
    {
      "epoch": 0.1772565493902795,
      "grad_norm": 0.7937003278803161,
      "learning_rate": 9.442168160517995e-06,
      "loss": 0.1653,
      "step": 6076
    },
    {
      "epoch": 0.17728572262092304,
      "grad_norm": 0.9252279333681923,
      "learning_rate": 9.44195129100203e-06,
      "loss": 0.1773,
      "step": 6077
    },
    {
      "epoch": 0.1773148958515666,
      "grad_norm": 1.01782313691626,
      "learning_rate": 9.441734381829382e-06,
      "loss": 0.1501,
      "step": 6078
    },
    {
      "epoch": 0.17734406908221018,
      "grad_norm": 0.9169277443816802,
      "learning_rate": 9.441517433001992e-06,
      "loss": 0.1502,
      "step": 6079
    },
    {
      "epoch": 0.17737324231285373,
      "grad_norm": 0.6476156203642134,
      "learning_rate": 9.441300444521792e-06,
      "loss": 0.1448,
      "step": 6080
    },
    {
      "epoch": 0.17740241554349728,
      "grad_norm": 0.7252667035537854,
      "learning_rate": 9.441083416390725e-06,
      "loss": 0.162,
      "step": 6081
    },
    {
      "epoch": 0.17743158877414084,
      "grad_norm": 0.9443395614628517,
      "learning_rate": 9.440866348610723e-06,
      "loss": 0.1607,
      "step": 6082
    },
    {
      "epoch": 0.17746076200478442,
      "grad_norm": 0.7600945171755237,
      "learning_rate": 9.440649241183727e-06,
      "loss": 0.1833,
      "step": 6083
    },
    {
      "epoch": 0.17748993523542797,
      "grad_norm": 0.931479269840474,
      "learning_rate": 9.440432094111675e-06,
      "loss": 0.1689,
      "step": 6084
    },
    {
      "epoch": 0.17751910846607152,
      "grad_norm": 0.7627085732521647,
      "learning_rate": 9.440214907396506e-06,
      "loss": 0.1665,
      "step": 6085
    },
    {
      "epoch": 0.1775482816967151,
      "grad_norm": 0.7402417189600208,
      "learning_rate": 9.439997681040156e-06,
      "loss": 0.1539,
      "step": 6086
    },
    {
      "epoch": 0.17757745492735866,
      "grad_norm": 0.7909033891310475,
      "learning_rate": 9.439780415044568e-06,
      "loss": 0.1491,
      "step": 6087
    },
    {
      "epoch": 0.1776066281580022,
      "grad_norm": 0.9789034030402775,
      "learning_rate": 9.439563109411682e-06,
      "loss": 0.1773,
      "step": 6088
    },
    {
      "epoch": 0.1776358013886458,
      "grad_norm": 1.0910629320412395,
      "learning_rate": 9.439345764143434e-06,
      "loss": 0.1579,
      "step": 6089
    },
    {
      "epoch": 0.17766497461928935,
      "grad_norm": 0.8578377522873836,
      "learning_rate": 9.439128379241767e-06,
      "loss": 0.1447,
      "step": 6090
    },
    {
      "epoch": 0.1776941478499329,
      "grad_norm": 1.120834663212433,
      "learning_rate": 9.438910954708622e-06,
      "loss": 0.1731,
      "step": 6091
    },
    {
      "epoch": 0.17772332108057645,
      "grad_norm": 0.7903505977482612,
      "learning_rate": 9.43869349054594e-06,
      "loss": 0.1675,
      "step": 6092
    },
    {
      "epoch": 0.17775249431122003,
      "grad_norm": 0.8174964146133391,
      "learning_rate": 9.438475986755661e-06,
      "loss": 0.1647,
      "step": 6093
    },
    {
      "epoch": 0.17778166754186359,
      "grad_norm": 0.6848338727131013,
      "learning_rate": 9.438258443339729e-06,
      "loss": 0.1748,
      "step": 6094
    },
    {
      "epoch": 0.17781084077250714,
      "grad_norm": 0.8040857348148285,
      "learning_rate": 9.438040860300085e-06,
      "loss": 0.18,
      "step": 6095
    },
    {
      "epoch": 0.17784001400315072,
      "grad_norm": 0.9080380906748584,
      "learning_rate": 9.437823237638672e-06,
      "loss": 0.1864,
      "step": 6096
    },
    {
      "epoch": 0.17786918723379427,
      "grad_norm": 0.7563824475411975,
      "learning_rate": 9.43760557535743e-06,
      "loss": 0.1682,
      "step": 6097
    },
    {
      "epoch": 0.17789836046443783,
      "grad_norm": 0.7708359204805659,
      "learning_rate": 9.437387873458308e-06,
      "loss": 0.1731,
      "step": 6098
    },
    {
      "epoch": 0.17792753369508138,
      "grad_norm": 0.8988494689525325,
      "learning_rate": 9.437170131943245e-06,
      "loss": 0.1842,
      "step": 6099
    },
    {
      "epoch": 0.17795670692572496,
      "grad_norm": 0.881316888063764,
      "learning_rate": 9.436952350814187e-06,
      "loss": 0.1932,
      "step": 6100
    },
    {
      "epoch": 0.17798588015636851,
      "grad_norm": 0.8412463896369021,
      "learning_rate": 9.436734530073078e-06,
      "loss": 0.1842,
      "step": 6101
    },
    {
      "epoch": 0.17801505338701207,
      "grad_norm": 0.7500046491642948,
      "learning_rate": 9.43651666972186e-06,
      "loss": 0.139,
      "step": 6102
    },
    {
      "epoch": 0.17804422661765565,
      "grad_norm": 1.1570837505681435,
      "learning_rate": 9.436298769762481e-06,
      "loss": 0.1901,
      "step": 6103
    },
    {
      "epoch": 0.1780733998482992,
      "grad_norm": 0.7852084294455182,
      "learning_rate": 9.436080830196888e-06,
      "loss": 0.1489,
      "step": 6104
    },
    {
      "epoch": 0.17810257307894276,
      "grad_norm": 0.7920356544226356,
      "learning_rate": 9.435862851027023e-06,
      "loss": 0.1664,
      "step": 6105
    },
    {
      "epoch": 0.17813174630958634,
      "grad_norm": 0.8037907284038206,
      "learning_rate": 9.435644832254831e-06,
      "loss": 0.1818,
      "step": 6106
    },
    {
      "epoch": 0.1781609195402299,
      "grad_norm": 0.9146267883701465,
      "learning_rate": 9.435426773882264e-06,
      "loss": 0.1751,
      "step": 6107
    },
    {
      "epoch": 0.17819009277087344,
      "grad_norm": 0.8506247433730693,
      "learning_rate": 9.435208675911263e-06,
      "loss": 0.1675,
      "step": 6108
    },
    {
      "epoch": 0.178219266001517,
      "grad_norm": 1.2478055350424464,
      "learning_rate": 9.43499053834378e-06,
      "loss": 0.1766,
      "step": 6109
    },
    {
      "epoch": 0.17824843923216058,
      "grad_norm": 0.7861486485923962,
      "learning_rate": 9.434772361181759e-06,
      "loss": 0.1632,
      "step": 6110
    },
    {
      "epoch": 0.17827761246280413,
      "grad_norm": 0.9131489960481203,
      "learning_rate": 9.434554144427148e-06,
      "loss": 0.1731,
      "step": 6111
    },
    {
      "epoch": 0.17830678569344768,
      "grad_norm": 1.1593414358106822,
      "learning_rate": 9.434335888081898e-06,
      "loss": 0.1839,
      "step": 6112
    },
    {
      "epoch": 0.17833595892409126,
      "grad_norm": 0.847344521852335,
      "learning_rate": 9.434117592147955e-06,
      "loss": 0.1533,
      "step": 6113
    },
    {
      "epoch": 0.17836513215473482,
      "grad_norm": 1.0958624282003797,
      "learning_rate": 9.43389925662727e-06,
      "loss": 0.1768,
      "step": 6114
    },
    {
      "epoch": 0.17839430538537837,
      "grad_norm": 1.2150248763347455,
      "learning_rate": 9.433680881521789e-06,
      "loss": 0.1707,
      "step": 6115
    },
    {
      "epoch": 0.17842347861602195,
      "grad_norm": 0.898320883463539,
      "learning_rate": 9.433462466833462e-06,
      "loss": 0.169,
      "step": 6116
    },
    {
      "epoch": 0.1784526518466655,
      "grad_norm": 1.0818057778123038,
      "learning_rate": 9.433244012564245e-06,
      "loss": 0.1774,
      "step": 6117
    },
    {
      "epoch": 0.17848182507730906,
      "grad_norm": 1.093611499526309,
      "learning_rate": 9.433025518716081e-06,
      "loss": 0.1719,
      "step": 6118
    },
    {
      "epoch": 0.1785109983079526,
      "grad_norm": 1.04629029446073,
      "learning_rate": 9.432806985290924e-06,
      "loss": 0.1773,
      "step": 6119
    },
    {
      "epoch": 0.1785401715385962,
      "grad_norm": 0.9329986990371699,
      "learning_rate": 9.432588412290725e-06,
      "loss": 0.1721,
      "step": 6120
    },
    {
      "epoch": 0.17856934476923975,
      "grad_norm": 0.8700545346597031,
      "learning_rate": 9.432369799717434e-06,
      "loss": 0.1983,
      "step": 6121
    },
    {
      "epoch": 0.1785985179998833,
      "grad_norm": 1.0542415237391032,
      "learning_rate": 9.432151147573003e-06,
      "loss": 0.1828,
      "step": 6122
    },
    {
      "epoch": 0.17862769123052688,
      "grad_norm": 0.838955245856994,
      "learning_rate": 9.431932455859384e-06,
      "loss": 0.1351,
      "step": 6123
    },
    {
      "epoch": 0.17865686446117043,
      "grad_norm": 1.3967139817878313,
      "learning_rate": 9.431713724578531e-06,
      "loss": 0.2086,
      "step": 6124
    },
    {
      "epoch": 0.178686037691814,
      "grad_norm": 0.9763115375116108,
      "learning_rate": 9.431494953732396e-06,
      "loss": 0.1567,
      "step": 6125
    },
    {
      "epoch": 0.17871521092245754,
      "grad_norm": 0.849450274946255,
      "learning_rate": 9.431276143322933e-06,
      "loss": 0.1768,
      "step": 6126
    },
    {
      "epoch": 0.17874438415310112,
      "grad_norm": 1.176015247895556,
      "learning_rate": 9.431057293352093e-06,
      "loss": 0.1791,
      "step": 6127
    },
    {
      "epoch": 0.17877355738374467,
      "grad_norm": 1.2749422903424745,
      "learning_rate": 9.430838403821831e-06,
      "loss": 0.2019,
      "step": 6128
    },
    {
      "epoch": 0.17880273061438823,
      "grad_norm": 0.9438674709566486,
      "learning_rate": 9.430619474734102e-06,
      "loss": 0.1769,
      "step": 6129
    },
    {
      "epoch": 0.1788319038450318,
      "grad_norm": 0.8042485643091488,
      "learning_rate": 9.43040050609086e-06,
      "loss": 0.2025,
      "step": 6130
    },
    {
      "epoch": 0.17886107707567536,
      "grad_norm": 1.0309599888690277,
      "learning_rate": 9.43018149789406e-06,
      "loss": 0.1685,
      "step": 6131
    },
    {
      "epoch": 0.17889025030631892,
      "grad_norm": 0.7531252493687638,
      "learning_rate": 9.429962450145657e-06,
      "loss": 0.1466,
      "step": 6132
    },
    {
      "epoch": 0.1789194235369625,
      "grad_norm": 0.8056546655740853,
      "learning_rate": 9.429743362847608e-06,
      "loss": 0.1838,
      "step": 6133
    },
    {
      "epoch": 0.17894859676760605,
      "grad_norm": 0.9787697867974474,
      "learning_rate": 9.429524236001866e-06,
      "loss": 0.2005,
      "step": 6134
    },
    {
      "epoch": 0.1789777699982496,
      "grad_norm": 0.7721888307938807,
      "learning_rate": 9.429305069610389e-06,
      "loss": 0.1482,
      "step": 6135
    },
    {
      "epoch": 0.17900694322889316,
      "grad_norm": 0.7173122780538939,
      "learning_rate": 9.429085863675135e-06,
      "loss": 0.1672,
      "step": 6136
    },
    {
      "epoch": 0.17903611645953674,
      "grad_norm": 0.7886093796379801,
      "learning_rate": 9.42886661819806e-06,
      "loss": 0.17,
      "step": 6137
    },
    {
      "epoch": 0.1790652896901803,
      "grad_norm": 0.6716444684628302,
      "learning_rate": 9.42864733318112e-06,
      "loss": 0.148,
      "step": 6138
    },
    {
      "epoch": 0.17909446292082384,
      "grad_norm": 0.7653181569944403,
      "learning_rate": 9.428428008626274e-06,
      "loss": 0.1501,
      "step": 6139
    },
    {
      "epoch": 0.17912363615146742,
      "grad_norm": 0.7644070134856724,
      "learning_rate": 9.42820864453548e-06,
      "loss": 0.1726,
      "step": 6140
    },
    {
      "epoch": 0.17915280938211098,
      "grad_norm": 0.9368679198269921,
      "learning_rate": 9.427989240910695e-06,
      "loss": 0.1565,
      "step": 6141
    },
    {
      "epoch": 0.17918198261275453,
      "grad_norm": 0.6316129360047353,
      "learning_rate": 9.42776979775388e-06,
      "loss": 0.1487,
      "step": 6142
    },
    {
      "epoch": 0.17921115584339808,
      "grad_norm": 0.8383230480926974,
      "learning_rate": 9.427550315066994e-06,
      "loss": 0.1758,
      "step": 6143
    },
    {
      "epoch": 0.17924032907404167,
      "grad_norm": 0.9890782465178287,
      "learning_rate": 9.427330792851996e-06,
      "loss": 0.1881,
      "step": 6144
    },
    {
      "epoch": 0.17926950230468522,
      "grad_norm": 0.7080945023913502,
      "learning_rate": 9.427111231110844e-06,
      "loss": 0.1745,
      "step": 6145
    },
    {
      "epoch": 0.17929867553532877,
      "grad_norm": 0.7666605079265063,
      "learning_rate": 9.4268916298455e-06,
      "loss": 0.1557,
      "step": 6146
    },
    {
      "epoch": 0.17932784876597235,
      "grad_norm": 0.8405628010532938,
      "learning_rate": 9.426671989057926e-06,
      "loss": 0.1538,
      "step": 6147
    },
    {
      "epoch": 0.1793570219966159,
      "grad_norm": 0.8001625757953447,
      "learning_rate": 9.42645230875008e-06,
      "loss": 0.1742,
      "step": 6148
    },
    {
      "epoch": 0.17938619522725946,
      "grad_norm": 0.9332369139694799,
      "learning_rate": 9.426232588923925e-06,
      "loss": 0.1515,
      "step": 6149
    },
    {
      "epoch": 0.17941536845790304,
      "grad_norm": 0.8351790934983703,
      "learning_rate": 9.426012829581421e-06,
      "loss": 0.1644,
      "step": 6150
    },
    {
      "epoch": 0.1794445416885466,
      "grad_norm": 0.7994829000612014,
      "learning_rate": 9.42579303072453e-06,
      "loss": 0.182,
      "step": 6151
    },
    {
      "epoch": 0.17947371491919015,
      "grad_norm": 0.959677623640591,
      "learning_rate": 9.425573192355219e-06,
      "loss": 0.1886,
      "step": 6152
    },
    {
      "epoch": 0.1795028881498337,
      "grad_norm": 0.9195168526872746,
      "learning_rate": 9.425353314475445e-06,
      "loss": 0.1647,
      "step": 6153
    },
    {
      "epoch": 0.17953206138047728,
      "grad_norm": 0.711177053303128,
      "learning_rate": 9.425133397087171e-06,
      "loss": 0.158,
      "step": 6154
    },
    {
      "epoch": 0.17956123461112083,
      "grad_norm": 0.6299993751647488,
      "learning_rate": 9.424913440192366e-06,
      "loss": 0.155,
      "step": 6155
    },
    {
      "epoch": 0.1795904078417644,
      "grad_norm": 0.7729505405505678,
      "learning_rate": 9.424693443792988e-06,
      "loss": 0.1545,
      "step": 6156
    },
    {
      "epoch": 0.17961958107240797,
      "grad_norm": 1.0723102235306599,
      "learning_rate": 9.424473407891003e-06,
      "loss": 0.1654,
      "step": 6157
    },
    {
      "epoch": 0.17964875430305152,
      "grad_norm": 0.7710784361163612,
      "learning_rate": 9.424253332488377e-06,
      "loss": 0.1494,
      "step": 6158
    },
    {
      "epoch": 0.17967792753369508,
      "grad_norm": 0.8657758464748335,
      "learning_rate": 9.424033217587072e-06,
      "loss": 0.1604,
      "step": 6159
    },
    {
      "epoch": 0.17970710076433866,
      "grad_norm": 0.9032855261951235,
      "learning_rate": 9.423813063189056e-06,
      "loss": 0.1773,
      "step": 6160
    },
    {
      "epoch": 0.1797362739949822,
      "grad_norm": 0.8345559494797617,
      "learning_rate": 9.423592869296292e-06,
      "loss": 0.1609,
      "step": 6161
    },
    {
      "epoch": 0.17976544722562576,
      "grad_norm": 0.8849683920236567,
      "learning_rate": 9.423372635910748e-06,
      "loss": 0.1889,
      "step": 6162
    },
    {
      "epoch": 0.17979462045626932,
      "grad_norm": 0.9609974502826502,
      "learning_rate": 9.42315236303439e-06,
      "loss": 0.174,
      "step": 6163
    },
    {
      "epoch": 0.1798237936869129,
      "grad_norm": 0.9045587451641919,
      "learning_rate": 9.42293205066918e-06,
      "loss": 0.1553,
      "step": 6164
    },
    {
      "epoch": 0.17985296691755645,
      "grad_norm": 0.9235731750614937,
      "learning_rate": 9.422711698817091e-06,
      "loss": 0.1562,
      "step": 6165
    },
    {
      "epoch": 0.1798821401482,
      "grad_norm": 0.9983722343381372,
      "learning_rate": 9.422491307480085e-06,
      "loss": 0.1538,
      "step": 6166
    },
    {
      "epoch": 0.17991131337884358,
      "grad_norm": 1.040014406683864,
      "learning_rate": 9.422270876660136e-06,
      "loss": 0.1516,
      "step": 6167
    },
    {
      "epoch": 0.17994048660948714,
      "grad_norm": 0.907915871955659,
      "learning_rate": 9.422050406359207e-06,
      "loss": 0.1742,
      "step": 6168
    },
    {
      "epoch": 0.1799696598401307,
      "grad_norm": 0.8433086264631569,
      "learning_rate": 9.421829896579267e-06,
      "loss": 0.1555,
      "step": 6169
    },
    {
      "epoch": 0.17999883307077424,
      "grad_norm": 1.1187874230437564,
      "learning_rate": 9.421609347322285e-06,
      "loss": 0.1832,
      "step": 6170
    },
    {
      "epoch": 0.18002800630141783,
      "grad_norm": 0.8590340789382764,
      "learning_rate": 9.42138875859023e-06,
      "loss": 0.1468,
      "step": 6171
    },
    {
      "epoch": 0.18005717953206138,
      "grad_norm": 0.7310620734230633,
      "learning_rate": 9.421168130385074e-06,
      "loss": 0.1675,
      "step": 6172
    },
    {
      "epoch": 0.18008635276270493,
      "grad_norm": 0.9963718411069702,
      "learning_rate": 9.420947462708783e-06,
      "loss": 0.171,
      "step": 6173
    },
    {
      "epoch": 0.1801155259933485,
      "grad_norm": 0.9020944829065396,
      "learning_rate": 9.420726755563327e-06,
      "loss": 0.1751,
      "step": 6174
    },
    {
      "epoch": 0.18014469922399207,
      "grad_norm": 0.7587580020082834,
      "learning_rate": 9.42050600895068e-06,
      "loss": 0.1562,
      "step": 6175
    },
    {
      "epoch": 0.18017387245463562,
      "grad_norm": 0.922642096759342,
      "learning_rate": 9.42028522287281e-06,
      "loss": 0.1786,
      "step": 6176
    },
    {
      "epoch": 0.1802030456852792,
      "grad_norm": 0.8950283400370009,
      "learning_rate": 9.420064397331688e-06,
      "loss": 0.1998,
      "step": 6177
    },
    {
      "epoch": 0.18023221891592275,
      "grad_norm": 0.7929709305026523,
      "learning_rate": 9.419843532329287e-06,
      "loss": 0.147,
      "step": 6178
    },
    {
      "epoch": 0.1802613921465663,
      "grad_norm": 1.2104020486551266,
      "learning_rate": 9.419622627867577e-06,
      "loss": 0.1641,
      "step": 6179
    },
    {
      "epoch": 0.18029056537720986,
      "grad_norm": 0.8553677422338132,
      "learning_rate": 9.419401683948533e-06,
      "loss": 0.1713,
      "step": 6180
    },
    {
      "epoch": 0.18031973860785344,
      "grad_norm": 0.8737367582203313,
      "learning_rate": 9.419180700574123e-06,
      "loss": 0.186,
      "step": 6181
    },
    {
      "epoch": 0.180348911838497,
      "grad_norm": 0.8067083291331099,
      "learning_rate": 9.418959677746325e-06,
      "loss": 0.1863,
      "step": 6182
    },
    {
      "epoch": 0.18037808506914055,
      "grad_norm": 0.7832841294836334,
      "learning_rate": 9.418738615467108e-06,
      "loss": 0.1524,
      "step": 6183
    },
    {
      "epoch": 0.18040725829978413,
      "grad_norm": 0.9379152920288453,
      "learning_rate": 9.41851751373845e-06,
      "loss": 0.1591,
      "step": 6184
    },
    {
      "epoch": 0.18043643153042768,
      "grad_norm": 0.8878302057374515,
      "learning_rate": 9.41829637256232e-06,
      "loss": 0.173,
      "step": 6185
    },
    {
      "epoch": 0.18046560476107124,
      "grad_norm": 0.9087940608673092,
      "learning_rate": 9.418075191940697e-06,
      "loss": 0.1816,
      "step": 6186
    },
    {
      "epoch": 0.1804947779917148,
      "grad_norm": 0.7582838323836925,
      "learning_rate": 9.417853971875553e-06,
      "loss": 0.1604,
      "step": 6187
    },
    {
      "epoch": 0.18052395122235837,
      "grad_norm": 1.3421748469345602,
      "learning_rate": 9.417632712368861e-06,
      "loss": 0.1652,
      "step": 6188
    },
    {
      "epoch": 0.18055312445300192,
      "grad_norm": 0.818452488353429,
      "learning_rate": 9.417411413422601e-06,
      "loss": 0.1735,
      "step": 6189
    },
    {
      "epoch": 0.18058229768364548,
      "grad_norm": 0.8319082879299589,
      "learning_rate": 9.417190075038745e-06,
      "loss": 0.1565,
      "step": 6190
    },
    {
      "epoch": 0.18061147091428906,
      "grad_norm": 0.9572358486376007,
      "learning_rate": 9.416968697219272e-06,
      "loss": 0.142,
      "step": 6191
    },
    {
      "epoch": 0.1806406441449326,
      "grad_norm": 1.0652700399568353,
      "learning_rate": 9.416747279966155e-06,
      "loss": 0.181,
      "step": 6192
    },
    {
      "epoch": 0.18066981737557616,
      "grad_norm": 0.8093986188290544,
      "learning_rate": 9.416525823281375e-06,
      "loss": 0.1587,
      "step": 6193
    },
    {
      "epoch": 0.18069899060621974,
      "grad_norm": 0.8793735767853701,
      "learning_rate": 9.416304327166905e-06,
      "loss": 0.1844,
      "step": 6194
    },
    {
      "epoch": 0.1807281638368633,
      "grad_norm": 0.9718214223440156,
      "learning_rate": 9.416082791624726e-06,
      "loss": 0.1934,
      "step": 6195
    },
    {
      "epoch": 0.18075733706750685,
      "grad_norm": 0.756008513886222,
      "learning_rate": 9.415861216656812e-06,
      "loss": 0.1361,
      "step": 6196
    },
    {
      "epoch": 0.1807865102981504,
      "grad_norm": 0.9071404958173924,
      "learning_rate": 9.415639602265144e-06,
      "loss": 0.1675,
      "step": 6197
    },
    {
      "epoch": 0.18081568352879399,
      "grad_norm": 0.8786545741210041,
      "learning_rate": 9.4154179484517e-06,
      "loss": 0.1699,
      "step": 6198
    },
    {
      "epoch": 0.18084485675943754,
      "grad_norm": 0.9263575442534925,
      "learning_rate": 9.415196255218457e-06,
      "loss": 0.1499,
      "step": 6199
    },
    {
      "epoch": 0.1808740299900811,
      "grad_norm": 0.7796762701779799,
      "learning_rate": 9.414974522567398e-06,
      "loss": 0.1437,
      "step": 6200
    },
    {
      "epoch": 0.18090320322072467,
      "grad_norm": 0.866025414208799,
      "learning_rate": 9.414752750500499e-06,
      "loss": 0.1977,
      "step": 6201
    },
    {
      "epoch": 0.18093237645136823,
      "grad_norm": 1.186550089311323,
      "learning_rate": 9.414530939019741e-06,
      "loss": 0.1613,
      "step": 6202
    },
    {
      "epoch": 0.18096154968201178,
      "grad_norm": 1.0800366163797837,
      "learning_rate": 9.414309088127105e-06,
      "loss": 0.1958,
      "step": 6203
    },
    {
      "epoch": 0.18099072291265536,
      "grad_norm": 0.9683499345321749,
      "learning_rate": 9.414087197824573e-06,
      "loss": 0.1769,
      "step": 6204
    },
    {
      "epoch": 0.18101989614329891,
      "grad_norm": 1.0385735295901457,
      "learning_rate": 9.413865268114123e-06,
      "loss": 0.165,
      "step": 6205
    },
    {
      "epoch": 0.18104906937394247,
      "grad_norm": 0.929342593065302,
      "learning_rate": 9.413643298997736e-06,
      "loss": 0.1531,
      "step": 6206
    },
    {
      "epoch": 0.18107824260458602,
      "grad_norm": 0.7853337979334546,
      "learning_rate": 9.413421290477397e-06,
      "loss": 0.1556,
      "step": 6207
    },
    {
      "epoch": 0.1811074158352296,
      "grad_norm": 0.8194819974039269,
      "learning_rate": 9.413199242555086e-06,
      "loss": 0.1745,
      "step": 6208
    },
    {
      "epoch": 0.18113658906587315,
      "grad_norm": 0.7724744253414053,
      "learning_rate": 9.412977155232787e-06,
      "loss": 0.154,
      "step": 6209
    },
    {
      "epoch": 0.1811657622965167,
      "grad_norm": 0.8542435209665531,
      "learning_rate": 9.412755028512478e-06,
      "loss": 0.174,
      "step": 6210
    },
    {
      "epoch": 0.1811949355271603,
      "grad_norm": 1.022489064681061,
      "learning_rate": 9.412532862396149e-06,
      "loss": 0.1657,
      "step": 6211
    },
    {
      "epoch": 0.18122410875780384,
      "grad_norm": 0.7578179979500707,
      "learning_rate": 9.412310656885779e-06,
      "loss": 0.2094,
      "step": 6212
    },
    {
      "epoch": 0.1812532819884474,
      "grad_norm": 0.8060204496253309,
      "learning_rate": 9.412088411983352e-06,
      "loss": 0.1478,
      "step": 6213
    },
    {
      "epoch": 0.18128245521909095,
      "grad_norm": 0.9909780923516098,
      "learning_rate": 9.411866127690855e-06,
      "loss": 0.1604,
      "step": 6214
    },
    {
      "epoch": 0.18131162844973453,
      "grad_norm": 0.7047233474518533,
      "learning_rate": 9.411643804010266e-06,
      "loss": 0.1631,
      "step": 6215
    },
    {
      "epoch": 0.18134080168037808,
      "grad_norm": 0.8575426263434166,
      "learning_rate": 9.411421440943577e-06,
      "loss": 0.1676,
      "step": 6216
    },
    {
      "epoch": 0.18136997491102164,
      "grad_norm": 1.0595142798092263,
      "learning_rate": 9.411199038492771e-06,
      "loss": 0.1601,
      "step": 6217
    },
    {
      "epoch": 0.18139914814166522,
      "grad_norm": 0.8622059520119328,
      "learning_rate": 9.410976596659833e-06,
      "loss": 0.1621,
      "step": 6218
    },
    {
      "epoch": 0.18142832137230877,
      "grad_norm": 0.9781827108687683,
      "learning_rate": 9.410754115446747e-06,
      "loss": 0.1605,
      "step": 6219
    },
    {
      "epoch": 0.18145749460295232,
      "grad_norm": 0.8949547567745179,
      "learning_rate": 9.410531594855503e-06,
      "loss": 0.1858,
      "step": 6220
    },
    {
      "epoch": 0.1814866678335959,
      "grad_norm": 0.9421490204150713,
      "learning_rate": 9.410309034888086e-06,
      "loss": 0.1556,
      "step": 6221
    },
    {
      "epoch": 0.18151584106423946,
      "grad_norm": 0.9222360795511418,
      "learning_rate": 9.410086435546481e-06,
      "loss": 0.1776,
      "step": 6222
    },
    {
      "epoch": 0.181545014294883,
      "grad_norm": 0.9042954423969205,
      "learning_rate": 9.409863796832679e-06,
      "loss": 0.1931,
      "step": 6223
    },
    {
      "epoch": 0.18157418752552656,
      "grad_norm": 0.87669143043865,
      "learning_rate": 9.409641118748665e-06,
      "loss": 0.1938,
      "step": 6224
    },
    {
      "epoch": 0.18160336075617015,
      "grad_norm": 0.8911654886162883,
      "learning_rate": 9.409418401296429e-06,
      "loss": 0.1572,
      "step": 6225
    },
    {
      "epoch": 0.1816325339868137,
      "grad_norm": 0.9490587471244761,
      "learning_rate": 9.409195644477955e-06,
      "loss": 0.1601,
      "step": 6226
    },
    {
      "epoch": 0.18166170721745725,
      "grad_norm": 0.7109438871780286,
      "learning_rate": 9.408972848295237e-06,
      "loss": 0.1904,
      "step": 6227
    },
    {
      "epoch": 0.18169088044810083,
      "grad_norm": 0.9835123319739566,
      "learning_rate": 9.408750012750262e-06,
      "loss": 0.1664,
      "step": 6228
    },
    {
      "epoch": 0.1817200536787444,
      "grad_norm": 0.9472216549479102,
      "learning_rate": 9.408527137845019e-06,
      "loss": 0.1545,
      "step": 6229
    },
    {
      "epoch": 0.18174922690938794,
      "grad_norm": 0.6824306815751001,
      "learning_rate": 9.408304223581497e-06,
      "loss": 0.1521,
      "step": 6230
    },
    {
      "epoch": 0.18177840014003152,
      "grad_norm": 0.8559321147756367,
      "learning_rate": 9.40808126996169e-06,
      "loss": 0.2012,
      "step": 6231
    },
    {
      "epoch": 0.18180757337067507,
      "grad_norm": 1.04243711595298,
      "learning_rate": 9.407858276987582e-06,
      "loss": 0.1629,
      "step": 6232
    },
    {
      "epoch": 0.18183674660131863,
      "grad_norm": 0.781091931137516,
      "learning_rate": 9.407635244661171e-06,
      "loss": 0.1714,
      "step": 6233
    },
    {
      "epoch": 0.18186591983196218,
      "grad_norm": 0.7351836000670849,
      "learning_rate": 9.407412172984443e-06,
      "loss": 0.1809,
      "step": 6234
    },
    {
      "epoch": 0.18189509306260576,
      "grad_norm": 0.8888704776542667,
      "learning_rate": 9.407189061959391e-06,
      "loss": 0.1832,
      "step": 6235
    },
    {
      "epoch": 0.18192426629324931,
      "grad_norm": 0.9210496492979364,
      "learning_rate": 9.406965911588009e-06,
      "loss": 0.1396,
      "step": 6236
    },
    {
      "epoch": 0.18195343952389287,
      "grad_norm": 0.7581127058051876,
      "learning_rate": 9.406742721872283e-06,
      "loss": 0.1838,
      "step": 6237
    },
    {
      "epoch": 0.18198261275453645,
      "grad_norm": 0.7802841499323447,
      "learning_rate": 9.406519492814215e-06,
      "loss": 0.1777,
      "step": 6238
    },
    {
      "epoch": 0.18201178598518,
      "grad_norm": 1.0567663023915306,
      "learning_rate": 9.406296224415791e-06,
      "loss": 0.1714,
      "step": 6239
    },
    {
      "epoch": 0.18204095921582356,
      "grad_norm": 1.0572082746664182,
      "learning_rate": 9.406072916679006e-06,
      "loss": 0.1707,
      "step": 6240
    },
    {
      "epoch": 0.1820701324464671,
      "grad_norm": 1.1517742435805653,
      "learning_rate": 9.405849569605853e-06,
      "loss": 0.1631,
      "step": 6241
    },
    {
      "epoch": 0.1820993056771107,
      "grad_norm": 0.9723575965767852,
      "learning_rate": 9.405626183198329e-06,
      "loss": 0.1626,
      "step": 6242
    },
    {
      "epoch": 0.18212847890775424,
      "grad_norm": 0.8955581516573967,
      "learning_rate": 9.405402757458424e-06,
      "loss": 0.1844,
      "step": 6243
    },
    {
      "epoch": 0.1821576521383978,
      "grad_norm": 0.8263967085292345,
      "learning_rate": 9.405179292388135e-06,
      "loss": 0.1528,
      "step": 6244
    },
    {
      "epoch": 0.18218682536904138,
      "grad_norm": 0.8216943605496201,
      "learning_rate": 9.404955787989458e-06,
      "loss": 0.1688,
      "step": 6245
    },
    {
      "epoch": 0.18221599859968493,
      "grad_norm": 1.2210662319131085,
      "learning_rate": 9.404732244264387e-06,
      "loss": 0.1562,
      "step": 6246
    },
    {
      "epoch": 0.18224517183032848,
      "grad_norm": 0.8952438572608528,
      "learning_rate": 9.404508661214918e-06,
      "loss": 0.1441,
      "step": 6247
    },
    {
      "epoch": 0.18227434506097207,
      "grad_norm": 0.809408536103548,
      "learning_rate": 9.404285038843047e-06,
      "loss": 0.1582,
      "step": 6248
    },
    {
      "epoch": 0.18230351829161562,
      "grad_norm": 0.6595809122634859,
      "learning_rate": 9.404061377150771e-06,
      "loss": 0.1508,
      "step": 6249
    },
    {
      "epoch": 0.18233269152225917,
      "grad_norm": 0.7862198516734406,
      "learning_rate": 9.403837676140084e-06,
      "loss": 0.1513,
      "step": 6250
    },
    {
      "epoch": 0.18236186475290272,
      "grad_norm": 0.6850742799073719,
      "learning_rate": 9.403613935812988e-06,
      "loss": 0.1689,
      "step": 6251
    },
    {
      "epoch": 0.1823910379835463,
      "grad_norm": 0.8220992034464922,
      "learning_rate": 9.403390156171477e-06,
      "loss": 0.1385,
      "step": 6252
    },
    {
      "epoch": 0.18242021121418986,
      "grad_norm": 0.9788715151395558,
      "learning_rate": 9.40316633721755e-06,
      "loss": 0.1675,
      "step": 6253
    },
    {
      "epoch": 0.1824493844448334,
      "grad_norm": 0.8508562582919345,
      "learning_rate": 9.402942478953205e-06,
      "loss": 0.207,
      "step": 6254
    },
    {
      "epoch": 0.182478557675477,
      "grad_norm": 0.970477424704942,
      "learning_rate": 9.402718581380442e-06,
      "loss": 0.1678,
      "step": 6255
    },
    {
      "epoch": 0.18250773090612055,
      "grad_norm": 0.8971265181992876,
      "learning_rate": 9.402494644501256e-06,
      "loss": 0.1542,
      "step": 6256
    },
    {
      "epoch": 0.1825369041367641,
      "grad_norm": 0.9824876826399018,
      "learning_rate": 9.402270668317651e-06,
      "loss": 0.1632,
      "step": 6257
    },
    {
      "epoch": 0.18256607736740765,
      "grad_norm": 0.6949440436484277,
      "learning_rate": 9.402046652831623e-06,
      "loss": 0.1619,
      "step": 6258
    },
    {
      "epoch": 0.18259525059805123,
      "grad_norm": 0.8770120753793275,
      "learning_rate": 9.401822598045173e-06,
      "loss": 0.1644,
      "step": 6259
    },
    {
      "epoch": 0.1826244238286948,
      "grad_norm": 0.9068548662885453,
      "learning_rate": 9.401598503960303e-06,
      "loss": 0.1695,
      "step": 6260
    },
    {
      "epoch": 0.18265359705933834,
      "grad_norm": 0.8946411605336688,
      "learning_rate": 9.401374370579013e-06,
      "loss": 0.1571,
      "step": 6261
    },
    {
      "epoch": 0.18268277028998192,
      "grad_norm": 0.8074587967392921,
      "learning_rate": 9.401150197903301e-06,
      "loss": 0.1602,
      "step": 6262
    },
    {
      "epoch": 0.18271194352062547,
      "grad_norm": 2.60264728155018,
      "learning_rate": 9.400925985935172e-06,
      "loss": 0.1817,
      "step": 6263
    },
    {
      "epoch": 0.18274111675126903,
      "grad_norm": 0.9913172170775648,
      "learning_rate": 9.400701734676628e-06,
      "loss": 0.172,
      "step": 6264
    },
    {
      "epoch": 0.1827702899819126,
      "grad_norm": 0.8779597607438039,
      "learning_rate": 9.400477444129667e-06,
      "loss": 0.1919,
      "step": 6265
    },
    {
      "epoch": 0.18279946321255616,
      "grad_norm": 0.8429548045429296,
      "learning_rate": 9.400253114296293e-06,
      "loss": 0.1685,
      "step": 6266
    },
    {
      "epoch": 0.18282863644319972,
      "grad_norm": 0.9852071087522531,
      "learning_rate": 9.400028745178512e-06,
      "loss": 0.1846,
      "step": 6267
    },
    {
      "epoch": 0.18285780967384327,
      "grad_norm": 1.0386223536856265,
      "learning_rate": 9.399804336778325e-06,
      "loss": 0.1751,
      "step": 6268
    },
    {
      "epoch": 0.18288698290448685,
      "grad_norm": 0.7428465970760484,
      "learning_rate": 9.399579889097733e-06,
      "loss": 0.1652,
      "step": 6269
    },
    {
      "epoch": 0.1829161561351304,
      "grad_norm": 0.8416234696604418,
      "learning_rate": 9.399355402138743e-06,
      "loss": 0.1747,
      "step": 6270
    },
    {
      "epoch": 0.18294532936577396,
      "grad_norm": 0.8880831660665703,
      "learning_rate": 9.399130875903357e-06,
      "loss": 0.1621,
      "step": 6271
    },
    {
      "epoch": 0.18297450259641754,
      "grad_norm": 0.8646320623649186,
      "learning_rate": 9.398906310393582e-06,
      "loss": 0.176,
      "step": 6272
    },
    {
      "epoch": 0.1830036758270611,
      "grad_norm": 0.8468516507308228,
      "learning_rate": 9.398681705611423e-06,
      "loss": 0.1634,
      "step": 6273
    },
    {
      "epoch": 0.18303284905770464,
      "grad_norm": 1.0238616595287175,
      "learning_rate": 9.39845706155888e-06,
      "loss": 0.1806,
      "step": 6274
    },
    {
      "epoch": 0.18306202228834823,
      "grad_norm": 0.9580535676433123,
      "learning_rate": 9.398232378237965e-06,
      "loss": 0.1695,
      "step": 6275
    },
    {
      "epoch": 0.18309119551899178,
      "grad_norm": 0.954693563100793,
      "learning_rate": 9.398007655650682e-06,
      "loss": 0.176,
      "step": 6276
    },
    {
      "epoch": 0.18312036874963533,
      "grad_norm": 0.7206433187720213,
      "learning_rate": 9.397782893799036e-06,
      "loss": 0.1505,
      "step": 6277
    },
    {
      "epoch": 0.18314954198027888,
      "grad_norm": 0.8757530542891719,
      "learning_rate": 9.397558092685033e-06,
      "loss": 0.1624,
      "step": 6278
    },
    {
      "epoch": 0.18317871521092247,
      "grad_norm": 1.0543168020175973,
      "learning_rate": 9.397333252310682e-06,
      "loss": 0.1636,
      "step": 6279
    },
    {
      "epoch": 0.18320788844156602,
      "grad_norm": 0.778576249760065,
      "learning_rate": 9.39710837267799e-06,
      "loss": 0.1827,
      "step": 6280
    },
    {
      "epoch": 0.18323706167220957,
      "grad_norm": 1.1641877106989917,
      "learning_rate": 9.396883453788964e-06,
      "loss": 0.2028,
      "step": 6281
    },
    {
      "epoch": 0.18326623490285315,
      "grad_norm": 1.1271654281740786,
      "learning_rate": 9.39665849564561e-06,
      "loss": 0.1523,
      "step": 6282
    },
    {
      "epoch": 0.1832954081334967,
      "grad_norm": 0.8299374316477975,
      "learning_rate": 9.396433498249939e-06,
      "loss": 0.168,
      "step": 6283
    },
    {
      "epoch": 0.18332458136414026,
      "grad_norm": 1.5415467716062783,
      "learning_rate": 9.396208461603962e-06,
      "loss": 0.1718,
      "step": 6284
    },
    {
      "epoch": 0.1833537545947838,
      "grad_norm": 1.0033100132751955,
      "learning_rate": 9.395983385709683e-06,
      "loss": 0.1458,
      "step": 6285
    },
    {
      "epoch": 0.1833829278254274,
      "grad_norm": 0.7665849418163303,
      "learning_rate": 9.395758270569114e-06,
      "loss": 0.165,
      "step": 6286
    },
    {
      "epoch": 0.18341210105607095,
      "grad_norm": 0.9567717284645186,
      "learning_rate": 9.395533116184266e-06,
      "loss": 0.1542,
      "step": 6287
    },
    {
      "epoch": 0.1834412742867145,
      "grad_norm": 0.8993418807931096,
      "learning_rate": 9.395307922557145e-06,
      "loss": 0.1763,
      "step": 6288
    },
    {
      "epoch": 0.18347044751735808,
      "grad_norm": 0.8079546539093717,
      "learning_rate": 9.395082689689765e-06,
      "loss": 0.1565,
      "step": 6289
    },
    {
      "epoch": 0.18349962074800164,
      "grad_norm": 0.9748613091499144,
      "learning_rate": 9.394857417584137e-06,
      "loss": 0.1899,
      "step": 6290
    },
    {
      "epoch": 0.1835287939786452,
      "grad_norm": 0.974800626920447,
      "learning_rate": 9.394632106242271e-06,
      "loss": 0.1431,
      "step": 6291
    },
    {
      "epoch": 0.18355796720928877,
      "grad_norm": 1.2020729522605325,
      "learning_rate": 9.394406755666177e-06,
      "loss": 0.1732,
      "step": 6292
    },
    {
      "epoch": 0.18358714043993232,
      "grad_norm": 1.1276256250163865,
      "learning_rate": 9.39418136585787e-06,
      "loss": 0.1771,
      "step": 6293
    },
    {
      "epoch": 0.18361631367057588,
      "grad_norm": 0.6975061214800776,
      "learning_rate": 9.393955936819362e-06,
      "loss": 0.172,
      "step": 6294
    },
    {
      "epoch": 0.18364548690121943,
      "grad_norm": 0.9135330478235544,
      "learning_rate": 9.393730468552661e-06,
      "loss": 0.1851,
      "step": 6295
    },
    {
      "epoch": 0.183674660131863,
      "grad_norm": 0.9041104249236712,
      "learning_rate": 9.393504961059786e-06,
      "loss": 0.1727,
      "step": 6296
    },
    {
      "epoch": 0.18370383336250656,
      "grad_norm": 0.8974073959120973,
      "learning_rate": 9.393279414342747e-06,
      "loss": 0.1659,
      "step": 6297
    },
    {
      "epoch": 0.18373300659315012,
      "grad_norm": 0.9090586042105718,
      "learning_rate": 9.393053828403558e-06,
      "loss": 0.1657,
      "step": 6298
    },
    {
      "epoch": 0.1837621798237937,
      "grad_norm": 1.0353462341734232,
      "learning_rate": 9.392828203244232e-06,
      "loss": 0.169,
      "step": 6299
    },
    {
      "epoch": 0.18379135305443725,
      "grad_norm": 0.8231975707209924,
      "learning_rate": 9.392602538866785e-06,
      "loss": 0.1686,
      "step": 6300
    },
    {
      "epoch": 0.1838205262850808,
      "grad_norm": 0.7398106825842896,
      "learning_rate": 9.39237683527323e-06,
      "loss": 0.1641,
      "step": 6301
    },
    {
      "epoch": 0.18384969951572439,
      "grad_norm": 0.9598872308409644,
      "learning_rate": 9.392151092465587e-06,
      "loss": 0.1572,
      "step": 6302
    },
    {
      "epoch": 0.18387887274636794,
      "grad_norm": 0.7734106660152992,
      "learning_rate": 9.391925310445863e-06,
      "loss": 0.1482,
      "step": 6303
    },
    {
      "epoch": 0.1839080459770115,
      "grad_norm": 0.7760284564417853,
      "learning_rate": 9.391699489216082e-06,
      "loss": 0.1516,
      "step": 6304
    },
    {
      "epoch": 0.18393721920765504,
      "grad_norm": 0.9199543724306547,
      "learning_rate": 9.391473628778253e-06,
      "loss": 0.1794,
      "step": 6305
    },
    {
      "epoch": 0.18396639243829863,
      "grad_norm": 0.8064664006424641,
      "learning_rate": 9.391247729134399e-06,
      "loss": 0.156,
      "step": 6306
    },
    {
      "epoch": 0.18399556566894218,
      "grad_norm": 0.9541207820094149,
      "learning_rate": 9.391021790286532e-06,
      "loss": 0.1597,
      "step": 6307
    },
    {
      "epoch": 0.18402473889958573,
      "grad_norm": 0.835873642125506,
      "learning_rate": 9.39079581223667e-06,
      "loss": 0.15,
      "step": 6308
    },
    {
      "epoch": 0.1840539121302293,
      "grad_norm": 0.913013134531752,
      "learning_rate": 9.390569794986833e-06,
      "loss": 0.1751,
      "step": 6309
    },
    {
      "epoch": 0.18408308536087287,
      "grad_norm": 0.8186151914454821,
      "learning_rate": 9.390343738539036e-06,
      "loss": 0.1839,
      "step": 6310
    },
    {
      "epoch": 0.18411225859151642,
      "grad_norm": 0.989796088579576,
      "learning_rate": 9.390117642895298e-06,
      "loss": 0.1607,
      "step": 6311
    },
    {
      "epoch": 0.18414143182215997,
      "grad_norm": 0.766298593376058,
      "learning_rate": 9.389891508057638e-06,
      "loss": 0.1621,
      "step": 6312
    },
    {
      "epoch": 0.18417060505280355,
      "grad_norm": 0.9280216493572327,
      "learning_rate": 9.389665334028073e-06,
      "loss": 0.1754,
      "step": 6313
    },
    {
      "epoch": 0.1841997782834471,
      "grad_norm": 0.8973743583496462,
      "learning_rate": 9.389439120808625e-06,
      "loss": 0.1873,
      "step": 6314
    },
    {
      "epoch": 0.18422895151409066,
      "grad_norm": 0.8473033153608642,
      "learning_rate": 9.389212868401313e-06,
      "loss": 0.1678,
      "step": 6315
    },
    {
      "epoch": 0.18425812474473424,
      "grad_norm": 0.9923850099865504,
      "learning_rate": 9.388986576808156e-06,
      "loss": 0.1688,
      "step": 6316
    },
    {
      "epoch": 0.1842872979753778,
      "grad_norm": 1.0566776003326883,
      "learning_rate": 9.388760246031175e-06,
      "loss": 0.1696,
      "step": 6317
    },
    {
      "epoch": 0.18431647120602135,
      "grad_norm": 1.0754251362690752,
      "learning_rate": 9.38853387607239e-06,
      "loss": 0.1494,
      "step": 6318
    },
    {
      "epoch": 0.18434564443666493,
      "grad_norm": 0.7831028392985439,
      "learning_rate": 9.388307466933821e-06,
      "loss": 0.1728,
      "step": 6319
    },
    {
      "epoch": 0.18437481766730848,
      "grad_norm": 0.8504901453125424,
      "learning_rate": 9.388081018617492e-06,
      "loss": 0.1511,
      "step": 6320
    },
    {
      "epoch": 0.18440399089795204,
      "grad_norm": 1.0756459690975153,
      "learning_rate": 9.387854531125421e-06,
      "loss": 0.164,
      "step": 6321
    },
    {
      "epoch": 0.1844331641285956,
      "grad_norm": 0.9622875988951577,
      "learning_rate": 9.387628004459633e-06,
      "loss": 0.1823,
      "step": 6322
    },
    {
      "epoch": 0.18446233735923917,
      "grad_norm": 0.8239741278722938,
      "learning_rate": 9.387401438622151e-06,
      "loss": 0.1958,
      "step": 6323
    },
    {
      "epoch": 0.18449151058988272,
      "grad_norm": 0.8432333835187112,
      "learning_rate": 9.387174833614996e-06,
      "loss": 0.1561,
      "step": 6324
    },
    {
      "epoch": 0.18452068382052628,
      "grad_norm": 0.9037546123012228,
      "learning_rate": 9.38694818944019e-06,
      "loss": 0.1761,
      "step": 6325
    },
    {
      "epoch": 0.18454985705116986,
      "grad_norm": 0.6479765240275243,
      "learning_rate": 9.386721506099759e-06,
      "loss": 0.1706,
      "step": 6326
    },
    {
      "epoch": 0.1845790302818134,
      "grad_norm": 0.8840684744698483,
      "learning_rate": 9.386494783595725e-06,
      "loss": 0.1498,
      "step": 6327
    },
    {
      "epoch": 0.18460820351245696,
      "grad_norm": 4.114698133905036,
      "learning_rate": 9.386268021930114e-06,
      "loss": 0.1699,
      "step": 6328
    },
    {
      "epoch": 0.18463737674310052,
      "grad_norm": 0.8557881661633471,
      "learning_rate": 9.386041221104947e-06,
      "loss": 0.1751,
      "step": 6329
    },
    {
      "epoch": 0.1846665499737441,
      "grad_norm": 0.6900398003115896,
      "learning_rate": 9.385814381122252e-06,
      "loss": 0.1611,
      "step": 6330
    },
    {
      "epoch": 0.18469572320438765,
      "grad_norm": 0.9162916362396664,
      "learning_rate": 9.385587501984056e-06,
      "loss": 0.1589,
      "step": 6331
    },
    {
      "epoch": 0.1847248964350312,
      "grad_norm": 0.8271291333589972,
      "learning_rate": 9.385360583692378e-06,
      "loss": 0.1482,
      "step": 6332
    },
    {
      "epoch": 0.18475406966567479,
      "grad_norm": 0.7268643631329941,
      "learning_rate": 9.385133626249247e-06,
      "loss": 0.1528,
      "step": 6333
    },
    {
      "epoch": 0.18478324289631834,
      "grad_norm": 1.0353747666604984,
      "learning_rate": 9.384906629656692e-06,
      "loss": 0.1775,
      "step": 6334
    },
    {
      "epoch": 0.1848124161269619,
      "grad_norm": 0.9373739253374651,
      "learning_rate": 9.384679593916737e-06,
      "loss": 0.171,
      "step": 6335
    },
    {
      "epoch": 0.18484158935760547,
      "grad_norm": 0.7342795308586916,
      "learning_rate": 9.384452519031409e-06,
      "loss": 0.1529,
      "step": 6336
    },
    {
      "epoch": 0.18487076258824903,
      "grad_norm": 0.9723050522783112,
      "learning_rate": 9.384225405002736e-06,
      "loss": 0.1691,
      "step": 6337
    },
    {
      "epoch": 0.18489993581889258,
      "grad_norm": 0.9092930360712453,
      "learning_rate": 9.383998251832744e-06,
      "loss": 0.1648,
      "step": 6338
    },
    {
      "epoch": 0.18492910904953613,
      "grad_norm": 0.8127884031882446,
      "learning_rate": 9.383771059523464e-06,
      "loss": 0.1938,
      "step": 6339
    },
    {
      "epoch": 0.18495828228017971,
      "grad_norm": 0.8100007733503917,
      "learning_rate": 9.383543828076923e-06,
      "loss": 0.1617,
      "step": 6340
    },
    {
      "epoch": 0.18498745551082327,
      "grad_norm": 1.0994614799445628,
      "learning_rate": 9.383316557495145e-06,
      "loss": 0.1598,
      "step": 6341
    },
    {
      "epoch": 0.18501662874146682,
      "grad_norm": 0.7678430777969951,
      "learning_rate": 9.383089247780168e-06,
      "loss": 0.1669,
      "step": 6342
    },
    {
      "epoch": 0.1850458019721104,
      "grad_norm": 0.8686584777523905,
      "learning_rate": 9.382861898934013e-06,
      "loss": 0.1539,
      "step": 6343
    },
    {
      "epoch": 0.18507497520275396,
      "grad_norm": 1.0690350375174442,
      "learning_rate": 9.382634510958714e-06,
      "loss": 0.1845,
      "step": 6344
    },
    {
      "epoch": 0.1851041484333975,
      "grad_norm": 0.8535316947589281,
      "learning_rate": 9.382407083856302e-06,
      "loss": 0.1728,
      "step": 6345
    },
    {
      "epoch": 0.1851333216640411,
      "grad_norm": 0.9349007679845841,
      "learning_rate": 9.382179617628804e-06,
      "loss": 0.1627,
      "step": 6346
    },
    {
      "epoch": 0.18516249489468464,
      "grad_norm": 0.9874725412467277,
      "learning_rate": 9.381952112278254e-06,
      "loss": 0.1641,
      "step": 6347
    },
    {
      "epoch": 0.1851916681253282,
      "grad_norm": 0.9545658165985447,
      "learning_rate": 9.38172456780668e-06,
      "loss": 0.2031,
      "step": 6348
    },
    {
      "epoch": 0.18522084135597175,
      "grad_norm": 0.859255529423847,
      "learning_rate": 9.381496984216117e-06,
      "loss": 0.1796,
      "step": 6349
    },
    {
      "epoch": 0.18525001458661533,
      "grad_norm": 0.8752842002519265,
      "learning_rate": 9.381269361508593e-06,
      "loss": 0.1616,
      "step": 6350
    },
    {
      "epoch": 0.18527918781725888,
      "grad_norm": 0.7403846602310657,
      "learning_rate": 9.381041699686143e-06,
      "loss": 0.1531,
      "step": 6351
    },
    {
      "epoch": 0.18530836104790244,
      "grad_norm": 0.9101251262560319,
      "learning_rate": 9.380813998750798e-06,
      "loss": 0.1728,
      "step": 6352
    },
    {
      "epoch": 0.18533753427854602,
      "grad_norm": 0.9226859299655494,
      "learning_rate": 9.380586258704592e-06,
      "loss": 0.1902,
      "step": 6353
    },
    {
      "epoch": 0.18536670750918957,
      "grad_norm": 0.735729210753132,
      "learning_rate": 9.380358479549556e-06,
      "loss": 0.1512,
      "step": 6354
    },
    {
      "epoch": 0.18539588073983312,
      "grad_norm": 0.7761692285022256,
      "learning_rate": 9.380130661287728e-06,
      "loss": 0.1516,
      "step": 6355
    },
    {
      "epoch": 0.18542505397047668,
      "grad_norm": 0.9170830195308721,
      "learning_rate": 9.379902803921135e-06,
      "loss": 0.1607,
      "step": 6356
    },
    {
      "epoch": 0.18545422720112026,
      "grad_norm": 0.8316013696108943,
      "learning_rate": 9.379674907451819e-06,
      "loss": 0.164,
      "step": 6357
    },
    {
      "epoch": 0.1854834004317638,
      "grad_norm": 0.8536503482264336,
      "learning_rate": 9.379446971881808e-06,
      "loss": 0.1611,
      "step": 6358
    },
    {
      "epoch": 0.18551257366240737,
      "grad_norm": 0.839788580451009,
      "learning_rate": 9.379218997213143e-06,
      "loss": 0.1661,
      "step": 6359
    },
    {
      "epoch": 0.18554174689305095,
      "grad_norm": 0.8795878200249163,
      "learning_rate": 9.378990983447855e-06,
      "loss": 0.1651,
      "step": 6360
    },
    {
      "epoch": 0.1855709201236945,
      "grad_norm": 0.9092622775474988,
      "learning_rate": 9.37876293058798e-06,
      "loss": 0.1843,
      "step": 6361
    },
    {
      "epoch": 0.18560009335433805,
      "grad_norm": 0.8255685089027337,
      "learning_rate": 9.378534838635556e-06,
      "loss": 0.1548,
      "step": 6362
    },
    {
      "epoch": 0.18562926658498163,
      "grad_norm": 0.9788167032986756,
      "learning_rate": 9.378306707592618e-06,
      "loss": 0.1846,
      "step": 6363
    },
    {
      "epoch": 0.1856584398156252,
      "grad_norm": 0.8886286713130412,
      "learning_rate": 9.378078537461203e-06,
      "loss": 0.1678,
      "step": 6364
    },
    {
      "epoch": 0.18568761304626874,
      "grad_norm": 0.888688783427601,
      "learning_rate": 9.377850328243348e-06,
      "loss": 0.1522,
      "step": 6365
    },
    {
      "epoch": 0.1857167862769123,
      "grad_norm": 0.6460136648863666,
      "learning_rate": 9.377622079941089e-06,
      "loss": 0.1448,
      "step": 6366
    },
    {
      "epoch": 0.18574595950755587,
      "grad_norm": 1.0355527099034294,
      "learning_rate": 9.377393792556466e-06,
      "loss": 0.1849,
      "step": 6367
    },
    {
      "epoch": 0.18577513273819943,
      "grad_norm": 0.9430101147387856,
      "learning_rate": 9.377165466091516e-06,
      "loss": 0.1641,
      "step": 6368
    },
    {
      "epoch": 0.18580430596884298,
      "grad_norm": 0.7912800017061814,
      "learning_rate": 9.376937100548277e-06,
      "loss": 0.1634,
      "step": 6369
    },
    {
      "epoch": 0.18583347919948656,
      "grad_norm": 1.2790306723717684,
      "learning_rate": 9.376708695928791e-06,
      "loss": 0.1537,
      "step": 6370
    },
    {
      "epoch": 0.18586265243013012,
      "grad_norm": 0.9569122567998494,
      "learning_rate": 9.376480252235091e-06,
      "loss": 0.1554,
      "step": 6371
    },
    {
      "epoch": 0.18589182566077367,
      "grad_norm": 0.8385870624766267,
      "learning_rate": 9.376251769469223e-06,
      "loss": 0.1696,
      "step": 6372
    },
    {
      "epoch": 0.18592099889141722,
      "grad_norm": 1.2551188792359957,
      "learning_rate": 9.376023247633224e-06,
      "loss": 0.1739,
      "step": 6373
    },
    {
      "epoch": 0.1859501721220608,
      "grad_norm": 0.9203606008213495,
      "learning_rate": 9.375794686729132e-06,
      "loss": 0.1553,
      "step": 6374
    },
    {
      "epoch": 0.18597934535270436,
      "grad_norm": 0.9664355287797862,
      "learning_rate": 9.37556608675899e-06,
      "loss": 0.1753,
      "step": 6375
    },
    {
      "epoch": 0.1860085185833479,
      "grad_norm": 0.8985595808477231,
      "learning_rate": 9.375337447724839e-06,
      "loss": 0.1992,
      "step": 6376
    },
    {
      "epoch": 0.1860376918139915,
      "grad_norm": 0.8896127544730188,
      "learning_rate": 9.37510876962872e-06,
      "loss": 0.1645,
      "step": 6377
    },
    {
      "epoch": 0.18606686504463504,
      "grad_norm": 0.9140614984575676,
      "learning_rate": 9.374880052472674e-06,
      "loss": 0.1495,
      "step": 6378
    },
    {
      "epoch": 0.1860960382752786,
      "grad_norm": 0.8551668307596201,
      "learning_rate": 9.374651296258743e-06,
      "loss": 0.154,
      "step": 6379
    },
    {
      "epoch": 0.18612521150592218,
      "grad_norm": 0.7345254800454083,
      "learning_rate": 9.374422500988971e-06,
      "loss": 0.1641,
      "step": 6380
    },
    {
      "epoch": 0.18615438473656573,
      "grad_norm": 0.8967007878719371,
      "learning_rate": 9.374193666665397e-06,
      "loss": 0.2001,
      "step": 6381
    },
    {
      "epoch": 0.18618355796720928,
      "grad_norm": 1.0206969373347077,
      "learning_rate": 9.373964793290067e-06,
      "loss": 0.1617,
      "step": 6382
    },
    {
      "epoch": 0.18621273119785284,
      "grad_norm": 0.9382054746335238,
      "learning_rate": 9.373735880865024e-06,
      "loss": 0.1779,
      "step": 6383
    },
    {
      "epoch": 0.18624190442849642,
      "grad_norm": 0.7812344511784418,
      "learning_rate": 9.373506929392311e-06,
      "loss": 0.1626,
      "step": 6384
    },
    {
      "epoch": 0.18627107765913997,
      "grad_norm": 0.9776708510589419,
      "learning_rate": 9.373277938873973e-06,
      "loss": 0.1775,
      "step": 6385
    },
    {
      "epoch": 0.18630025088978353,
      "grad_norm": 0.9224752017283132,
      "learning_rate": 9.373048909312052e-06,
      "loss": 0.1786,
      "step": 6386
    },
    {
      "epoch": 0.1863294241204271,
      "grad_norm": 0.6783865099924923,
      "learning_rate": 9.372819840708594e-06,
      "loss": 0.1732,
      "step": 6387
    },
    {
      "epoch": 0.18635859735107066,
      "grad_norm": 1.3051642832191346,
      "learning_rate": 9.372590733065645e-06,
      "loss": 0.1851,
      "step": 6388
    },
    {
      "epoch": 0.1863877705817142,
      "grad_norm": 1.0003059103132785,
      "learning_rate": 9.37236158638525e-06,
      "loss": 0.1774,
      "step": 6389
    },
    {
      "epoch": 0.1864169438123578,
      "grad_norm": 1.2418311183576725,
      "learning_rate": 9.372132400669456e-06,
      "loss": 0.1906,
      "step": 6390
    },
    {
      "epoch": 0.18644611704300135,
      "grad_norm": 1.1159368494090276,
      "learning_rate": 9.371903175920306e-06,
      "loss": 0.1789,
      "step": 6391
    },
    {
      "epoch": 0.1864752902736449,
      "grad_norm": 0.8045398393384725,
      "learning_rate": 9.371673912139847e-06,
      "loss": 0.1568,
      "step": 6392
    },
    {
      "epoch": 0.18650446350428845,
      "grad_norm": 0.8406425441848276,
      "learning_rate": 9.371444609330129e-06,
      "loss": 0.1633,
      "step": 6393
    },
    {
      "epoch": 0.18653363673493203,
      "grad_norm": 0.7074436767919758,
      "learning_rate": 9.371215267493195e-06,
      "loss": 0.1641,
      "step": 6394
    },
    {
      "epoch": 0.1865628099655756,
      "grad_norm": 0.8501370586374717,
      "learning_rate": 9.370985886631097e-06,
      "loss": 0.1834,
      "step": 6395
    },
    {
      "epoch": 0.18659198319621914,
      "grad_norm": 0.9463208895737822,
      "learning_rate": 9.370756466745879e-06,
      "loss": 0.1542,
      "step": 6396
    },
    {
      "epoch": 0.18662115642686272,
      "grad_norm": 0.8719077905712171,
      "learning_rate": 9.37052700783959e-06,
      "loss": 0.1657,
      "step": 6397
    },
    {
      "epoch": 0.18665032965750628,
      "grad_norm": 0.9821077085761606,
      "learning_rate": 9.37029750991428e-06,
      "loss": 0.1524,
      "step": 6398
    },
    {
      "epoch": 0.18667950288814983,
      "grad_norm": 0.8425957668888753,
      "learning_rate": 9.370067972971998e-06,
      "loss": 0.1602,
      "step": 6399
    },
    {
      "epoch": 0.18670867611879338,
      "grad_norm": 0.9156979950842294,
      "learning_rate": 9.369838397014792e-06,
      "loss": 0.1914,
      "step": 6400
    },
    {
      "epoch": 0.18673784934943696,
      "grad_norm": 0.9284921112275565,
      "learning_rate": 9.36960878204471e-06,
      "loss": 0.1723,
      "step": 6401
    },
    {
      "epoch": 0.18676702258008052,
      "grad_norm": 0.813841492557956,
      "learning_rate": 9.369379128063807e-06,
      "loss": 0.1695,
      "step": 6402
    },
    {
      "epoch": 0.18679619581072407,
      "grad_norm": 1.0530021092744695,
      "learning_rate": 9.369149435074127e-06,
      "loss": 0.1486,
      "step": 6403
    },
    {
      "epoch": 0.18682536904136765,
      "grad_norm": 0.8517192153127459,
      "learning_rate": 9.368919703077726e-06,
      "loss": 0.1685,
      "step": 6404
    },
    {
      "epoch": 0.1868545422720112,
      "grad_norm": 0.9136196497971955,
      "learning_rate": 9.368689932076651e-06,
      "loss": 0.1774,
      "step": 6405
    },
    {
      "epoch": 0.18688371550265476,
      "grad_norm": 0.6810571973131726,
      "learning_rate": 9.368460122072958e-06,
      "loss": 0.1422,
      "step": 6406
    },
    {
      "epoch": 0.18691288873329834,
      "grad_norm": 0.98660818876648,
      "learning_rate": 9.368230273068694e-06,
      "loss": 0.1691,
      "step": 6407
    },
    {
      "epoch": 0.1869420619639419,
      "grad_norm": 0.6836505378334928,
      "learning_rate": 9.368000385065914e-06,
      "loss": 0.1296,
      "step": 6408
    },
    {
      "epoch": 0.18697123519458544,
      "grad_norm": 1.1529083326425609,
      "learning_rate": 9.367770458066668e-06,
      "loss": 0.1753,
      "step": 6409
    },
    {
      "epoch": 0.187000408425229,
      "grad_norm": 0.9984352785617496,
      "learning_rate": 9.36754049207301e-06,
      "loss": 0.188,
      "step": 6410
    },
    {
      "epoch": 0.18702958165587258,
      "grad_norm": 0.8513446484147696,
      "learning_rate": 9.367310487086994e-06,
      "loss": 0.1552,
      "step": 6411
    },
    {
      "epoch": 0.18705875488651613,
      "grad_norm": 0.8994726050458148,
      "learning_rate": 9.367080443110672e-06,
      "loss": 0.1618,
      "step": 6412
    },
    {
      "epoch": 0.18708792811715969,
      "grad_norm": 0.9579227745596848,
      "learning_rate": 9.366850360146098e-06,
      "loss": 0.1932,
      "step": 6413
    },
    {
      "epoch": 0.18711710134780327,
      "grad_norm": 0.836438245202908,
      "learning_rate": 9.366620238195327e-06,
      "loss": 0.1573,
      "step": 6414
    },
    {
      "epoch": 0.18714627457844682,
      "grad_norm": 0.9597197293795949,
      "learning_rate": 9.366390077260413e-06,
      "loss": 0.1408,
      "step": 6415
    },
    {
      "epoch": 0.18717544780909037,
      "grad_norm": 0.878411391925992,
      "learning_rate": 9.366159877343411e-06,
      "loss": 0.1932,
      "step": 6416
    },
    {
      "epoch": 0.18720462103973395,
      "grad_norm": 0.8044571836578218,
      "learning_rate": 9.365929638446375e-06,
      "loss": 0.1526,
      "step": 6417
    },
    {
      "epoch": 0.1872337942703775,
      "grad_norm": 0.8660836589451121,
      "learning_rate": 9.365699360571361e-06,
      "loss": 0.1624,
      "step": 6418
    },
    {
      "epoch": 0.18726296750102106,
      "grad_norm": 0.7468442113023918,
      "learning_rate": 9.365469043720428e-06,
      "loss": 0.17,
      "step": 6419
    },
    {
      "epoch": 0.1872921407316646,
      "grad_norm": 0.9647577885839785,
      "learning_rate": 9.365238687895626e-06,
      "loss": 0.152,
      "step": 6420
    },
    {
      "epoch": 0.1873213139623082,
      "grad_norm": 0.8733550915431748,
      "learning_rate": 9.365008293099017e-06,
      "loss": 0.1507,
      "step": 6421
    },
    {
      "epoch": 0.18735048719295175,
      "grad_norm": 0.833192434914469,
      "learning_rate": 9.364777859332656e-06,
      "loss": 0.1737,
      "step": 6422
    },
    {
      "epoch": 0.1873796604235953,
      "grad_norm": 0.943764908474735,
      "learning_rate": 9.364547386598599e-06,
      "loss": 0.1855,
      "step": 6423
    },
    {
      "epoch": 0.18740883365423888,
      "grad_norm": 0.9674986000953253,
      "learning_rate": 9.364316874898906e-06,
      "loss": 0.1575,
      "step": 6424
    },
    {
      "epoch": 0.18743800688488244,
      "grad_norm": 0.8465644267118999,
      "learning_rate": 9.364086324235634e-06,
      "loss": 0.1818,
      "step": 6425
    },
    {
      "epoch": 0.187467180115526,
      "grad_norm": 3.682477680581137,
      "learning_rate": 9.36385573461084e-06,
      "loss": 0.158,
      "step": 6426
    },
    {
      "epoch": 0.18749635334616954,
      "grad_norm": 1.1389325051338943,
      "learning_rate": 9.363625106026585e-06,
      "loss": 0.1399,
      "step": 6427
    },
    {
      "epoch": 0.18752552657681312,
      "grad_norm": 0.9335206135751585,
      "learning_rate": 9.363394438484926e-06,
      "loss": 0.1548,
      "step": 6428
    },
    {
      "epoch": 0.18755469980745668,
      "grad_norm": 0.9756779180721152,
      "learning_rate": 9.363163731987924e-06,
      "loss": 0.1612,
      "step": 6429
    },
    {
      "epoch": 0.18758387303810023,
      "grad_norm": 0.9415783249737719,
      "learning_rate": 9.362932986537636e-06,
      "loss": 0.1745,
      "step": 6430
    },
    {
      "epoch": 0.1876130462687438,
      "grad_norm": 0.91126906562831,
      "learning_rate": 9.362702202136125e-06,
      "loss": 0.1721,
      "step": 6431
    },
    {
      "epoch": 0.18764221949938736,
      "grad_norm": 0.86616612377215,
      "learning_rate": 9.36247137878545e-06,
      "loss": 0.1499,
      "step": 6432
    },
    {
      "epoch": 0.18767139273003092,
      "grad_norm": 1.0357674865729583,
      "learning_rate": 9.362240516487672e-06,
      "loss": 0.1665,
      "step": 6433
    },
    {
      "epoch": 0.1877005659606745,
      "grad_norm": 0.82598036222361,
      "learning_rate": 9.362009615244852e-06,
      "loss": 0.1454,
      "step": 6434
    },
    {
      "epoch": 0.18772973919131805,
      "grad_norm": 0.8879978864420757,
      "learning_rate": 9.36177867505905e-06,
      "loss": 0.1768,
      "step": 6435
    },
    {
      "epoch": 0.1877589124219616,
      "grad_norm": 0.8781657754241868,
      "learning_rate": 9.36154769593233e-06,
      "loss": 0.1527,
      "step": 6436
    },
    {
      "epoch": 0.18778808565260516,
      "grad_norm": 0.9536049107109157,
      "learning_rate": 9.361316677866756e-06,
      "loss": 0.153,
      "step": 6437
    },
    {
      "epoch": 0.18781725888324874,
      "grad_norm": 0.8754771873434788,
      "learning_rate": 9.361085620864384e-06,
      "loss": 0.1847,
      "step": 6438
    },
    {
      "epoch": 0.1878464321138923,
      "grad_norm": 0.9788522664013113,
      "learning_rate": 9.360854524927283e-06,
      "loss": 0.1598,
      "step": 6439
    },
    {
      "epoch": 0.18787560534453585,
      "grad_norm": 0.8847500326674329,
      "learning_rate": 9.360623390057513e-06,
      "loss": 0.1575,
      "step": 6440
    },
    {
      "epoch": 0.18790477857517943,
      "grad_norm": 0.8954633065214586,
      "learning_rate": 9.36039221625714e-06,
      "loss": 0.1628,
      "step": 6441
    },
    {
      "epoch": 0.18793395180582298,
      "grad_norm": 1.0958594723145838,
      "learning_rate": 9.360161003528225e-06,
      "loss": 0.1855,
      "step": 6442
    },
    {
      "epoch": 0.18796312503646653,
      "grad_norm": 1.0493007576651754,
      "learning_rate": 9.359929751872832e-06,
      "loss": 0.1757,
      "step": 6443
    },
    {
      "epoch": 0.1879922982671101,
      "grad_norm": 0.7734016970896587,
      "learning_rate": 9.359698461293029e-06,
      "loss": 0.1831,
      "step": 6444
    },
    {
      "epoch": 0.18802147149775367,
      "grad_norm": 0.7670175090011172,
      "learning_rate": 9.359467131790878e-06,
      "loss": 0.1845,
      "step": 6445
    },
    {
      "epoch": 0.18805064472839722,
      "grad_norm": 0.837747572703878,
      "learning_rate": 9.359235763368444e-06,
      "loss": 0.1661,
      "step": 6446
    },
    {
      "epoch": 0.18807981795904077,
      "grad_norm": 0.9472609193995318,
      "learning_rate": 9.359004356027796e-06,
      "loss": 0.18,
      "step": 6447
    },
    {
      "epoch": 0.18810899118968435,
      "grad_norm": 0.7727561093733268,
      "learning_rate": 9.358772909770996e-06,
      "loss": 0.1691,
      "step": 6448
    },
    {
      "epoch": 0.1881381644203279,
      "grad_norm": 0.9422799353347984,
      "learning_rate": 9.358541424600112e-06,
      "loss": 0.1788,
      "step": 6449
    },
    {
      "epoch": 0.18816733765097146,
      "grad_norm": 1.0040165688649716,
      "learning_rate": 9.358309900517212e-06,
      "loss": 0.2013,
      "step": 6450
    },
    {
      "epoch": 0.18819651088161504,
      "grad_norm": 0.6921997078968785,
      "learning_rate": 9.358078337524362e-06,
      "loss": 0.1488,
      "step": 6451
    },
    {
      "epoch": 0.1882256841122586,
      "grad_norm": 0.8853309572382455,
      "learning_rate": 9.357846735623627e-06,
      "loss": 0.1971,
      "step": 6452
    },
    {
      "epoch": 0.18825485734290215,
      "grad_norm": 0.9426241570187159,
      "learning_rate": 9.357615094817076e-06,
      "loss": 0.1524,
      "step": 6453
    },
    {
      "epoch": 0.1882840305735457,
      "grad_norm": 0.876801031963035,
      "learning_rate": 9.35738341510678e-06,
      "loss": 0.1897,
      "step": 6454
    },
    {
      "epoch": 0.18831320380418928,
      "grad_norm": 0.8590444765798828,
      "learning_rate": 9.357151696494805e-06,
      "loss": 0.1811,
      "step": 6455
    },
    {
      "epoch": 0.18834237703483284,
      "grad_norm": 1.041073665471181,
      "learning_rate": 9.356919938983217e-06,
      "loss": 0.1648,
      "step": 6456
    },
    {
      "epoch": 0.1883715502654764,
      "grad_norm": 0.7810445562439615,
      "learning_rate": 9.35668814257409e-06,
      "loss": 0.1568,
      "step": 6457
    },
    {
      "epoch": 0.18840072349611997,
      "grad_norm": 0.9808872059364543,
      "learning_rate": 9.356456307269493e-06,
      "loss": 0.149,
      "step": 6458
    },
    {
      "epoch": 0.18842989672676352,
      "grad_norm": 0.7957062531699588,
      "learning_rate": 9.35622443307149e-06,
      "loss": 0.1566,
      "step": 6459
    },
    {
      "epoch": 0.18845906995740708,
      "grad_norm": 0.9969133059553652,
      "learning_rate": 9.355992519982159e-06,
      "loss": 0.1707,
      "step": 6460
    },
    {
      "epoch": 0.18848824318805066,
      "grad_norm": 0.9009970339068955,
      "learning_rate": 9.355760568003564e-06,
      "loss": 0.1634,
      "step": 6461
    },
    {
      "epoch": 0.1885174164186942,
      "grad_norm": 0.9034640625249046,
      "learning_rate": 9.35552857713778e-06,
      "loss": 0.158,
      "step": 6462
    },
    {
      "epoch": 0.18854658964933776,
      "grad_norm": 0.8307151872442313,
      "learning_rate": 9.355296547386876e-06,
      "loss": 0.1796,
      "step": 6463
    },
    {
      "epoch": 0.18857576287998132,
      "grad_norm": 0.7823639156560412,
      "learning_rate": 9.355064478752925e-06,
      "loss": 0.1559,
      "step": 6464
    },
    {
      "epoch": 0.1886049361106249,
      "grad_norm": 1.1364863653210286,
      "learning_rate": 9.354832371237996e-06,
      "loss": 0.1643,
      "step": 6465
    },
    {
      "epoch": 0.18863410934126845,
      "grad_norm": 0.819555976878176,
      "learning_rate": 9.354600224844166e-06,
      "loss": 0.1766,
      "step": 6466
    },
    {
      "epoch": 0.188663282571912,
      "grad_norm": 0.8021540114947207,
      "learning_rate": 9.354368039573502e-06,
      "loss": 0.1578,
      "step": 6467
    },
    {
      "epoch": 0.1886924558025556,
      "grad_norm": 1.0294397190067515,
      "learning_rate": 9.354135815428081e-06,
      "loss": 0.1749,
      "step": 6468
    },
    {
      "epoch": 0.18872162903319914,
      "grad_norm": 0.8596814036866786,
      "learning_rate": 9.353903552409975e-06,
      "loss": 0.1583,
      "step": 6469
    },
    {
      "epoch": 0.1887508022638427,
      "grad_norm": 0.9579756939625307,
      "learning_rate": 9.353671250521257e-06,
      "loss": 0.1912,
      "step": 6470
    },
    {
      "epoch": 0.18877997549448625,
      "grad_norm": 0.8536816187458158,
      "learning_rate": 9.353438909764e-06,
      "loss": 0.1682,
      "step": 6471
    },
    {
      "epoch": 0.18880914872512983,
      "grad_norm": 0.8471358060067807,
      "learning_rate": 9.353206530140282e-06,
      "loss": 0.1575,
      "step": 6472
    },
    {
      "epoch": 0.18883832195577338,
      "grad_norm": 0.7423011039671563,
      "learning_rate": 9.352974111652174e-06,
      "loss": 0.1705,
      "step": 6473
    },
    {
      "epoch": 0.18886749518641693,
      "grad_norm": 0.872510793966477,
      "learning_rate": 9.352741654301752e-06,
      "loss": 0.1919,
      "step": 6474
    },
    {
      "epoch": 0.18889666841706051,
      "grad_norm": 0.8843322687907549,
      "learning_rate": 9.352509158091092e-06,
      "loss": 0.176,
      "step": 6475
    },
    {
      "epoch": 0.18892584164770407,
      "grad_norm": 0.9153013391100571,
      "learning_rate": 9.35227662302227e-06,
      "loss": 0.1737,
      "step": 6476
    },
    {
      "epoch": 0.18895501487834762,
      "grad_norm": 0.8340567504416462,
      "learning_rate": 9.35204404909736e-06,
      "loss": 0.1556,
      "step": 6477
    },
    {
      "epoch": 0.1889841881089912,
      "grad_norm": 0.7850765747011559,
      "learning_rate": 9.35181143631844e-06,
      "loss": 0.147,
      "step": 6478
    },
    {
      "epoch": 0.18901336133963476,
      "grad_norm": 1.04531413559154,
      "learning_rate": 9.351578784687589e-06,
      "loss": 0.1667,
      "step": 6479
    },
    {
      "epoch": 0.1890425345702783,
      "grad_norm": 0.8802548522615019,
      "learning_rate": 9.351346094206878e-06,
      "loss": 0.1461,
      "step": 6480
    },
    {
      "epoch": 0.18907170780092186,
      "grad_norm": 0.8610718168542294,
      "learning_rate": 9.351113364878388e-06,
      "loss": 0.161,
      "step": 6481
    },
    {
      "epoch": 0.18910088103156544,
      "grad_norm": 0.9291714448392332,
      "learning_rate": 9.350880596704199e-06,
      "loss": 0.1791,
      "step": 6482
    },
    {
      "epoch": 0.189130054262209,
      "grad_norm": 1.0332328719886352,
      "learning_rate": 9.350647789686384e-06,
      "loss": 0.1772,
      "step": 6483
    },
    {
      "epoch": 0.18915922749285255,
      "grad_norm": 0.7525244706407437,
      "learning_rate": 9.350414943827027e-06,
      "loss": 0.1466,
      "step": 6484
    },
    {
      "epoch": 0.18918840072349613,
      "grad_norm": 1.052568781939131,
      "learning_rate": 9.350182059128202e-06,
      "loss": 0.1645,
      "step": 6485
    },
    {
      "epoch": 0.18921757395413968,
      "grad_norm": 0.8692515713275414,
      "learning_rate": 9.34994913559199e-06,
      "loss": 0.1478,
      "step": 6486
    },
    {
      "epoch": 0.18924674718478324,
      "grad_norm": 0.7641666212824467,
      "learning_rate": 9.34971617322047e-06,
      "loss": 0.1699,
      "step": 6487
    },
    {
      "epoch": 0.18927592041542682,
      "grad_norm": 1.012211156978612,
      "learning_rate": 9.349483172015723e-06,
      "loss": 0.1503,
      "step": 6488
    },
    {
      "epoch": 0.18930509364607037,
      "grad_norm": 0.8705933276035973,
      "learning_rate": 9.349250131979829e-06,
      "loss": 0.1756,
      "step": 6489
    },
    {
      "epoch": 0.18933426687671392,
      "grad_norm": 0.8260686852987795,
      "learning_rate": 9.349017053114868e-06,
      "loss": 0.1706,
      "step": 6490
    },
    {
      "epoch": 0.18936344010735748,
      "grad_norm": 1.0823426214485259,
      "learning_rate": 9.34878393542292e-06,
      "loss": 0.1625,
      "step": 6491
    },
    {
      "epoch": 0.18939261333800106,
      "grad_norm": 0.838019795588882,
      "learning_rate": 9.348550778906069e-06,
      "loss": 0.1488,
      "step": 6492
    },
    {
      "epoch": 0.1894217865686446,
      "grad_norm": 0.8252509549584727,
      "learning_rate": 9.348317583566393e-06,
      "loss": 0.161,
      "step": 6493
    },
    {
      "epoch": 0.18945095979928817,
      "grad_norm": 0.8667037898451724,
      "learning_rate": 9.348084349405977e-06,
      "loss": 0.1573,
      "step": 6494
    },
    {
      "epoch": 0.18948013302993175,
      "grad_norm": 0.9632097702897997,
      "learning_rate": 9.347851076426902e-06,
      "loss": 0.1616,
      "step": 6495
    },
    {
      "epoch": 0.1895093062605753,
      "grad_norm": 0.9263513470808169,
      "learning_rate": 9.347617764631248e-06,
      "loss": 0.1696,
      "step": 6496
    },
    {
      "epoch": 0.18953847949121885,
      "grad_norm": 0.9603998868666372,
      "learning_rate": 9.347384414021103e-06,
      "loss": 0.1867,
      "step": 6497
    },
    {
      "epoch": 0.1895676527218624,
      "grad_norm": 0.9443097850177389,
      "learning_rate": 9.347151024598547e-06,
      "loss": 0.1646,
      "step": 6498
    },
    {
      "epoch": 0.189596825952506,
      "grad_norm": 0.9662622519989555,
      "learning_rate": 9.346917596365663e-06,
      "loss": 0.191,
      "step": 6499
    },
    {
      "epoch": 0.18962599918314954,
      "grad_norm": 0.7954713778166729,
      "learning_rate": 9.346684129324539e-06,
      "loss": 0.1623,
      "step": 6500
    },
    {
      "epoch": 0.1896551724137931,
      "grad_norm": 0.8878743061399259,
      "learning_rate": 9.346450623477255e-06,
      "loss": 0.1905,
      "step": 6501
    },
    {
      "epoch": 0.18968434564443667,
      "grad_norm": 1.0539982976897557,
      "learning_rate": 9.346217078825898e-06,
      "loss": 0.1586,
      "step": 6502
    },
    {
      "epoch": 0.18971351887508023,
      "grad_norm": 0.796267607966624,
      "learning_rate": 9.345983495372552e-06,
      "loss": 0.1772,
      "step": 6503
    },
    {
      "epoch": 0.18974269210572378,
      "grad_norm": 0.9537548330363622,
      "learning_rate": 9.345749873119304e-06,
      "loss": 0.1952,
      "step": 6504
    },
    {
      "epoch": 0.18977186533636736,
      "grad_norm": 1.1008248733668338,
      "learning_rate": 9.345516212068237e-06,
      "loss": 0.1787,
      "step": 6505
    },
    {
      "epoch": 0.18980103856701092,
      "grad_norm": 0.9523573448915854,
      "learning_rate": 9.34528251222144e-06,
      "loss": 0.1526,
      "step": 6506
    },
    {
      "epoch": 0.18983021179765447,
      "grad_norm": 0.9023001720464379,
      "learning_rate": 9.345048773580995e-06,
      "loss": 0.1711,
      "step": 6507
    },
    {
      "epoch": 0.18985938502829802,
      "grad_norm": 0.9134559758736757,
      "learning_rate": 9.344814996148995e-06,
      "loss": 0.1614,
      "step": 6508
    },
    {
      "epoch": 0.1898885582589416,
      "grad_norm": 0.9209440705402696,
      "learning_rate": 9.344581179927523e-06,
      "loss": 0.1853,
      "step": 6509
    },
    {
      "epoch": 0.18991773148958516,
      "grad_norm": 1.0425855002494953,
      "learning_rate": 9.344347324918667e-06,
      "loss": 0.203,
      "step": 6510
    },
    {
      "epoch": 0.1899469047202287,
      "grad_norm": 0.9322617122236974,
      "learning_rate": 9.344113431124517e-06,
      "loss": 0.1594,
      "step": 6511
    },
    {
      "epoch": 0.1899760779508723,
      "grad_norm": 0.7838747514885989,
      "learning_rate": 9.343879498547157e-06,
      "loss": 0.1669,
      "step": 6512
    },
    {
      "epoch": 0.19000525118151584,
      "grad_norm": 1.0160814798959792,
      "learning_rate": 9.343645527188678e-06,
      "loss": 0.1763,
      "step": 6513
    },
    {
      "epoch": 0.1900344244121594,
      "grad_norm": 0.9135837272844288,
      "learning_rate": 9.34341151705117e-06,
      "loss": 0.159,
      "step": 6514
    },
    {
      "epoch": 0.19006359764280295,
      "grad_norm": 0.7831207396365385,
      "learning_rate": 9.34317746813672e-06,
      "loss": 0.1617,
      "step": 6515
    },
    {
      "epoch": 0.19009277087344653,
      "grad_norm": 0.8770832195483749,
      "learning_rate": 9.342943380447417e-06,
      "loss": 0.1857,
      "step": 6516
    },
    {
      "epoch": 0.19012194410409008,
      "grad_norm": 0.8567349049776124,
      "learning_rate": 9.342709253985356e-06,
      "loss": 0.1663,
      "step": 6517
    },
    {
      "epoch": 0.19015111733473364,
      "grad_norm": 0.6983197993077767,
      "learning_rate": 9.342475088752621e-06,
      "loss": 0.1398,
      "step": 6518
    },
    {
      "epoch": 0.19018029056537722,
      "grad_norm": 0.8021887784195425,
      "learning_rate": 9.342240884751305e-06,
      "loss": 0.1837,
      "step": 6519
    },
    {
      "epoch": 0.19020946379602077,
      "grad_norm": 0.8892876153441276,
      "learning_rate": 9.342006641983499e-06,
      "loss": 0.1656,
      "step": 6520
    },
    {
      "epoch": 0.19023863702666433,
      "grad_norm": 0.8278555770237782,
      "learning_rate": 9.341772360451294e-06,
      "loss": 0.1438,
      "step": 6521
    },
    {
      "epoch": 0.1902678102573079,
      "grad_norm": 0.9707716915155568,
      "learning_rate": 9.341538040156783e-06,
      "loss": 0.1524,
      "step": 6522
    },
    {
      "epoch": 0.19029698348795146,
      "grad_norm": 0.9330398926275478,
      "learning_rate": 9.341303681102056e-06,
      "loss": 0.178,
      "step": 6523
    },
    {
      "epoch": 0.190326156718595,
      "grad_norm": 0.7281296627655914,
      "learning_rate": 9.341069283289207e-06,
      "loss": 0.1646,
      "step": 6524
    },
    {
      "epoch": 0.19035532994923857,
      "grad_norm": 1.0189311532604206,
      "learning_rate": 9.340834846720326e-06,
      "loss": 0.1605,
      "step": 6525
    },
    {
      "epoch": 0.19038450317988215,
      "grad_norm": 0.7388679711664593,
      "learning_rate": 9.340600371397508e-06,
      "loss": 0.1555,
      "step": 6526
    },
    {
      "epoch": 0.1904136764105257,
      "grad_norm": 0.9170058175685646,
      "learning_rate": 9.340365857322846e-06,
      "loss": 0.1565,
      "step": 6527
    },
    {
      "epoch": 0.19044284964116925,
      "grad_norm": 0.7596638777324773,
      "learning_rate": 9.340131304498435e-06,
      "loss": 0.1416,
      "step": 6528
    },
    {
      "epoch": 0.19047202287181283,
      "grad_norm": 0.8463017137810688,
      "learning_rate": 9.339896712926367e-06,
      "loss": 0.187,
      "step": 6529
    },
    {
      "epoch": 0.1905011961024564,
      "grad_norm": 0.7594154273046081,
      "learning_rate": 9.339662082608739e-06,
      "loss": 0.1685,
      "step": 6530
    },
    {
      "epoch": 0.19053036933309994,
      "grad_norm": 0.7411891091664682,
      "learning_rate": 9.33942741354764e-06,
      "loss": 0.16,
      "step": 6531
    },
    {
      "epoch": 0.19055954256374352,
      "grad_norm": 0.9121398575667056,
      "learning_rate": 9.339192705745172e-06,
      "loss": 0.1768,
      "step": 6532
    },
    {
      "epoch": 0.19058871579438708,
      "grad_norm": 0.8243088908698623,
      "learning_rate": 9.338957959203427e-06,
      "loss": 0.1651,
      "step": 6533
    },
    {
      "epoch": 0.19061788902503063,
      "grad_norm": 0.8719208184465156,
      "learning_rate": 9.3387231739245e-06,
      "loss": 0.1877,
      "step": 6534
    },
    {
      "epoch": 0.19064706225567418,
      "grad_norm": 0.752749898025691,
      "learning_rate": 9.338488349910489e-06,
      "loss": 0.1681,
      "step": 6535
    },
    {
      "epoch": 0.19067623548631776,
      "grad_norm": 1.0676027126913896,
      "learning_rate": 9.33825348716349e-06,
      "loss": 0.1636,
      "step": 6536
    },
    {
      "epoch": 0.19070540871696132,
      "grad_norm": 0.745378376347966,
      "learning_rate": 9.338018585685599e-06,
      "loss": 0.1743,
      "step": 6537
    },
    {
      "epoch": 0.19073458194760487,
      "grad_norm": 0.867800714762912,
      "learning_rate": 9.337783645478912e-06,
      "loss": 0.172,
      "step": 6538
    },
    {
      "epoch": 0.19076375517824845,
      "grad_norm": 1.0383533865490961,
      "learning_rate": 9.337548666545532e-06,
      "loss": 0.2088,
      "step": 6539
    },
    {
      "epoch": 0.190792928408892,
      "grad_norm": 0.8019896158769209,
      "learning_rate": 9.33731364888755e-06,
      "loss": 0.1437,
      "step": 6540
    },
    {
      "epoch": 0.19082210163953556,
      "grad_norm": 0.7264352087170607,
      "learning_rate": 9.337078592507069e-06,
      "loss": 0.1537,
      "step": 6541
    },
    {
      "epoch": 0.1908512748701791,
      "grad_norm": 0.9476453646977872,
      "learning_rate": 9.336843497406184e-06,
      "loss": 0.1805,
      "step": 6542
    },
    {
      "epoch": 0.1908804481008227,
      "grad_norm": 0.8229482466231757,
      "learning_rate": 9.336608363586997e-06,
      "loss": 0.1565,
      "step": 6543
    },
    {
      "epoch": 0.19090962133146624,
      "grad_norm": 0.8411750734928479,
      "learning_rate": 9.336373191051604e-06,
      "loss": 0.1693,
      "step": 6544
    },
    {
      "epoch": 0.1909387945621098,
      "grad_norm": 0.752537013957642,
      "learning_rate": 9.336137979802107e-06,
      "loss": 0.1423,
      "step": 6545
    },
    {
      "epoch": 0.19096796779275338,
      "grad_norm": 0.813754702244634,
      "learning_rate": 9.335902729840606e-06,
      "loss": 0.1862,
      "step": 6546
    },
    {
      "epoch": 0.19099714102339693,
      "grad_norm": 0.7726436207337717,
      "learning_rate": 9.3356674411692e-06,
      "loss": 0.1607,
      "step": 6547
    },
    {
      "epoch": 0.19102631425404049,
      "grad_norm": 0.8806303552398813,
      "learning_rate": 9.33543211378999e-06,
      "loss": 0.1869,
      "step": 6548
    },
    {
      "epoch": 0.19105548748468407,
      "grad_norm": 0.7724633385704062,
      "learning_rate": 9.335196747705077e-06,
      "loss": 0.1593,
      "step": 6549
    },
    {
      "epoch": 0.19108466071532762,
      "grad_norm": 1.007481687845384,
      "learning_rate": 9.334961342916563e-06,
      "loss": 0.1327,
      "step": 6550
    },
    {
      "epoch": 0.19111383394597117,
      "grad_norm": 0.7255188714256837,
      "learning_rate": 9.334725899426549e-06,
      "loss": 0.1467,
      "step": 6551
    },
    {
      "epoch": 0.19114300717661473,
      "grad_norm": 0.8636454349993008,
      "learning_rate": 9.334490417237137e-06,
      "loss": 0.1678,
      "step": 6552
    },
    {
      "epoch": 0.1911721804072583,
      "grad_norm": 0.8242913205173432,
      "learning_rate": 9.334254896350428e-06,
      "loss": 0.1508,
      "step": 6553
    },
    {
      "epoch": 0.19120135363790186,
      "grad_norm": 1.1292800022059526,
      "learning_rate": 9.334019336768525e-06,
      "loss": 0.1515,
      "step": 6554
    },
    {
      "epoch": 0.19123052686854541,
      "grad_norm": 0.884273781257435,
      "learning_rate": 9.333783738493534e-06,
      "loss": 0.1525,
      "step": 6555
    },
    {
      "epoch": 0.191259700099189,
      "grad_norm": 0.9425489409246627,
      "learning_rate": 9.333548101527557e-06,
      "loss": 0.1616,
      "step": 6556
    },
    {
      "epoch": 0.19128887332983255,
      "grad_norm": 0.9218937211022523,
      "learning_rate": 9.333312425872696e-06,
      "loss": 0.1548,
      "step": 6557
    },
    {
      "epoch": 0.1913180465604761,
      "grad_norm": 0.8745561746227835,
      "learning_rate": 9.333076711531055e-06,
      "loss": 0.1552,
      "step": 6558
    },
    {
      "epoch": 0.19134721979111965,
      "grad_norm": 0.971277358473962,
      "learning_rate": 9.33284095850474e-06,
      "loss": 0.1627,
      "step": 6559
    },
    {
      "epoch": 0.19137639302176324,
      "grad_norm": 0.729642279874078,
      "learning_rate": 9.332605166795857e-06,
      "loss": 0.1605,
      "step": 6560
    },
    {
      "epoch": 0.1914055662524068,
      "grad_norm": 0.9982954152299268,
      "learning_rate": 9.332369336406508e-06,
      "loss": 0.1486,
      "step": 6561
    },
    {
      "epoch": 0.19143473948305034,
      "grad_norm": 1.000662881706075,
      "learning_rate": 9.332133467338799e-06,
      "loss": 0.1635,
      "step": 6562
    },
    {
      "epoch": 0.19146391271369392,
      "grad_norm": 0.7443616070218757,
      "learning_rate": 9.331897559594839e-06,
      "loss": 0.1439,
      "step": 6563
    },
    {
      "epoch": 0.19149308594433748,
      "grad_norm": 1.0920462291255546,
      "learning_rate": 9.33166161317673e-06,
      "loss": 0.1745,
      "step": 6564
    },
    {
      "epoch": 0.19152225917498103,
      "grad_norm": 0.8604818796068482,
      "learning_rate": 9.33142562808658e-06,
      "loss": 0.1842,
      "step": 6565
    },
    {
      "epoch": 0.1915514324056246,
      "grad_norm": 0.9014934031732198,
      "learning_rate": 9.331189604326498e-06,
      "loss": 0.1855,
      "step": 6566
    },
    {
      "epoch": 0.19158060563626816,
      "grad_norm": 1.158240541171249,
      "learning_rate": 9.330953541898587e-06,
      "loss": 0.1642,
      "step": 6567
    },
    {
      "epoch": 0.19160977886691172,
      "grad_norm": 1.0939223699238565,
      "learning_rate": 9.330717440804957e-06,
      "loss": 0.165,
      "step": 6568
    },
    {
      "epoch": 0.19163895209755527,
      "grad_norm": 0.7959937577769631,
      "learning_rate": 9.330481301047716e-06,
      "loss": 0.1647,
      "step": 6569
    },
    {
      "epoch": 0.19166812532819885,
      "grad_norm": 0.8363215560981384,
      "learning_rate": 9.330245122628972e-06,
      "loss": 0.1833,
      "step": 6570
    },
    {
      "epoch": 0.1916972985588424,
      "grad_norm": 1.0241992590915194,
      "learning_rate": 9.33000890555083e-06,
      "loss": 0.1692,
      "step": 6571
    },
    {
      "epoch": 0.19172647178948596,
      "grad_norm": 0.8207735379074976,
      "learning_rate": 9.329772649815407e-06,
      "loss": 0.1904,
      "step": 6572
    },
    {
      "epoch": 0.19175564502012954,
      "grad_norm": 0.9699830671805612,
      "learning_rate": 9.329536355424804e-06,
      "loss": 0.1896,
      "step": 6573
    },
    {
      "epoch": 0.1917848182507731,
      "grad_norm": 0.8962596024080848,
      "learning_rate": 9.329300022381135e-06,
      "loss": 0.171,
      "step": 6574
    },
    {
      "epoch": 0.19181399148141665,
      "grad_norm": 0.8742934378036707,
      "learning_rate": 9.329063650686511e-06,
      "loss": 0.1658,
      "step": 6575
    },
    {
      "epoch": 0.19184316471206023,
      "grad_norm": 0.8093306750612865,
      "learning_rate": 9.328827240343037e-06,
      "loss": 0.1735,
      "step": 6576
    },
    {
      "epoch": 0.19187233794270378,
      "grad_norm": 0.7606877618978442,
      "learning_rate": 9.328590791352828e-06,
      "loss": 0.1673,
      "step": 6577
    },
    {
      "epoch": 0.19190151117334733,
      "grad_norm": 0.973003170760142,
      "learning_rate": 9.328354303717995e-06,
      "loss": 0.1843,
      "step": 6578
    },
    {
      "epoch": 0.1919306844039909,
      "grad_norm": 0.7516757091467464,
      "learning_rate": 9.328117777440647e-06,
      "loss": 0.1704,
      "step": 6579
    },
    {
      "epoch": 0.19195985763463447,
      "grad_norm": 0.8252779180987707,
      "learning_rate": 9.327881212522896e-06,
      "loss": 0.1511,
      "step": 6580
    },
    {
      "epoch": 0.19198903086527802,
      "grad_norm": 0.898972360986517,
      "learning_rate": 9.327644608966855e-06,
      "loss": 0.1692,
      "step": 6581
    },
    {
      "epoch": 0.19201820409592157,
      "grad_norm": 0.9227271030833828,
      "learning_rate": 9.327407966774635e-06,
      "loss": 0.1602,
      "step": 6582
    },
    {
      "epoch": 0.19204737732656516,
      "grad_norm": 0.9116754764672668,
      "learning_rate": 9.327171285948352e-06,
      "loss": 0.1936,
      "step": 6583
    },
    {
      "epoch": 0.1920765505572087,
      "grad_norm": 0.9654934836715419,
      "learning_rate": 9.326934566490116e-06,
      "loss": 0.1517,
      "step": 6584
    },
    {
      "epoch": 0.19210572378785226,
      "grad_norm": 0.8335776057812551,
      "learning_rate": 9.326697808402041e-06,
      "loss": 0.1479,
      "step": 6585
    },
    {
      "epoch": 0.19213489701849581,
      "grad_norm": 0.8741283696287376,
      "learning_rate": 9.32646101168624e-06,
      "loss": 0.1683,
      "step": 6586
    },
    {
      "epoch": 0.1921640702491394,
      "grad_norm": 0.8851147245285534,
      "learning_rate": 9.326224176344829e-06,
      "loss": 0.1514,
      "step": 6587
    },
    {
      "epoch": 0.19219324347978295,
      "grad_norm": 1.0776747378112042,
      "learning_rate": 9.32598730237992e-06,
      "loss": 0.1776,
      "step": 6588
    },
    {
      "epoch": 0.1922224167104265,
      "grad_norm": 0.9799316250456251,
      "learning_rate": 9.32575038979363e-06,
      "loss": 0.174,
      "step": 6589
    },
    {
      "epoch": 0.19225158994107008,
      "grad_norm": 0.8183463022592024,
      "learning_rate": 9.325513438588073e-06,
      "loss": 0.1701,
      "step": 6590
    },
    {
      "epoch": 0.19228076317171364,
      "grad_norm": 0.8361733681857653,
      "learning_rate": 9.325276448765365e-06,
      "loss": 0.16,
      "step": 6591
    },
    {
      "epoch": 0.1923099364023572,
      "grad_norm": 0.7967788134473966,
      "learning_rate": 9.325039420327621e-06,
      "loss": 0.1581,
      "step": 6592
    },
    {
      "epoch": 0.19233910963300077,
      "grad_norm": 0.894301695688426,
      "learning_rate": 9.324802353276957e-06,
      "loss": 0.1748,
      "step": 6593
    },
    {
      "epoch": 0.19236828286364432,
      "grad_norm": 0.6669028858586441,
      "learning_rate": 9.324565247615491e-06,
      "loss": 0.1728,
      "step": 6594
    },
    {
      "epoch": 0.19239745609428788,
      "grad_norm": 0.8436065735195872,
      "learning_rate": 9.324328103345338e-06,
      "loss": 0.1743,
      "step": 6595
    },
    {
      "epoch": 0.19242662932493143,
      "grad_norm": 0.8692035951921453,
      "learning_rate": 9.324090920468615e-06,
      "loss": 0.184,
      "step": 6596
    },
    {
      "epoch": 0.192455802555575,
      "grad_norm": 0.9045271630083936,
      "learning_rate": 9.323853698987443e-06,
      "loss": 0.1774,
      "step": 6597
    },
    {
      "epoch": 0.19248497578621857,
      "grad_norm": 0.6962124244886452,
      "learning_rate": 9.323616438903937e-06,
      "loss": 0.152,
      "step": 6598
    },
    {
      "epoch": 0.19251414901686212,
      "grad_norm": 0.8647424905362106,
      "learning_rate": 9.323379140220215e-06,
      "loss": 0.1851,
      "step": 6599
    },
    {
      "epoch": 0.1925433222475057,
      "grad_norm": 0.8542025098778888,
      "learning_rate": 9.323141802938395e-06,
      "loss": 0.1748,
      "step": 6600
    },
    {
      "epoch": 0.19257249547814925,
      "grad_norm": 0.8022133668432814,
      "learning_rate": 9.322904427060598e-06,
      "loss": 0.1704,
      "step": 6601
    },
    {
      "epoch": 0.1926016687087928,
      "grad_norm": 0.9583753138003067,
      "learning_rate": 9.322667012588942e-06,
      "loss": 0.1938,
      "step": 6602
    },
    {
      "epoch": 0.1926308419394364,
      "grad_norm": 0.7933999801245497,
      "learning_rate": 9.322429559525548e-06,
      "loss": 0.1499,
      "step": 6603
    },
    {
      "epoch": 0.19266001517007994,
      "grad_norm": 0.8192991203228889,
      "learning_rate": 9.322192067872533e-06,
      "loss": 0.1401,
      "step": 6604
    },
    {
      "epoch": 0.1926891884007235,
      "grad_norm": 0.7750423447305869,
      "learning_rate": 9.321954537632019e-06,
      "loss": 0.167,
      "step": 6605
    },
    {
      "epoch": 0.19271836163136705,
      "grad_norm": 0.8832348182547236,
      "learning_rate": 9.321716968806127e-06,
      "loss": 0.1652,
      "step": 6606
    },
    {
      "epoch": 0.19274753486201063,
      "grad_norm": 0.9731916544546582,
      "learning_rate": 9.32147936139698e-06,
      "loss": 0.1885,
      "step": 6607
    },
    {
      "epoch": 0.19277670809265418,
      "grad_norm": 0.9056726077735234,
      "learning_rate": 9.321241715406694e-06,
      "loss": 0.1893,
      "step": 6608
    },
    {
      "epoch": 0.19280588132329773,
      "grad_norm": 0.9771268289081955,
      "learning_rate": 9.321004030837394e-06,
      "loss": 0.1966,
      "step": 6609
    },
    {
      "epoch": 0.19283505455394132,
      "grad_norm": 0.9250638256199035,
      "learning_rate": 9.320766307691202e-06,
      "loss": 0.1908,
      "step": 6610
    },
    {
      "epoch": 0.19286422778458487,
      "grad_norm": 1.1064866746245714,
      "learning_rate": 9.32052854597024e-06,
      "loss": 0.1897,
      "step": 6611
    },
    {
      "epoch": 0.19289340101522842,
      "grad_norm": 1.2970316922890326,
      "learning_rate": 9.32029074567663e-06,
      "loss": 0.1275,
      "step": 6612
    },
    {
      "epoch": 0.19292257424587198,
      "grad_norm": 0.9121026402023582,
      "learning_rate": 9.320052906812495e-06,
      "loss": 0.1836,
      "step": 6613
    },
    {
      "epoch": 0.19295174747651556,
      "grad_norm": 0.9468523757153974,
      "learning_rate": 9.31981502937996e-06,
      "loss": 0.1575,
      "step": 6614
    },
    {
      "epoch": 0.1929809207071591,
      "grad_norm": 0.8629423786920889,
      "learning_rate": 9.319577113381147e-06,
      "loss": 0.18,
      "step": 6615
    },
    {
      "epoch": 0.19301009393780266,
      "grad_norm": 0.7974688953391694,
      "learning_rate": 9.319339158818182e-06,
      "loss": 0.1722,
      "step": 6616
    },
    {
      "epoch": 0.19303926716844624,
      "grad_norm": 0.8848472276388092,
      "learning_rate": 9.319101165693187e-06,
      "loss": 0.1878,
      "step": 6617
    },
    {
      "epoch": 0.1930684403990898,
      "grad_norm": 0.7751275570666104,
      "learning_rate": 9.318863134008288e-06,
      "loss": 0.1773,
      "step": 6618
    },
    {
      "epoch": 0.19309761362973335,
      "grad_norm": 0.8620195648849519,
      "learning_rate": 9.31862506376561e-06,
      "loss": 0.1602,
      "step": 6619
    },
    {
      "epoch": 0.19312678686037693,
      "grad_norm": 0.6632416890148962,
      "learning_rate": 9.318386954967278e-06,
      "loss": 0.1572,
      "step": 6620
    },
    {
      "epoch": 0.19315596009102048,
      "grad_norm": 0.9913204114730793,
      "learning_rate": 9.318148807615418e-06,
      "loss": 0.1557,
      "step": 6621
    },
    {
      "epoch": 0.19318513332166404,
      "grad_norm": 0.8709380792413741,
      "learning_rate": 9.317910621712156e-06,
      "loss": 0.1736,
      "step": 6622
    },
    {
      "epoch": 0.1932143065523076,
      "grad_norm": 0.8025506641134886,
      "learning_rate": 9.31767239725962e-06,
      "loss": 0.172,
      "step": 6623
    },
    {
      "epoch": 0.19324347978295117,
      "grad_norm": 0.6836281445296057,
      "learning_rate": 9.317434134259934e-06,
      "loss": 0.1639,
      "step": 6624
    },
    {
      "epoch": 0.19327265301359473,
      "grad_norm": 1.0082377785568835,
      "learning_rate": 9.317195832715228e-06,
      "loss": 0.1513,
      "step": 6625
    },
    {
      "epoch": 0.19330182624423828,
      "grad_norm": 0.9049552447867992,
      "learning_rate": 9.31695749262763e-06,
      "loss": 0.1743,
      "step": 6626
    },
    {
      "epoch": 0.19333099947488186,
      "grad_norm": 0.6619899587092459,
      "learning_rate": 9.316719113999263e-06,
      "loss": 0.1335,
      "step": 6627
    },
    {
      "epoch": 0.1933601727055254,
      "grad_norm": 0.7845890178964112,
      "learning_rate": 9.316480696832259e-06,
      "loss": 0.1442,
      "step": 6628
    },
    {
      "epoch": 0.19338934593616897,
      "grad_norm": 0.8581096565647223,
      "learning_rate": 9.316242241128746e-06,
      "loss": 0.16,
      "step": 6629
    },
    {
      "epoch": 0.19341851916681252,
      "grad_norm": 0.8871473185494342,
      "learning_rate": 9.316003746890854e-06,
      "loss": 0.1667,
      "step": 6630
    },
    {
      "epoch": 0.1934476923974561,
      "grad_norm": 0.72978329018481,
      "learning_rate": 9.315765214120709e-06,
      "loss": 0.171,
      "step": 6631
    },
    {
      "epoch": 0.19347686562809965,
      "grad_norm": 0.9565276574491248,
      "learning_rate": 9.315526642820443e-06,
      "loss": 0.162,
      "step": 6632
    },
    {
      "epoch": 0.1935060388587432,
      "grad_norm": 0.8492242129049904,
      "learning_rate": 9.315288032992185e-06,
      "loss": 0.1533,
      "step": 6633
    },
    {
      "epoch": 0.1935352120893868,
      "grad_norm": 0.8202558106143849,
      "learning_rate": 9.315049384638065e-06,
      "loss": 0.1306,
      "step": 6634
    },
    {
      "epoch": 0.19356438532003034,
      "grad_norm": 0.9556762645442757,
      "learning_rate": 9.314810697760214e-06,
      "loss": 0.164,
      "step": 6635
    },
    {
      "epoch": 0.1935935585506739,
      "grad_norm": 0.970958578848858,
      "learning_rate": 9.314571972360765e-06,
      "loss": 0.2004,
      "step": 6636
    },
    {
      "epoch": 0.19362273178131748,
      "grad_norm": 1.076134050832174,
      "learning_rate": 9.314333208441847e-06,
      "loss": 0.1925,
      "step": 6637
    },
    {
      "epoch": 0.19365190501196103,
      "grad_norm": 0.7572545491179042,
      "learning_rate": 9.314094406005592e-06,
      "loss": 0.1581,
      "step": 6638
    },
    {
      "epoch": 0.19368107824260458,
      "grad_norm": 0.7839825173434335,
      "learning_rate": 9.31385556505413e-06,
      "loss": 0.1512,
      "step": 6639
    },
    {
      "epoch": 0.19371025147324814,
      "grad_norm": 0.8591181566493377,
      "learning_rate": 9.313616685589596e-06,
      "loss": 0.18,
      "step": 6640
    },
    {
      "epoch": 0.19373942470389172,
      "grad_norm": 0.956879252265813,
      "learning_rate": 9.313377767614125e-06,
      "loss": 0.1809,
      "step": 6641
    },
    {
      "epoch": 0.19376859793453527,
      "grad_norm": 0.9351799785382513,
      "learning_rate": 9.313138811129844e-06,
      "loss": 0.1644,
      "step": 6642
    },
    {
      "epoch": 0.19379777116517882,
      "grad_norm": 0.9192455379752127,
      "learning_rate": 9.31289981613889e-06,
      "loss": 0.1649,
      "step": 6643
    },
    {
      "epoch": 0.1938269443958224,
      "grad_norm": 0.931037002354566,
      "learning_rate": 9.312660782643397e-06,
      "loss": 0.1714,
      "step": 6644
    },
    {
      "epoch": 0.19385611762646596,
      "grad_norm": 0.9645718792541462,
      "learning_rate": 9.312421710645496e-06,
      "loss": 0.1797,
      "step": 6645
    },
    {
      "epoch": 0.1938852908571095,
      "grad_norm": 0.891172809150048,
      "learning_rate": 9.312182600147325e-06,
      "loss": 0.1747,
      "step": 6646
    },
    {
      "epoch": 0.1939144640877531,
      "grad_norm": 0.912154300639407,
      "learning_rate": 9.311943451151017e-06,
      "loss": 0.171,
      "step": 6647
    },
    {
      "epoch": 0.19394363731839664,
      "grad_norm": 0.965505005244721,
      "learning_rate": 9.311704263658707e-06,
      "loss": 0.1668,
      "step": 6648
    },
    {
      "epoch": 0.1939728105490402,
      "grad_norm": 0.8875713134156251,
      "learning_rate": 9.311465037672532e-06,
      "loss": 0.1679,
      "step": 6649
    },
    {
      "epoch": 0.19400198377968375,
      "grad_norm": 1.018908109861898,
      "learning_rate": 9.311225773194624e-06,
      "loss": 0.165,
      "step": 6650
    },
    {
      "epoch": 0.19403115701032733,
      "grad_norm": 1.0440928186899154,
      "learning_rate": 9.310986470227121e-06,
      "loss": 0.1462,
      "step": 6651
    },
    {
      "epoch": 0.19406033024097089,
      "grad_norm": 0.9394903747176593,
      "learning_rate": 9.310747128772162e-06,
      "loss": 0.1764,
      "step": 6652
    },
    {
      "epoch": 0.19408950347161444,
      "grad_norm": 0.8371496225635469,
      "learning_rate": 9.31050774883188e-06,
      "loss": 0.1659,
      "step": 6653
    },
    {
      "epoch": 0.19411867670225802,
      "grad_norm": 0.8341934815383878,
      "learning_rate": 9.310268330408417e-06,
      "loss": 0.1525,
      "step": 6654
    },
    {
      "epoch": 0.19414784993290157,
      "grad_norm": 1.1010564884598677,
      "learning_rate": 9.310028873503905e-06,
      "loss": 0.1597,
      "step": 6655
    },
    {
      "epoch": 0.19417702316354513,
      "grad_norm": 0.7642353216518087,
      "learning_rate": 9.309789378120483e-06,
      "loss": 0.1514,
      "step": 6656
    },
    {
      "epoch": 0.19420619639418868,
      "grad_norm": 0.9972370208399856,
      "learning_rate": 9.309549844260292e-06,
      "loss": 0.1936,
      "step": 6657
    },
    {
      "epoch": 0.19423536962483226,
      "grad_norm": 1.077879262495552,
      "learning_rate": 9.309310271925469e-06,
      "loss": 0.1709,
      "step": 6658
    },
    {
      "epoch": 0.1942645428554758,
      "grad_norm": 0.9146424859094537,
      "learning_rate": 9.309070661118151e-06,
      "loss": 0.1722,
      "step": 6659
    },
    {
      "epoch": 0.19429371608611937,
      "grad_norm": 0.959592205810473,
      "learning_rate": 9.30883101184048e-06,
      "loss": 0.1599,
      "step": 6660
    },
    {
      "epoch": 0.19432288931676295,
      "grad_norm": 0.7038250088223772,
      "learning_rate": 9.308591324094594e-06,
      "loss": 0.1405,
      "step": 6661
    },
    {
      "epoch": 0.1943520625474065,
      "grad_norm": 0.9424024148519394,
      "learning_rate": 9.308351597882632e-06,
      "loss": 0.152,
      "step": 6662
    },
    {
      "epoch": 0.19438123577805005,
      "grad_norm": 0.8046587631286805,
      "learning_rate": 9.308111833206737e-06,
      "loss": 0.1676,
      "step": 6663
    },
    {
      "epoch": 0.19441040900869364,
      "grad_norm": 0.9187185870845379,
      "learning_rate": 9.307872030069049e-06,
      "loss": 0.1734,
      "step": 6664
    },
    {
      "epoch": 0.1944395822393372,
      "grad_norm": 0.779844975332377,
      "learning_rate": 9.307632188471707e-06,
      "loss": 0.1574,
      "step": 6665
    },
    {
      "epoch": 0.19446875546998074,
      "grad_norm": 0.945219663686819,
      "learning_rate": 9.30739230841685e-06,
      "loss": 0.1611,
      "step": 6666
    },
    {
      "epoch": 0.1944979287006243,
      "grad_norm": 0.8842010436067574,
      "learning_rate": 9.307152389906626e-06,
      "loss": 0.1833,
      "step": 6667
    },
    {
      "epoch": 0.19452710193126788,
      "grad_norm": 0.9778749000964558,
      "learning_rate": 9.306912432943173e-06,
      "loss": 0.1665,
      "step": 6668
    },
    {
      "epoch": 0.19455627516191143,
      "grad_norm": 0.793338859736364,
      "learning_rate": 9.306672437528635e-06,
      "loss": 0.169,
      "step": 6669
    },
    {
      "epoch": 0.19458544839255498,
      "grad_norm": 0.7503759724553432,
      "learning_rate": 9.306432403665152e-06,
      "loss": 0.154,
      "step": 6670
    },
    {
      "epoch": 0.19461462162319856,
      "grad_norm": 0.7981291064907011,
      "learning_rate": 9.30619233135487e-06,
      "loss": 0.1485,
      "step": 6671
    },
    {
      "epoch": 0.19464379485384212,
      "grad_norm": 0.8824631369306233,
      "learning_rate": 9.30595222059993e-06,
      "loss": 0.1587,
      "step": 6672
    },
    {
      "epoch": 0.19467296808448567,
      "grad_norm": 0.9097674315717833,
      "learning_rate": 9.305712071402474e-06,
      "loss": 0.1897,
      "step": 6673
    },
    {
      "epoch": 0.19470214131512925,
      "grad_norm": 0.9764717255707349,
      "learning_rate": 9.305471883764651e-06,
      "loss": 0.1516,
      "step": 6674
    },
    {
      "epoch": 0.1947313145457728,
      "grad_norm": 0.857674852944612,
      "learning_rate": 9.305231657688605e-06,
      "loss": 0.1533,
      "step": 6675
    },
    {
      "epoch": 0.19476048777641636,
      "grad_norm": 0.7699677407374467,
      "learning_rate": 9.304991393176475e-06,
      "loss": 0.1458,
      "step": 6676
    },
    {
      "epoch": 0.1947896610070599,
      "grad_norm": 0.8300309033541458,
      "learning_rate": 9.30475109023041e-06,
      "loss": 0.1924,
      "step": 6677
    },
    {
      "epoch": 0.1948188342377035,
      "grad_norm": 0.8280787865320262,
      "learning_rate": 9.304510748852558e-06,
      "loss": 0.1483,
      "step": 6678
    },
    {
      "epoch": 0.19484800746834705,
      "grad_norm": 0.8621778447224153,
      "learning_rate": 9.304270369045058e-06,
      "loss": 0.1768,
      "step": 6679
    },
    {
      "epoch": 0.1948771806989906,
      "grad_norm": 0.8403947736527188,
      "learning_rate": 9.30402995081006e-06,
      "loss": 0.1647,
      "step": 6680
    },
    {
      "epoch": 0.19490635392963418,
      "grad_norm": 1.1243189869466546,
      "learning_rate": 9.303789494149711e-06,
      "loss": 0.173,
      "step": 6681
    },
    {
      "epoch": 0.19493552716027773,
      "grad_norm": 0.7889869801713401,
      "learning_rate": 9.303548999066157e-06,
      "loss": 0.1583,
      "step": 6682
    },
    {
      "epoch": 0.19496470039092129,
      "grad_norm": 0.9309794684973345,
      "learning_rate": 9.303308465561544e-06,
      "loss": 0.1637,
      "step": 6683
    },
    {
      "epoch": 0.19499387362156484,
      "grad_norm": 0.877032678167129,
      "learning_rate": 9.303067893638022e-06,
      "loss": 0.1983,
      "step": 6684
    },
    {
      "epoch": 0.19502304685220842,
      "grad_norm": 0.7429751270285164,
      "learning_rate": 9.302827283297736e-06,
      "loss": 0.1406,
      "step": 6685
    },
    {
      "epoch": 0.19505222008285197,
      "grad_norm": 1.1250560779172676,
      "learning_rate": 9.302586634542835e-06,
      "loss": 0.1559,
      "step": 6686
    },
    {
      "epoch": 0.19508139331349553,
      "grad_norm": 0.6895416128858841,
      "learning_rate": 9.302345947375469e-06,
      "loss": 0.1703,
      "step": 6687
    },
    {
      "epoch": 0.1951105665441391,
      "grad_norm": 0.7863778257084978,
      "learning_rate": 9.302105221797784e-06,
      "loss": 0.1504,
      "step": 6688
    },
    {
      "epoch": 0.19513973977478266,
      "grad_norm": 1.0091930647466942,
      "learning_rate": 9.30186445781193e-06,
      "loss": 0.1656,
      "step": 6689
    },
    {
      "epoch": 0.19516891300542621,
      "grad_norm": 0.7497217945700104,
      "learning_rate": 9.301623655420058e-06,
      "loss": 0.1483,
      "step": 6690
    },
    {
      "epoch": 0.1951980862360698,
      "grad_norm": 0.7333323914970696,
      "learning_rate": 9.301382814624318e-06,
      "loss": 0.1734,
      "step": 6691
    },
    {
      "epoch": 0.19522725946671335,
      "grad_norm": 1.0619694790705123,
      "learning_rate": 9.301141935426856e-06,
      "loss": 0.1787,
      "step": 6692
    },
    {
      "epoch": 0.1952564326973569,
      "grad_norm": 1.1363839954284976,
      "learning_rate": 9.300901017829827e-06,
      "loss": 0.1586,
      "step": 6693
    },
    {
      "epoch": 0.19528560592800046,
      "grad_norm": 0.9425364829644454,
      "learning_rate": 9.300660061835382e-06,
      "loss": 0.1578,
      "step": 6694
    },
    {
      "epoch": 0.19531477915864404,
      "grad_norm": 0.8252125484495676,
      "learning_rate": 9.30041906744567e-06,
      "loss": 0.1702,
      "step": 6695
    },
    {
      "epoch": 0.1953439523892876,
      "grad_norm": 0.9493848757992757,
      "learning_rate": 9.30017803466284e-06,
      "loss": 0.1566,
      "step": 6696
    },
    {
      "epoch": 0.19537312561993114,
      "grad_norm": 1.0631928542107913,
      "learning_rate": 9.299936963489051e-06,
      "loss": 0.1722,
      "step": 6697
    },
    {
      "epoch": 0.19540229885057472,
      "grad_norm": 0.9191586911053998,
      "learning_rate": 9.29969585392645e-06,
      "loss": 0.1579,
      "step": 6698
    },
    {
      "epoch": 0.19543147208121828,
      "grad_norm": 0.9179787981754665,
      "learning_rate": 9.299454705977191e-06,
      "loss": 0.172,
      "step": 6699
    },
    {
      "epoch": 0.19546064531186183,
      "grad_norm": 0.8691153998650513,
      "learning_rate": 9.299213519643427e-06,
      "loss": 0.157,
      "step": 6700
    },
    {
      "epoch": 0.19548981854250538,
      "grad_norm": 0.9333121570377566,
      "learning_rate": 9.298972294927308e-06,
      "loss": 0.1592,
      "step": 6701
    },
    {
      "epoch": 0.19551899177314896,
      "grad_norm": 0.736950193272381,
      "learning_rate": 9.298731031830994e-06,
      "loss": 0.1698,
      "step": 6702
    },
    {
      "epoch": 0.19554816500379252,
      "grad_norm": 0.9342355920214414,
      "learning_rate": 9.298489730356635e-06,
      "loss": 0.1426,
      "step": 6703
    },
    {
      "epoch": 0.19557733823443607,
      "grad_norm": 0.8999560289479994,
      "learning_rate": 9.298248390506387e-06,
      "loss": 0.1654,
      "step": 6704
    },
    {
      "epoch": 0.19560651146507965,
      "grad_norm": 0.7436024642026525,
      "learning_rate": 9.2980070122824e-06,
      "loss": 0.158,
      "step": 6705
    },
    {
      "epoch": 0.1956356846957232,
      "grad_norm": 0.836181561382301,
      "learning_rate": 9.297765595686834e-06,
      "loss": 0.1698,
      "step": 6706
    },
    {
      "epoch": 0.19566485792636676,
      "grad_norm": 0.8837979522452214,
      "learning_rate": 9.297524140721843e-06,
      "loss": 0.1443,
      "step": 6707
    },
    {
      "epoch": 0.19569403115701034,
      "grad_norm": 0.8967727658671764,
      "learning_rate": 9.297282647389583e-06,
      "loss": 0.1788,
      "step": 6708
    },
    {
      "epoch": 0.1957232043876539,
      "grad_norm": 0.8503853960943315,
      "learning_rate": 9.297041115692208e-06,
      "loss": 0.1827,
      "step": 6709
    },
    {
      "epoch": 0.19575237761829745,
      "grad_norm": 1.06943952060511,
      "learning_rate": 9.296799545631876e-06,
      "loss": 0.1689,
      "step": 6710
    },
    {
      "epoch": 0.195781550848941,
      "grad_norm": 0.7823124754458123,
      "learning_rate": 9.296557937210745e-06,
      "loss": 0.1471,
      "step": 6711
    },
    {
      "epoch": 0.19581072407958458,
      "grad_norm": 0.8053578511032365,
      "learning_rate": 9.296316290430969e-06,
      "loss": 0.1509,
      "step": 6712
    },
    {
      "epoch": 0.19583989731022813,
      "grad_norm": 0.8349161213608354,
      "learning_rate": 9.296074605294707e-06,
      "loss": 0.1734,
      "step": 6713
    },
    {
      "epoch": 0.1958690705408717,
      "grad_norm": 0.8981238701740619,
      "learning_rate": 9.295832881804116e-06,
      "loss": 0.1694,
      "step": 6714
    },
    {
      "epoch": 0.19589824377151527,
      "grad_norm": 1.0839142407035054,
      "learning_rate": 9.295591119961356e-06,
      "loss": 0.1537,
      "step": 6715
    },
    {
      "epoch": 0.19592741700215882,
      "grad_norm": 0.8004759947306443,
      "learning_rate": 9.295349319768583e-06,
      "loss": 0.1895,
      "step": 6716
    },
    {
      "epoch": 0.19595659023280237,
      "grad_norm": 1.0750397339890871,
      "learning_rate": 9.295107481227957e-06,
      "loss": 0.1678,
      "step": 6717
    },
    {
      "epoch": 0.19598576346344596,
      "grad_norm": 1.0142931560315016,
      "learning_rate": 9.294865604341635e-06,
      "loss": 0.1782,
      "step": 6718
    },
    {
      "epoch": 0.1960149366940895,
      "grad_norm": 0.7769502369432942,
      "learning_rate": 9.29462368911178e-06,
      "loss": 0.1819,
      "step": 6719
    },
    {
      "epoch": 0.19604410992473306,
      "grad_norm": 0.8453196871331251,
      "learning_rate": 9.29438173554055e-06,
      "loss": 0.1722,
      "step": 6720
    },
    {
      "epoch": 0.19607328315537662,
      "grad_norm": 0.8773114967078133,
      "learning_rate": 9.294139743630104e-06,
      "loss": 0.1702,
      "step": 6721
    },
    {
      "epoch": 0.1961024563860202,
      "grad_norm": 0.8901471248144578,
      "learning_rate": 9.293897713382603e-06,
      "loss": 0.1367,
      "step": 6722
    },
    {
      "epoch": 0.19613162961666375,
      "grad_norm": 0.7341032379100687,
      "learning_rate": 9.29365564480021e-06,
      "loss": 0.1786,
      "step": 6723
    },
    {
      "epoch": 0.1961608028473073,
      "grad_norm": 0.8538138627001078,
      "learning_rate": 9.293413537885083e-06,
      "loss": 0.1861,
      "step": 6724
    },
    {
      "epoch": 0.19618997607795088,
      "grad_norm": 0.9065472952693894,
      "learning_rate": 9.293171392639385e-06,
      "loss": 0.1492,
      "step": 6725
    },
    {
      "epoch": 0.19621914930859444,
      "grad_norm": 0.9563289558009515,
      "learning_rate": 9.292929209065278e-06,
      "loss": 0.1601,
      "step": 6726
    },
    {
      "epoch": 0.196248322539238,
      "grad_norm": 0.8153129262051362,
      "learning_rate": 9.292686987164924e-06,
      "loss": 0.19,
      "step": 6727
    },
    {
      "epoch": 0.19627749576988154,
      "grad_norm": 0.9845470112599826,
      "learning_rate": 9.292444726940485e-06,
      "loss": 0.1473,
      "step": 6728
    },
    {
      "epoch": 0.19630666900052512,
      "grad_norm": 0.8607922807974953,
      "learning_rate": 9.292202428394124e-06,
      "loss": 0.1435,
      "step": 6729
    },
    {
      "epoch": 0.19633584223116868,
      "grad_norm": 0.7267509290603898,
      "learning_rate": 9.291960091528004e-06,
      "loss": 0.1621,
      "step": 6730
    },
    {
      "epoch": 0.19636501546181223,
      "grad_norm": 0.8054969037959844,
      "learning_rate": 9.29171771634429e-06,
      "loss": 0.1583,
      "step": 6731
    },
    {
      "epoch": 0.1963941886924558,
      "grad_norm": 1.136024857434458,
      "learning_rate": 9.291475302845145e-06,
      "loss": 0.185,
      "step": 6732
    },
    {
      "epoch": 0.19642336192309937,
      "grad_norm": 0.8106671548634049,
      "learning_rate": 9.291232851032733e-06,
      "loss": 0.1788,
      "step": 6733
    },
    {
      "epoch": 0.19645253515374292,
      "grad_norm": 1.0138559362243085,
      "learning_rate": 9.290990360909218e-06,
      "loss": 0.1515,
      "step": 6734
    },
    {
      "epoch": 0.1964817083843865,
      "grad_norm": 0.9685956393538463,
      "learning_rate": 9.290747832476765e-06,
      "loss": 0.1487,
      "step": 6735
    },
    {
      "epoch": 0.19651088161503005,
      "grad_norm": 1.0979140692203073,
      "learning_rate": 9.29050526573754e-06,
      "loss": 0.1553,
      "step": 6736
    },
    {
      "epoch": 0.1965400548456736,
      "grad_norm": 0.9914402686672533,
      "learning_rate": 9.290262660693708e-06,
      "loss": 0.176,
      "step": 6737
    },
    {
      "epoch": 0.19656922807631716,
      "grad_norm": 0.8714868605471512,
      "learning_rate": 9.290020017347434e-06,
      "loss": 0.2007,
      "step": 6738
    },
    {
      "epoch": 0.19659840130696074,
      "grad_norm": 0.962420922455051,
      "learning_rate": 9.289777335700888e-06,
      "loss": 0.1886,
      "step": 6739
    },
    {
      "epoch": 0.1966275745376043,
      "grad_norm": 0.8631583971632991,
      "learning_rate": 9.289534615756231e-06,
      "loss": 0.1545,
      "step": 6740
    },
    {
      "epoch": 0.19665674776824785,
      "grad_norm": 0.7641851756402492,
      "learning_rate": 9.289291857515634e-06,
      "loss": 0.1394,
      "step": 6741
    },
    {
      "epoch": 0.19668592099889143,
      "grad_norm": 0.8290259161847129,
      "learning_rate": 9.289049060981264e-06,
      "loss": 0.1712,
      "step": 6742
    },
    {
      "epoch": 0.19671509422953498,
      "grad_norm": 0.7554586638487255,
      "learning_rate": 9.288806226155288e-06,
      "loss": 0.1477,
      "step": 6743
    },
    {
      "epoch": 0.19674426746017853,
      "grad_norm": 0.811628315137839,
      "learning_rate": 9.288563353039873e-06,
      "loss": 0.1939,
      "step": 6744
    },
    {
      "epoch": 0.1967734406908221,
      "grad_norm": 0.8483973942457574,
      "learning_rate": 9.288320441637189e-06,
      "loss": 0.1664,
      "step": 6745
    },
    {
      "epoch": 0.19680261392146567,
      "grad_norm": 0.8667848777043733,
      "learning_rate": 9.288077491949403e-06,
      "loss": 0.1835,
      "step": 6746
    },
    {
      "epoch": 0.19683178715210922,
      "grad_norm": 0.871493082736332,
      "learning_rate": 9.287834503978685e-06,
      "loss": 0.191,
      "step": 6747
    },
    {
      "epoch": 0.19686096038275278,
      "grad_norm": 0.7598035949504574,
      "learning_rate": 9.287591477727205e-06,
      "loss": 0.1839,
      "step": 6748
    },
    {
      "epoch": 0.19689013361339636,
      "grad_norm": 0.8706855653558321,
      "learning_rate": 9.28734841319713e-06,
      "loss": 0.1614,
      "step": 6749
    },
    {
      "epoch": 0.1969193068440399,
      "grad_norm": 0.7810263676089523,
      "learning_rate": 9.287105310390634e-06,
      "loss": 0.1579,
      "step": 6750
    },
    {
      "epoch": 0.19694848007468346,
      "grad_norm": 0.9410537912137862,
      "learning_rate": 9.286862169309886e-06,
      "loss": 0.1645,
      "step": 6751
    },
    {
      "epoch": 0.19697765330532704,
      "grad_norm": 1.301653554428064,
      "learning_rate": 9.286618989957053e-06,
      "loss": 0.1611,
      "step": 6752
    },
    {
      "epoch": 0.1970068265359706,
      "grad_norm": 1.0884767791477747,
      "learning_rate": 9.286375772334309e-06,
      "loss": 0.1556,
      "step": 6753
    },
    {
      "epoch": 0.19703599976661415,
      "grad_norm": 0.6899878328774506,
      "learning_rate": 9.286132516443826e-06,
      "loss": 0.1581,
      "step": 6754
    },
    {
      "epoch": 0.1970651729972577,
      "grad_norm": 0.8429315060403528,
      "learning_rate": 9.285889222287776e-06,
      "loss": 0.1603,
      "step": 6755
    },
    {
      "epoch": 0.19709434622790128,
      "grad_norm": 0.9048267404546941,
      "learning_rate": 9.28564588986833e-06,
      "loss": 0.2059,
      "step": 6756
    },
    {
      "epoch": 0.19712351945854484,
      "grad_norm": 0.7372171872163484,
      "learning_rate": 9.285402519187659e-06,
      "loss": 0.1671,
      "step": 6757
    },
    {
      "epoch": 0.1971526926891884,
      "grad_norm": 0.7491177044705533,
      "learning_rate": 9.285159110247938e-06,
      "loss": 0.1521,
      "step": 6758
    },
    {
      "epoch": 0.19718186591983197,
      "grad_norm": 0.9188720580649349,
      "learning_rate": 9.28491566305134e-06,
      "loss": 0.1657,
      "step": 6759
    },
    {
      "epoch": 0.19721103915047553,
      "grad_norm": 0.766881910798833,
      "learning_rate": 9.284672177600039e-06,
      "loss": 0.1511,
      "step": 6760
    },
    {
      "epoch": 0.19724021238111908,
      "grad_norm": 0.8821957120476288,
      "learning_rate": 9.284428653896207e-06,
      "loss": 0.149,
      "step": 6761
    },
    {
      "epoch": 0.19726938561176266,
      "grad_norm": 0.8651784938194986,
      "learning_rate": 9.284185091942017e-06,
      "loss": 0.1632,
      "step": 6762
    },
    {
      "epoch": 0.1972985588424062,
      "grad_norm": 0.7624688335799735,
      "learning_rate": 9.283941491739648e-06,
      "loss": 0.1706,
      "step": 6763
    },
    {
      "epoch": 0.19732773207304977,
      "grad_norm": 1.0473505300517565,
      "learning_rate": 9.28369785329127e-06,
      "loss": 0.1788,
      "step": 6764
    },
    {
      "epoch": 0.19735690530369332,
      "grad_norm": 0.8783393953173931,
      "learning_rate": 9.283454176599059e-06,
      "loss": 0.1759,
      "step": 6765
    },
    {
      "epoch": 0.1973860785343369,
      "grad_norm": 1.090863210412366,
      "learning_rate": 9.283210461665195e-06,
      "loss": 0.1809,
      "step": 6766
    },
    {
      "epoch": 0.19741525176498045,
      "grad_norm": 0.8816239928570382,
      "learning_rate": 9.282966708491848e-06,
      "loss": 0.1513,
      "step": 6767
    },
    {
      "epoch": 0.197444424995624,
      "grad_norm": 0.906288892074761,
      "learning_rate": 9.282722917081197e-06,
      "loss": 0.1764,
      "step": 6768
    },
    {
      "epoch": 0.1974735982262676,
      "grad_norm": 0.8922413127003459,
      "learning_rate": 9.282479087435419e-06,
      "loss": 0.1577,
      "step": 6769
    },
    {
      "epoch": 0.19750277145691114,
      "grad_norm": 0.8980575146322631,
      "learning_rate": 9.28223521955669e-06,
      "loss": 0.176,
      "step": 6770
    },
    {
      "epoch": 0.1975319446875547,
      "grad_norm": 0.8317359960658007,
      "learning_rate": 9.281991313447185e-06,
      "loss": 0.1475,
      "step": 6771
    },
    {
      "epoch": 0.19756111791819825,
      "grad_norm": 0.9007402706647583,
      "learning_rate": 9.281747369109086e-06,
      "loss": 0.1707,
      "step": 6772
    },
    {
      "epoch": 0.19759029114884183,
      "grad_norm": 1.072492056741432,
      "learning_rate": 9.281503386544569e-06,
      "loss": 0.1502,
      "step": 6773
    },
    {
      "epoch": 0.19761946437948538,
      "grad_norm": 0.8813739309462332,
      "learning_rate": 9.281259365755811e-06,
      "loss": 0.1737,
      "step": 6774
    },
    {
      "epoch": 0.19764863761012894,
      "grad_norm": 0.8453346845434963,
      "learning_rate": 9.28101530674499e-06,
      "loss": 0.1608,
      "step": 6775
    },
    {
      "epoch": 0.19767781084077252,
      "grad_norm": 0.8030093121854524,
      "learning_rate": 9.280771209514287e-06,
      "loss": 0.1457,
      "step": 6776
    },
    {
      "epoch": 0.19770698407141607,
      "grad_norm": 0.9315923295272862,
      "learning_rate": 9.280527074065881e-06,
      "loss": 0.174,
      "step": 6777
    },
    {
      "epoch": 0.19773615730205962,
      "grad_norm": 1.0099377696711123,
      "learning_rate": 9.280282900401953e-06,
      "loss": 0.155,
      "step": 6778
    },
    {
      "epoch": 0.1977653305327032,
      "grad_norm": 0.876679878840367,
      "learning_rate": 9.280038688524678e-06,
      "loss": 0.1823,
      "step": 6779
    },
    {
      "epoch": 0.19779450376334676,
      "grad_norm": 1.1406207717128962,
      "learning_rate": 9.279794438436241e-06,
      "loss": 0.175,
      "step": 6780
    },
    {
      "epoch": 0.1978236769939903,
      "grad_norm": 0.8593591024214584,
      "learning_rate": 9.279550150138821e-06,
      "loss": 0.1669,
      "step": 6781
    },
    {
      "epoch": 0.19785285022463386,
      "grad_norm": 1.119386990221924,
      "learning_rate": 9.279305823634599e-06,
      "loss": 0.1493,
      "step": 6782
    },
    {
      "epoch": 0.19788202345527744,
      "grad_norm": 0.8571760555090274,
      "learning_rate": 9.279061458925755e-06,
      "loss": 0.1497,
      "step": 6783
    },
    {
      "epoch": 0.197911196685921,
      "grad_norm": 1.1002761353387693,
      "learning_rate": 9.278817056014473e-06,
      "loss": 0.1583,
      "step": 6784
    },
    {
      "epoch": 0.19794036991656455,
      "grad_norm": 1.0908781638352976,
      "learning_rate": 9.278572614902932e-06,
      "loss": 0.2065,
      "step": 6785
    },
    {
      "epoch": 0.19796954314720813,
      "grad_norm": 0.851372087478325,
      "learning_rate": 9.278328135593318e-06,
      "loss": 0.1693,
      "step": 6786
    },
    {
      "epoch": 0.19799871637785169,
      "grad_norm": 0.8643394174725874,
      "learning_rate": 9.278083618087811e-06,
      "loss": 0.1552,
      "step": 6787
    },
    {
      "epoch": 0.19802788960849524,
      "grad_norm": 0.7977493439466831,
      "learning_rate": 9.277839062388594e-06,
      "loss": 0.1732,
      "step": 6788
    },
    {
      "epoch": 0.19805706283913882,
      "grad_norm": 0.7624833243387342,
      "learning_rate": 9.277594468497853e-06,
      "loss": 0.1775,
      "step": 6789
    },
    {
      "epoch": 0.19808623606978237,
      "grad_norm": 0.8444387072539988,
      "learning_rate": 9.277349836417769e-06,
      "loss": 0.1724,
      "step": 6790
    },
    {
      "epoch": 0.19811540930042593,
      "grad_norm": 0.8496209699680165,
      "learning_rate": 9.277105166150525e-06,
      "loss": 0.1716,
      "step": 6791
    },
    {
      "epoch": 0.19814458253106948,
      "grad_norm": 0.9393124415834895,
      "learning_rate": 9.276860457698308e-06,
      "loss": 0.1821,
      "step": 6792
    },
    {
      "epoch": 0.19817375576171306,
      "grad_norm": 0.8848031449719974,
      "learning_rate": 9.276615711063303e-06,
      "loss": 0.1688,
      "step": 6793
    },
    {
      "epoch": 0.19820292899235661,
      "grad_norm": 0.9007435531593639,
      "learning_rate": 9.276370926247693e-06,
      "loss": 0.1739,
      "step": 6794
    },
    {
      "epoch": 0.19823210222300017,
      "grad_norm": 0.9604942915988172,
      "learning_rate": 9.276126103253664e-06,
      "loss": 0.1599,
      "step": 6795
    },
    {
      "epoch": 0.19826127545364375,
      "grad_norm": 0.8461491811556062,
      "learning_rate": 9.275881242083402e-06,
      "loss": 0.1715,
      "step": 6796
    },
    {
      "epoch": 0.1982904486842873,
      "grad_norm": 1.0300730779632639,
      "learning_rate": 9.275636342739094e-06,
      "loss": 0.1992,
      "step": 6797
    },
    {
      "epoch": 0.19831962191493085,
      "grad_norm": 0.8445314481632383,
      "learning_rate": 9.275391405222923e-06,
      "loss": 0.1727,
      "step": 6798
    },
    {
      "epoch": 0.1983487951455744,
      "grad_norm": 0.7359376197202013,
      "learning_rate": 9.27514642953708e-06,
      "loss": 0.1654,
      "step": 6799
    },
    {
      "epoch": 0.198377968376218,
      "grad_norm": 0.8443801360351817,
      "learning_rate": 9.274901415683751e-06,
      "loss": 0.158,
      "step": 6800
    },
    {
      "epoch": 0.19840714160686154,
      "grad_norm": 0.8307639478949574,
      "learning_rate": 9.27465636366512e-06,
      "loss": 0.1787,
      "step": 6801
    },
    {
      "epoch": 0.1984363148375051,
      "grad_norm": 0.9858139091472421,
      "learning_rate": 9.27441127348338e-06,
      "loss": 0.1918,
      "step": 6802
    },
    {
      "epoch": 0.19846548806814868,
      "grad_norm": 0.8464730211325557,
      "learning_rate": 9.274166145140715e-06,
      "loss": 0.1699,
      "step": 6803
    },
    {
      "epoch": 0.19849466129879223,
      "grad_norm": 0.7180746493841713,
      "learning_rate": 9.273920978639315e-06,
      "loss": 0.1559,
      "step": 6804
    },
    {
      "epoch": 0.19852383452943578,
      "grad_norm": 0.799114963785012,
      "learning_rate": 9.27367577398137e-06,
      "loss": 0.1622,
      "step": 6805
    },
    {
      "epoch": 0.19855300776007936,
      "grad_norm": 0.6447383763126631,
      "learning_rate": 9.273430531169068e-06,
      "loss": 0.1439,
      "step": 6806
    },
    {
      "epoch": 0.19858218099072292,
      "grad_norm": 0.7725223467058463,
      "learning_rate": 9.273185250204597e-06,
      "loss": 0.1633,
      "step": 6807
    },
    {
      "epoch": 0.19861135422136647,
      "grad_norm": 0.7575513821876597,
      "learning_rate": 9.272939931090148e-06,
      "loss": 0.1544,
      "step": 6808
    },
    {
      "epoch": 0.19864052745201002,
      "grad_norm": 0.7586924842351225,
      "learning_rate": 9.272694573827914e-06,
      "loss": 0.1687,
      "step": 6809
    },
    {
      "epoch": 0.1986697006826536,
      "grad_norm": 0.9272205458155205,
      "learning_rate": 9.272449178420079e-06,
      "loss": 0.1749,
      "step": 6810
    },
    {
      "epoch": 0.19869887391329716,
      "grad_norm": 0.7176069199236317,
      "learning_rate": 9.27220374486884e-06,
      "loss": 0.1564,
      "step": 6811
    },
    {
      "epoch": 0.1987280471439407,
      "grad_norm": 0.7930359692764781,
      "learning_rate": 9.271958273176385e-06,
      "loss": 0.1696,
      "step": 6812
    },
    {
      "epoch": 0.1987572203745843,
      "grad_norm": 0.9127337094725932,
      "learning_rate": 9.271712763344907e-06,
      "loss": 0.1754,
      "step": 6813
    },
    {
      "epoch": 0.19878639360522785,
      "grad_norm": 0.8029291901645138,
      "learning_rate": 9.271467215376598e-06,
      "loss": 0.1817,
      "step": 6814
    },
    {
      "epoch": 0.1988155668358714,
      "grad_norm": 2.336362348610908,
      "learning_rate": 9.271221629273647e-06,
      "loss": 0.1618,
      "step": 6815
    },
    {
      "epoch": 0.19884474006651495,
      "grad_norm": 0.986410040723693,
      "learning_rate": 9.27097600503825e-06,
      "loss": 0.1604,
      "step": 6816
    },
    {
      "epoch": 0.19887391329715853,
      "grad_norm": 0.9145616410938826,
      "learning_rate": 9.2707303426726e-06,
      "loss": 0.1528,
      "step": 6817
    },
    {
      "epoch": 0.1989030865278021,
      "grad_norm": 0.8020578464674823,
      "learning_rate": 9.270484642178888e-06,
      "loss": 0.1506,
      "step": 6818
    },
    {
      "epoch": 0.19893225975844564,
      "grad_norm": 0.8017613401330342,
      "learning_rate": 9.270238903559307e-06,
      "loss": 0.1644,
      "step": 6819
    },
    {
      "epoch": 0.19896143298908922,
      "grad_norm": 0.9630723985747422,
      "learning_rate": 9.269993126816055e-06,
      "loss": 0.1661,
      "step": 6820
    },
    {
      "epoch": 0.19899060621973277,
      "grad_norm": 1.071793350005467,
      "learning_rate": 9.269747311951322e-06,
      "loss": 0.1656,
      "step": 6821
    },
    {
      "epoch": 0.19901977945037633,
      "grad_norm": 0.7592639567566531,
      "learning_rate": 9.269501458967306e-06,
      "loss": 0.167,
      "step": 6822
    },
    {
      "epoch": 0.1990489526810199,
      "grad_norm": 0.9551099958261103,
      "learning_rate": 9.269255567866199e-06,
      "loss": 0.1683,
      "step": 6823
    },
    {
      "epoch": 0.19907812591166346,
      "grad_norm": 0.8111207539993681,
      "learning_rate": 9.269009638650198e-06,
      "loss": 0.1573,
      "step": 6824
    },
    {
      "epoch": 0.19910729914230701,
      "grad_norm": 0.9348714473789744,
      "learning_rate": 9.268763671321497e-06,
      "loss": 0.1663,
      "step": 6825
    },
    {
      "epoch": 0.19913647237295057,
      "grad_norm": 0.9722626260327435,
      "learning_rate": 9.268517665882294e-06,
      "loss": 0.1808,
      "step": 6826
    },
    {
      "epoch": 0.19916564560359415,
      "grad_norm": 0.9170527580958073,
      "learning_rate": 9.268271622334784e-06,
      "loss": 0.1769,
      "step": 6827
    },
    {
      "epoch": 0.1991948188342377,
      "grad_norm": 1.014970958407438,
      "learning_rate": 9.268025540681163e-06,
      "loss": 0.1879,
      "step": 6828
    },
    {
      "epoch": 0.19922399206488126,
      "grad_norm": 0.8491689365116207,
      "learning_rate": 9.26777942092363e-06,
      "loss": 0.1487,
      "step": 6829
    },
    {
      "epoch": 0.19925316529552484,
      "grad_norm": 0.8266477970103245,
      "learning_rate": 9.26753326306438e-06,
      "loss": 0.1828,
      "step": 6830
    },
    {
      "epoch": 0.1992823385261684,
      "grad_norm": 0.9012240304171691,
      "learning_rate": 9.267287067105612e-06,
      "loss": 0.1635,
      "step": 6831
    },
    {
      "epoch": 0.19931151175681194,
      "grad_norm": 0.8504390585172055,
      "learning_rate": 9.267040833049525e-06,
      "loss": 0.1647,
      "step": 6832
    },
    {
      "epoch": 0.19934068498745552,
      "grad_norm": 0.7854031137197991,
      "learning_rate": 9.266794560898315e-06,
      "loss": 0.1581,
      "step": 6833
    },
    {
      "epoch": 0.19936985821809908,
      "grad_norm": 0.74083613402986,
      "learning_rate": 9.266548250654183e-06,
      "loss": 0.1446,
      "step": 6834
    },
    {
      "epoch": 0.19939903144874263,
      "grad_norm": 0.8604736649988246,
      "learning_rate": 9.266301902319326e-06,
      "loss": 0.1766,
      "step": 6835
    },
    {
      "epoch": 0.19942820467938618,
      "grad_norm": 0.7926394245167371,
      "learning_rate": 9.266055515895945e-06,
      "loss": 0.1608,
      "step": 6836
    },
    {
      "epoch": 0.19945737791002977,
      "grad_norm": 0.616829938563071,
      "learning_rate": 9.265809091386236e-06,
      "loss": 0.1459,
      "step": 6837
    },
    {
      "epoch": 0.19948655114067332,
      "grad_norm": 0.8577998647176115,
      "learning_rate": 9.265562628792402e-06,
      "loss": 0.1735,
      "step": 6838
    },
    {
      "epoch": 0.19951572437131687,
      "grad_norm": 0.9264624005572271,
      "learning_rate": 9.265316128116647e-06,
      "loss": 0.18,
      "step": 6839
    },
    {
      "epoch": 0.19954489760196045,
      "grad_norm": 0.829671413022701,
      "learning_rate": 9.265069589361165e-06,
      "loss": 0.1875,
      "step": 6840
    },
    {
      "epoch": 0.199574070832604,
      "grad_norm": 0.7875201566196365,
      "learning_rate": 9.264823012528159e-06,
      "loss": 0.1673,
      "step": 6841
    },
    {
      "epoch": 0.19960324406324756,
      "grad_norm": 0.7874558435542366,
      "learning_rate": 9.264576397619832e-06,
      "loss": 0.1629,
      "step": 6842
    },
    {
      "epoch": 0.1996324172938911,
      "grad_norm": 0.7724462576193868,
      "learning_rate": 9.264329744638385e-06,
      "loss": 0.1466,
      "step": 6843
    },
    {
      "epoch": 0.1996615905245347,
      "grad_norm": 0.7668766098109513,
      "learning_rate": 9.264083053586022e-06,
      "loss": 0.1862,
      "step": 6844
    },
    {
      "epoch": 0.19969076375517825,
      "grad_norm": 0.773502420006225,
      "learning_rate": 9.263836324464942e-06,
      "loss": 0.1803,
      "step": 6845
    },
    {
      "epoch": 0.1997199369858218,
      "grad_norm": 0.7835549402281338,
      "learning_rate": 9.263589557277349e-06,
      "loss": 0.1441,
      "step": 6846
    },
    {
      "epoch": 0.19974911021646538,
      "grad_norm": 0.8700013437396111,
      "learning_rate": 9.263342752025446e-06,
      "loss": 0.1921,
      "step": 6847
    },
    {
      "epoch": 0.19977828344710893,
      "grad_norm": 0.8383673342033546,
      "learning_rate": 9.263095908711436e-06,
      "loss": 0.1758,
      "step": 6848
    },
    {
      "epoch": 0.1998074566777525,
      "grad_norm": 0.9235433147483293,
      "learning_rate": 9.262849027337524e-06,
      "loss": 0.1747,
      "step": 6849
    },
    {
      "epoch": 0.19983662990839607,
      "grad_norm": 0.8038146197528632,
      "learning_rate": 9.262602107905913e-06,
      "loss": 0.1463,
      "step": 6850
    },
    {
      "epoch": 0.19986580313903962,
      "grad_norm": 0.752886841622253,
      "learning_rate": 9.26235515041881e-06,
      "loss": 0.1313,
      "step": 6851
    },
    {
      "epoch": 0.19989497636968317,
      "grad_norm": 0.9008267139088517,
      "learning_rate": 9.262108154878415e-06,
      "loss": 0.1682,
      "step": 6852
    },
    {
      "epoch": 0.19992414960032673,
      "grad_norm": 0.9045358933870375,
      "learning_rate": 9.261861121286938e-06,
      "loss": 0.1679,
      "step": 6853
    },
    {
      "epoch": 0.1999533228309703,
      "grad_norm": 1.1205794896514105,
      "learning_rate": 9.261614049646581e-06,
      "loss": 0.158,
      "step": 6854
    },
    {
      "epoch": 0.19998249606161386,
      "grad_norm": 0.9444648051004103,
      "learning_rate": 9.261366939959552e-06,
      "loss": 0.1611,
      "step": 6855
    },
    {
      "epoch": 0.20001166929225742,
      "grad_norm": 0.8900281994126368,
      "learning_rate": 9.261119792228056e-06,
      "loss": 0.1622,
      "step": 6856
    },
    {
      "epoch": 0.200040842522901,
      "grad_norm": 0.9793048129383088,
      "learning_rate": 9.260872606454299e-06,
      "loss": 0.1554,
      "step": 6857
    },
    {
      "epoch": 0.20007001575354455,
      "grad_norm": 0.7866144396388639,
      "learning_rate": 9.260625382640489e-06,
      "loss": 0.1402,
      "step": 6858
    },
    {
      "epoch": 0.2000991889841881,
      "grad_norm": 1.1020344557217077,
      "learning_rate": 9.260378120788833e-06,
      "loss": 0.1975,
      "step": 6859
    },
    {
      "epoch": 0.20012836221483168,
      "grad_norm": 0.9935447255693488,
      "learning_rate": 9.260130820901539e-06,
      "loss": 0.1812,
      "step": 6860
    },
    {
      "epoch": 0.20015753544547524,
      "grad_norm": 0.92842363614548,
      "learning_rate": 9.259883482980812e-06,
      "loss": 0.1681,
      "step": 6861
    },
    {
      "epoch": 0.2001867086761188,
      "grad_norm": 0.8056007956260339,
      "learning_rate": 9.259636107028863e-06,
      "loss": 0.1464,
      "step": 6862
    },
    {
      "epoch": 0.20021588190676234,
      "grad_norm": 0.8201984804203223,
      "learning_rate": 9.2593886930479e-06,
      "loss": 0.1628,
      "step": 6863
    },
    {
      "epoch": 0.20024505513740593,
      "grad_norm": 0.9561164466426443,
      "learning_rate": 9.259141241040132e-06,
      "loss": 0.1631,
      "step": 6864
    },
    {
      "epoch": 0.20027422836804948,
      "grad_norm": 0.7606171656600601,
      "learning_rate": 9.258893751007768e-06,
      "loss": 0.1916,
      "step": 6865
    },
    {
      "epoch": 0.20030340159869303,
      "grad_norm": 0.9893724394233364,
      "learning_rate": 9.258646222953014e-06,
      "loss": 0.1785,
      "step": 6866
    },
    {
      "epoch": 0.2003325748293366,
      "grad_norm": 1.0492910901475423,
      "learning_rate": 9.258398656878086e-06,
      "loss": 0.1832,
      "step": 6867
    },
    {
      "epoch": 0.20036174805998017,
      "grad_norm": 0.8239922216935243,
      "learning_rate": 9.25815105278519e-06,
      "loss": 0.1447,
      "step": 6868
    },
    {
      "epoch": 0.20039092129062372,
      "grad_norm": 0.9857818821904483,
      "learning_rate": 9.257903410676542e-06,
      "loss": 0.156,
      "step": 6869
    },
    {
      "epoch": 0.20042009452126727,
      "grad_norm": 1.1030047783364014,
      "learning_rate": 9.257655730554343e-06,
      "loss": 0.1626,
      "step": 6870
    },
    {
      "epoch": 0.20044926775191085,
      "grad_norm": 0.7547724311474497,
      "learning_rate": 9.257408012420814e-06,
      "loss": 0.1441,
      "step": 6871
    },
    {
      "epoch": 0.2004784409825544,
      "grad_norm": 0.8284604181580133,
      "learning_rate": 9.25716025627816e-06,
      "loss": 0.1588,
      "step": 6872
    },
    {
      "epoch": 0.20050761421319796,
      "grad_norm": 0.8664436845875464,
      "learning_rate": 9.256912462128598e-06,
      "loss": 0.1478,
      "step": 6873
    },
    {
      "epoch": 0.20053678744384154,
      "grad_norm": 0.7916567029228911,
      "learning_rate": 9.256664629974336e-06,
      "loss": 0.1738,
      "step": 6874
    },
    {
      "epoch": 0.2005659606744851,
      "grad_norm": 0.876323632801491,
      "learning_rate": 9.256416759817589e-06,
      "loss": 0.1574,
      "step": 6875
    },
    {
      "epoch": 0.20059513390512865,
      "grad_norm": 0.8757684449739846,
      "learning_rate": 9.256168851660568e-06,
      "loss": 0.1627,
      "step": 6876
    },
    {
      "epoch": 0.20062430713577223,
      "grad_norm": 0.7331586042635607,
      "learning_rate": 9.255920905505489e-06,
      "loss": 0.166,
      "step": 6877
    },
    {
      "epoch": 0.20065348036641578,
      "grad_norm": 0.884088409347574,
      "learning_rate": 9.255672921354564e-06,
      "loss": 0.1572,
      "step": 6878
    },
    {
      "epoch": 0.20068265359705934,
      "grad_norm": 0.9423527980206594,
      "learning_rate": 9.255424899210006e-06,
      "loss": 0.1738,
      "step": 6879
    },
    {
      "epoch": 0.2007118268277029,
      "grad_norm": 0.7542517853422843,
      "learning_rate": 9.255176839074031e-06,
      "loss": 0.1736,
      "step": 6880
    },
    {
      "epoch": 0.20074100005834647,
      "grad_norm": 0.9210894675280078,
      "learning_rate": 9.254928740948854e-06,
      "loss": 0.1571,
      "step": 6881
    },
    {
      "epoch": 0.20077017328899002,
      "grad_norm": 0.9075573167169664,
      "learning_rate": 9.254680604836688e-06,
      "loss": 0.1654,
      "step": 6882
    },
    {
      "epoch": 0.20079934651963358,
      "grad_norm": 0.7357175761969743,
      "learning_rate": 9.254432430739749e-06,
      "loss": 0.1576,
      "step": 6883
    },
    {
      "epoch": 0.20082851975027716,
      "grad_norm": 0.9293557786696476,
      "learning_rate": 9.254184218660252e-06,
      "loss": 0.1757,
      "step": 6884
    },
    {
      "epoch": 0.2008576929809207,
      "grad_norm": 1.2702381815754837,
      "learning_rate": 9.253935968600416e-06,
      "loss": 0.1973,
      "step": 6885
    },
    {
      "epoch": 0.20088686621156426,
      "grad_norm": 0.8137709259145418,
      "learning_rate": 9.253687680562454e-06,
      "loss": 0.1494,
      "step": 6886
    },
    {
      "epoch": 0.20091603944220782,
      "grad_norm": 0.7591368911767536,
      "learning_rate": 9.253439354548583e-06,
      "loss": 0.1455,
      "step": 6887
    },
    {
      "epoch": 0.2009452126728514,
      "grad_norm": 0.8431311808072077,
      "learning_rate": 9.253190990561022e-06,
      "loss": 0.177,
      "step": 6888
    },
    {
      "epoch": 0.20097438590349495,
      "grad_norm": 0.7439925566693707,
      "learning_rate": 9.252942588601988e-06,
      "loss": 0.1629,
      "step": 6889
    },
    {
      "epoch": 0.2010035591341385,
      "grad_norm": 0.8463428058909843,
      "learning_rate": 9.252694148673695e-06,
      "loss": 0.1783,
      "step": 6890
    },
    {
      "epoch": 0.20103273236478209,
      "grad_norm": 0.8698538347710973,
      "learning_rate": 9.252445670778367e-06,
      "loss": 0.17,
      "step": 6891
    },
    {
      "epoch": 0.20106190559542564,
      "grad_norm": 1.0189241222151606,
      "learning_rate": 9.252197154918217e-06,
      "loss": 0.1683,
      "step": 6892
    },
    {
      "epoch": 0.2010910788260692,
      "grad_norm": 0.7984973411853468,
      "learning_rate": 9.251948601095466e-06,
      "loss": 0.1963,
      "step": 6893
    },
    {
      "epoch": 0.20112025205671277,
      "grad_norm": 1.1090263605234885,
      "learning_rate": 9.251700009312334e-06,
      "loss": 0.1655,
      "step": 6894
    },
    {
      "epoch": 0.20114942528735633,
      "grad_norm": 0.7521774243577772,
      "learning_rate": 9.25145137957104e-06,
      "loss": 0.166,
      "step": 6895
    },
    {
      "epoch": 0.20117859851799988,
      "grad_norm": 0.8901929590357675,
      "learning_rate": 9.251202711873802e-06,
      "loss": 0.163,
      "step": 6896
    },
    {
      "epoch": 0.20120777174864343,
      "grad_norm": 0.8387379421239792,
      "learning_rate": 9.25095400622284e-06,
      "loss": 0.1647,
      "step": 6897
    },
    {
      "epoch": 0.201236944979287,
      "grad_norm": 0.7271202499149118,
      "learning_rate": 9.250705262620376e-06,
      "loss": 0.169,
      "step": 6898
    },
    {
      "epoch": 0.20126611820993057,
      "grad_norm": 0.7917441056416124,
      "learning_rate": 9.25045648106863e-06,
      "loss": 0.1608,
      "step": 6899
    },
    {
      "epoch": 0.20129529144057412,
      "grad_norm": 1.0166959242851203,
      "learning_rate": 9.250207661569824e-06,
      "loss": 0.1534,
      "step": 6900
    },
    {
      "epoch": 0.2013244646712177,
      "grad_norm": 0.7654424818295686,
      "learning_rate": 9.249958804126178e-06,
      "loss": 0.1824,
      "step": 6901
    },
    {
      "epoch": 0.20135363790186125,
      "grad_norm": 0.9651451193769214,
      "learning_rate": 9.249709908739914e-06,
      "loss": 0.1843,
      "step": 6902
    },
    {
      "epoch": 0.2013828111325048,
      "grad_norm": 0.8894528565331724,
      "learning_rate": 9.249460975413256e-06,
      "loss": 0.1569,
      "step": 6903
    },
    {
      "epoch": 0.2014119843631484,
      "grad_norm": 0.6423539728835258,
      "learning_rate": 9.249212004148424e-06,
      "loss": 0.16,
      "step": 6904
    },
    {
      "epoch": 0.20144115759379194,
      "grad_norm": 0.8063827987347563,
      "learning_rate": 9.248962994947641e-06,
      "loss": 0.1559,
      "step": 6905
    },
    {
      "epoch": 0.2014703308244355,
      "grad_norm": 0.8624478514379726,
      "learning_rate": 9.248713947813131e-06,
      "loss": 0.1608,
      "step": 6906
    },
    {
      "epoch": 0.20149950405507905,
      "grad_norm": 0.8950892712865031,
      "learning_rate": 9.248464862747117e-06,
      "loss": 0.1734,
      "step": 6907
    },
    {
      "epoch": 0.20152867728572263,
      "grad_norm": 0.8905745182797603,
      "learning_rate": 9.248215739751825e-06,
      "loss": 0.1794,
      "step": 6908
    },
    {
      "epoch": 0.20155785051636618,
      "grad_norm": 0.9989720835810473,
      "learning_rate": 9.247966578829476e-06,
      "loss": 0.1573,
      "step": 6909
    },
    {
      "epoch": 0.20158702374700974,
      "grad_norm": 0.8291663064917253,
      "learning_rate": 9.247717379982293e-06,
      "loss": 0.1749,
      "step": 6910
    },
    {
      "epoch": 0.20161619697765332,
      "grad_norm": 0.9065232306006418,
      "learning_rate": 9.247468143212505e-06,
      "loss": 0.1698,
      "step": 6911
    },
    {
      "epoch": 0.20164537020829687,
      "grad_norm": 0.7636604866786924,
      "learning_rate": 9.247218868522335e-06,
      "loss": 0.1598,
      "step": 6912
    },
    {
      "epoch": 0.20167454343894042,
      "grad_norm": 0.7669056755864361,
      "learning_rate": 9.24696955591401e-06,
      "loss": 0.1608,
      "step": 6913
    },
    {
      "epoch": 0.20170371666958398,
      "grad_norm": 0.8165590403692065,
      "learning_rate": 9.246720205389752e-06,
      "loss": 0.164,
      "step": 6914
    },
    {
      "epoch": 0.20173288990022756,
      "grad_norm": 0.6512869425367765,
      "learning_rate": 9.246470816951792e-06,
      "loss": 0.169,
      "step": 6915
    },
    {
      "epoch": 0.2017620631308711,
      "grad_norm": 0.851582519614119,
      "learning_rate": 9.246221390602353e-06,
      "loss": 0.1714,
      "step": 6916
    },
    {
      "epoch": 0.20179123636151466,
      "grad_norm": 0.8819487941044123,
      "learning_rate": 9.245971926343664e-06,
      "loss": 0.1591,
      "step": 6917
    },
    {
      "epoch": 0.20182040959215825,
      "grad_norm": 0.9313966169414754,
      "learning_rate": 9.245722424177953e-06,
      "loss": 0.1553,
      "step": 6918
    },
    {
      "epoch": 0.2018495828228018,
      "grad_norm": 0.691431593294132,
      "learning_rate": 9.245472884107442e-06,
      "loss": 0.157,
      "step": 6919
    },
    {
      "epoch": 0.20187875605344535,
      "grad_norm": 1.0897206048393633,
      "learning_rate": 9.245223306134364e-06,
      "loss": 0.1683,
      "step": 6920
    },
    {
      "epoch": 0.20190792928408893,
      "grad_norm": 0.7985922287025494,
      "learning_rate": 9.244973690260947e-06,
      "loss": 0.1312,
      "step": 6921
    },
    {
      "epoch": 0.20193710251473249,
      "grad_norm": 1.1762938233012288,
      "learning_rate": 9.244724036489416e-06,
      "loss": 0.1776,
      "step": 6922
    },
    {
      "epoch": 0.20196627574537604,
      "grad_norm": 0.6709814809299784,
      "learning_rate": 9.244474344822003e-06,
      "loss": 0.1612,
      "step": 6923
    },
    {
      "epoch": 0.2019954489760196,
      "grad_norm": 0.8355430771473132,
      "learning_rate": 9.244224615260939e-06,
      "loss": 0.1623,
      "step": 6924
    },
    {
      "epoch": 0.20202462220666317,
      "grad_norm": 0.841972104666194,
      "learning_rate": 9.243974847808447e-06,
      "loss": 0.1451,
      "step": 6925
    },
    {
      "epoch": 0.20205379543730673,
      "grad_norm": 0.9060411147309869,
      "learning_rate": 9.243725042466762e-06,
      "loss": 0.1629,
      "step": 6926
    },
    {
      "epoch": 0.20208296866795028,
      "grad_norm": 1.0164509121189214,
      "learning_rate": 9.243475199238115e-06,
      "loss": 0.1508,
      "step": 6927
    },
    {
      "epoch": 0.20211214189859386,
      "grad_norm": 1.2165006713459436,
      "learning_rate": 9.243225318124731e-06,
      "loss": 0.1521,
      "step": 6928
    },
    {
      "epoch": 0.20214131512923741,
      "grad_norm": 0.9375090116468068,
      "learning_rate": 9.242975399128846e-06,
      "loss": 0.1364,
      "step": 6929
    },
    {
      "epoch": 0.20217048835988097,
      "grad_norm": 0.7444053922033216,
      "learning_rate": 9.242725442252689e-06,
      "loss": 0.1709,
      "step": 6930
    },
    {
      "epoch": 0.20219966159052452,
      "grad_norm": 0.8938477042452997,
      "learning_rate": 9.242475447498494e-06,
      "loss": 0.1975,
      "step": 6931
    },
    {
      "epoch": 0.2022288348211681,
      "grad_norm": 0.9444066284799371,
      "learning_rate": 9.242225414868489e-06,
      "loss": 0.175,
      "step": 6932
    },
    {
      "epoch": 0.20225800805181166,
      "grad_norm": 0.7975877128495011,
      "learning_rate": 9.241975344364908e-06,
      "loss": 0.1668,
      "step": 6933
    },
    {
      "epoch": 0.2022871812824552,
      "grad_norm": 0.9344549468260859,
      "learning_rate": 9.241725235989984e-06,
      "loss": 0.1775,
      "step": 6934
    },
    {
      "epoch": 0.2023163545130988,
      "grad_norm": 0.9183123396344244,
      "learning_rate": 9.24147508974595e-06,
      "loss": 0.2053,
      "step": 6935
    },
    {
      "epoch": 0.20234552774374234,
      "grad_norm": 0.812772993081817,
      "learning_rate": 9.24122490563504e-06,
      "loss": 0.1748,
      "step": 6936
    },
    {
      "epoch": 0.2023747009743859,
      "grad_norm": 1.2402670599204466,
      "learning_rate": 9.240974683659484e-06,
      "loss": 0.1757,
      "step": 6937
    },
    {
      "epoch": 0.20240387420502948,
      "grad_norm": 0.9170751278495316,
      "learning_rate": 9.24072442382152e-06,
      "loss": 0.1974,
      "step": 6938
    },
    {
      "epoch": 0.20243304743567303,
      "grad_norm": 1.0772371356975892,
      "learning_rate": 9.240474126123382e-06,
      "loss": 0.1603,
      "step": 6939
    },
    {
      "epoch": 0.20246222066631658,
      "grad_norm": 0.8619979857339938,
      "learning_rate": 9.240223790567301e-06,
      "loss": 0.1687,
      "step": 6940
    },
    {
      "epoch": 0.20249139389696014,
      "grad_norm": 0.7450528065605044,
      "learning_rate": 9.239973417155514e-06,
      "loss": 0.1663,
      "step": 6941
    },
    {
      "epoch": 0.20252056712760372,
      "grad_norm": 0.929227600853273,
      "learning_rate": 9.239723005890259e-06,
      "loss": 0.1615,
      "step": 6942
    },
    {
      "epoch": 0.20254974035824727,
      "grad_norm": 0.612880897375697,
      "learning_rate": 9.239472556773767e-06,
      "loss": 0.1539,
      "step": 6943
    },
    {
      "epoch": 0.20257891358889082,
      "grad_norm": 0.797816385622781,
      "learning_rate": 9.239222069808278e-06,
      "loss": 0.1586,
      "step": 6944
    },
    {
      "epoch": 0.2026080868195344,
      "grad_norm": 0.8512931410273533,
      "learning_rate": 9.238971544996024e-06,
      "loss": 0.1911,
      "step": 6945
    },
    {
      "epoch": 0.20263726005017796,
      "grad_norm": 0.7940492984333843,
      "learning_rate": 9.238720982339244e-06,
      "loss": 0.1887,
      "step": 6946
    },
    {
      "epoch": 0.2026664332808215,
      "grad_norm": 0.9662901286894028,
      "learning_rate": 9.238470381840177e-06,
      "loss": 0.1605,
      "step": 6947
    },
    {
      "epoch": 0.2026956065114651,
      "grad_norm": 0.7812194229483999,
      "learning_rate": 9.238219743501056e-06,
      "loss": 0.1469,
      "step": 6948
    },
    {
      "epoch": 0.20272477974210865,
      "grad_norm": 0.9627478070169841,
      "learning_rate": 9.237969067324122e-06,
      "loss": 0.1756,
      "step": 6949
    },
    {
      "epoch": 0.2027539529727522,
      "grad_norm": 0.9180610650279988,
      "learning_rate": 9.237718353311614e-06,
      "loss": 0.1743,
      "step": 6950
    },
    {
      "epoch": 0.20278312620339575,
      "grad_norm": 0.8145939137932461,
      "learning_rate": 9.237467601465765e-06,
      "loss": 0.1642,
      "step": 6951
    },
    {
      "epoch": 0.20281229943403933,
      "grad_norm": 0.9541162411630298,
      "learning_rate": 9.237216811788818e-06,
      "loss": 0.1877,
      "step": 6952
    },
    {
      "epoch": 0.2028414726646829,
      "grad_norm": 0.7818593113436227,
      "learning_rate": 9.23696598428301e-06,
      "loss": 0.1525,
      "step": 6953
    },
    {
      "epoch": 0.20287064589532644,
      "grad_norm": 0.8353926491125307,
      "learning_rate": 9.236715118950584e-06,
      "loss": 0.1879,
      "step": 6954
    },
    {
      "epoch": 0.20289981912597002,
      "grad_norm": 0.8569013518121356,
      "learning_rate": 9.236464215793773e-06,
      "loss": 0.1641,
      "step": 6955
    },
    {
      "epoch": 0.20292899235661357,
      "grad_norm": 0.7441027716221941,
      "learning_rate": 9.236213274814822e-06,
      "loss": 0.1595,
      "step": 6956
    },
    {
      "epoch": 0.20295816558725713,
      "grad_norm": 0.6849361560051208,
      "learning_rate": 9.23596229601597e-06,
      "loss": 0.1535,
      "step": 6957
    },
    {
      "epoch": 0.20298733881790068,
      "grad_norm": 0.7500331412109894,
      "learning_rate": 9.23571127939946e-06,
      "loss": 0.1598,
      "step": 6958
    },
    {
      "epoch": 0.20301651204854426,
      "grad_norm": 0.8118918095758154,
      "learning_rate": 9.23546022496753e-06,
      "loss": 0.1946,
      "step": 6959
    },
    {
      "epoch": 0.20304568527918782,
      "grad_norm": 0.8545793980969125,
      "learning_rate": 9.23520913272242e-06,
      "loss": 0.1733,
      "step": 6960
    },
    {
      "epoch": 0.20307485850983137,
      "grad_norm": 0.816879846775412,
      "learning_rate": 9.234958002666377e-06,
      "loss": 0.1637,
      "step": 6961
    },
    {
      "epoch": 0.20310403174047495,
      "grad_norm": 0.8082137698355487,
      "learning_rate": 9.234706834801637e-06,
      "loss": 0.1909,
      "step": 6962
    },
    {
      "epoch": 0.2031332049711185,
      "grad_norm": 0.886717342632263,
      "learning_rate": 9.234455629130447e-06,
      "loss": 0.1437,
      "step": 6963
    },
    {
      "epoch": 0.20316237820176206,
      "grad_norm": 0.7627804041244418,
      "learning_rate": 9.234204385655048e-06,
      "loss": 0.1669,
      "step": 6964
    },
    {
      "epoch": 0.20319155143240564,
      "grad_norm": 0.9082853257968745,
      "learning_rate": 9.233953104377683e-06,
      "loss": 0.155,
      "step": 6965
    },
    {
      "epoch": 0.2032207246630492,
      "grad_norm": 0.7534748533281638,
      "learning_rate": 9.233701785300594e-06,
      "loss": 0.1533,
      "step": 6966
    },
    {
      "epoch": 0.20324989789369274,
      "grad_norm": 0.8623488199709958,
      "learning_rate": 9.233450428426028e-06,
      "loss": 0.1481,
      "step": 6967
    },
    {
      "epoch": 0.2032790711243363,
      "grad_norm": 0.9881855600799769,
      "learning_rate": 9.233199033756225e-06,
      "loss": 0.1712,
      "step": 6968
    },
    {
      "epoch": 0.20330824435497988,
      "grad_norm": 0.844767954923981,
      "learning_rate": 9.232947601293434e-06,
      "loss": 0.1582,
      "step": 6969
    },
    {
      "epoch": 0.20333741758562343,
      "grad_norm": 0.8276002682189507,
      "learning_rate": 9.232696131039896e-06,
      "loss": 0.1791,
      "step": 6970
    },
    {
      "epoch": 0.20336659081626698,
      "grad_norm": 0.8617800403501159,
      "learning_rate": 9.232444622997856e-06,
      "loss": 0.1616,
      "step": 6971
    },
    {
      "epoch": 0.20339576404691057,
      "grad_norm": 0.8659663059040512,
      "learning_rate": 9.232193077169564e-06,
      "loss": 0.1654,
      "step": 6972
    },
    {
      "epoch": 0.20342493727755412,
      "grad_norm": 0.779554960350323,
      "learning_rate": 9.23194149355726e-06,
      "loss": 0.151,
      "step": 6973
    },
    {
      "epoch": 0.20345411050819767,
      "grad_norm": 0.8464240910273081,
      "learning_rate": 9.231689872163193e-06,
      "loss": 0.1754,
      "step": 6974
    },
    {
      "epoch": 0.20348328373884125,
      "grad_norm": 0.8346599537505763,
      "learning_rate": 9.23143821298961e-06,
      "loss": 0.1564,
      "step": 6975
    },
    {
      "epoch": 0.2035124569694848,
      "grad_norm": 0.7887283247170853,
      "learning_rate": 9.231186516038756e-06,
      "loss": 0.169,
      "step": 6976
    },
    {
      "epoch": 0.20354163020012836,
      "grad_norm": 0.8313811211179252,
      "learning_rate": 9.230934781312879e-06,
      "loss": 0.1657,
      "step": 6977
    },
    {
      "epoch": 0.2035708034307719,
      "grad_norm": 0.6988999318097441,
      "learning_rate": 9.230683008814226e-06,
      "loss": 0.1501,
      "step": 6978
    },
    {
      "epoch": 0.2035999766614155,
      "grad_norm": 0.9094972559651215,
      "learning_rate": 9.230431198545045e-06,
      "loss": 0.1589,
      "step": 6979
    },
    {
      "epoch": 0.20362914989205905,
      "grad_norm": 0.8904694721866652,
      "learning_rate": 9.230179350507584e-06,
      "loss": 0.1537,
      "step": 6980
    },
    {
      "epoch": 0.2036583231227026,
      "grad_norm": 0.9547194753711763,
      "learning_rate": 9.229927464704094e-06,
      "loss": 0.1606,
      "step": 6981
    },
    {
      "epoch": 0.20368749635334618,
      "grad_norm": 0.9622437302424728,
      "learning_rate": 9.22967554113682e-06,
      "loss": 0.1509,
      "step": 6982
    },
    {
      "epoch": 0.20371666958398973,
      "grad_norm": 0.8020621072325536,
      "learning_rate": 9.22942357980801e-06,
      "loss": 0.1726,
      "step": 6983
    },
    {
      "epoch": 0.2037458428146333,
      "grad_norm": 0.8312982184894929,
      "learning_rate": 9.229171580719917e-06,
      "loss": 0.1567,
      "step": 6984
    },
    {
      "epoch": 0.20377501604527684,
      "grad_norm": 0.9668106852012995,
      "learning_rate": 9.228919543874793e-06,
      "loss": 0.1985,
      "step": 6985
    },
    {
      "epoch": 0.20380418927592042,
      "grad_norm": 0.8657568338629704,
      "learning_rate": 9.22866746927488e-06,
      "loss": 0.1732,
      "step": 6986
    },
    {
      "epoch": 0.20383336250656398,
      "grad_norm": 0.8623396974875376,
      "learning_rate": 9.228415356922437e-06,
      "loss": 0.1663,
      "step": 6987
    },
    {
      "epoch": 0.20386253573720753,
      "grad_norm": 0.8453525214965825,
      "learning_rate": 9.228163206819709e-06,
      "loss": 0.198,
      "step": 6988
    },
    {
      "epoch": 0.2038917089678511,
      "grad_norm": 0.9125294274115585,
      "learning_rate": 9.22791101896895e-06,
      "loss": 0.1661,
      "step": 6989
    },
    {
      "epoch": 0.20392088219849466,
      "grad_norm": 0.8576353726170916,
      "learning_rate": 9.227658793372412e-06,
      "loss": 0.1639,
      "step": 6990
    },
    {
      "epoch": 0.20395005542913822,
      "grad_norm": 0.8394250192109743,
      "learning_rate": 9.227406530032343e-06,
      "loss": 0.2013,
      "step": 6991
    },
    {
      "epoch": 0.2039792286597818,
      "grad_norm": 0.8698217646400507,
      "learning_rate": 9.227154228951e-06,
      "loss": 0.1581,
      "step": 6992
    },
    {
      "epoch": 0.20400840189042535,
      "grad_norm": 0.7342443622768361,
      "learning_rate": 9.226901890130632e-06,
      "loss": 0.1646,
      "step": 6993
    },
    {
      "epoch": 0.2040375751210689,
      "grad_norm": 0.7905139694958258,
      "learning_rate": 9.226649513573494e-06,
      "loss": 0.1632,
      "step": 6994
    },
    {
      "epoch": 0.20406674835171246,
      "grad_norm": 0.9604884114024702,
      "learning_rate": 9.226397099281837e-06,
      "loss": 0.1966,
      "step": 6995
    },
    {
      "epoch": 0.20409592158235604,
      "grad_norm": 0.8188683052092606,
      "learning_rate": 9.226144647257916e-06,
      "loss": 0.1489,
      "step": 6996
    },
    {
      "epoch": 0.2041250948129996,
      "grad_norm": 1.0209871621250723,
      "learning_rate": 9.225892157503983e-06,
      "loss": 0.16,
      "step": 6997
    },
    {
      "epoch": 0.20415426804364314,
      "grad_norm": 0.9416078696474081,
      "learning_rate": 9.225639630022295e-06,
      "loss": 0.1827,
      "step": 6998
    },
    {
      "epoch": 0.20418344127428673,
      "grad_norm": 0.9224229764513623,
      "learning_rate": 9.225387064815106e-06,
      "loss": 0.175,
      "step": 6999
    },
    {
      "epoch": 0.20421261450493028,
      "grad_norm": 1.0303395643999491,
      "learning_rate": 9.225134461884668e-06,
      "loss": 0.1511,
      "step": 7000
    },
    {
      "epoch": 0.20424178773557383,
      "grad_norm": 0.82603145282564,
      "learning_rate": 9.224881821233239e-06,
      "loss": 0.1639,
      "step": 7001
    },
    {
      "epoch": 0.20427096096621739,
      "grad_norm": 0.9880245574139768,
      "learning_rate": 9.224629142863075e-06,
      "loss": 0.1844,
      "step": 7002
    },
    {
      "epoch": 0.20430013419686097,
      "grad_norm": 1.2395735111046278,
      "learning_rate": 9.224376426776428e-06,
      "loss": 0.1764,
      "step": 7003
    },
    {
      "epoch": 0.20432930742750452,
      "grad_norm": 0.7913835154235777,
      "learning_rate": 9.224123672975557e-06,
      "loss": 0.1789,
      "step": 7004
    },
    {
      "epoch": 0.20435848065814807,
      "grad_norm": 1.1300233207007448,
      "learning_rate": 9.22387088146272e-06,
      "loss": 0.1538,
      "step": 7005
    },
    {
      "epoch": 0.20438765388879165,
      "grad_norm": 1.1307792259323688,
      "learning_rate": 9.223618052240171e-06,
      "loss": 0.1743,
      "step": 7006
    },
    {
      "epoch": 0.2044168271194352,
      "grad_norm": 0.8735797992845247,
      "learning_rate": 9.22336518531017e-06,
      "loss": 0.1587,
      "step": 7007
    },
    {
      "epoch": 0.20444600035007876,
      "grad_norm": 0.796213194462923,
      "learning_rate": 9.223112280674971e-06,
      "loss": 0.1574,
      "step": 7008
    },
    {
      "epoch": 0.20447517358072234,
      "grad_norm": 0.9041885368326816,
      "learning_rate": 9.222859338336834e-06,
      "loss": 0.171,
      "step": 7009
    },
    {
      "epoch": 0.2045043468113659,
      "grad_norm": 0.7192505392225091,
      "learning_rate": 9.222606358298017e-06,
      "loss": 0.1529,
      "step": 7010
    },
    {
      "epoch": 0.20453352004200945,
      "grad_norm": 0.8520647621195595,
      "learning_rate": 9.222353340560779e-06,
      "loss": 0.1694,
      "step": 7011
    },
    {
      "epoch": 0.204562693272653,
      "grad_norm": 0.7528888103249979,
      "learning_rate": 9.222100285127376e-06,
      "loss": 0.1527,
      "step": 7012
    },
    {
      "epoch": 0.20459186650329658,
      "grad_norm": 0.8277757246979951,
      "learning_rate": 9.221847192000072e-06,
      "loss": 0.1966,
      "step": 7013
    },
    {
      "epoch": 0.20462103973394014,
      "grad_norm": 0.762915048514747,
      "learning_rate": 9.221594061181122e-06,
      "loss": 0.1638,
      "step": 7014
    },
    {
      "epoch": 0.2046502129645837,
      "grad_norm": 0.9475856252908226,
      "learning_rate": 9.22134089267279e-06,
      "loss": 0.182,
      "step": 7015
    },
    {
      "epoch": 0.20467938619522727,
      "grad_norm": 0.8304613080203108,
      "learning_rate": 9.221087686477335e-06,
      "loss": 0.144,
      "step": 7016
    },
    {
      "epoch": 0.20470855942587082,
      "grad_norm": 0.9407708063085051,
      "learning_rate": 9.220834442597015e-06,
      "loss": 0.1406,
      "step": 7017
    },
    {
      "epoch": 0.20473773265651438,
      "grad_norm": 0.9594131857982843,
      "learning_rate": 9.220581161034093e-06,
      "loss": 0.1919,
      "step": 7018
    },
    {
      "epoch": 0.20476690588715796,
      "grad_norm": 0.8012598194062235,
      "learning_rate": 9.22032784179083e-06,
      "loss": 0.1613,
      "step": 7019
    },
    {
      "epoch": 0.2047960791178015,
      "grad_norm": 0.783222446437833,
      "learning_rate": 9.220074484869488e-06,
      "loss": 0.1524,
      "step": 7020
    },
    {
      "epoch": 0.20482525234844506,
      "grad_norm": 1.0735242632621904,
      "learning_rate": 9.219821090272326e-06,
      "loss": 0.1649,
      "step": 7021
    },
    {
      "epoch": 0.20485442557908862,
      "grad_norm": 0.8837234931799185,
      "learning_rate": 9.219567658001613e-06,
      "loss": 0.1657,
      "step": 7022
    },
    {
      "epoch": 0.2048835988097322,
      "grad_norm": 0.8152130609318874,
      "learning_rate": 9.219314188059605e-06,
      "loss": 0.1556,
      "step": 7023
    },
    {
      "epoch": 0.20491277204037575,
      "grad_norm": 0.8369414123535003,
      "learning_rate": 9.219060680448567e-06,
      "loss": 0.1939,
      "step": 7024
    },
    {
      "epoch": 0.2049419452710193,
      "grad_norm": 1.0024558097070586,
      "learning_rate": 9.218807135170763e-06,
      "loss": 0.1925,
      "step": 7025
    },
    {
      "epoch": 0.20497111850166289,
      "grad_norm": 0.7609945916463682,
      "learning_rate": 9.218553552228454e-06,
      "loss": 0.1798,
      "step": 7026
    },
    {
      "epoch": 0.20500029173230644,
      "grad_norm": 0.8195889686366729,
      "learning_rate": 9.218299931623907e-06,
      "loss": 0.1609,
      "step": 7027
    },
    {
      "epoch": 0.20502946496295,
      "grad_norm": 0.8855074580997848,
      "learning_rate": 9.218046273359385e-06,
      "loss": 0.1663,
      "step": 7028
    },
    {
      "epoch": 0.20505863819359355,
      "grad_norm": 0.8058363475589431,
      "learning_rate": 9.217792577437154e-06,
      "loss": 0.1615,
      "step": 7029
    },
    {
      "epoch": 0.20508781142423713,
      "grad_norm": 0.7965888061691665,
      "learning_rate": 9.217538843859477e-06,
      "loss": 0.184,
      "step": 7030
    },
    {
      "epoch": 0.20511698465488068,
      "grad_norm": 0.9233542755276904,
      "learning_rate": 9.217285072628621e-06,
      "loss": 0.1978,
      "step": 7031
    },
    {
      "epoch": 0.20514615788552423,
      "grad_norm": 0.7417683680594019,
      "learning_rate": 9.217031263746849e-06,
      "loss": 0.1549,
      "step": 7032
    },
    {
      "epoch": 0.20517533111616781,
      "grad_norm": 0.7508384502098701,
      "learning_rate": 9.216777417216429e-06,
      "loss": 0.1561,
      "step": 7033
    },
    {
      "epoch": 0.20520450434681137,
      "grad_norm": 0.7734620855738245,
      "learning_rate": 9.216523533039628e-06,
      "loss": 0.1619,
      "step": 7034
    },
    {
      "epoch": 0.20523367757745492,
      "grad_norm": 0.8909058400944175,
      "learning_rate": 9.21626961121871e-06,
      "loss": 0.1733,
      "step": 7035
    },
    {
      "epoch": 0.2052628508080985,
      "grad_norm": 0.8887479590938959,
      "learning_rate": 9.216015651755944e-06,
      "loss": 0.1708,
      "step": 7036
    },
    {
      "epoch": 0.20529202403874205,
      "grad_norm": 0.7810672670879556,
      "learning_rate": 9.215761654653597e-06,
      "loss": 0.1761,
      "step": 7037
    },
    {
      "epoch": 0.2053211972693856,
      "grad_norm": 0.825915603229344,
      "learning_rate": 9.215507619913937e-06,
      "loss": 0.1416,
      "step": 7038
    },
    {
      "epoch": 0.20535037050002916,
      "grad_norm": 0.9494067374597331,
      "learning_rate": 9.215253547539229e-06,
      "loss": 0.1604,
      "step": 7039
    },
    {
      "epoch": 0.20537954373067274,
      "grad_norm": 0.849334261808857,
      "learning_rate": 9.214999437531746e-06,
      "loss": 0.1665,
      "step": 7040
    },
    {
      "epoch": 0.2054087169613163,
      "grad_norm": 0.9562283780863475,
      "learning_rate": 9.214745289893753e-06,
      "loss": 0.168,
      "step": 7041
    },
    {
      "epoch": 0.20543789019195985,
      "grad_norm": 0.8395458214040364,
      "learning_rate": 9.21449110462752e-06,
      "loss": 0.1501,
      "step": 7042
    },
    {
      "epoch": 0.20546706342260343,
      "grad_norm": 0.8971796442230011,
      "learning_rate": 9.214236881735317e-06,
      "loss": 0.134,
      "step": 7043
    },
    {
      "epoch": 0.20549623665324698,
      "grad_norm": 1.0300220366056503,
      "learning_rate": 9.213982621219413e-06,
      "loss": 0.1744,
      "step": 7044
    },
    {
      "epoch": 0.20552540988389054,
      "grad_norm": 1.1743529491957736,
      "learning_rate": 9.213728323082079e-06,
      "loss": 0.1748,
      "step": 7045
    },
    {
      "epoch": 0.20555458311453412,
      "grad_norm": 1.0193126074031889,
      "learning_rate": 9.213473987325583e-06,
      "loss": 0.1632,
      "step": 7046
    },
    {
      "epoch": 0.20558375634517767,
      "grad_norm": 0.8004089414962124,
      "learning_rate": 9.213219613952198e-06,
      "loss": 0.1524,
      "step": 7047
    },
    {
      "epoch": 0.20561292957582122,
      "grad_norm": 1.0382022058331408,
      "learning_rate": 9.212965202964192e-06,
      "loss": 0.172,
      "step": 7048
    },
    {
      "epoch": 0.20564210280646478,
      "grad_norm": 0.8307365765560831,
      "learning_rate": 9.212710754363841e-06,
      "loss": 0.1494,
      "step": 7049
    },
    {
      "epoch": 0.20567127603710836,
      "grad_norm": 0.89444786051651,
      "learning_rate": 9.212456268153414e-06,
      "loss": 0.1748,
      "step": 7050
    },
    {
      "epoch": 0.2057004492677519,
      "grad_norm": 0.7110872416229941,
      "learning_rate": 9.212201744335182e-06,
      "loss": 0.16,
      "step": 7051
    },
    {
      "epoch": 0.20572962249839546,
      "grad_norm": 0.9553424011884314,
      "learning_rate": 9.211947182911418e-06,
      "loss": 0.1777,
      "step": 7052
    },
    {
      "epoch": 0.20575879572903905,
      "grad_norm": 0.8427944379759794,
      "learning_rate": 9.211692583884395e-06,
      "loss": 0.1692,
      "step": 7053
    },
    {
      "epoch": 0.2057879689596826,
      "grad_norm": 0.8912967009010678,
      "learning_rate": 9.211437947256387e-06,
      "loss": 0.1734,
      "step": 7054
    },
    {
      "epoch": 0.20581714219032615,
      "grad_norm": 1.0440606064314335,
      "learning_rate": 9.211183273029667e-06,
      "loss": 0.1605,
      "step": 7055
    },
    {
      "epoch": 0.2058463154209697,
      "grad_norm": 0.8608823932440356,
      "learning_rate": 9.210928561206507e-06,
      "loss": 0.174,
      "step": 7056
    },
    {
      "epoch": 0.2058754886516133,
      "grad_norm": 0.8798085720541676,
      "learning_rate": 9.210673811789181e-06,
      "loss": 0.1549,
      "step": 7057
    },
    {
      "epoch": 0.20590466188225684,
      "grad_norm": 1.0416204542312937,
      "learning_rate": 9.210419024779967e-06,
      "loss": 0.1979,
      "step": 7058
    },
    {
      "epoch": 0.2059338351129004,
      "grad_norm": 0.8238960482510895,
      "learning_rate": 9.210164200181133e-06,
      "loss": 0.164,
      "step": 7059
    },
    {
      "epoch": 0.20596300834354397,
      "grad_norm": 0.9591263439518445,
      "learning_rate": 9.209909337994963e-06,
      "loss": 0.1715,
      "step": 7060
    },
    {
      "epoch": 0.20599218157418753,
      "grad_norm": 0.8637896079605812,
      "learning_rate": 9.209654438223724e-06,
      "loss": 0.1736,
      "step": 7061
    },
    {
      "epoch": 0.20602135480483108,
      "grad_norm": 0.8204201986572738,
      "learning_rate": 9.209399500869695e-06,
      "loss": 0.1771,
      "step": 7062
    },
    {
      "epoch": 0.20605052803547466,
      "grad_norm": 0.8331051108982461,
      "learning_rate": 9.209144525935154e-06,
      "loss": 0.1635,
      "step": 7063
    },
    {
      "epoch": 0.20607970126611821,
      "grad_norm": 0.8698195907117675,
      "learning_rate": 9.208889513422374e-06,
      "loss": 0.1653,
      "step": 7064
    },
    {
      "epoch": 0.20610887449676177,
      "grad_norm": 0.7944483701447789,
      "learning_rate": 9.208634463333634e-06,
      "loss": 0.1462,
      "step": 7065
    },
    {
      "epoch": 0.20613804772740532,
      "grad_norm": 0.9980133372433863,
      "learning_rate": 9.20837937567121e-06,
      "loss": 0.1849,
      "step": 7066
    },
    {
      "epoch": 0.2061672209580489,
      "grad_norm": 0.8137617243493083,
      "learning_rate": 9.20812425043738e-06,
      "loss": 0.1949,
      "step": 7067
    },
    {
      "epoch": 0.20619639418869246,
      "grad_norm": 0.9273455101518907,
      "learning_rate": 9.20786908763442e-06,
      "loss": 0.1379,
      "step": 7068
    },
    {
      "epoch": 0.206225567419336,
      "grad_norm": 0.8629397425055924,
      "learning_rate": 9.20761388726461e-06,
      "loss": 0.1676,
      "step": 7069
    },
    {
      "epoch": 0.2062547406499796,
      "grad_norm": 0.6833022223183531,
      "learning_rate": 9.207358649330229e-06,
      "loss": 0.1484,
      "step": 7070
    },
    {
      "epoch": 0.20628391388062314,
      "grad_norm": 0.8137189089894815,
      "learning_rate": 9.207103373833553e-06,
      "loss": 0.1598,
      "step": 7071
    },
    {
      "epoch": 0.2063130871112667,
      "grad_norm": 0.9811256991406814,
      "learning_rate": 9.206848060776861e-06,
      "loss": 0.1588,
      "step": 7072
    },
    {
      "epoch": 0.20634226034191025,
      "grad_norm": 0.8532945645803387,
      "learning_rate": 9.206592710162436e-06,
      "loss": 0.1549,
      "step": 7073
    },
    {
      "epoch": 0.20637143357255383,
      "grad_norm": 0.8748392553037043,
      "learning_rate": 9.206337321992554e-06,
      "loss": 0.1729,
      "step": 7074
    },
    {
      "epoch": 0.20640060680319738,
      "grad_norm": 0.7947230153235141,
      "learning_rate": 9.206081896269498e-06,
      "loss": 0.1427,
      "step": 7075
    },
    {
      "epoch": 0.20642978003384094,
      "grad_norm": 0.8735131842080504,
      "learning_rate": 9.205826432995547e-06,
      "loss": 0.1874,
      "step": 7076
    },
    {
      "epoch": 0.20645895326448452,
      "grad_norm": 0.904135571112561,
      "learning_rate": 9.20557093217298e-06,
      "loss": 0.1574,
      "step": 7077
    },
    {
      "epoch": 0.20648812649512807,
      "grad_norm": 1.1024578813330324,
      "learning_rate": 9.20531539380408e-06,
      "loss": 0.152,
      "step": 7078
    },
    {
      "epoch": 0.20651729972577162,
      "grad_norm": 0.8570271618694294,
      "learning_rate": 9.205059817891128e-06,
      "loss": 0.1478,
      "step": 7079
    },
    {
      "epoch": 0.2065464729564152,
      "grad_norm": 0.7449770153439637,
      "learning_rate": 9.204804204436406e-06,
      "loss": 0.1632,
      "step": 7080
    },
    {
      "epoch": 0.20657564618705876,
      "grad_norm": 0.9955024876594831,
      "learning_rate": 9.204548553442196e-06,
      "loss": 0.2087,
      "step": 7081
    },
    {
      "epoch": 0.2066048194177023,
      "grad_norm": 0.8777825750110577,
      "learning_rate": 9.204292864910781e-06,
      "loss": 0.1488,
      "step": 7082
    },
    {
      "epoch": 0.20663399264834587,
      "grad_norm": 0.8349816278410988,
      "learning_rate": 9.204037138844441e-06,
      "loss": 0.1598,
      "step": 7083
    },
    {
      "epoch": 0.20666316587898945,
      "grad_norm": 1.2555677252554858,
      "learning_rate": 9.203781375245465e-06,
      "loss": 0.182,
      "step": 7084
    },
    {
      "epoch": 0.206692339109633,
      "grad_norm": 0.8331678704155605,
      "learning_rate": 9.203525574116127e-06,
      "loss": 0.1719,
      "step": 7085
    },
    {
      "epoch": 0.20672151234027655,
      "grad_norm": 0.7213427739702032,
      "learning_rate": 9.20326973545872e-06,
      "loss": 0.1465,
      "step": 7086
    },
    {
      "epoch": 0.20675068557092013,
      "grad_norm": 0.7682660254920037,
      "learning_rate": 9.203013859275523e-06,
      "loss": 0.1567,
      "step": 7087
    },
    {
      "epoch": 0.2067798588015637,
      "grad_norm": 0.8731578594256073,
      "learning_rate": 9.202757945568822e-06,
      "loss": 0.1708,
      "step": 7088
    },
    {
      "epoch": 0.20680903203220724,
      "grad_norm": 0.8333449796435296,
      "learning_rate": 9.2025019943409e-06,
      "loss": 0.145,
      "step": 7089
    },
    {
      "epoch": 0.20683820526285082,
      "grad_norm": 0.8760915723100529,
      "learning_rate": 9.202246005594045e-06,
      "loss": 0.1705,
      "step": 7090
    },
    {
      "epoch": 0.20686737849349437,
      "grad_norm": 1.0430655009673637,
      "learning_rate": 9.20198997933054e-06,
      "loss": 0.1764,
      "step": 7091
    },
    {
      "epoch": 0.20689655172413793,
      "grad_norm": 0.8727107465988335,
      "learning_rate": 9.201733915552672e-06,
      "loss": 0.1528,
      "step": 7092
    },
    {
      "epoch": 0.20692572495478148,
      "grad_norm": 0.9070862177074983,
      "learning_rate": 9.201477814262727e-06,
      "loss": 0.1685,
      "step": 7093
    },
    {
      "epoch": 0.20695489818542506,
      "grad_norm": 0.9758127645458515,
      "learning_rate": 9.20122167546299e-06,
      "loss": 0.1574,
      "step": 7094
    },
    {
      "epoch": 0.20698407141606862,
      "grad_norm": 0.8160619992259059,
      "learning_rate": 9.20096549915575e-06,
      "loss": 0.1737,
      "step": 7095
    },
    {
      "epoch": 0.20701324464671217,
      "grad_norm": 0.7248935104136316,
      "learning_rate": 9.200709285343292e-06,
      "loss": 0.1451,
      "step": 7096
    },
    {
      "epoch": 0.20704241787735575,
      "grad_norm": 0.7895891594620774,
      "learning_rate": 9.200453034027903e-06,
      "loss": 0.1441,
      "step": 7097
    },
    {
      "epoch": 0.2070715911079993,
      "grad_norm": 0.9970660714634246,
      "learning_rate": 9.200196745211873e-06,
      "loss": 0.1796,
      "step": 7098
    },
    {
      "epoch": 0.20710076433864286,
      "grad_norm": 0.7827229750158712,
      "learning_rate": 9.19994041889749e-06,
      "loss": 0.1676,
      "step": 7099
    },
    {
      "epoch": 0.2071299375692864,
      "grad_norm": 0.7112071647046179,
      "learning_rate": 9.19968405508704e-06,
      "loss": 0.1454,
      "step": 7100
    },
    {
      "epoch": 0.20715911079993,
      "grad_norm": 0.6660471009508432,
      "learning_rate": 9.199427653782815e-06,
      "loss": 0.168,
      "step": 7101
    },
    {
      "epoch": 0.20718828403057354,
      "grad_norm": 0.8299462389483362,
      "learning_rate": 9.199171214987103e-06,
      "loss": 0.1601,
      "step": 7102
    },
    {
      "epoch": 0.2072174572612171,
      "grad_norm": 0.8344605365563602,
      "learning_rate": 9.198914738702191e-06,
      "loss": 0.148,
      "step": 7103
    },
    {
      "epoch": 0.20724663049186068,
      "grad_norm": 0.7109615234153235,
      "learning_rate": 9.19865822493037e-06,
      "loss": 0.1748,
      "step": 7104
    },
    {
      "epoch": 0.20727580372250423,
      "grad_norm": 0.9412419045345006,
      "learning_rate": 9.198401673673934e-06,
      "loss": 0.1813,
      "step": 7105
    },
    {
      "epoch": 0.20730497695314778,
      "grad_norm": 1.1276383291887195,
      "learning_rate": 9.198145084935167e-06,
      "loss": 0.1892,
      "step": 7106
    },
    {
      "epoch": 0.20733415018379137,
      "grad_norm": 0.9710707860135948,
      "learning_rate": 9.197888458716364e-06,
      "loss": 0.1624,
      "step": 7107
    },
    {
      "epoch": 0.20736332341443492,
      "grad_norm": 1.019701360818089,
      "learning_rate": 9.197631795019815e-06,
      "loss": 0.1566,
      "step": 7108
    },
    {
      "epoch": 0.20739249664507847,
      "grad_norm": 0.74507133497317,
      "learning_rate": 9.197375093847811e-06,
      "loss": 0.1758,
      "step": 7109
    },
    {
      "epoch": 0.20742166987572203,
      "grad_norm": 1.0144998879685472,
      "learning_rate": 9.197118355202644e-06,
      "loss": 0.1707,
      "step": 7110
    },
    {
      "epoch": 0.2074508431063656,
      "grad_norm": 0.8741559351263685,
      "learning_rate": 9.196861579086607e-06,
      "loss": 0.1722,
      "step": 7111
    },
    {
      "epoch": 0.20748001633700916,
      "grad_norm": 0.6842181467468504,
      "learning_rate": 9.196604765501991e-06,
      "loss": 0.1478,
      "step": 7112
    },
    {
      "epoch": 0.2075091895676527,
      "grad_norm": 0.9035682230126743,
      "learning_rate": 9.196347914451089e-06,
      "loss": 0.1753,
      "step": 7113
    },
    {
      "epoch": 0.2075383627982963,
      "grad_norm": 0.802764908227016,
      "learning_rate": 9.196091025936195e-06,
      "loss": 0.1662,
      "step": 7114
    },
    {
      "epoch": 0.20756753602893985,
      "grad_norm": 0.8312720985285272,
      "learning_rate": 9.195834099959604e-06,
      "loss": 0.155,
      "step": 7115
    },
    {
      "epoch": 0.2075967092595834,
      "grad_norm": 0.7328392867043356,
      "learning_rate": 9.195577136523606e-06,
      "loss": 0.171,
      "step": 7116
    },
    {
      "epoch": 0.20762588249022695,
      "grad_norm": 0.7986201166657328,
      "learning_rate": 9.195320135630496e-06,
      "loss": 0.1784,
      "step": 7117
    },
    {
      "epoch": 0.20765505572087053,
      "grad_norm": 0.8296984793649945,
      "learning_rate": 9.19506309728257e-06,
      "loss": 0.1426,
      "step": 7118
    },
    {
      "epoch": 0.2076842289515141,
      "grad_norm": 0.6494697354782675,
      "learning_rate": 9.194806021482123e-06,
      "loss": 0.1379,
      "step": 7119
    },
    {
      "epoch": 0.20771340218215764,
      "grad_norm": 1.2721754865246282,
      "learning_rate": 9.194548908231448e-06,
      "loss": 0.1491,
      "step": 7120
    },
    {
      "epoch": 0.20774257541280122,
      "grad_norm": 1.114672137703868,
      "learning_rate": 9.194291757532842e-06,
      "loss": 0.1481,
      "step": 7121
    },
    {
      "epoch": 0.20777174864344478,
      "grad_norm": 0.8637457423044791,
      "learning_rate": 9.194034569388602e-06,
      "loss": 0.1589,
      "step": 7122
    },
    {
      "epoch": 0.20780092187408833,
      "grad_norm": 0.7333022679873099,
      "learning_rate": 9.193777343801021e-06,
      "loss": 0.1765,
      "step": 7123
    },
    {
      "epoch": 0.2078300951047319,
      "grad_norm": 0.9906453369889608,
      "learning_rate": 9.193520080772398e-06,
      "loss": 0.1891,
      "step": 7124
    },
    {
      "epoch": 0.20785926833537546,
      "grad_norm": 0.9500924370608755,
      "learning_rate": 9.193262780305028e-06,
      "loss": 0.2019,
      "step": 7125
    },
    {
      "epoch": 0.20788844156601902,
      "grad_norm": 1.052800775250883,
      "learning_rate": 9.193005442401209e-06,
      "loss": 0.1505,
      "step": 7126
    },
    {
      "epoch": 0.20791761479666257,
      "grad_norm": 0.7881281559280178,
      "learning_rate": 9.192748067063238e-06,
      "loss": 0.1727,
      "step": 7127
    },
    {
      "epoch": 0.20794678802730615,
      "grad_norm": 0.9240133278018553,
      "learning_rate": 9.192490654293414e-06,
      "loss": 0.1557,
      "step": 7128
    },
    {
      "epoch": 0.2079759612579497,
      "grad_norm": 0.9479760475904528,
      "learning_rate": 9.192233204094034e-06,
      "loss": 0.1691,
      "step": 7129
    },
    {
      "epoch": 0.20800513448859326,
      "grad_norm": 0.9927279076327339,
      "learning_rate": 9.191975716467397e-06,
      "loss": 0.1541,
      "step": 7130
    },
    {
      "epoch": 0.20803430771923684,
      "grad_norm": 0.9893259101543747,
      "learning_rate": 9.1917181914158e-06,
      "loss": 0.1408,
      "step": 7131
    },
    {
      "epoch": 0.2080634809498804,
      "grad_norm": 0.895924776365021,
      "learning_rate": 9.191460628941544e-06,
      "loss": 0.1874,
      "step": 7132
    },
    {
      "epoch": 0.20809265418052394,
      "grad_norm": 0.9570681155974422,
      "learning_rate": 9.191203029046929e-06,
      "loss": 0.1894,
      "step": 7133
    },
    {
      "epoch": 0.20812182741116753,
      "grad_norm": 1.0782237602145626,
      "learning_rate": 9.190945391734254e-06,
      "loss": 0.1959,
      "step": 7134
    },
    {
      "epoch": 0.20815100064181108,
      "grad_norm": 0.879667010937371,
      "learning_rate": 9.190687717005818e-06,
      "loss": 0.1554,
      "step": 7135
    },
    {
      "epoch": 0.20818017387245463,
      "grad_norm": 1.7162003841975093,
      "learning_rate": 9.190430004863924e-06,
      "loss": 0.173,
      "step": 7136
    },
    {
      "epoch": 0.20820934710309819,
      "grad_norm": 1.114298507950001,
      "learning_rate": 9.190172255310869e-06,
      "loss": 0.1772,
      "step": 7137
    },
    {
      "epoch": 0.20823852033374177,
      "grad_norm": 0.9187882525918382,
      "learning_rate": 9.18991446834896e-06,
      "loss": 0.1714,
      "step": 7138
    },
    {
      "epoch": 0.20826769356438532,
      "grad_norm": 0.8934628611463499,
      "learning_rate": 9.189656643980492e-06,
      "loss": 0.1503,
      "step": 7139
    },
    {
      "epoch": 0.20829686679502887,
      "grad_norm": 0.8729740494879913,
      "learning_rate": 9.189398782207771e-06,
      "loss": 0.1561,
      "step": 7140
    },
    {
      "epoch": 0.20832604002567245,
      "grad_norm": 0.8532934030012483,
      "learning_rate": 9.189140883033097e-06,
      "loss": 0.1615,
      "step": 7141
    },
    {
      "epoch": 0.208355213256316,
      "grad_norm": 0.8884195549436464,
      "learning_rate": 9.188882946458773e-06,
      "loss": 0.175,
      "step": 7142
    },
    {
      "epoch": 0.20838438648695956,
      "grad_norm": 0.9929578120059851,
      "learning_rate": 9.188624972487101e-06,
      "loss": 0.1552,
      "step": 7143
    },
    {
      "epoch": 0.20841355971760311,
      "grad_norm": 0.8440531149573288,
      "learning_rate": 9.188366961120386e-06,
      "loss": 0.1798,
      "step": 7144
    },
    {
      "epoch": 0.2084427329482467,
      "grad_norm": 0.7885896114796663,
      "learning_rate": 9.188108912360932e-06,
      "loss": 0.1861,
      "step": 7145
    },
    {
      "epoch": 0.20847190617889025,
      "grad_norm": 1.0741397692528705,
      "learning_rate": 9.18785082621104e-06,
      "loss": 0.177,
      "step": 7146
    },
    {
      "epoch": 0.2085010794095338,
      "grad_norm": 0.7888149571309065,
      "learning_rate": 9.187592702673017e-06,
      "loss": 0.1751,
      "step": 7147
    },
    {
      "epoch": 0.20853025264017738,
      "grad_norm": 0.8882939061613188,
      "learning_rate": 9.187334541749165e-06,
      "loss": 0.167,
      "step": 7148
    },
    {
      "epoch": 0.20855942587082094,
      "grad_norm": 1.1154958535430273,
      "learning_rate": 9.187076343441787e-06,
      "loss": 0.1871,
      "step": 7149
    },
    {
      "epoch": 0.2085885991014645,
      "grad_norm": 0.8528943675779154,
      "learning_rate": 9.186818107753195e-06,
      "loss": 0.1558,
      "step": 7150
    },
    {
      "epoch": 0.20861777233210807,
      "grad_norm": 0.9105786245507598,
      "learning_rate": 9.18655983468569e-06,
      "loss": 0.1457,
      "step": 7151
    },
    {
      "epoch": 0.20864694556275162,
      "grad_norm": 0.8758734781424876,
      "learning_rate": 9.186301524241576e-06,
      "loss": 0.2017,
      "step": 7152
    },
    {
      "epoch": 0.20867611879339518,
      "grad_norm": 0.9515679452598051,
      "learning_rate": 9.186043176423162e-06,
      "loss": 0.1895,
      "step": 7153
    },
    {
      "epoch": 0.20870529202403873,
      "grad_norm": 0.9761232825843177,
      "learning_rate": 9.185784791232755e-06,
      "loss": 0.1914,
      "step": 7154
    },
    {
      "epoch": 0.2087344652546823,
      "grad_norm": 0.7893318345979233,
      "learning_rate": 9.185526368672662e-06,
      "loss": 0.1627,
      "step": 7155
    },
    {
      "epoch": 0.20876363848532586,
      "grad_norm": 0.9538821863437764,
      "learning_rate": 9.185267908745186e-06,
      "loss": 0.1758,
      "step": 7156
    },
    {
      "epoch": 0.20879281171596942,
      "grad_norm": 0.9263212016878108,
      "learning_rate": 9.185009411452638e-06,
      "loss": 0.1539,
      "step": 7157
    },
    {
      "epoch": 0.208821984946613,
      "grad_norm": 0.7255824406567902,
      "learning_rate": 9.184750876797325e-06,
      "loss": 0.1606,
      "step": 7158
    },
    {
      "epoch": 0.20885115817725655,
      "grad_norm": 0.8900029933857889,
      "learning_rate": 9.184492304781555e-06,
      "loss": 0.153,
      "step": 7159
    },
    {
      "epoch": 0.2088803314079001,
      "grad_norm": 0.8014889704446233,
      "learning_rate": 9.184233695407635e-06,
      "loss": 0.1592,
      "step": 7160
    },
    {
      "epoch": 0.20890950463854369,
      "grad_norm": 0.8585024131393774,
      "learning_rate": 9.18397504867788e-06,
      "loss": 0.1887,
      "step": 7161
    },
    {
      "epoch": 0.20893867786918724,
      "grad_norm": 0.6927086297948429,
      "learning_rate": 9.18371636459459e-06,
      "loss": 0.1509,
      "step": 7162
    },
    {
      "epoch": 0.2089678510998308,
      "grad_norm": 0.8285743828056754,
      "learning_rate": 9.183457643160082e-06,
      "loss": 0.1887,
      "step": 7163
    },
    {
      "epoch": 0.20899702433047435,
      "grad_norm": 0.861001703495805,
      "learning_rate": 9.183198884376661e-06,
      "loss": 0.1712,
      "step": 7164
    },
    {
      "epoch": 0.20902619756111793,
      "grad_norm": 0.7323659507296036,
      "learning_rate": 9.18294008824664e-06,
      "loss": 0.1697,
      "step": 7165
    },
    {
      "epoch": 0.20905537079176148,
      "grad_norm": 0.7159030178356711,
      "learning_rate": 9.182681254772327e-06,
      "loss": 0.168,
      "step": 7166
    },
    {
      "epoch": 0.20908454402240503,
      "grad_norm": 0.7549284021070887,
      "learning_rate": 9.182422383956036e-06,
      "loss": 0.1709,
      "step": 7167
    },
    {
      "epoch": 0.20911371725304861,
      "grad_norm": 0.94434557018474,
      "learning_rate": 9.182163475800077e-06,
      "loss": 0.1554,
      "step": 7168
    },
    {
      "epoch": 0.20914289048369217,
      "grad_norm": 0.8347875207314545,
      "learning_rate": 9.181904530306757e-06,
      "loss": 0.1729,
      "step": 7169
    },
    {
      "epoch": 0.20917206371433572,
      "grad_norm": 0.6783003229252659,
      "learning_rate": 9.181645547478395e-06,
      "loss": 0.1366,
      "step": 7170
    },
    {
      "epoch": 0.20920123694497927,
      "grad_norm": 1.1050779346558968,
      "learning_rate": 9.1813865273173e-06,
      "loss": 0.165,
      "step": 7171
    },
    {
      "epoch": 0.20923041017562286,
      "grad_norm": 0.8185826575869648,
      "learning_rate": 9.181127469825784e-06,
      "loss": 0.1742,
      "step": 7172
    },
    {
      "epoch": 0.2092595834062664,
      "grad_norm": 1.0439897072743844,
      "learning_rate": 9.180868375006158e-06,
      "loss": 0.1655,
      "step": 7173
    },
    {
      "epoch": 0.20928875663690996,
      "grad_norm": 1.0412342674908153,
      "learning_rate": 9.180609242860739e-06,
      "loss": 0.1684,
      "step": 7174
    },
    {
      "epoch": 0.20931792986755354,
      "grad_norm": 0.8897105234040097,
      "learning_rate": 9.180350073391838e-06,
      "loss": 0.1593,
      "step": 7175
    },
    {
      "epoch": 0.2093471030981971,
      "grad_norm": 0.8714788467791885,
      "learning_rate": 9.18009086660177e-06,
      "loss": 0.1723,
      "step": 7176
    },
    {
      "epoch": 0.20937627632884065,
      "grad_norm": 0.7005511708820562,
      "learning_rate": 9.179831622492847e-06,
      "loss": 0.135,
      "step": 7177
    },
    {
      "epoch": 0.20940544955948423,
      "grad_norm": 1.2540627711913974,
      "learning_rate": 9.179572341067387e-06,
      "loss": 0.149,
      "step": 7178
    },
    {
      "epoch": 0.20943462279012778,
      "grad_norm": 0.7243322612171238,
      "learning_rate": 9.179313022327703e-06,
      "loss": 0.1518,
      "step": 7179
    },
    {
      "epoch": 0.20946379602077134,
      "grad_norm": 0.8735805655729467,
      "learning_rate": 9.17905366627611e-06,
      "loss": 0.1772,
      "step": 7180
    },
    {
      "epoch": 0.2094929692514149,
      "grad_norm": 1.2367216747115835,
      "learning_rate": 9.178794272914921e-06,
      "loss": 0.16,
      "step": 7181
    },
    {
      "epoch": 0.20952214248205847,
      "grad_norm": 0.7919243540922308,
      "learning_rate": 9.178534842246457e-06,
      "loss": 0.1558,
      "step": 7182
    },
    {
      "epoch": 0.20955131571270202,
      "grad_norm": 0.6764322676637853,
      "learning_rate": 9.17827537427303e-06,
      "loss": 0.1633,
      "step": 7183
    },
    {
      "epoch": 0.20958048894334558,
      "grad_norm": 0.8948139087747934,
      "learning_rate": 9.178015868996959e-06,
      "loss": 0.169,
      "step": 7184
    },
    {
      "epoch": 0.20960966217398916,
      "grad_norm": 0.9265613587274324,
      "learning_rate": 9.17775632642056e-06,
      "loss": 0.1812,
      "step": 7185
    },
    {
      "epoch": 0.2096388354046327,
      "grad_norm": 0.8292242281181695,
      "learning_rate": 9.177496746546148e-06,
      "loss": 0.1568,
      "step": 7186
    },
    {
      "epoch": 0.20966800863527627,
      "grad_norm": 0.7472986987443127,
      "learning_rate": 9.177237129376043e-06,
      "loss": 0.1551,
      "step": 7187
    },
    {
      "epoch": 0.20969718186591982,
      "grad_norm": 0.9179470707908803,
      "learning_rate": 9.176977474912563e-06,
      "loss": 0.181,
      "step": 7188
    },
    {
      "epoch": 0.2097263550965634,
      "grad_norm": 0.9507366317154728,
      "learning_rate": 9.176717783158023e-06,
      "loss": 0.1752,
      "step": 7189
    },
    {
      "epoch": 0.20975552832720695,
      "grad_norm": 0.7908172868930405,
      "learning_rate": 9.176458054114746e-06,
      "loss": 0.1425,
      "step": 7190
    },
    {
      "epoch": 0.2097847015578505,
      "grad_norm": 0.9594138180980587,
      "learning_rate": 9.176198287785048e-06,
      "loss": 0.1714,
      "step": 7191
    },
    {
      "epoch": 0.2098138747884941,
      "grad_norm": 0.868901899928297,
      "learning_rate": 9.175938484171248e-06,
      "loss": 0.1654,
      "step": 7192
    },
    {
      "epoch": 0.20984304801913764,
      "grad_norm": 0.8398311088406083,
      "learning_rate": 9.175678643275668e-06,
      "loss": 0.1818,
      "step": 7193
    },
    {
      "epoch": 0.2098722212497812,
      "grad_norm": 0.8050506810692823,
      "learning_rate": 9.175418765100624e-06,
      "loss": 0.1681,
      "step": 7194
    },
    {
      "epoch": 0.20990139448042477,
      "grad_norm": 0.8963428993314191,
      "learning_rate": 9.175158849648438e-06,
      "loss": 0.1586,
      "step": 7195
    },
    {
      "epoch": 0.20993056771106833,
      "grad_norm": 0.8480606598967553,
      "learning_rate": 9.17489889692143e-06,
      "loss": 0.1618,
      "step": 7196
    },
    {
      "epoch": 0.20995974094171188,
      "grad_norm": 0.7181614838963151,
      "learning_rate": 9.174638906921921e-06,
      "loss": 0.1765,
      "step": 7197
    },
    {
      "epoch": 0.20998891417235543,
      "grad_norm": 0.850916230145168,
      "learning_rate": 9.174378879652235e-06,
      "loss": 0.1534,
      "step": 7198
    },
    {
      "epoch": 0.21001808740299902,
      "grad_norm": 0.9370392528764342,
      "learning_rate": 9.17411881511469e-06,
      "loss": 0.1849,
      "step": 7199
    },
    {
      "epoch": 0.21004726063364257,
      "grad_norm": 0.9624771284835438,
      "learning_rate": 9.173858713311606e-06,
      "loss": 0.1609,
      "step": 7200
    },
    {
      "epoch": 0.21007643386428612,
      "grad_norm": 0.7370014896462503,
      "learning_rate": 9.17359857424531e-06,
      "loss": 0.1477,
      "step": 7201
    },
    {
      "epoch": 0.2101056070949297,
      "grad_norm": 1.030148428794689,
      "learning_rate": 9.173338397918123e-06,
      "loss": 0.1738,
      "step": 7202
    },
    {
      "epoch": 0.21013478032557326,
      "grad_norm": 0.9207997100627249,
      "learning_rate": 9.173078184332366e-06,
      "loss": 0.1721,
      "step": 7203
    },
    {
      "epoch": 0.2101639535562168,
      "grad_norm": 0.7433046817468101,
      "learning_rate": 9.172817933490364e-06,
      "loss": 0.1439,
      "step": 7204
    },
    {
      "epoch": 0.2101931267868604,
      "grad_norm": 1.0273490729952162,
      "learning_rate": 9.172557645394438e-06,
      "loss": 0.1634,
      "step": 7205
    },
    {
      "epoch": 0.21022230001750394,
      "grad_norm": 0.9397161896137732,
      "learning_rate": 9.172297320046915e-06,
      "loss": 0.1654,
      "step": 7206
    },
    {
      "epoch": 0.2102514732481475,
      "grad_norm": 0.8099268542749183,
      "learning_rate": 9.172036957450116e-06,
      "loss": 0.1482,
      "step": 7207
    },
    {
      "epoch": 0.21028064647879105,
      "grad_norm": 0.8430220199734931,
      "learning_rate": 9.171776557606368e-06,
      "loss": 0.1539,
      "step": 7208
    },
    {
      "epoch": 0.21030981970943463,
      "grad_norm": 1.2139852278920045,
      "learning_rate": 9.171516120517993e-06,
      "loss": 0.2128,
      "step": 7209
    },
    {
      "epoch": 0.21033899294007818,
      "grad_norm": 0.9566087572947806,
      "learning_rate": 9.17125564618732e-06,
      "loss": 0.1805,
      "step": 7210
    },
    {
      "epoch": 0.21036816617072174,
      "grad_norm": 0.7991018451759162,
      "learning_rate": 9.170995134616673e-06,
      "loss": 0.1394,
      "step": 7211
    },
    {
      "epoch": 0.21039733940136532,
      "grad_norm": 0.9455057662208112,
      "learning_rate": 9.170734585808376e-06,
      "loss": 0.1541,
      "step": 7212
    },
    {
      "epoch": 0.21042651263200887,
      "grad_norm": 0.8655907379066675,
      "learning_rate": 9.170473999764755e-06,
      "loss": 0.184,
      "step": 7213
    },
    {
      "epoch": 0.21045568586265243,
      "grad_norm": 0.8199372371606958,
      "learning_rate": 9.17021337648814e-06,
      "loss": 0.1639,
      "step": 7214
    },
    {
      "epoch": 0.21048485909329598,
      "grad_norm": 0.6649741533327053,
      "learning_rate": 9.169952715980854e-06,
      "loss": 0.143,
      "step": 7215
    },
    {
      "epoch": 0.21051403232393956,
      "grad_norm": 0.9405125419912356,
      "learning_rate": 9.169692018245226e-06,
      "loss": 0.191,
      "step": 7216
    },
    {
      "epoch": 0.2105432055545831,
      "grad_norm": 0.8292947727476876,
      "learning_rate": 9.169431283283583e-06,
      "loss": 0.1738,
      "step": 7217
    },
    {
      "epoch": 0.21057237878522667,
      "grad_norm": 0.7121011638614227,
      "learning_rate": 9.169170511098254e-06,
      "loss": 0.1563,
      "step": 7218
    },
    {
      "epoch": 0.21060155201587025,
      "grad_norm": 0.846206531882245,
      "learning_rate": 9.168909701691564e-06,
      "loss": 0.1494,
      "step": 7219
    },
    {
      "epoch": 0.2106307252465138,
      "grad_norm": 0.9946974153910175,
      "learning_rate": 9.168648855065844e-06,
      "loss": 0.1695,
      "step": 7220
    },
    {
      "epoch": 0.21065989847715735,
      "grad_norm": 0.7830084892918774,
      "learning_rate": 9.168387971223422e-06,
      "loss": 0.1745,
      "step": 7221
    },
    {
      "epoch": 0.21068907170780093,
      "grad_norm": 0.8729232446653769,
      "learning_rate": 9.16812705016663e-06,
      "loss": 0.1511,
      "step": 7222
    },
    {
      "epoch": 0.2107182449384445,
      "grad_norm": 1.106811527660954,
      "learning_rate": 9.16786609189779e-06,
      "loss": 0.1821,
      "step": 7223
    },
    {
      "epoch": 0.21074741816908804,
      "grad_norm": 0.8419729335873963,
      "learning_rate": 9.167605096419238e-06,
      "loss": 0.1542,
      "step": 7224
    },
    {
      "epoch": 0.2107765913997316,
      "grad_norm": 0.9634351687740887,
      "learning_rate": 9.167344063733305e-06,
      "loss": 0.1516,
      "step": 7225
    },
    {
      "epoch": 0.21080576463037518,
      "grad_norm": 0.7983317844393885,
      "learning_rate": 9.167082993842317e-06,
      "loss": 0.1505,
      "step": 7226
    },
    {
      "epoch": 0.21083493786101873,
      "grad_norm": 0.7538471682521021,
      "learning_rate": 9.166821886748607e-06,
      "loss": 0.1335,
      "step": 7227
    },
    {
      "epoch": 0.21086411109166228,
      "grad_norm": 0.9316829504194767,
      "learning_rate": 9.166560742454507e-06,
      "loss": 0.1605,
      "step": 7228
    },
    {
      "epoch": 0.21089328432230586,
      "grad_norm": 0.8741634697097257,
      "learning_rate": 9.166299560962346e-06,
      "loss": 0.19,
      "step": 7229
    },
    {
      "epoch": 0.21092245755294942,
      "grad_norm": 0.8083318365286651,
      "learning_rate": 9.166038342274458e-06,
      "loss": 0.1578,
      "step": 7230
    },
    {
      "epoch": 0.21095163078359297,
      "grad_norm": 0.8987026413554663,
      "learning_rate": 9.165777086393173e-06,
      "loss": 0.1791,
      "step": 7231
    },
    {
      "epoch": 0.21098080401423655,
      "grad_norm": 0.9635897845510202,
      "learning_rate": 9.165515793320824e-06,
      "loss": 0.1622,
      "step": 7232
    },
    {
      "epoch": 0.2110099772448801,
      "grad_norm": 0.9937506673765454,
      "learning_rate": 9.165254463059747e-06,
      "loss": 0.1745,
      "step": 7233
    },
    {
      "epoch": 0.21103915047552366,
      "grad_norm": 0.8573155118927874,
      "learning_rate": 9.164993095612271e-06,
      "loss": 0.17,
      "step": 7234
    },
    {
      "epoch": 0.2110683237061672,
      "grad_norm": 1.0406524258434822,
      "learning_rate": 9.164731690980732e-06,
      "loss": 0.1857,
      "step": 7235
    },
    {
      "epoch": 0.2110974969368108,
      "grad_norm": 0.8987827523359867,
      "learning_rate": 9.16447024916746e-06,
      "loss": 0.1587,
      "step": 7236
    },
    {
      "epoch": 0.21112667016745434,
      "grad_norm": 0.6673609456296926,
      "learning_rate": 9.164208770174795e-06,
      "loss": 0.1444,
      "step": 7237
    },
    {
      "epoch": 0.2111558433980979,
      "grad_norm": 0.859594804297381,
      "learning_rate": 9.163947254005066e-06,
      "loss": 0.1545,
      "step": 7238
    },
    {
      "epoch": 0.21118501662874148,
      "grad_norm": 0.7358956790046004,
      "learning_rate": 9.163685700660611e-06,
      "loss": 0.15,
      "step": 7239
    },
    {
      "epoch": 0.21121418985938503,
      "grad_norm": 0.7973373779002741,
      "learning_rate": 9.163424110143763e-06,
      "loss": 0.1729,
      "step": 7240
    },
    {
      "epoch": 0.21124336309002859,
      "grad_norm": 0.7203175182166265,
      "learning_rate": 9.16316248245686e-06,
      "loss": 0.1332,
      "step": 7241
    },
    {
      "epoch": 0.21127253632067214,
      "grad_norm": 0.8740975631117702,
      "learning_rate": 9.162900817602235e-06,
      "loss": 0.1525,
      "step": 7242
    },
    {
      "epoch": 0.21130170955131572,
      "grad_norm": 0.7848806097179016,
      "learning_rate": 9.162639115582226e-06,
      "loss": 0.1545,
      "step": 7243
    },
    {
      "epoch": 0.21133088278195927,
      "grad_norm": 0.8666497284228482,
      "learning_rate": 9.16237737639917e-06,
      "loss": 0.1618,
      "step": 7244
    },
    {
      "epoch": 0.21136005601260283,
      "grad_norm": 0.7492361144512795,
      "learning_rate": 9.162115600055398e-06,
      "loss": 0.1736,
      "step": 7245
    },
    {
      "epoch": 0.2113892292432464,
      "grad_norm": 0.7576215749565726,
      "learning_rate": 9.161853786553256e-06,
      "loss": 0.1597,
      "step": 7246
    },
    {
      "epoch": 0.21141840247388996,
      "grad_norm": 0.8438264160498878,
      "learning_rate": 9.161591935895073e-06,
      "loss": 0.1486,
      "step": 7247
    },
    {
      "epoch": 0.2114475757045335,
      "grad_norm": 0.7609494232054379,
      "learning_rate": 9.161330048083194e-06,
      "loss": 0.1666,
      "step": 7248
    },
    {
      "epoch": 0.2114767489351771,
      "grad_norm": 0.8909393589085635,
      "learning_rate": 9.161068123119953e-06,
      "loss": 0.1615,
      "step": 7249
    },
    {
      "epoch": 0.21150592216582065,
      "grad_norm": 0.8393390508080685,
      "learning_rate": 9.160806161007687e-06,
      "loss": 0.1446,
      "step": 7250
    },
    {
      "epoch": 0.2115350953964642,
      "grad_norm": 0.7310636219773928,
      "learning_rate": 9.16054416174874e-06,
      "loss": 0.1617,
      "step": 7251
    },
    {
      "epoch": 0.21156426862710775,
      "grad_norm": 0.8136839895612361,
      "learning_rate": 9.160282125345445e-06,
      "loss": 0.1734,
      "step": 7252
    },
    {
      "epoch": 0.21159344185775134,
      "grad_norm": 0.8147171789853841,
      "learning_rate": 9.160020051800148e-06,
      "loss": 0.1512,
      "step": 7253
    },
    {
      "epoch": 0.2116226150883949,
      "grad_norm": 0.9635672195950745,
      "learning_rate": 9.159757941115181e-06,
      "loss": 0.1528,
      "step": 7254
    },
    {
      "epoch": 0.21165178831903844,
      "grad_norm": 0.7038737130414471,
      "learning_rate": 9.15949579329289e-06,
      "loss": 0.1742,
      "step": 7255
    },
    {
      "epoch": 0.21168096154968202,
      "grad_norm": 0.8915309327020401,
      "learning_rate": 9.159233608335614e-06,
      "loss": 0.1661,
      "step": 7256
    },
    {
      "epoch": 0.21171013478032558,
      "grad_norm": 0.7964819668145793,
      "learning_rate": 9.158971386245691e-06,
      "loss": 0.1285,
      "step": 7257
    },
    {
      "epoch": 0.21173930801096913,
      "grad_norm": 0.8556872958705937,
      "learning_rate": 9.158709127025468e-06,
      "loss": 0.1775,
      "step": 7258
    },
    {
      "epoch": 0.21176848124161268,
      "grad_norm": 0.7919850483184171,
      "learning_rate": 9.15844683067728e-06,
      "loss": 0.1612,
      "step": 7259
    },
    {
      "epoch": 0.21179765447225626,
      "grad_norm": 0.8689093484287964,
      "learning_rate": 9.15818449720347e-06,
      "loss": 0.1723,
      "step": 7260
    },
    {
      "epoch": 0.21182682770289982,
      "grad_norm": 1.0878408856516633,
      "learning_rate": 9.157922126606385e-06,
      "loss": 0.1561,
      "step": 7261
    },
    {
      "epoch": 0.21185600093354337,
      "grad_norm": 0.7054973680891888,
      "learning_rate": 9.157659718888362e-06,
      "loss": 0.1507,
      "step": 7262
    },
    {
      "epoch": 0.21188517416418695,
      "grad_norm": 0.7655988458983077,
      "learning_rate": 9.157397274051745e-06,
      "loss": 0.1776,
      "step": 7263
    },
    {
      "epoch": 0.2119143473948305,
      "grad_norm": 0.8851712451319848,
      "learning_rate": 9.157134792098878e-06,
      "loss": 0.2037,
      "step": 7264
    },
    {
      "epoch": 0.21194352062547406,
      "grad_norm": 0.9438857408147793,
      "learning_rate": 9.156872273032104e-06,
      "loss": 0.1795,
      "step": 7265
    },
    {
      "epoch": 0.21197269385611764,
      "grad_norm": 0.9911627622933965,
      "learning_rate": 9.156609716853768e-06,
      "loss": 0.1696,
      "step": 7266
    },
    {
      "epoch": 0.2120018670867612,
      "grad_norm": 0.8586388532504238,
      "learning_rate": 9.156347123566211e-06,
      "loss": 0.1657,
      "step": 7267
    },
    {
      "epoch": 0.21203104031740475,
      "grad_norm": 0.9447965772581178,
      "learning_rate": 9.15608449317178e-06,
      "loss": 0.1542,
      "step": 7268
    },
    {
      "epoch": 0.2120602135480483,
      "grad_norm": 1.0570953122206987,
      "learning_rate": 9.15582182567282e-06,
      "loss": 0.179,
      "step": 7269
    },
    {
      "epoch": 0.21208938677869188,
      "grad_norm": 0.9138467528532017,
      "learning_rate": 9.155559121071673e-06,
      "loss": 0.1824,
      "step": 7270
    },
    {
      "epoch": 0.21211856000933543,
      "grad_norm": 0.7993348248640859,
      "learning_rate": 9.155296379370686e-06,
      "loss": 0.1544,
      "step": 7271
    },
    {
      "epoch": 0.212147733239979,
      "grad_norm": 0.9384783663292866,
      "learning_rate": 9.155033600572206e-06,
      "loss": 0.1529,
      "step": 7272
    },
    {
      "epoch": 0.21217690647062257,
      "grad_norm": 0.9040254271351434,
      "learning_rate": 9.154770784678577e-06,
      "loss": 0.1603,
      "step": 7273
    },
    {
      "epoch": 0.21220607970126612,
      "grad_norm": 0.8428159003755334,
      "learning_rate": 9.154507931692146e-06,
      "loss": 0.1622,
      "step": 7274
    },
    {
      "epoch": 0.21223525293190967,
      "grad_norm": 0.932883922513483,
      "learning_rate": 9.154245041615262e-06,
      "loss": 0.1672,
      "step": 7275
    },
    {
      "epoch": 0.21226442616255325,
      "grad_norm": 0.6227746897426399,
      "learning_rate": 9.153982114450268e-06,
      "loss": 0.1557,
      "step": 7276
    },
    {
      "epoch": 0.2122935993931968,
      "grad_norm": 0.9073164938194024,
      "learning_rate": 9.153719150199513e-06,
      "loss": 0.1764,
      "step": 7277
    },
    {
      "epoch": 0.21232277262384036,
      "grad_norm": 1.0442867199960852,
      "learning_rate": 9.153456148865347e-06,
      "loss": 0.1545,
      "step": 7278
    },
    {
      "epoch": 0.21235194585448391,
      "grad_norm": 0.6690553638554411,
      "learning_rate": 9.153193110450115e-06,
      "loss": 0.1575,
      "step": 7279
    },
    {
      "epoch": 0.2123811190851275,
      "grad_norm": 0.8853732465828301,
      "learning_rate": 9.152930034956166e-06,
      "loss": 0.1572,
      "step": 7280
    },
    {
      "epoch": 0.21241029231577105,
      "grad_norm": 0.8426211020092063,
      "learning_rate": 9.152666922385849e-06,
      "loss": 0.1563,
      "step": 7281
    },
    {
      "epoch": 0.2124394655464146,
      "grad_norm": 0.7788186702083829,
      "learning_rate": 9.152403772741514e-06,
      "loss": 0.1511,
      "step": 7282
    },
    {
      "epoch": 0.21246863877705818,
      "grad_norm": 0.822945340131927,
      "learning_rate": 9.152140586025509e-06,
      "loss": 0.1744,
      "step": 7283
    },
    {
      "epoch": 0.21249781200770174,
      "grad_norm": 0.9471169841068209,
      "learning_rate": 9.151877362240182e-06,
      "loss": 0.1634,
      "step": 7284
    },
    {
      "epoch": 0.2125269852383453,
      "grad_norm": 0.8465634649866716,
      "learning_rate": 9.151614101387886e-06,
      "loss": 0.1814,
      "step": 7285
    },
    {
      "epoch": 0.21255615846898884,
      "grad_norm": 1.2962259187476264,
      "learning_rate": 9.151350803470971e-06,
      "loss": 0.1372,
      "step": 7286
    },
    {
      "epoch": 0.21258533169963242,
      "grad_norm": 0.9542451308170714,
      "learning_rate": 9.151087468491788e-06,
      "loss": 0.1836,
      "step": 7287
    },
    {
      "epoch": 0.21261450493027598,
      "grad_norm": 1.0098810004640373,
      "learning_rate": 9.150824096452686e-06,
      "loss": 0.1786,
      "step": 7288
    },
    {
      "epoch": 0.21264367816091953,
      "grad_norm": 1.0573907358946364,
      "learning_rate": 9.150560687356018e-06,
      "loss": 0.1796,
      "step": 7289
    },
    {
      "epoch": 0.2126728513915631,
      "grad_norm": 1.3555653612269074,
      "learning_rate": 9.150297241204134e-06,
      "loss": 0.1736,
      "step": 7290
    },
    {
      "epoch": 0.21270202462220666,
      "grad_norm": 0.9332835706812773,
      "learning_rate": 9.150033757999389e-06,
      "loss": 0.1935,
      "step": 7291
    },
    {
      "epoch": 0.21273119785285022,
      "grad_norm": 0.9551991891457929,
      "learning_rate": 9.149770237744132e-06,
      "loss": 0.1537,
      "step": 7292
    },
    {
      "epoch": 0.2127603710834938,
      "grad_norm": 1.1512656443502813,
      "learning_rate": 9.149506680440715e-06,
      "loss": 0.1569,
      "step": 7293
    },
    {
      "epoch": 0.21278954431413735,
      "grad_norm": 0.893945134974164,
      "learning_rate": 9.149243086091495e-06,
      "loss": 0.1679,
      "step": 7294
    },
    {
      "epoch": 0.2128187175447809,
      "grad_norm": 0.9385221012246392,
      "learning_rate": 9.148979454698824e-06,
      "loss": 0.1753,
      "step": 7295
    },
    {
      "epoch": 0.21284789077542446,
      "grad_norm": 0.8300356824294304,
      "learning_rate": 9.148715786265054e-06,
      "loss": 0.1542,
      "step": 7296
    },
    {
      "epoch": 0.21287706400606804,
      "grad_norm": 0.857791008708655,
      "learning_rate": 9.148452080792538e-06,
      "loss": 0.1686,
      "step": 7297
    },
    {
      "epoch": 0.2129062372367116,
      "grad_norm": 0.7634129287782235,
      "learning_rate": 9.148188338283635e-06,
      "loss": 0.1893,
      "step": 7298
    },
    {
      "epoch": 0.21293541046735515,
      "grad_norm": 0.7691221227717636,
      "learning_rate": 9.147924558740694e-06,
      "loss": 0.1527,
      "step": 7299
    },
    {
      "epoch": 0.21296458369799873,
      "grad_norm": 0.8425605026794849,
      "learning_rate": 9.147660742166075e-06,
      "loss": 0.1605,
      "step": 7300
    },
    {
      "epoch": 0.21299375692864228,
      "grad_norm": 0.9963815151871954,
      "learning_rate": 9.14739688856213e-06,
      "loss": 0.1768,
      "step": 7301
    },
    {
      "epoch": 0.21302293015928583,
      "grad_norm": 0.8846975435104927,
      "learning_rate": 9.147132997931216e-06,
      "loss": 0.1563,
      "step": 7302
    },
    {
      "epoch": 0.2130521033899294,
      "grad_norm": 0.8220305453751654,
      "learning_rate": 9.146869070275688e-06,
      "loss": 0.169,
      "step": 7303
    },
    {
      "epoch": 0.21308127662057297,
      "grad_norm": 0.7534101736131481,
      "learning_rate": 9.146605105597904e-06,
      "loss": 0.1514,
      "step": 7304
    },
    {
      "epoch": 0.21311044985121652,
      "grad_norm": 1.0260436057186861,
      "learning_rate": 9.146341103900219e-06,
      "loss": 0.1675,
      "step": 7305
    },
    {
      "epoch": 0.21313962308186007,
      "grad_norm": 0.7428550466098834,
      "learning_rate": 9.14607706518499e-06,
      "loss": 0.1613,
      "step": 7306
    },
    {
      "epoch": 0.21316879631250366,
      "grad_norm": 0.8292004776562788,
      "learning_rate": 9.145812989454576e-06,
      "loss": 0.169,
      "step": 7307
    },
    {
      "epoch": 0.2131979695431472,
      "grad_norm": 0.8611035287697096,
      "learning_rate": 9.145548876711332e-06,
      "loss": 0.1321,
      "step": 7308
    },
    {
      "epoch": 0.21322714277379076,
      "grad_norm": 0.7735265669200234,
      "learning_rate": 9.145284726957618e-06,
      "loss": 0.1811,
      "step": 7309
    },
    {
      "epoch": 0.21325631600443434,
      "grad_norm": 0.757016431699202,
      "learning_rate": 9.14502054019579e-06,
      "loss": 0.1535,
      "step": 7310
    },
    {
      "epoch": 0.2132854892350779,
      "grad_norm": 0.7863331618232147,
      "learning_rate": 9.14475631642821e-06,
      "loss": 0.167,
      "step": 7311
    },
    {
      "epoch": 0.21331466246572145,
      "grad_norm": 0.8302171844504787,
      "learning_rate": 9.144492055657234e-06,
      "loss": 0.1575,
      "step": 7312
    },
    {
      "epoch": 0.213343835696365,
      "grad_norm": 0.9145860360312988,
      "learning_rate": 9.144227757885222e-06,
      "loss": 0.1636,
      "step": 7313
    },
    {
      "epoch": 0.21337300892700858,
      "grad_norm": 1.0497092239261154,
      "learning_rate": 9.143963423114534e-06,
      "loss": 0.1355,
      "step": 7314
    },
    {
      "epoch": 0.21340218215765214,
      "grad_norm": 0.924946311542116,
      "learning_rate": 9.143699051347533e-06,
      "loss": 0.1707,
      "step": 7315
    },
    {
      "epoch": 0.2134313553882957,
      "grad_norm": 0.8193654544468313,
      "learning_rate": 9.14343464258657e-06,
      "loss": 0.1656,
      "step": 7316
    },
    {
      "epoch": 0.21346052861893927,
      "grad_norm": 0.8506878470010961,
      "learning_rate": 9.143170196834016e-06,
      "loss": 0.1508,
      "step": 7317
    },
    {
      "epoch": 0.21348970184958282,
      "grad_norm": 0.7402661330754645,
      "learning_rate": 9.142905714092228e-06,
      "loss": 0.1316,
      "step": 7318
    },
    {
      "epoch": 0.21351887508022638,
      "grad_norm": 0.8419823467258659,
      "learning_rate": 9.142641194363565e-06,
      "loss": 0.17,
      "step": 7319
    },
    {
      "epoch": 0.21354804831086996,
      "grad_norm": 0.9193798138031342,
      "learning_rate": 9.142376637650389e-06,
      "loss": 0.1707,
      "step": 7320
    },
    {
      "epoch": 0.2135772215415135,
      "grad_norm": 0.7789488088692962,
      "learning_rate": 9.142112043955065e-06,
      "loss": 0.1492,
      "step": 7321
    },
    {
      "epoch": 0.21360639477215707,
      "grad_norm": 0.8795139713047154,
      "learning_rate": 9.141847413279955e-06,
      "loss": 0.1623,
      "step": 7322
    },
    {
      "epoch": 0.21363556800280062,
      "grad_norm": 0.848520747485592,
      "learning_rate": 9.141582745627418e-06,
      "loss": 0.1824,
      "step": 7323
    },
    {
      "epoch": 0.2136647412334442,
      "grad_norm": 0.8085062744672866,
      "learning_rate": 9.141318040999818e-06,
      "loss": 0.2026,
      "step": 7324
    },
    {
      "epoch": 0.21369391446408775,
      "grad_norm": 1.1710800232932104,
      "learning_rate": 9.14105329939952e-06,
      "loss": 0.1367,
      "step": 7325
    },
    {
      "epoch": 0.2137230876947313,
      "grad_norm": 0.8742825967810263,
      "learning_rate": 9.140788520828887e-06,
      "loss": 0.1695,
      "step": 7326
    },
    {
      "epoch": 0.2137522609253749,
      "grad_norm": 0.9654315392518187,
      "learning_rate": 9.140523705290284e-06,
      "loss": 0.159,
      "step": 7327
    },
    {
      "epoch": 0.21378143415601844,
      "grad_norm": 0.756589814868092,
      "learning_rate": 9.140258852786073e-06,
      "loss": 0.1342,
      "step": 7328
    },
    {
      "epoch": 0.213810607386662,
      "grad_norm": 0.838268804707351,
      "learning_rate": 9.139993963318619e-06,
      "loss": 0.1719,
      "step": 7329
    },
    {
      "epoch": 0.21383978061730555,
      "grad_norm": 0.7408867817376356,
      "learning_rate": 9.139729036890286e-06,
      "loss": 0.1564,
      "step": 7330
    },
    {
      "epoch": 0.21386895384794913,
      "grad_norm": 0.7836876194083487,
      "learning_rate": 9.139464073503442e-06,
      "loss": 0.1464,
      "step": 7331
    },
    {
      "epoch": 0.21389812707859268,
      "grad_norm": 0.8026603789293204,
      "learning_rate": 9.13919907316045e-06,
      "loss": 0.168,
      "step": 7332
    },
    {
      "epoch": 0.21392730030923623,
      "grad_norm": 1.1129692994111364,
      "learning_rate": 9.138934035863676e-06,
      "loss": 0.1767,
      "step": 7333
    },
    {
      "epoch": 0.21395647353987982,
      "grad_norm": 0.9043989670577581,
      "learning_rate": 9.138668961615489e-06,
      "loss": 0.1605,
      "step": 7334
    },
    {
      "epoch": 0.21398564677052337,
      "grad_norm": 0.761803105172563,
      "learning_rate": 9.138403850418252e-06,
      "loss": 0.1579,
      "step": 7335
    },
    {
      "epoch": 0.21401482000116692,
      "grad_norm": 0.9006980494791728,
      "learning_rate": 9.138138702274334e-06,
      "loss": 0.1919,
      "step": 7336
    },
    {
      "epoch": 0.2140439932318105,
      "grad_norm": 1.0927969594980038,
      "learning_rate": 9.137873517186102e-06,
      "loss": 0.1497,
      "step": 7337
    },
    {
      "epoch": 0.21407316646245406,
      "grad_norm": 0.7630838897546133,
      "learning_rate": 9.137608295155922e-06,
      "loss": 0.1471,
      "step": 7338
    },
    {
      "epoch": 0.2141023396930976,
      "grad_norm": 0.7360159065532706,
      "learning_rate": 9.137343036186163e-06,
      "loss": 0.1565,
      "step": 7339
    },
    {
      "epoch": 0.21413151292374116,
      "grad_norm": 0.8045461629263935,
      "learning_rate": 9.137077740279193e-06,
      "loss": 0.1567,
      "step": 7340
    },
    {
      "epoch": 0.21416068615438474,
      "grad_norm": 0.8868403361881234,
      "learning_rate": 9.13681240743738e-06,
      "loss": 0.2215,
      "step": 7341
    },
    {
      "epoch": 0.2141898593850283,
      "grad_norm": 0.7991777175910662,
      "learning_rate": 9.136547037663095e-06,
      "loss": 0.1478,
      "step": 7342
    },
    {
      "epoch": 0.21421903261567185,
      "grad_norm": 0.8704840435846436,
      "learning_rate": 9.136281630958706e-06,
      "loss": 0.1701,
      "step": 7343
    },
    {
      "epoch": 0.21424820584631543,
      "grad_norm": 0.7646456628845033,
      "learning_rate": 9.13601618732658e-06,
      "loss": 0.161,
      "step": 7344
    },
    {
      "epoch": 0.21427737907695898,
      "grad_norm": 0.8046625131504545,
      "learning_rate": 9.135750706769089e-06,
      "loss": 0.1771,
      "step": 7345
    },
    {
      "epoch": 0.21430655230760254,
      "grad_norm": 0.8329365527287422,
      "learning_rate": 9.135485189288604e-06,
      "loss": 0.1893,
      "step": 7346
    },
    {
      "epoch": 0.21433572553824612,
      "grad_norm": 0.8606383080312506,
      "learning_rate": 9.135219634887493e-06,
      "loss": 0.1723,
      "step": 7347
    },
    {
      "epoch": 0.21436489876888967,
      "grad_norm": 0.8321246251854761,
      "learning_rate": 9.134954043568131e-06,
      "loss": 0.1775,
      "step": 7348
    },
    {
      "epoch": 0.21439407199953323,
      "grad_norm": 0.9277947646560082,
      "learning_rate": 9.134688415332885e-06,
      "loss": 0.156,
      "step": 7349
    },
    {
      "epoch": 0.21442324523017678,
      "grad_norm": 0.8517468199609406,
      "learning_rate": 9.134422750184127e-06,
      "loss": 0.1367,
      "step": 7350
    },
    {
      "epoch": 0.21445241846082036,
      "grad_norm": 1.2604531790201514,
      "learning_rate": 9.13415704812423e-06,
      "loss": 0.1661,
      "step": 7351
    },
    {
      "epoch": 0.2144815916914639,
      "grad_norm": 0.787396494273231,
      "learning_rate": 9.133891309155565e-06,
      "loss": 0.1515,
      "step": 7352
    },
    {
      "epoch": 0.21451076492210747,
      "grad_norm": 1.0771416045998476,
      "learning_rate": 9.133625533280505e-06,
      "loss": 0.1761,
      "step": 7353
    },
    {
      "epoch": 0.21453993815275105,
      "grad_norm": 0.8482116276404471,
      "learning_rate": 9.133359720501425e-06,
      "loss": 0.1564,
      "step": 7354
    },
    {
      "epoch": 0.2145691113833946,
      "grad_norm": 0.9357043607950191,
      "learning_rate": 9.133093870820695e-06,
      "loss": 0.1711,
      "step": 7355
    },
    {
      "epoch": 0.21459828461403815,
      "grad_norm": 0.8300983990280097,
      "learning_rate": 9.132827984240691e-06,
      "loss": 0.1593,
      "step": 7356
    },
    {
      "epoch": 0.2146274578446817,
      "grad_norm": 0.725622974870402,
      "learning_rate": 9.132562060763784e-06,
      "loss": 0.1616,
      "step": 7357
    },
    {
      "epoch": 0.2146566310753253,
      "grad_norm": 0.9195976435212265,
      "learning_rate": 9.13229610039235e-06,
      "loss": 0.184,
      "step": 7358
    },
    {
      "epoch": 0.21468580430596884,
      "grad_norm": 0.8605882375424254,
      "learning_rate": 9.132030103128762e-06,
      "loss": 0.2024,
      "step": 7359
    },
    {
      "epoch": 0.2147149775366124,
      "grad_norm": 0.907525777273798,
      "learning_rate": 9.131764068975397e-06,
      "loss": 0.1897,
      "step": 7360
    },
    {
      "epoch": 0.21474415076725598,
      "grad_norm": 0.9433606626723489,
      "learning_rate": 9.131497997934627e-06,
      "loss": 0.1466,
      "step": 7361
    },
    {
      "epoch": 0.21477332399789953,
      "grad_norm": 0.8210241521435729,
      "learning_rate": 9.13123189000883e-06,
      "loss": 0.1554,
      "step": 7362
    },
    {
      "epoch": 0.21480249722854308,
      "grad_norm": 0.8116618100076691,
      "learning_rate": 9.130965745200382e-06,
      "loss": 0.1662,
      "step": 7363
    },
    {
      "epoch": 0.21483167045918666,
      "grad_norm": 1.2190663889972422,
      "learning_rate": 9.130699563511656e-06,
      "loss": 0.1887,
      "step": 7364
    },
    {
      "epoch": 0.21486084368983022,
      "grad_norm": 0.8565025067762257,
      "learning_rate": 9.130433344945032e-06,
      "loss": 0.1604,
      "step": 7365
    },
    {
      "epoch": 0.21489001692047377,
      "grad_norm": 0.8834188656468229,
      "learning_rate": 9.130167089502884e-06,
      "loss": 0.1591,
      "step": 7366
    },
    {
      "epoch": 0.21491919015111732,
      "grad_norm": 1.143879753765153,
      "learning_rate": 9.12990079718759e-06,
      "loss": 0.1381,
      "step": 7367
    },
    {
      "epoch": 0.2149483633817609,
      "grad_norm": 0.8172458099029768,
      "learning_rate": 9.129634468001529e-06,
      "loss": 0.155,
      "step": 7368
    },
    {
      "epoch": 0.21497753661240446,
      "grad_norm": 0.8025807980795531,
      "learning_rate": 9.129368101947076e-06,
      "loss": 0.154,
      "step": 7369
    },
    {
      "epoch": 0.215006709843048,
      "grad_norm": 0.974295012210877,
      "learning_rate": 9.12910169902661e-06,
      "loss": 0.1945,
      "step": 7370
    },
    {
      "epoch": 0.2150358830736916,
      "grad_norm": 0.8587266874156988,
      "learning_rate": 9.128835259242511e-06,
      "loss": 0.1682,
      "step": 7371
    },
    {
      "epoch": 0.21506505630433514,
      "grad_norm": 0.8151983296692307,
      "learning_rate": 9.128568782597155e-06,
      "loss": 0.1767,
      "step": 7372
    },
    {
      "epoch": 0.2150942295349787,
      "grad_norm": 0.7577545340507429,
      "learning_rate": 9.128302269092925e-06,
      "loss": 0.1704,
      "step": 7373
    },
    {
      "epoch": 0.21512340276562225,
      "grad_norm": 0.8399926028136064,
      "learning_rate": 9.128035718732196e-06,
      "loss": 0.1669,
      "step": 7374
    },
    {
      "epoch": 0.21515257599626583,
      "grad_norm": 0.9750137504931491,
      "learning_rate": 9.12776913151735e-06,
      "loss": 0.1634,
      "step": 7375
    },
    {
      "epoch": 0.21518174922690939,
      "grad_norm": 0.8003817450825019,
      "learning_rate": 9.127502507450765e-06,
      "loss": 0.157,
      "step": 7376
    },
    {
      "epoch": 0.21521092245755294,
      "grad_norm": 0.8293215185825179,
      "learning_rate": 9.127235846534826e-06,
      "loss": 0.1671,
      "step": 7377
    },
    {
      "epoch": 0.21524009568819652,
      "grad_norm": 1.05160228048288,
      "learning_rate": 9.126969148771907e-06,
      "loss": 0.1662,
      "step": 7378
    },
    {
      "epoch": 0.21526926891884007,
      "grad_norm": 0.7691951627365682,
      "learning_rate": 9.126702414164395e-06,
      "loss": 0.1695,
      "step": 7379
    },
    {
      "epoch": 0.21529844214948363,
      "grad_norm": 0.8939838121579913,
      "learning_rate": 9.126435642714669e-06,
      "loss": 0.1517,
      "step": 7380
    },
    {
      "epoch": 0.2153276153801272,
      "grad_norm": 0.8513543627829114,
      "learning_rate": 9.12616883442511e-06,
      "loss": 0.1707,
      "step": 7381
    },
    {
      "epoch": 0.21535678861077076,
      "grad_norm": 0.9092042406280318,
      "learning_rate": 9.1259019892981e-06,
      "loss": 0.1994,
      "step": 7382
    },
    {
      "epoch": 0.21538596184141431,
      "grad_norm": 0.9000988792296255,
      "learning_rate": 9.125635107336024e-06,
      "loss": 0.1679,
      "step": 7383
    },
    {
      "epoch": 0.21541513507205787,
      "grad_norm": 0.908123743882174,
      "learning_rate": 9.125368188541262e-06,
      "loss": 0.1758,
      "step": 7384
    },
    {
      "epoch": 0.21544430830270145,
      "grad_norm": 0.8767753044865089,
      "learning_rate": 9.125101232916196e-06,
      "loss": 0.176,
      "step": 7385
    },
    {
      "epoch": 0.215473481533345,
      "grad_norm": 0.9149316587210717,
      "learning_rate": 9.124834240463212e-06,
      "loss": 0.1538,
      "step": 7386
    },
    {
      "epoch": 0.21550265476398855,
      "grad_norm": 0.8840923678120068,
      "learning_rate": 9.124567211184693e-06,
      "loss": 0.1675,
      "step": 7387
    },
    {
      "epoch": 0.21553182799463214,
      "grad_norm": 0.8313796305740393,
      "learning_rate": 9.124300145083022e-06,
      "loss": 0.2106,
      "step": 7388
    },
    {
      "epoch": 0.2155610012252757,
      "grad_norm": 1.081647456332479,
      "learning_rate": 9.124033042160583e-06,
      "loss": 0.1649,
      "step": 7389
    },
    {
      "epoch": 0.21559017445591924,
      "grad_norm": 0.7099284823286249,
      "learning_rate": 9.123765902419764e-06,
      "loss": 0.1427,
      "step": 7390
    },
    {
      "epoch": 0.21561934768656282,
      "grad_norm": 0.8659710311845469,
      "learning_rate": 9.123498725862946e-06,
      "loss": 0.1486,
      "step": 7391
    },
    {
      "epoch": 0.21564852091720638,
      "grad_norm": 0.8675062325411816,
      "learning_rate": 9.123231512492513e-06,
      "loss": 0.1584,
      "step": 7392
    },
    {
      "epoch": 0.21567769414784993,
      "grad_norm": 0.8166116693541626,
      "learning_rate": 9.122964262310858e-06,
      "loss": 0.1762,
      "step": 7393
    },
    {
      "epoch": 0.21570686737849348,
      "grad_norm": 0.8307253049564872,
      "learning_rate": 9.12269697532036e-06,
      "loss": 0.1694,
      "step": 7394
    },
    {
      "epoch": 0.21573604060913706,
      "grad_norm": 0.835149841674723,
      "learning_rate": 9.122429651523408e-06,
      "loss": 0.1825,
      "step": 7395
    },
    {
      "epoch": 0.21576521383978062,
      "grad_norm": 1.039023270171752,
      "learning_rate": 9.122162290922387e-06,
      "loss": 0.1541,
      "step": 7396
    },
    {
      "epoch": 0.21579438707042417,
      "grad_norm": 1.2286048092311068,
      "learning_rate": 9.121894893519688e-06,
      "loss": 0.1706,
      "step": 7397
    },
    {
      "epoch": 0.21582356030106775,
      "grad_norm": 0.8429100222118212,
      "learning_rate": 9.121627459317693e-06,
      "loss": 0.1796,
      "step": 7398
    },
    {
      "epoch": 0.2158527335317113,
      "grad_norm": 0.911287718055003,
      "learning_rate": 9.121359988318792e-06,
      "loss": 0.1771,
      "step": 7399
    },
    {
      "epoch": 0.21588190676235486,
      "grad_norm": 0.9175234071784473,
      "learning_rate": 9.121092480525374e-06,
      "loss": 0.1553,
      "step": 7400
    },
    {
      "epoch": 0.2159110799929984,
      "grad_norm": 0.9441039377984193,
      "learning_rate": 9.120824935939824e-06,
      "loss": 0.1682,
      "step": 7401
    },
    {
      "epoch": 0.215940253223642,
      "grad_norm": 0.7742096126268675,
      "learning_rate": 9.120557354564534e-06,
      "loss": 0.1871,
      "step": 7402
    },
    {
      "epoch": 0.21596942645428555,
      "grad_norm": 0.8067408667463365,
      "learning_rate": 9.120289736401892e-06,
      "loss": 0.1873,
      "step": 7403
    },
    {
      "epoch": 0.2159985996849291,
      "grad_norm": 0.7300165533170915,
      "learning_rate": 9.120022081454286e-06,
      "loss": 0.1776,
      "step": 7404
    },
    {
      "epoch": 0.21602777291557268,
      "grad_norm": 0.7454749963610728,
      "learning_rate": 9.119754389724107e-06,
      "loss": 0.1571,
      "step": 7405
    },
    {
      "epoch": 0.21605694614621623,
      "grad_norm": 0.7374077050525888,
      "learning_rate": 9.119486661213744e-06,
      "loss": 0.1824,
      "step": 7406
    },
    {
      "epoch": 0.2160861193768598,
      "grad_norm": 0.8220961724986665,
      "learning_rate": 9.119218895925588e-06,
      "loss": 0.1446,
      "step": 7407
    },
    {
      "epoch": 0.21611529260750337,
      "grad_norm": 0.7783355121923526,
      "learning_rate": 9.118951093862028e-06,
      "loss": 0.195,
      "step": 7408
    },
    {
      "epoch": 0.21614446583814692,
      "grad_norm": 0.7202058289041149,
      "learning_rate": 9.118683255025457e-06,
      "loss": 0.1704,
      "step": 7409
    },
    {
      "epoch": 0.21617363906879047,
      "grad_norm": 0.8610059331317936,
      "learning_rate": 9.118415379418265e-06,
      "loss": 0.1622,
      "step": 7410
    },
    {
      "epoch": 0.21620281229943403,
      "grad_norm": 0.7001027600457649,
      "learning_rate": 9.118147467042844e-06,
      "loss": 0.1421,
      "step": 7411
    },
    {
      "epoch": 0.2162319855300776,
      "grad_norm": 0.8022121923333063,
      "learning_rate": 9.117879517901584e-06,
      "loss": 0.1784,
      "step": 7412
    },
    {
      "epoch": 0.21626115876072116,
      "grad_norm": 0.9036733519026036,
      "learning_rate": 9.11761153199688e-06,
      "loss": 0.1584,
      "step": 7413
    },
    {
      "epoch": 0.21629033199136471,
      "grad_norm": 0.7732901311394017,
      "learning_rate": 9.117343509331122e-06,
      "loss": 0.1569,
      "step": 7414
    },
    {
      "epoch": 0.2163195052220083,
      "grad_norm": 0.8897167987673777,
      "learning_rate": 9.117075449906704e-06,
      "loss": 0.1618,
      "step": 7415
    },
    {
      "epoch": 0.21634867845265185,
      "grad_norm": 0.9203266233694063,
      "learning_rate": 9.11680735372602e-06,
      "loss": 0.1841,
      "step": 7416
    },
    {
      "epoch": 0.2163778516832954,
      "grad_norm": 0.9771782649620638,
      "learning_rate": 9.116539220791464e-06,
      "loss": 0.167,
      "step": 7417
    },
    {
      "epoch": 0.21640702491393898,
      "grad_norm": 0.7923600665984015,
      "learning_rate": 9.116271051105428e-06,
      "loss": 0.1596,
      "step": 7418
    },
    {
      "epoch": 0.21643619814458254,
      "grad_norm": 0.9232057168625178,
      "learning_rate": 9.116002844670304e-06,
      "loss": 0.1747,
      "step": 7419
    },
    {
      "epoch": 0.2164653713752261,
      "grad_norm": 0.7897568343124842,
      "learning_rate": 9.115734601488492e-06,
      "loss": 0.1449,
      "step": 7420
    },
    {
      "epoch": 0.21649454460586964,
      "grad_norm": 0.9717566598530014,
      "learning_rate": 9.115466321562384e-06,
      "loss": 0.1414,
      "step": 7421
    },
    {
      "epoch": 0.21652371783651322,
      "grad_norm": 0.7384852427970718,
      "learning_rate": 9.115198004894371e-06,
      "loss": 0.1689,
      "step": 7422
    },
    {
      "epoch": 0.21655289106715678,
      "grad_norm": 0.9349622934838513,
      "learning_rate": 9.114929651486857e-06,
      "loss": 0.1844,
      "step": 7423
    },
    {
      "epoch": 0.21658206429780033,
      "grad_norm": 1.1331587925570314,
      "learning_rate": 9.114661261342232e-06,
      "loss": 0.153,
      "step": 7424
    },
    {
      "epoch": 0.2166112375284439,
      "grad_norm": 0.8569517521357359,
      "learning_rate": 9.114392834462895e-06,
      "loss": 0.1398,
      "step": 7425
    },
    {
      "epoch": 0.21664041075908747,
      "grad_norm": 1.0594142407397262,
      "learning_rate": 9.114124370851238e-06,
      "loss": 0.1609,
      "step": 7426
    },
    {
      "epoch": 0.21666958398973102,
      "grad_norm": 0.9872499379799294,
      "learning_rate": 9.113855870509664e-06,
      "loss": 0.1944,
      "step": 7427
    },
    {
      "epoch": 0.21669875722037457,
      "grad_norm": 0.9348282425197145,
      "learning_rate": 9.113587333440566e-06,
      "loss": 0.16,
      "step": 7428
    },
    {
      "epoch": 0.21672793045101815,
      "grad_norm": 0.8385280054899792,
      "learning_rate": 9.11331875964634e-06,
      "loss": 0.1529,
      "step": 7429
    },
    {
      "epoch": 0.2167571036816617,
      "grad_norm": 0.8559787857518748,
      "learning_rate": 9.113050149129387e-06,
      "loss": 0.1605,
      "step": 7430
    },
    {
      "epoch": 0.21678627691230526,
      "grad_norm": 0.8517007332578688,
      "learning_rate": 9.112781501892105e-06,
      "loss": 0.161,
      "step": 7431
    },
    {
      "epoch": 0.21681545014294884,
      "grad_norm": 1.1456108258528437,
      "learning_rate": 9.112512817936892e-06,
      "loss": 0.1422,
      "step": 7432
    },
    {
      "epoch": 0.2168446233735924,
      "grad_norm": 0.9161175606348492,
      "learning_rate": 9.112244097266144e-06,
      "loss": 0.1636,
      "step": 7433
    },
    {
      "epoch": 0.21687379660423595,
      "grad_norm": 1.0118412085103796,
      "learning_rate": 9.111975339882265e-06,
      "loss": 0.1416,
      "step": 7434
    },
    {
      "epoch": 0.21690296983487953,
      "grad_norm": 0.7363456828612303,
      "learning_rate": 9.11170654578765e-06,
      "loss": 0.156,
      "step": 7435
    },
    {
      "epoch": 0.21693214306552308,
      "grad_norm": 0.7820229076664653,
      "learning_rate": 9.1114377149847e-06,
      "loss": 0.1583,
      "step": 7436
    },
    {
      "epoch": 0.21696131629616663,
      "grad_norm": 1.0942678168691158,
      "learning_rate": 9.11116884747582e-06,
      "loss": 0.1769,
      "step": 7437
    },
    {
      "epoch": 0.2169904895268102,
      "grad_norm": 0.9447698383398426,
      "learning_rate": 9.1108999432634e-06,
      "loss": 0.1871,
      "step": 7438
    },
    {
      "epoch": 0.21701966275745377,
      "grad_norm": 0.8157624572363903,
      "learning_rate": 9.11063100234985e-06,
      "loss": 0.1936,
      "step": 7439
    },
    {
      "epoch": 0.21704883598809732,
      "grad_norm": 0.9421469363701053,
      "learning_rate": 9.110362024737566e-06,
      "loss": 0.1633,
      "step": 7440
    },
    {
      "epoch": 0.21707800921874087,
      "grad_norm": 0.9607965558693335,
      "learning_rate": 9.110093010428953e-06,
      "loss": 0.1627,
      "step": 7441
    },
    {
      "epoch": 0.21710718244938446,
      "grad_norm": 0.8354674897137242,
      "learning_rate": 9.10982395942641e-06,
      "loss": 0.1966,
      "step": 7442
    },
    {
      "epoch": 0.217136355680028,
      "grad_norm": 0.906809921024923,
      "learning_rate": 9.10955487173234e-06,
      "loss": 0.1484,
      "step": 7443
    },
    {
      "epoch": 0.21716552891067156,
      "grad_norm": 0.980781509983675,
      "learning_rate": 9.109285747349145e-06,
      "loss": 0.1425,
      "step": 7444
    },
    {
      "epoch": 0.21719470214131512,
      "grad_norm": 0.8914872052533028,
      "learning_rate": 9.109016586279227e-06,
      "loss": 0.1559,
      "step": 7445
    },
    {
      "epoch": 0.2172238753719587,
      "grad_norm": 0.7714414266749681,
      "learning_rate": 9.10874738852499e-06,
      "loss": 0.1585,
      "step": 7446
    },
    {
      "epoch": 0.21725304860260225,
      "grad_norm": 1.0786465918729728,
      "learning_rate": 9.108478154088838e-06,
      "loss": 0.1649,
      "step": 7447
    },
    {
      "epoch": 0.2172822218332458,
      "grad_norm": 0.8838986337349987,
      "learning_rate": 9.108208882973172e-06,
      "loss": 0.1651,
      "step": 7448
    },
    {
      "epoch": 0.21731139506388938,
      "grad_norm": 0.7998682490861097,
      "learning_rate": 9.1079395751804e-06,
      "loss": 0.1799,
      "step": 7449
    },
    {
      "epoch": 0.21734056829453294,
      "grad_norm": 1.121077534441,
      "learning_rate": 9.107670230712924e-06,
      "loss": 0.1488,
      "step": 7450
    },
    {
      "epoch": 0.2173697415251765,
      "grad_norm": 0.8781274845047359,
      "learning_rate": 9.107400849573148e-06,
      "loss": 0.1773,
      "step": 7451
    },
    {
      "epoch": 0.21739891475582007,
      "grad_norm": 0.9108741953398324,
      "learning_rate": 9.107131431763479e-06,
      "loss": 0.1399,
      "step": 7452
    },
    {
      "epoch": 0.21742808798646363,
      "grad_norm": 1.4747423636955517,
      "learning_rate": 9.106861977286319e-06,
      "loss": 0.1823,
      "step": 7453
    },
    {
      "epoch": 0.21745726121710718,
      "grad_norm": 0.9139258343487516,
      "learning_rate": 9.106592486144077e-06,
      "loss": 0.1478,
      "step": 7454
    },
    {
      "epoch": 0.21748643444775073,
      "grad_norm": 0.9833107826241818,
      "learning_rate": 9.106322958339156e-06,
      "loss": 0.175,
      "step": 7455
    },
    {
      "epoch": 0.2175156076783943,
      "grad_norm": 0.9696490842829171,
      "learning_rate": 9.106053393873965e-06,
      "loss": 0.1738,
      "step": 7456
    },
    {
      "epoch": 0.21754478090903787,
      "grad_norm": 1.2310078909985334,
      "learning_rate": 9.105783792750909e-06,
      "loss": 0.1756,
      "step": 7457
    },
    {
      "epoch": 0.21757395413968142,
      "grad_norm": 0.7618525861111929,
      "learning_rate": 9.105514154972397e-06,
      "loss": 0.1577,
      "step": 7458
    },
    {
      "epoch": 0.217603127370325,
      "grad_norm": 0.9952546015463827,
      "learning_rate": 9.105244480540833e-06,
      "loss": 0.1793,
      "step": 7459
    },
    {
      "epoch": 0.21763230060096855,
      "grad_norm": 0.9626963435260152,
      "learning_rate": 9.104974769458626e-06,
      "loss": 0.1626,
      "step": 7460
    },
    {
      "epoch": 0.2176614738316121,
      "grad_norm": 0.9606094758968395,
      "learning_rate": 9.104705021728185e-06,
      "loss": 0.1722,
      "step": 7461
    },
    {
      "epoch": 0.2176906470622557,
      "grad_norm": 0.9611724485693799,
      "learning_rate": 9.104435237351918e-06,
      "loss": 0.1693,
      "step": 7462
    },
    {
      "epoch": 0.21771982029289924,
      "grad_norm": 0.994146211050947,
      "learning_rate": 9.104165416332232e-06,
      "loss": 0.1714,
      "step": 7463
    },
    {
      "epoch": 0.2177489935235428,
      "grad_norm": 0.7237656428923633,
      "learning_rate": 9.103895558671538e-06,
      "loss": 0.1474,
      "step": 7464
    },
    {
      "epoch": 0.21777816675418635,
      "grad_norm": 0.853741497086034,
      "learning_rate": 9.103625664372244e-06,
      "loss": 0.1638,
      "step": 7465
    },
    {
      "epoch": 0.21780733998482993,
      "grad_norm": 0.9604956953178158,
      "learning_rate": 9.10335573343676e-06,
      "loss": 0.1592,
      "step": 7466
    },
    {
      "epoch": 0.21783651321547348,
      "grad_norm": 0.7211730382503233,
      "learning_rate": 9.103085765867494e-06,
      "loss": 0.1483,
      "step": 7467
    },
    {
      "epoch": 0.21786568644611704,
      "grad_norm": 0.9003289263920882,
      "learning_rate": 9.102815761666857e-06,
      "loss": 0.1737,
      "step": 7468
    },
    {
      "epoch": 0.21789485967676062,
      "grad_norm": 1.063103587280332,
      "learning_rate": 9.102545720837264e-06,
      "loss": 0.1767,
      "step": 7469
    },
    {
      "epoch": 0.21792403290740417,
      "grad_norm": 0.9651305024078388,
      "learning_rate": 9.102275643381118e-06,
      "loss": 0.1749,
      "step": 7470
    },
    {
      "epoch": 0.21795320613804772,
      "grad_norm": 0.7664733929527952,
      "learning_rate": 9.102005529300837e-06,
      "loss": 0.1778,
      "step": 7471
    },
    {
      "epoch": 0.21798237936869128,
      "grad_norm": 0.7476178142610409,
      "learning_rate": 9.10173537859883e-06,
      "loss": 0.1598,
      "step": 7472
    },
    {
      "epoch": 0.21801155259933486,
      "grad_norm": 0.788871726907985,
      "learning_rate": 9.101465191277507e-06,
      "loss": 0.1597,
      "step": 7473
    },
    {
      "epoch": 0.2180407258299784,
      "grad_norm": 0.8464570107689131,
      "learning_rate": 9.101194967339284e-06,
      "loss": 0.1542,
      "step": 7474
    },
    {
      "epoch": 0.21806989906062196,
      "grad_norm": 0.9205452933907281,
      "learning_rate": 9.100924706786568e-06,
      "loss": 0.1511,
      "step": 7475
    },
    {
      "epoch": 0.21809907229126554,
      "grad_norm": 0.7740632248814412,
      "learning_rate": 9.100654409621779e-06,
      "loss": 0.1639,
      "step": 7476
    },
    {
      "epoch": 0.2181282455219091,
      "grad_norm": 0.6725205946707393,
      "learning_rate": 9.100384075847324e-06,
      "loss": 0.1458,
      "step": 7477
    },
    {
      "epoch": 0.21815741875255265,
      "grad_norm": 0.8691746230253524,
      "learning_rate": 9.10011370546562e-06,
      "loss": 0.1436,
      "step": 7478
    },
    {
      "epoch": 0.21818659198319623,
      "grad_norm": 0.8966021726089877,
      "learning_rate": 9.099843298479079e-06,
      "loss": 0.2026,
      "step": 7479
    },
    {
      "epoch": 0.21821576521383979,
      "grad_norm": 1.525537760543073,
      "learning_rate": 9.099572854890115e-06,
      "loss": 0.1721,
      "step": 7480
    },
    {
      "epoch": 0.21824493844448334,
      "grad_norm": 1.0278026389305943,
      "learning_rate": 9.099302374701145e-06,
      "loss": 0.1698,
      "step": 7481
    },
    {
      "epoch": 0.2182741116751269,
      "grad_norm": 0.810566816600142,
      "learning_rate": 9.09903185791458e-06,
      "loss": 0.1575,
      "step": 7482
    },
    {
      "epoch": 0.21830328490577047,
      "grad_norm": 0.9454530453508962,
      "learning_rate": 9.098761304532839e-06,
      "loss": 0.1806,
      "step": 7483
    },
    {
      "epoch": 0.21833245813641403,
      "grad_norm": 0.6411644748981236,
      "learning_rate": 9.098490714558335e-06,
      "loss": 0.136,
      "step": 7484
    },
    {
      "epoch": 0.21836163136705758,
      "grad_norm": 0.9555150607957652,
      "learning_rate": 9.098220087993484e-06,
      "loss": 0.1502,
      "step": 7485
    },
    {
      "epoch": 0.21839080459770116,
      "grad_norm": 0.7725450469261882,
      "learning_rate": 9.0979494248407e-06,
      "loss": 0.1755,
      "step": 7486
    },
    {
      "epoch": 0.2184199778283447,
      "grad_norm": 0.7376261609439652,
      "learning_rate": 9.097678725102406e-06,
      "loss": 0.1487,
      "step": 7487
    },
    {
      "epoch": 0.21844915105898827,
      "grad_norm": 0.9009890967373166,
      "learning_rate": 9.097407988781012e-06,
      "loss": 0.158,
      "step": 7488
    },
    {
      "epoch": 0.21847832428963182,
      "grad_norm": 0.9162151433377764,
      "learning_rate": 9.097137215878938e-06,
      "loss": 0.1578,
      "step": 7489
    },
    {
      "epoch": 0.2185074975202754,
      "grad_norm": 0.8071439964557588,
      "learning_rate": 9.096866406398601e-06,
      "loss": 0.1824,
      "step": 7490
    },
    {
      "epoch": 0.21853667075091895,
      "grad_norm": 0.8452596310026971,
      "learning_rate": 9.096595560342418e-06,
      "loss": 0.175,
      "step": 7491
    },
    {
      "epoch": 0.2185658439815625,
      "grad_norm": 0.9880919817564738,
      "learning_rate": 9.09632467771281e-06,
      "loss": 0.1785,
      "step": 7492
    },
    {
      "epoch": 0.2185950172122061,
      "grad_norm": 0.8639885120516009,
      "learning_rate": 9.096053758512193e-06,
      "loss": 0.1407,
      "step": 7493
    },
    {
      "epoch": 0.21862419044284964,
      "grad_norm": 0.9298884048629106,
      "learning_rate": 9.095782802742983e-06,
      "loss": 0.1597,
      "step": 7494
    },
    {
      "epoch": 0.2186533636734932,
      "grad_norm": 0.8963426486967087,
      "learning_rate": 9.095511810407605e-06,
      "loss": 0.1693,
      "step": 7495
    },
    {
      "epoch": 0.21868253690413678,
      "grad_norm": 0.779138913716008,
      "learning_rate": 9.095240781508472e-06,
      "loss": 0.152,
      "step": 7496
    },
    {
      "epoch": 0.21871171013478033,
      "grad_norm": 0.98885334805735,
      "learning_rate": 9.09496971604801e-06,
      "loss": 0.1658,
      "step": 7497
    },
    {
      "epoch": 0.21874088336542388,
      "grad_norm": 1.0373383670255278,
      "learning_rate": 9.094698614028635e-06,
      "loss": 0.171,
      "step": 7498
    },
    {
      "epoch": 0.21877005659606744,
      "grad_norm": 0.9342057049525488,
      "learning_rate": 9.094427475452767e-06,
      "loss": 0.1718,
      "step": 7499
    },
    {
      "epoch": 0.21879922982671102,
      "grad_norm": 0.9690013882216534,
      "learning_rate": 9.09415630032283e-06,
      "loss": 0.1728,
      "step": 7500
    },
    {
      "epoch": 0.21882840305735457,
      "grad_norm": 0.8017953998969738,
      "learning_rate": 9.09388508864124e-06,
      "loss": 0.1509,
      "step": 7501
    },
    {
      "epoch": 0.21885757628799812,
      "grad_norm": 1.2085718131544307,
      "learning_rate": 9.093613840410423e-06,
      "loss": 0.1743,
      "step": 7502
    },
    {
      "epoch": 0.2188867495186417,
      "grad_norm": 0.9052288263657564,
      "learning_rate": 9.0933425556328e-06,
      "loss": 0.1228,
      "step": 7503
    },
    {
      "epoch": 0.21891592274928526,
      "grad_norm": 0.7426645505106202,
      "learning_rate": 9.09307123431079e-06,
      "loss": 0.1555,
      "step": 7504
    },
    {
      "epoch": 0.2189450959799288,
      "grad_norm": 0.8284735612852144,
      "learning_rate": 9.092799876446818e-06,
      "loss": 0.1653,
      "step": 7505
    },
    {
      "epoch": 0.2189742692105724,
      "grad_norm": 1.1483666041753051,
      "learning_rate": 9.092528482043306e-06,
      "loss": 0.1621,
      "step": 7506
    },
    {
      "epoch": 0.21900344244121595,
      "grad_norm": 0.8718060940875548,
      "learning_rate": 9.092257051102675e-06,
      "loss": 0.156,
      "step": 7507
    },
    {
      "epoch": 0.2190326156718595,
      "grad_norm": 0.9244857952426913,
      "learning_rate": 9.091985583627352e-06,
      "loss": 0.1579,
      "step": 7508
    },
    {
      "epoch": 0.21906178890250305,
      "grad_norm": 0.9375404157081058,
      "learning_rate": 9.091714079619758e-06,
      "loss": 0.159,
      "step": 7509
    },
    {
      "epoch": 0.21909096213314663,
      "grad_norm": 0.9444245221277218,
      "learning_rate": 9.091442539082317e-06,
      "loss": 0.1669,
      "step": 7510
    },
    {
      "epoch": 0.21912013536379019,
      "grad_norm": 0.8695707828850077,
      "learning_rate": 9.091170962017453e-06,
      "loss": 0.1575,
      "step": 7511
    },
    {
      "epoch": 0.21914930859443374,
      "grad_norm": 0.9718117313019489,
      "learning_rate": 9.090899348427593e-06,
      "loss": 0.1587,
      "step": 7512
    },
    {
      "epoch": 0.21917848182507732,
      "grad_norm": 0.9249537069936109,
      "learning_rate": 9.090627698315159e-06,
      "loss": 0.1736,
      "step": 7513
    },
    {
      "epoch": 0.21920765505572087,
      "grad_norm": 1.0613644382339964,
      "learning_rate": 9.090356011682578e-06,
      "loss": 0.1854,
      "step": 7514
    },
    {
      "epoch": 0.21923682828636443,
      "grad_norm": 0.7595003707986981,
      "learning_rate": 9.090084288532276e-06,
      "loss": 0.1618,
      "step": 7515
    },
    {
      "epoch": 0.21926600151700798,
      "grad_norm": 0.8861136072560637,
      "learning_rate": 9.089812528866674e-06,
      "loss": 0.1818,
      "step": 7516
    },
    {
      "epoch": 0.21929517474765156,
      "grad_norm": 1.1073248286947113,
      "learning_rate": 9.089540732688205e-06,
      "loss": 0.1688,
      "step": 7517
    },
    {
      "epoch": 0.21932434797829511,
      "grad_norm": 0.8579089096518323,
      "learning_rate": 9.089268899999293e-06,
      "loss": 0.1597,
      "step": 7518
    },
    {
      "epoch": 0.21935352120893867,
      "grad_norm": 0.8120764675150153,
      "learning_rate": 9.088997030802364e-06,
      "loss": 0.1766,
      "step": 7519
    },
    {
      "epoch": 0.21938269443958225,
      "grad_norm": 1.160647013834858,
      "learning_rate": 9.088725125099844e-06,
      "loss": 0.1622,
      "step": 7520
    },
    {
      "epoch": 0.2194118676702258,
      "grad_norm": 0.9285213051213117,
      "learning_rate": 9.088453182894165e-06,
      "loss": 0.1598,
      "step": 7521
    },
    {
      "epoch": 0.21944104090086936,
      "grad_norm": 0.8613692267922717,
      "learning_rate": 9.08818120418775e-06,
      "loss": 0.1688,
      "step": 7522
    },
    {
      "epoch": 0.21947021413151294,
      "grad_norm": 0.8943126674686942,
      "learning_rate": 9.08790918898303e-06,
      "loss": 0.1662,
      "step": 7523
    },
    {
      "epoch": 0.2194993873621565,
      "grad_norm": 0.7885484357016502,
      "learning_rate": 9.087637137282432e-06,
      "loss": 0.1621,
      "step": 7524
    },
    {
      "epoch": 0.21952856059280004,
      "grad_norm": 0.7457779815187887,
      "learning_rate": 9.087365049088386e-06,
      "loss": 0.1645,
      "step": 7525
    },
    {
      "epoch": 0.2195577338234436,
      "grad_norm": 0.9917753671200845,
      "learning_rate": 9.08709292440332e-06,
      "loss": 0.1432,
      "step": 7526
    },
    {
      "epoch": 0.21958690705408718,
      "grad_norm": 1.0063350040424661,
      "learning_rate": 9.086820763229665e-06,
      "loss": 0.1587,
      "step": 7527
    },
    {
      "epoch": 0.21961608028473073,
      "grad_norm": 1.1737765979425956,
      "learning_rate": 9.086548565569848e-06,
      "loss": 0.1676,
      "step": 7528
    },
    {
      "epoch": 0.21964525351537428,
      "grad_norm": 0.9962712694939887,
      "learning_rate": 9.086276331426302e-06,
      "loss": 0.1493,
      "step": 7529
    },
    {
      "epoch": 0.21967442674601786,
      "grad_norm": 1.0197100473107363,
      "learning_rate": 9.086004060801456e-06,
      "loss": 0.1781,
      "step": 7530
    },
    {
      "epoch": 0.21970359997666142,
      "grad_norm": 0.8887979084517902,
      "learning_rate": 9.085731753697741e-06,
      "loss": 0.1709,
      "step": 7531
    },
    {
      "epoch": 0.21973277320730497,
      "grad_norm": 1.0573885465579034,
      "learning_rate": 9.085459410117589e-06,
      "loss": 0.1477,
      "step": 7532
    },
    {
      "epoch": 0.21976194643794855,
      "grad_norm": 0.8718632363111908,
      "learning_rate": 9.085187030063432e-06,
      "loss": 0.1606,
      "step": 7533
    },
    {
      "epoch": 0.2197911196685921,
      "grad_norm": 0.7976871165360285,
      "learning_rate": 9.084914613537699e-06,
      "loss": 0.1665,
      "step": 7534
    },
    {
      "epoch": 0.21982029289923566,
      "grad_norm": 0.8654040256073582,
      "learning_rate": 9.084642160542823e-06,
      "loss": 0.1699,
      "step": 7535
    },
    {
      "epoch": 0.2198494661298792,
      "grad_norm": 0.9982965024961783,
      "learning_rate": 9.084369671081237e-06,
      "loss": 0.1834,
      "step": 7536
    },
    {
      "epoch": 0.2198786393605228,
      "grad_norm": 0.7991190223835759,
      "learning_rate": 9.084097145155372e-06,
      "loss": 0.168,
      "step": 7537
    },
    {
      "epoch": 0.21990781259116635,
      "grad_norm": 0.8384227842961963,
      "learning_rate": 9.083824582767667e-06,
      "loss": 0.1568,
      "step": 7538
    },
    {
      "epoch": 0.2199369858218099,
      "grad_norm": 0.8639148551820021,
      "learning_rate": 9.083551983920546e-06,
      "loss": 0.1895,
      "step": 7539
    },
    {
      "epoch": 0.21996615905245348,
      "grad_norm": 0.8776760204043199,
      "learning_rate": 9.083279348616451e-06,
      "loss": 0.16,
      "step": 7540
    },
    {
      "epoch": 0.21999533228309703,
      "grad_norm": 0.8397931986688365,
      "learning_rate": 9.083006676857813e-06,
      "loss": 0.1624,
      "step": 7541
    },
    {
      "epoch": 0.2200245055137406,
      "grad_norm": 0.9393977801830081,
      "learning_rate": 9.082733968647064e-06,
      "loss": 0.1431,
      "step": 7542
    },
    {
      "epoch": 0.22005367874438414,
      "grad_norm": 1.2585098990210004,
      "learning_rate": 9.082461223986643e-06,
      "loss": 0.1519,
      "step": 7543
    },
    {
      "epoch": 0.22008285197502772,
      "grad_norm": 0.8099130895397219,
      "learning_rate": 9.08218844287898e-06,
      "loss": 0.1834,
      "step": 7544
    },
    {
      "epoch": 0.22011202520567127,
      "grad_norm": 0.8935366257897372,
      "learning_rate": 9.081915625326516e-06,
      "loss": 0.1707,
      "step": 7545
    },
    {
      "epoch": 0.22014119843631483,
      "grad_norm": 0.9456191841995787,
      "learning_rate": 9.081642771331681e-06,
      "loss": 0.1765,
      "step": 7546
    },
    {
      "epoch": 0.2201703716669584,
      "grad_norm": 1.0163265319686992,
      "learning_rate": 9.081369880896916e-06,
      "loss": 0.1672,
      "step": 7547
    },
    {
      "epoch": 0.22019954489760196,
      "grad_norm": 1.0068121967005164,
      "learning_rate": 9.081096954024653e-06,
      "loss": 0.167,
      "step": 7548
    },
    {
      "epoch": 0.22022871812824552,
      "grad_norm": 0.8631234713501891,
      "learning_rate": 9.080823990717332e-06,
      "loss": 0.1774,
      "step": 7549
    },
    {
      "epoch": 0.2202578913588891,
      "grad_norm": 0.938971277102497,
      "learning_rate": 9.080550990977388e-06,
      "loss": 0.1621,
      "step": 7550
    },
    {
      "epoch": 0.22028706458953265,
      "grad_norm": 0.9393416019374851,
      "learning_rate": 9.08027795480726e-06,
      "loss": 0.1563,
      "step": 7551
    },
    {
      "epoch": 0.2203162378201762,
      "grad_norm": 0.9137967490648851,
      "learning_rate": 9.080004882209384e-06,
      "loss": 0.1699,
      "step": 7552
    },
    {
      "epoch": 0.22034541105081976,
      "grad_norm": 0.7496392812503637,
      "learning_rate": 9.079731773186196e-06,
      "loss": 0.1408,
      "step": 7553
    },
    {
      "epoch": 0.22037458428146334,
      "grad_norm": 0.9376169250918007,
      "learning_rate": 9.079458627740139e-06,
      "loss": 0.1679,
      "step": 7554
    },
    {
      "epoch": 0.2204037575121069,
      "grad_norm": 0.7795422893143354,
      "learning_rate": 9.079185445873649e-06,
      "loss": 0.1712,
      "step": 7555
    },
    {
      "epoch": 0.22043293074275044,
      "grad_norm": 0.8337107539120889,
      "learning_rate": 9.078912227589166e-06,
      "loss": 0.1687,
      "step": 7556
    },
    {
      "epoch": 0.22046210397339402,
      "grad_norm": 0.8084468214276382,
      "learning_rate": 9.078638972889126e-06,
      "loss": 0.1454,
      "step": 7557
    },
    {
      "epoch": 0.22049127720403758,
      "grad_norm": 0.7236410382389844,
      "learning_rate": 9.078365681775974e-06,
      "loss": 0.1607,
      "step": 7558
    },
    {
      "epoch": 0.22052045043468113,
      "grad_norm": 0.8600214654896388,
      "learning_rate": 9.078092354252143e-06,
      "loss": 0.1388,
      "step": 7559
    },
    {
      "epoch": 0.22054962366532468,
      "grad_norm": 1.0482883562274232,
      "learning_rate": 9.07781899032008e-06,
      "loss": 0.175,
      "step": 7560
    },
    {
      "epoch": 0.22057879689596827,
      "grad_norm": 0.7623423869872636,
      "learning_rate": 9.077545589982221e-06,
      "loss": 0.1719,
      "step": 7561
    },
    {
      "epoch": 0.22060797012661182,
      "grad_norm": 0.8428118785734672,
      "learning_rate": 9.077272153241008e-06,
      "loss": 0.1391,
      "step": 7562
    },
    {
      "epoch": 0.22063714335725537,
      "grad_norm": 0.8976367068085978,
      "learning_rate": 9.076998680098883e-06,
      "loss": 0.1574,
      "step": 7563
    },
    {
      "epoch": 0.22066631658789895,
      "grad_norm": 0.7699393351335566,
      "learning_rate": 9.076725170558289e-06,
      "loss": 0.1437,
      "step": 7564
    },
    {
      "epoch": 0.2206954898185425,
      "grad_norm": 0.8454398342400627,
      "learning_rate": 9.076451624621665e-06,
      "loss": 0.1804,
      "step": 7565
    },
    {
      "epoch": 0.22072466304918606,
      "grad_norm": 0.838918798589249,
      "learning_rate": 9.076178042291453e-06,
      "loss": 0.1415,
      "step": 7566
    },
    {
      "epoch": 0.22075383627982964,
      "grad_norm": 0.8760292330861484,
      "learning_rate": 9.075904423570096e-06,
      "loss": 0.1506,
      "step": 7567
    },
    {
      "epoch": 0.2207830095104732,
      "grad_norm": 0.8392243222768452,
      "learning_rate": 9.075630768460037e-06,
      "loss": 0.1394,
      "step": 7568
    },
    {
      "epoch": 0.22081218274111675,
      "grad_norm": 0.7150461773229624,
      "learning_rate": 9.075357076963723e-06,
      "loss": 0.1623,
      "step": 7569
    },
    {
      "epoch": 0.2208413559717603,
      "grad_norm": 1.021782821192606,
      "learning_rate": 9.07508334908359e-06,
      "loss": 0.1661,
      "step": 7570
    },
    {
      "epoch": 0.22087052920240388,
      "grad_norm": 0.7182875658508563,
      "learning_rate": 9.074809584822087e-06,
      "loss": 0.1565,
      "step": 7571
    },
    {
      "epoch": 0.22089970243304743,
      "grad_norm": 0.7924182059793997,
      "learning_rate": 9.074535784181658e-06,
      "loss": 0.1697,
      "step": 7572
    },
    {
      "epoch": 0.220928875663691,
      "grad_norm": 0.7716242050220606,
      "learning_rate": 9.074261947164744e-06,
      "loss": 0.1507,
      "step": 7573
    },
    {
      "epoch": 0.22095804889433457,
      "grad_norm": 0.8161076577769065,
      "learning_rate": 9.073988073773792e-06,
      "loss": 0.1559,
      "step": 7574
    },
    {
      "epoch": 0.22098722212497812,
      "grad_norm": 0.9744163088086125,
      "learning_rate": 9.07371416401125e-06,
      "loss": 0.1756,
      "step": 7575
    },
    {
      "epoch": 0.22101639535562168,
      "grad_norm": 0.9554848713519216,
      "learning_rate": 9.073440217879557e-06,
      "loss": 0.1575,
      "step": 7576
    },
    {
      "epoch": 0.22104556858626526,
      "grad_norm": 0.7954637106628107,
      "learning_rate": 9.073166235381163e-06,
      "loss": 0.141,
      "step": 7577
    },
    {
      "epoch": 0.2210747418169088,
      "grad_norm": 0.977347338980649,
      "learning_rate": 9.072892216518513e-06,
      "loss": 0.1734,
      "step": 7578
    },
    {
      "epoch": 0.22110391504755236,
      "grad_norm": 0.8007874726654638,
      "learning_rate": 9.072618161294056e-06,
      "loss": 0.1746,
      "step": 7579
    },
    {
      "epoch": 0.22113308827819592,
      "grad_norm": 0.8751881298397141,
      "learning_rate": 9.072344069710234e-06,
      "loss": 0.1724,
      "step": 7580
    },
    {
      "epoch": 0.2211622615088395,
      "grad_norm": 0.8967593436275193,
      "learning_rate": 9.072069941769497e-06,
      "loss": 0.1496,
      "step": 7581
    },
    {
      "epoch": 0.22119143473948305,
      "grad_norm": 0.8527490070747805,
      "learning_rate": 9.071795777474291e-06,
      "loss": 0.1673,
      "step": 7582
    },
    {
      "epoch": 0.2212206079701266,
      "grad_norm": 0.7119002806549058,
      "learning_rate": 9.071521576827066e-06,
      "loss": 0.1588,
      "step": 7583
    },
    {
      "epoch": 0.22124978120077018,
      "grad_norm": 0.7191548340724161,
      "learning_rate": 9.071247339830266e-06,
      "loss": 0.1735,
      "step": 7584
    },
    {
      "epoch": 0.22127895443141374,
      "grad_norm": 0.7932341849874444,
      "learning_rate": 9.070973066486343e-06,
      "loss": 0.1472,
      "step": 7585
    },
    {
      "epoch": 0.2213081276620573,
      "grad_norm": 0.9788378489903121,
      "learning_rate": 9.070698756797744e-06,
      "loss": 0.1782,
      "step": 7586
    },
    {
      "epoch": 0.22133730089270084,
      "grad_norm": 0.670224463952022,
      "learning_rate": 9.070424410766918e-06,
      "loss": 0.1493,
      "step": 7587
    },
    {
      "epoch": 0.22136647412334443,
      "grad_norm": 1.100490778659628,
      "learning_rate": 9.070150028396315e-06,
      "loss": 0.1666,
      "step": 7588
    },
    {
      "epoch": 0.22139564735398798,
      "grad_norm": 0.9397402986306163,
      "learning_rate": 9.069875609688384e-06,
      "loss": 0.1372,
      "step": 7589
    },
    {
      "epoch": 0.22142482058463153,
      "grad_norm": 0.8819686288122192,
      "learning_rate": 9.069601154645575e-06,
      "loss": 0.1703,
      "step": 7590
    },
    {
      "epoch": 0.2214539938152751,
      "grad_norm": 1.0019783126541315,
      "learning_rate": 9.06932666327034e-06,
      "loss": 0.1793,
      "step": 7591
    },
    {
      "epoch": 0.22148316704591867,
      "grad_norm": 0.8283424701988963,
      "learning_rate": 9.069052135565126e-06,
      "loss": 0.1506,
      "step": 7592
    },
    {
      "epoch": 0.22151234027656222,
      "grad_norm": 1.0498749053308172,
      "learning_rate": 9.068777571532385e-06,
      "loss": 0.1609,
      "step": 7593
    },
    {
      "epoch": 0.2215415135072058,
      "grad_norm": 0.845997786865884,
      "learning_rate": 9.06850297117457e-06,
      "loss": 0.1591,
      "step": 7594
    },
    {
      "epoch": 0.22157068673784935,
      "grad_norm": 0.9371159908291806,
      "learning_rate": 9.068228334494133e-06,
      "loss": 0.1713,
      "step": 7595
    },
    {
      "epoch": 0.2215998599684929,
      "grad_norm": 1.0625213095938626,
      "learning_rate": 9.067953661493524e-06,
      "loss": 0.1835,
      "step": 7596
    },
    {
      "epoch": 0.22162903319913646,
      "grad_norm": 0.9265621786408575,
      "learning_rate": 9.067678952175196e-06,
      "loss": 0.1658,
      "step": 7597
    },
    {
      "epoch": 0.22165820642978004,
      "grad_norm": 0.8570825544727586,
      "learning_rate": 9.067404206541601e-06,
      "loss": 0.1519,
      "step": 7598
    },
    {
      "epoch": 0.2216873796604236,
      "grad_norm": 0.8283111861076646,
      "learning_rate": 9.067129424595191e-06,
      "loss": 0.1553,
      "step": 7599
    },
    {
      "epoch": 0.22171655289106715,
      "grad_norm": 0.7113587454962451,
      "learning_rate": 9.066854606338422e-06,
      "loss": 0.162,
      "step": 7600
    },
    {
      "epoch": 0.22174572612171073,
      "grad_norm": 0.7252270028494985,
      "learning_rate": 9.066579751773745e-06,
      "loss": 0.1607,
      "step": 7601
    },
    {
      "epoch": 0.22177489935235428,
      "grad_norm": 0.734893412520523,
      "learning_rate": 9.066304860903616e-06,
      "loss": 0.1618,
      "step": 7602
    },
    {
      "epoch": 0.22180407258299784,
      "grad_norm": 0.887711150723681,
      "learning_rate": 9.066029933730486e-06,
      "loss": 0.1661,
      "step": 7603
    },
    {
      "epoch": 0.22183324581364142,
      "grad_norm": 0.9686223705673456,
      "learning_rate": 9.065754970256813e-06,
      "loss": 0.1698,
      "step": 7604
    },
    {
      "epoch": 0.22186241904428497,
      "grad_norm": 0.6550977232812051,
      "learning_rate": 9.06547997048505e-06,
      "loss": 0.1545,
      "step": 7605
    },
    {
      "epoch": 0.22189159227492852,
      "grad_norm": 0.8286044566302697,
      "learning_rate": 9.065204934417654e-06,
      "loss": 0.1617,
      "step": 7606
    },
    {
      "epoch": 0.22192076550557208,
      "grad_norm": 0.9547923364302132,
      "learning_rate": 9.064929862057075e-06,
      "loss": 0.17,
      "step": 7607
    },
    {
      "epoch": 0.22194993873621566,
      "grad_norm": 0.7569291356378777,
      "learning_rate": 9.064654753405775e-06,
      "loss": 0.1646,
      "step": 7608
    },
    {
      "epoch": 0.2219791119668592,
      "grad_norm": 1.008866085330164,
      "learning_rate": 9.064379608466207e-06,
      "loss": 0.1888,
      "step": 7609
    },
    {
      "epoch": 0.22200828519750276,
      "grad_norm": 0.8024048623313427,
      "learning_rate": 9.064104427240828e-06,
      "loss": 0.1932,
      "step": 7610
    },
    {
      "epoch": 0.22203745842814634,
      "grad_norm": 0.796585074083653,
      "learning_rate": 9.063829209732096e-06,
      "loss": 0.1537,
      "step": 7611
    },
    {
      "epoch": 0.2220666316587899,
      "grad_norm": 0.7401358079354325,
      "learning_rate": 9.063553955942465e-06,
      "loss": 0.1483,
      "step": 7612
    },
    {
      "epoch": 0.22209580488943345,
      "grad_norm": 0.7607351143856173,
      "learning_rate": 9.063278665874396e-06,
      "loss": 0.1658,
      "step": 7613
    },
    {
      "epoch": 0.222124978120077,
      "grad_norm": 0.7940261184520345,
      "learning_rate": 9.063003339530342e-06,
      "loss": 0.1508,
      "step": 7614
    },
    {
      "epoch": 0.22215415135072059,
      "grad_norm": 0.71268964953337,
      "learning_rate": 9.062727976912769e-06,
      "loss": 0.1631,
      "step": 7615
    },
    {
      "epoch": 0.22218332458136414,
      "grad_norm": 0.9682479161327682,
      "learning_rate": 9.062452578024128e-06,
      "loss": 0.1882,
      "step": 7616
    },
    {
      "epoch": 0.2222124978120077,
      "grad_norm": 0.7615084534557112,
      "learning_rate": 9.062177142866879e-06,
      "loss": 0.1651,
      "step": 7617
    },
    {
      "epoch": 0.22224167104265127,
      "grad_norm": 0.9499185985324785,
      "learning_rate": 9.061901671443483e-06,
      "loss": 0.1679,
      "step": 7618
    },
    {
      "epoch": 0.22227084427329483,
      "grad_norm": 0.7525132228183636,
      "learning_rate": 9.061626163756398e-06,
      "loss": 0.1599,
      "step": 7619
    },
    {
      "epoch": 0.22230001750393838,
      "grad_norm": 0.7786034564604447,
      "learning_rate": 9.061350619808086e-06,
      "loss": 0.1937,
      "step": 7620
    },
    {
      "epoch": 0.22232919073458196,
      "grad_norm": 0.9028832343859958,
      "learning_rate": 9.061075039601003e-06,
      "loss": 0.1712,
      "step": 7621
    },
    {
      "epoch": 0.22235836396522551,
      "grad_norm": 0.7954470304362796,
      "learning_rate": 9.060799423137615e-06,
      "loss": 0.1555,
      "step": 7622
    },
    {
      "epoch": 0.22238753719586907,
      "grad_norm": 0.9866484570970352,
      "learning_rate": 9.060523770420376e-06,
      "loss": 0.1547,
      "step": 7623
    },
    {
      "epoch": 0.22241671042651262,
      "grad_norm": 0.7820834776528715,
      "learning_rate": 9.060248081451752e-06,
      "loss": 0.1512,
      "step": 7624
    },
    {
      "epoch": 0.2224458836571562,
      "grad_norm": 0.8186999136404977,
      "learning_rate": 9.059972356234202e-06,
      "loss": 0.1533,
      "step": 7625
    },
    {
      "epoch": 0.22247505688779975,
      "grad_norm": 0.991945352238341,
      "learning_rate": 9.059696594770186e-06,
      "loss": 0.1582,
      "step": 7626
    },
    {
      "epoch": 0.2225042301184433,
      "grad_norm": 0.8035245190630039,
      "learning_rate": 9.059420797062169e-06,
      "loss": 0.154,
      "step": 7627
    },
    {
      "epoch": 0.2225334033490869,
      "grad_norm": 1.0123527087147737,
      "learning_rate": 9.059144963112612e-06,
      "loss": 0.1683,
      "step": 7628
    },
    {
      "epoch": 0.22256257657973044,
      "grad_norm": 0.7261878728564278,
      "learning_rate": 9.058869092923979e-06,
      "loss": 0.1624,
      "step": 7629
    },
    {
      "epoch": 0.222591749810374,
      "grad_norm": 0.8661584585519259,
      "learning_rate": 9.058593186498731e-06,
      "loss": 0.1308,
      "step": 7630
    },
    {
      "epoch": 0.22262092304101755,
      "grad_norm": 0.8825084494799464,
      "learning_rate": 9.058317243839333e-06,
      "loss": 0.1872,
      "step": 7631
    },
    {
      "epoch": 0.22265009627166113,
      "grad_norm": 0.782826852717805,
      "learning_rate": 9.058041264948244e-06,
      "loss": 0.1744,
      "step": 7632
    },
    {
      "epoch": 0.22267926950230468,
      "grad_norm": 0.7472818214781765,
      "learning_rate": 9.057765249827935e-06,
      "loss": 0.165,
      "step": 7633
    },
    {
      "epoch": 0.22270844273294824,
      "grad_norm": 0.8827334418567251,
      "learning_rate": 9.057489198480864e-06,
      "loss": 0.1589,
      "step": 7634
    },
    {
      "epoch": 0.22273761596359182,
      "grad_norm": 0.7843369468588055,
      "learning_rate": 9.057213110909499e-06,
      "loss": 0.1557,
      "step": 7635
    },
    {
      "epoch": 0.22276678919423537,
      "grad_norm": 0.8039448951636196,
      "learning_rate": 9.056936987116304e-06,
      "loss": 0.1531,
      "step": 7636
    },
    {
      "epoch": 0.22279596242487892,
      "grad_norm": 0.9018143498707185,
      "learning_rate": 9.056660827103744e-06,
      "loss": 0.1699,
      "step": 7637
    },
    {
      "epoch": 0.2228251356555225,
      "grad_norm": 0.8453876767618507,
      "learning_rate": 9.056384630874284e-06,
      "loss": 0.1643,
      "step": 7638
    },
    {
      "epoch": 0.22285430888616606,
      "grad_norm": 1.112609241081302,
      "learning_rate": 9.056108398430392e-06,
      "loss": 0.1428,
      "step": 7639
    },
    {
      "epoch": 0.2228834821168096,
      "grad_norm": 1.1699841161145523,
      "learning_rate": 9.055832129774531e-06,
      "loss": 0.1922,
      "step": 7640
    },
    {
      "epoch": 0.22291265534745316,
      "grad_norm": 0.7491678728972175,
      "learning_rate": 9.05555582490917e-06,
      "loss": 0.1632,
      "step": 7641
    },
    {
      "epoch": 0.22294182857809675,
      "grad_norm": 1.2193349843145231,
      "learning_rate": 9.055279483836773e-06,
      "loss": 0.1593,
      "step": 7642
    },
    {
      "epoch": 0.2229710018087403,
      "grad_norm": 0.9511142377842252,
      "learning_rate": 9.05500310655981e-06,
      "loss": 0.1545,
      "step": 7643
    },
    {
      "epoch": 0.22300017503938385,
      "grad_norm": 0.8744041500940533,
      "learning_rate": 9.054726693080748e-06,
      "loss": 0.1463,
      "step": 7644
    },
    {
      "epoch": 0.22302934827002743,
      "grad_norm": 1.0206027985283834,
      "learning_rate": 9.054450243402054e-06,
      "loss": 0.1832,
      "step": 7645
    },
    {
      "epoch": 0.223058521500671,
      "grad_norm": 0.9566518601858032,
      "learning_rate": 9.054173757526195e-06,
      "loss": 0.1564,
      "step": 7646
    },
    {
      "epoch": 0.22308769473131454,
      "grad_norm": 0.7813565535186373,
      "learning_rate": 9.05389723545564e-06,
      "loss": 0.2013,
      "step": 7647
    },
    {
      "epoch": 0.22311686796195812,
      "grad_norm": 0.7598699186360195,
      "learning_rate": 9.053620677192859e-06,
      "loss": 0.1319,
      "step": 7648
    },
    {
      "epoch": 0.22314604119260167,
      "grad_norm": 0.9025666236147157,
      "learning_rate": 9.05334408274032e-06,
      "loss": 0.173,
      "step": 7649
    },
    {
      "epoch": 0.22317521442324523,
      "grad_norm": 0.984611023588771,
      "learning_rate": 9.053067452100493e-06,
      "loss": 0.1639,
      "step": 7650
    },
    {
      "epoch": 0.22320438765388878,
      "grad_norm": 0.6207124157241103,
      "learning_rate": 9.052790785275848e-06,
      "loss": 0.1552,
      "step": 7651
    },
    {
      "epoch": 0.22323356088453236,
      "grad_norm": 0.7937699128727909,
      "learning_rate": 9.052514082268853e-06,
      "loss": 0.1614,
      "step": 7652
    },
    {
      "epoch": 0.22326273411517591,
      "grad_norm": 0.9311623697082175,
      "learning_rate": 9.052237343081982e-06,
      "loss": 0.1566,
      "step": 7653
    },
    {
      "epoch": 0.22329190734581947,
      "grad_norm": 0.779768384518718,
      "learning_rate": 9.051960567717702e-06,
      "loss": 0.1512,
      "step": 7654
    },
    {
      "epoch": 0.22332108057646305,
      "grad_norm": 0.783404962765794,
      "learning_rate": 9.051683756178484e-06,
      "loss": 0.1609,
      "step": 7655
    },
    {
      "epoch": 0.2233502538071066,
      "grad_norm": 1.0170932451725594,
      "learning_rate": 9.051406908466803e-06,
      "loss": 0.186,
      "step": 7656
    },
    {
      "epoch": 0.22337942703775016,
      "grad_norm": 0.9983335483662694,
      "learning_rate": 9.051130024585125e-06,
      "loss": 0.1659,
      "step": 7657
    },
    {
      "epoch": 0.2234086002683937,
      "grad_norm": 0.9680024818605943,
      "learning_rate": 9.050853104535927e-06,
      "loss": 0.1702,
      "step": 7658
    },
    {
      "epoch": 0.2234377734990373,
      "grad_norm": 0.9655365420641593,
      "learning_rate": 9.05057614832168e-06,
      "loss": 0.1733,
      "step": 7659
    },
    {
      "epoch": 0.22346694672968084,
      "grad_norm": 0.8928412249761869,
      "learning_rate": 9.050299155944857e-06,
      "loss": 0.1479,
      "step": 7660
    },
    {
      "epoch": 0.2234961199603244,
      "grad_norm": 0.7972993805394739,
      "learning_rate": 9.050022127407928e-06,
      "loss": 0.1679,
      "step": 7661
    },
    {
      "epoch": 0.22352529319096798,
      "grad_norm": 0.9849039289849973,
      "learning_rate": 9.049745062713368e-06,
      "loss": 0.1443,
      "step": 7662
    },
    {
      "epoch": 0.22355446642161153,
      "grad_norm": 0.9891067194514748,
      "learning_rate": 9.049467961863652e-06,
      "loss": 0.1489,
      "step": 7663
    },
    {
      "epoch": 0.22358363965225508,
      "grad_norm": 0.8719737764767784,
      "learning_rate": 9.049190824861254e-06,
      "loss": 0.1379,
      "step": 7664
    },
    {
      "epoch": 0.22361281288289866,
      "grad_norm": 1.1786497385711123,
      "learning_rate": 9.048913651708643e-06,
      "loss": 0.1486,
      "step": 7665
    },
    {
      "epoch": 0.22364198611354222,
      "grad_norm": 0.9145057571417332,
      "learning_rate": 9.048636442408302e-06,
      "loss": 0.1503,
      "step": 7666
    },
    {
      "epoch": 0.22367115934418577,
      "grad_norm": 0.8945697892923155,
      "learning_rate": 9.0483591969627e-06,
      "loss": 0.1748,
      "step": 7667
    },
    {
      "epoch": 0.22370033257482932,
      "grad_norm": 0.9387624666252707,
      "learning_rate": 9.048081915374312e-06,
      "loss": 0.168,
      "step": 7668
    },
    {
      "epoch": 0.2237295058054729,
      "grad_norm": 1.0523629310613862,
      "learning_rate": 9.047804597645615e-06,
      "loss": 0.1904,
      "step": 7669
    },
    {
      "epoch": 0.22375867903611646,
      "grad_norm": 0.9639725962769458,
      "learning_rate": 9.047527243779086e-06,
      "loss": 0.1773,
      "step": 7670
    },
    {
      "epoch": 0.22378785226676,
      "grad_norm": 0.7872886284236622,
      "learning_rate": 9.047249853777201e-06,
      "loss": 0.1709,
      "step": 7671
    },
    {
      "epoch": 0.2238170254974036,
      "grad_norm": 0.9870747744907474,
      "learning_rate": 9.046972427642434e-06,
      "loss": 0.1828,
      "step": 7672
    },
    {
      "epoch": 0.22384619872804715,
      "grad_norm": 0.8699741891418625,
      "learning_rate": 9.046694965377263e-06,
      "loss": 0.1641,
      "step": 7673
    },
    {
      "epoch": 0.2238753719586907,
      "grad_norm": 0.9056957844811336,
      "learning_rate": 9.046417466984165e-06,
      "loss": 0.1732,
      "step": 7674
    },
    {
      "epoch": 0.22390454518933425,
      "grad_norm": 0.943514829476528,
      "learning_rate": 9.046139932465618e-06,
      "loss": 0.1731,
      "step": 7675
    },
    {
      "epoch": 0.22393371841997783,
      "grad_norm": 0.8878351010150347,
      "learning_rate": 9.045862361824101e-06,
      "loss": 0.1441,
      "step": 7676
    },
    {
      "epoch": 0.2239628916506214,
      "grad_norm": 1.0017549404017092,
      "learning_rate": 9.04558475506209e-06,
      "loss": 0.1479,
      "step": 7677
    },
    {
      "epoch": 0.22399206488126494,
      "grad_norm": 0.7349079912287529,
      "learning_rate": 9.045307112182064e-06,
      "loss": 0.1555,
      "step": 7678
    },
    {
      "epoch": 0.22402123811190852,
      "grad_norm": 0.8843178662615621,
      "learning_rate": 9.045029433186502e-06,
      "loss": 0.1641,
      "step": 7679
    },
    {
      "epoch": 0.22405041134255207,
      "grad_norm": 1.0304141828281947,
      "learning_rate": 9.044751718077883e-06,
      "loss": 0.162,
      "step": 7680
    },
    {
      "epoch": 0.22407958457319563,
      "grad_norm": 0.7978965128000045,
      "learning_rate": 9.044473966858684e-06,
      "loss": 0.1535,
      "step": 7681
    },
    {
      "epoch": 0.2241087578038392,
      "grad_norm": 0.8422392322066434,
      "learning_rate": 9.044196179531389e-06,
      "loss": 0.1747,
      "step": 7682
    },
    {
      "epoch": 0.22413793103448276,
      "grad_norm": 0.9792872894157268,
      "learning_rate": 9.043918356098476e-06,
      "loss": 0.1619,
      "step": 7683
    },
    {
      "epoch": 0.22416710426512632,
      "grad_norm": 0.8599377481402988,
      "learning_rate": 9.043640496562425e-06,
      "loss": 0.1542,
      "step": 7684
    },
    {
      "epoch": 0.22419627749576987,
      "grad_norm": 0.884419926074693,
      "learning_rate": 9.043362600925717e-06,
      "loss": 0.153,
      "step": 7685
    },
    {
      "epoch": 0.22422545072641345,
      "grad_norm": 0.947556215129273,
      "learning_rate": 9.043084669190832e-06,
      "loss": 0.1569,
      "step": 7686
    },
    {
      "epoch": 0.224254623957057,
      "grad_norm": 0.7913255157996661,
      "learning_rate": 9.042806701360254e-06,
      "loss": 0.1658,
      "step": 7687
    },
    {
      "epoch": 0.22428379718770056,
      "grad_norm": 0.7619208615017694,
      "learning_rate": 9.042528697436461e-06,
      "loss": 0.1452,
      "step": 7688
    },
    {
      "epoch": 0.22431297041834414,
      "grad_norm": 0.8566179978048031,
      "learning_rate": 9.042250657421938e-06,
      "loss": 0.1651,
      "step": 7689
    },
    {
      "epoch": 0.2243421436489877,
      "grad_norm": 0.8134848864736269,
      "learning_rate": 9.041972581319165e-06,
      "loss": 0.1654,
      "step": 7690
    },
    {
      "epoch": 0.22437131687963124,
      "grad_norm": 0.7432956393861663,
      "learning_rate": 9.041694469130628e-06,
      "loss": 0.1654,
      "step": 7691
    },
    {
      "epoch": 0.22440049011027483,
      "grad_norm": 0.7811840870532569,
      "learning_rate": 9.041416320858804e-06,
      "loss": 0.143,
      "step": 7692
    },
    {
      "epoch": 0.22442966334091838,
      "grad_norm": 0.8061525347883657,
      "learning_rate": 9.041138136506183e-06,
      "loss": 0.1653,
      "step": 7693
    },
    {
      "epoch": 0.22445883657156193,
      "grad_norm": 0.688503899823051,
      "learning_rate": 9.040859916075244e-06,
      "loss": 0.1478,
      "step": 7694
    },
    {
      "epoch": 0.22448800980220548,
      "grad_norm": 0.9075490749612564,
      "learning_rate": 9.040581659568472e-06,
      "loss": 0.139,
      "step": 7695
    },
    {
      "epoch": 0.22451718303284907,
      "grad_norm": 0.6928728556934488,
      "learning_rate": 9.040303366988353e-06,
      "loss": 0.1364,
      "step": 7696
    },
    {
      "epoch": 0.22454635626349262,
      "grad_norm": 0.6761159505016567,
      "learning_rate": 9.04002503833737e-06,
      "loss": 0.1437,
      "step": 7697
    },
    {
      "epoch": 0.22457552949413617,
      "grad_norm": 0.8425281803151635,
      "learning_rate": 9.039746673618007e-06,
      "loss": 0.1497,
      "step": 7698
    },
    {
      "epoch": 0.22460470272477975,
      "grad_norm": 0.9503518187886093,
      "learning_rate": 9.039468272832749e-06,
      "loss": 0.1809,
      "step": 7699
    },
    {
      "epoch": 0.2246338759554233,
      "grad_norm": 0.9150172811661464,
      "learning_rate": 9.039189835984085e-06,
      "loss": 0.1531,
      "step": 7700
    },
    {
      "epoch": 0.22466304918606686,
      "grad_norm": 0.9210411178398046,
      "learning_rate": 9.038911363074495e-06,
      "loss": 0.1634,
      "step": 7701
    },
    {
      "epoch": 0.2246922224167104,
      "grad_norm": 0.783002610336514,
      "learning_rate": 9.038632854106473e-06,
      "loss": 0.1898,
      "step": 7702
    },
    {
      "epoch": 0.224721395647354,
      "grad_norm": 0.7402063151144914,
      "learning_rate": 9.038354309082498e-06,
      "loss": 0.1468,
      "step": 7703
    },
    {
      "epoch": 0.22475056887799755,
      "grad_norm": 0.7865639516467352,
      "learning_rate": 9.038075728005061e-06,
      "loss": 0.1436,
      "step": 7704
    },
    {
      "epoch": 0.2247797421086411,
      "grad_norm": 0.8337676546211781,
      "learning_rate": 9.037797110876645e-06,
      "loss": 0.1687,
      "step": 7705
    },
    {
      "epoch": 0.22480891533928468,
      "grad_norm": 0.7662961401587532,
      "learning_rate": 9.037518457699744e-06,
      "loss": 0.145,
      "step": 7706
    },
    {
      "epoch": 0.22483808856992823,
      "grad_norm": 1.0198900699113442,
      "learning_rate": 9.03723976847684e-06,
      "loss": 0.1467,
      "step": 7707
    },
    {
      "epoch": 0.2248672618005718,
      "grad_norm": 0.6969708413849717,
      "learning_rate": 9.036961043210424e-06,
      "loss": 0.1569,
      "step": 7708
    },
    {
      "epoch": 0.22489643503121537,
      "grad_norm": 0.7766844709857786,
      "learning_rate": 9.036682281902984e-06,
      "loss": 0.1604,
      "step": 7709
    },
    {
      "epoch": 0.22492560826185892,
      "grad_norm": 0.8059386561736319,
      "learning_rate": 9.036403484557005e-06,
      "loss": 0.1539,
      "step": 7710
    },
    {
      "epoch": 0.22495478149250248,
      "grad_norm": 0.865283687640427,
      "learning_rate": 9.036124651174983e-06,
      "loss": 0.1607,
      "step": 7711
    },
    {
      "epoch": 0.22498395472314603,
      "grad_norm": 0.7670100636936602,
      "learning_rate": 9.035845781759403e-06,
      "loss": 0.1407,
      "step": 7712
    },
    {
      "epoch": 0.2250131279537896,
      "grad_norm": 1.0289788523648595,
      "learning_rate": 9.035566876312754e-06,
      "loss": 0.1828,
      "step": 7713
    },
    {
      "epoch": 0.22504230118443316,
      "grad_norm": 0.8626691997769976,
      "learning_rate": 9.035287934837529e-06,
      "loss": 0.158,
      "step": 7714
    },
    {
      "epoch": 0.22507147441507672,
      "grad_norm": 0.9646817622704134,
      "learning_rate": 9.035008957336215e-06,
      "loss": 0.1708,
      "step": 7715
    },
    {
      "epoch": 0.2251006476457203,
      "grad_norm": 0.8368005802837244,
      "learning_rate": 9.034729943811304e-06,
      "loss": 0.1681,
      "step": 7716
    },
    {
      "epoch": 0.22512982087636385,
      "grad_norm": 0.8280718769062629,
      "learning_rate": 9.034450894265288e-06,
      "loss": 0.1519,
      "step": 7717
    },
    {
      "epoch": 0.2251589941070074,
      "grad_norm": 1.0617486656027086,
      "learning_rate": 9.034171808700657e-06,
      "loss": 0.1667,
      "step": 7718
    },
    {
      "epoch": 0.22518816733765099,
      "grad_norm": 0.912307137057485,
      "learning_rate": 9.033892687119905e-06,
      "loss": 0.1751,
      "step": 7719
    },
    {
      "epoch": 0.22521734056829454,
      "grad_norm": 0.9032676593843462,
      "learning_rate": 9.03361352952552e-06,
      "loss": 0.1835,
      "step": 7720
    },
    {
      "epoch": 0.2252465137989381,
      "grad_norm": 1.1243956755698408,
      "learning_rate": 9.033334335919997e-06,
      "loss": 0.1802,
      "step": 7721
    },
    {
      "epoch": 0.22527568702958164,
      "grad_norm": 0.8308440720513559,
      "learning_rate": 9.033055106305828e-06,
      "loss": 0.1652,
      "step": 7722
    },
    {
      "epoch": 0.22530486026022523,
      "grad_norm": 0.7229023191329865,
      "learning_rate": 9.032775840685505e-06,
      "loss": 0.1764,
      "step": 7723
    },
    {
      "epoch": 0.22533403349086878,
      "grad_norm": 0.8759747801209383,
      "learning_rate": 9.032496539061523e-06,
      "loss": 0.1576,
      "step": 7724
    },
    {
      "epoch": 0.22536320672151233,
      "grad_norm": 0.8923359849564969,
      "learning_rate": 9.032217201436374e-06,
      "loss": 0.1818,
      "step": 7725
    },
    {
      "epoch": 0.2253923799521559,
      "grad_norm": 0.7070430046591983,
      "learning_rate": 9.031937827812552e-06,
      "loss": 0.1587,
      "step": 7726
    },
    {
      "epoch": 0.22542155318279947,
      "grad_norm": 0.7723221493766538,
      "learning_rate": 9.031658418192553e-06,
      "loss": 0.1787,
      "step": 7727
    },
    {
      "epoch": 0.22545072641344302,
      "grad_norm": 0.9411068171141572,
      "learning_rate": 9.031378972578867e-06,
      "loss": 0.1611,
      "step": 7728
    },
    {
      "epoch": 0.22547989964408657,
      "grad_norm": 0.7716916797419265,
      "learning_rate": 9.031099490973996e-06,
      "loss": 0.1493,
      "step": 7729
    },
    {
      "epoch": 0.22550907287473015,
      "grad_norm": 0.7524008958908079,
      "learning_rate": 9.030819973380429e-06,
      "loss": 0.1593,
      "step": 7730
    },
    {
      "epoch": 0.2255382461053737,
      "grad_norm": 0.9948539072114604,
      "learning_rate": 9.030540419800664e-06,
      "loss": 0.1501,
      "step": 7731
    },
    {
      "epoch": 0.22556741933601726,
      "grad_norm": 0.7505041602274153,
      "learning_rate": 9.030260830237195e-06,
      "loss": 0.1596,
      "step": 7732
    },
    {
      "epoch": 0.22559659256666084,
      "grad_norm": 0.7834396248817043,
      "learning_rate": 9.029981204692521e-06,
      "loss": 0.15,
      "step": 7733
    },
    {
      "epoch": 0.2256257657973044,
      "grad_norm": 0.9118209380807698,
      "learning_rate": 9.029701543169136e-06,
      "loss": 0.155,
      "step": 7734
    },
    {
      "epoch": 0.22565493902794795,
      "grad_norm": 0.7907698722510623,
      "learning_rate": 9.029421845669537e-06,
      "loss": 0.157,
      "step": 7735
    },
    {
      "epoch": 0.22568411225859153,
      "grad_norm": 0.9267610415957518,
      "learning_rate": 9.029142112196224e-06,
      "loss": 0.1624,
      "step": 7736
    },
    {
      "epoch": 0.22571328548923508,
      "grad_norm": 0.7296141636833037,
      "learning_rate": 9.02886234275169e-06,
      "loss": 0.1484,
      "step": 7737
    },
    {
      "epoch": 0.22574245871987864,
      "grad_norm": 0.7530974192188826,
      "learning_rate": 9.028582537338434e-06,
      "loss": 0.1591,
      "step": 7738
    },
    {
      "epoch": 0.2257716319505222,
      "grad_norm": 0.854773736858789,
      "learning_rate": 9.028302695958956e-06,
      "loss": 0.1734,
      "step": 7739
    },
    {
      "epoch": 0.22580080518116577,
      "grad_norm": 0.8437420622238747,
      "learning_rate": 9.028022818615753e-06,
      "loss": 0.1708,
      "step": 7740
    },
    {
      "epoch": 0.22582997841180932,
      "grad_norm": 0.8625507996665871,
      "learning_rate": 9.027742905311324e-06,
      "loss": 0.1676,
      "step": 7741
    },
    {
      "epoch": 0.22585915164245288,
      "grad_norm": 0.8691371780488519,
      "learning_rate": 9.02746295604817e-06,
      "loss": 0.1985,
      "step": 7742
    },
    {
      "epoch": 0.22588832487309646,
      "grad_norm": 0.9278230265563486,
      "learning_rate": 9.027182970828786e-06,
      "loss": 0.1826,
      "step": 7743
    },
    {
      "epoch": 0.22591749810374,
      "grad_norm": 0.7856522119252362,
      "learning_rate": 9.026902949655673e-06,
      "loss": 0.1448,
      "step": 7744
    },
    {
      "epoch": 0.22594667133438356,
      "grad_norm": 0.9002523993168426,
      "learning_rate": 9.026622892531333e-06,
      "loss": 0.1626,
      "step": 7745
    },
    {
      "epoch": 0.22597584456502712,
      "grad_norm": 0.7387181122476415,
      "learning_rate": 9.026342799458265e-06,
      "loss": 0.1374,
      "step": 7746
    },
    {
      "epoch": 0.2260050177956707,
      "grad_norm": 1.1592372854476964,
      "learning_rate": 9.026062670438969e-06,
      "loss": 0.1786,
      "step": 7747
    },
    {
      "epoch": 0.22603419102631425,
      "grad_norm": 0.8886203635134633,
      "learning_rate": 9.025782505475947e-06,
      "loss": 0.1643,
      "step": 7748
    },
    {
      "epoch": 0.2260633642569578,
      "grad_norm": 0.8658568934398153,
      "learning_rate": 9.0255023045717e-06,
      "loss": 0.1575,
      "step": 7749
    },
    {
      "epoch": 0.22609253748760139,
      "grad_norm": 0.8613802901972075,
      "learning_rate": 9.025222067728729e-06,
      "loss": 0.1712,
      "step": 7750
    },
    {
      "epoch": 0.22612171071824494,
      "grad_norm": 0.8466326761128169,
      "learning_rate": 9.024941794949536e-06,
      "loss": 0.1639,
      "step": 7751
    },
    {
      "epoch": 0.2261508839488885,
      "grad_norm": 0.9454126268379517,
      "learning_rate": 9.024661486236624e-06,
      "loss": 0.1487,
      "step": 7752
    },
    {
      "epoch": 0.22618005717953207,
      "grad_norm": 1.190906457612595,
      "learning_rate": 9.024381141592495e-06,
      "loss": 0.1684,
      "step": 7753
    },
    {
      "epoch": 0.22620923041017563,
      "grad_norm": 0.8217592094029972,
      "learning_rate": 9.024100761019652e-06,
      "loss": 0.159,
      "step": 7754
    },
    {
      "epoch": 0.22623840364081918,
      "grad_norm": 0.736328682964108,
      "learning_rate": 9.023820344520597e-06,
      "loss": 0.1481,
      "step": 7755
    },
    {
      "epoch": 0.22626757687146273,
      "grad_norm": 1.043382123408031,
      "learning_rate": 9.023539892097837e-06,
      "loss": 0.161,
      "step": 7756
    },
    {
      "epoch": 0.22629675010210631,
      "grad_norm": 0.7643653720367733,
      "learning_rate": 9.02325940375387e-06,
      "loss": 0.1741,
      "step": 7757
    },
    {
      "epoch": 0.22632592333274987,
      "grad_norm": 0.8874120344038124,
      "learning_rate": 9.022978879491207e-06,
      "loss": 0.1941,
      "step": 7758
    },
    {
      "epoch": 0.22635509656339342,
      "grad_norm": 0.831561926407437,
      "learning_rate": 9.022698319312346e-06,
      "loss": 0.1883,
      "step": 7759
    },
    {
      "epoch": 0.226384269794037,
      "grad_norm": 0.9981630667292514,
      "learning_rate": 9.022417723219797e-06,
      "loss": 0.1562,
      "step": 7760
    },
    {
      "epoch": 0.22641344302468056,
      "grad_norm": 1.485055949321117,
      "learning_rate": 9.02213709121606e-06,
      "loss": 0.1651,
      "step": 7761
    },
    {
      "epoch": 0.2264426162553241,
      "grad_norm": 1.0950257317704974,
      "learning_rate": 9.021856423303645e-06,
      "loss": 0.1634,
      "step": 7762
    },
    {
      "epoch": 0.2264717894859677,
      "grad_norm": 1.3654749199431029,
      "learning_rate": 9.021575719485056e-06,
      "loss": 0.1706,
      "step": 7763
    },
    {
      "epoch": 0.22650096271661124,
      "grad_norm": 1.133425702358128,
      "learning_rate": 9.0212949797628e-06,
      "loss": 0.1572,
      "step": 7764
    },
    {
      "epoch": 0.2265301359472548,
      "grad_norm": 0.8283512805983546,
      "learning_rate": 9.02101420413938e-06,
      "loss": 0.1636,
      "step": 7765
    },
    {
      "epoch": 0.22655930917789835,
      "grad_norm": 1.1206211136306208,
      "learning_rate": 9.020733392617306e-06,
      "loss": 0.1703,
      "step": 7766
    },
    {
      "epoch": 0.22658848240854193,
      "grad_norm": 0.9820767132312471,
      "learning_rate": 9.020452545199084e-06,
      "loss": 0.1867,
      "step": 7767
    },
    {
      "epoch": 0.22661765563918548,
      "grad_norm": 0.7701110439822542,
      "learning_rate": 9.020171661887223e-06,
      "loss": 0.1444,
      "step": 7768
    },
    {
      "epoch": 0.22664682886982904,
      "grad_norm": 0.8374307375015951,
      "learning_rate": 9.019890742684227e-06,
      "loss": 0.1785,
      "step": 7769
    },
    {
      "epoch": 0.22667600210047262,
      "grad_norm": 1.0855926879485878,
      "learning_rate": 9.019609787592607e-06,
      "loss": 0.183,
      "step": 7770
    },
    {
      "epoch": 0.22670517533111617,
      "grad_norm": 0.881521948980321,
      "learning_rate": 9.01932879661487e-06,
      "loss": 0.1556,
      "step": 7771
    },
    {
      "epoch": 0.22673434856175972,
      "grad_norm": 0.8624881531492817,
      "learning_rate": 9.019047769753527e-06,
      "loss": 0.1902,
      "step": 7772
    },
    {
      "epoch": 0.22676352179240328,
      "grad_norm": 0.765844828474478,
      "learning_rate": 9.018766707011082e-06,
      "loss": 0.1273,
      "step": 7773
    },
    {
      "epoch": 0.22679269502304686,
      "grad_norm": 0.9658090031841545,
      "learning_rate": 9.018485608390048e-06,
      "loss": 0.1399,
      "step": 7774
    },
    {
      "epoch": 0.2268218682536904,
      "grad_norm": 0.7638279854341149,
      "learning_rate": 9.018204473892935e-06,
      "loss": 0.1321,
      "step": 7775
    },
    {
      "epoch": 0.22685104148433397,
      "grad_norm": 0.7406486751853761,
      "learning_rate": 9.017923303522251e-06,
      "loss": 0.1457,
      "step": 7776
    },
    {
      "epoch": 0.22688021471497755,
      "grad_norm": 0.7685794357804425,
      "learning_rate": 9.017642097280506e-06,
      "loss": 0.1568,
      "step": 7777
    },
    {
      "epoch": 0.2269093879456211,
      "grad_norm": 0.7109188143174116,
      "learning_rate": 9.017360855170212e-06,
      "loss": 0.1365,
      "step": 7778
    },
    {
      "epoch": 0.22693856117626465,
      "grad_norm": 0.8266958434797916,
      "learning_rate": 9.01707957719388e-06,
      "loss": 0.1551,
      "step": 7779
    },
    {
      "epoch": 0.22696773440690823,
      "grad_norm": 0.8098626300968494,
      "learning_rate": 9.01679826335402e-06,
      "loss": 0.1662,
      "step": 7780
    },
    {
      "epoch": 0.2269969076375518,
      "grad_norm": 0.987600792423462,
      "learning_rate": 9.016516913653144e-06,
      "loss": 0.1809,
      "step": 7781
    },
    {
      "epoch": 0.22702608086819534,
      "grad_norm": 0.7391288497416076,
      "learning_rate": 9.016235528093764e-06,
      "loss": 0.1485,
      "step": 7782
    },
    {
      "epoch": 0.2270552540988389,
      "grad_norm": 0.7452053286443837,
      "learning_rate": 9.015954106678391e-06,
      "loss": 0.1455,
      "step": 7783
    },
    {
      "epoch": 0.22708442732948247,
      "grad_norm": 0.8935608228872053,
      "learning_rate": 9.01567264940954e-06,
      "loss": 0.179,
      "step": 7784
    },
    {
      "epoch": 0.22711360056012603,
      "grad_norm": 0.6328135737272962,
      "learning_rate": 9.01539115628972e-06,
      "loss": 0.1481,
      "step": 7785
    },
    {
      "epoch": 0.22714277379076958,
      "grad_norm": 0.8840860145091277,
      "learning_rate": 9.01510962732145e-06,
      "loss": 0.1723,
      "step": 7786
    },
    {
      "epoch": 0.22717194702141316,
      "grad_norm": 0.867258923074141,
      "learning_rate": 9.014828062507237e-06,
      "loss": 0.1682,
      "step": 7787
    },
    {
      "epoch": 0.22720112025205672,
      "grad_norm": 0.7557198857562272,
      "learning_rate": 9.014546461849597e-06,
      "loss": 0.1663,
      "step": 7788
    },
    {
      "epoch": 0.22723029348270027,
      "grad_norm": 0.9233793516447816,
      "learning_rate": 9.014264825351046e-06,
      "loss": 0.1603,
      "step": 7789
    },
    {
      "epoch": 0.22725946671334385,
      "grad_norm": 0.8904760694334602,
      "learning_rate": 9.013983153014097e-06,
      "loss": 0.1673,
      "step": 7790
    },
    {
      "epoch": 0.2272886399439874,
      "grad_norm": 0.9100790280131235,
      "learning_rate": 9.013701444841262e-06,
      "loss": 0.1546,
      "step": 7791
    },
    {
      "epoch": 0.22731781317463096,
      "grad_norm": 0.825827433111236,
      "learning_rate": 9.013419700835062e-06,
      "loss": 0.1516,
      "step": 7792
    },
    {
      "epoch": 0.2273469864052745,
      "grad_norm": 0.714287197207715,
      "learning_rate": 9.013137920998007e-06,
      "loss": 0.1378,
      "step": 7793
    },
    {
      "epoch": 0.2273761596359181,
      "grad_norm": 1.1462027908735675,
      "learning_rate": 9.012856105332615e-06,
      "loss": 0.1691,
      "step": 7794
    },
    {
      "epoch": 0.22740533286656164,
      "grad_norm": 1.0480933585120638,
      "learning_rate": 9.012574253841401e-06,
      "loss": 0.1668,
      "step": 7795
    },
    {
      "epoch": 0.2274345060972052,
      "grad_norm": 0.8537650131964035,
      "learning_rate": 9.012292366526884e-06,
      "loss": 0.1717,
      "step": 7796
    },
    {
      "epoch": 0.22746367932784878,
      "grad_norm": 0.9896746025215346,
      "learning_rate": 9.012010443391578e-06,
      "loss": 0.1705,
      "step": 7797
    },
    {
      "epoch": 0.22749285255849233,
      "grad_norm": 0.9716721601903818,
      "learning_rate": 9.011728484438e-06,
      "loss": 0.1754,
      "step": 7798
    },
    {
      "epoch": 0.22752202578913588,
      "grad_norm": 0.840118546389893,
      "learning_rate": 9.011446489668667e-06,
      "loss": 0.1485,
      "step": 7799
    },
    {
      "epoch": 0.22755119901977944,
      "grad_norm": 0.7677962194515048,
      "learning_rate": 9.011164459086099e-06,
      "loss": 0.1449,
      "step": 7800
    },
    {
      "epoch": 0.22758037225042302,
      "grad_norm": 0.9554203066954745,
      "learning_rate": 9.010882392692812e-06,
      "loss": 0.1818,
      "step": 7801
    },
    {
      "epoch": 0.22760954548106657,
      "grad_norm": 1.0770749099152734,
      "learning_rate": 9.010600290491323e-06,
      "loss": 0.1599,
      "step": 7802
    },
    {
      "epoch": 0.22763871871171013,
      "grad_norm": 0.7743675527840735,
      "learning_rate": 9.010318152484152e-06,
      "loss": 0.1906,
      "step": 7803
    },
    {
      "epoch": 0.2276678919423537,
      "grad_norm": 1.1281114450625802,
      "learning_rate": 9.01003597867382e-06,
      "loss": 0.1739,
      "step": 7804
    },
    {
      "epoch": 0.22769706517299726,
      "grad_norm": 1.1674191060660375,
      "learning_rate": 9.00975376906284e-06,
      "loss": 0.1616,
      "step": 7805
    },
    {
      "epoch": 0.2277262384036408,
      "grad_norm": 0.9705020625119505,
      "learning_rate": 9.009471523653742e-06,
      "loss": 0.1633,
      "step": 7806
    },
    {
      "epoch": 0.2277554116342844,
      "grad_norm": 0.7453195533992221,
      "learning_rate": 9.009189242449034e-06,
      "loss": 0.1524,
      "step": 7807
    },
    {
      "epoch": 0.22778458486492795,
      "grad_norm": 0.8832157465935807,
      "learning_rate": 9.008906925451243e-06,
      "loss": 0.1605,
      "step": 7808
    },
    {
      "epoch": 0.2278137580955715,
      "grad_norm": 1.1004881089961562,
      "learning_rate": 9.008624572662888e-06,
      "loss": 0.181,
      "step": 7809
    },
    {
      "epoch": 0.22784293132621505,
      "grad_norm": 0.7527910908048386,
      "learning_rate": 9.00834218408649e-06,
      "loss": 0.1729,
      "step": 7810
    },
    {
      "epoch": 0.22787210455685863,
      "grad_norm": 0.7625762769082549,
      "learning_rate": 9.00805975972457e-06,
      "loss": 0.1585,
      "step": 7811
    },
    {
      "epoch": 0.2279012777875022,
      "grad_norm": 0.9127263062150434,
      "learning_rate": 9.007777299579649e-06,
      "loss": 0.1595,
      "step": 7812
    },
    {
      "epoch": 0.22793045101814574,
      "grad_norm": 0.8264397871512847,
      "learning_rate": 9.007494803654249e-06,
      "loss": 0.1672,
      "step": 7813
    },
    {
      "epoch": 0.22795962424878932,
      "grad_norm": 0.9024746727207784,
      "learning_rate": 9.007212271950892e-06,
      "loss": 0.1555,
      "step": 7814
    },
    {
      "epoch": 0.22798879747943288,
      "grad_norm": 0.7173904426501031,
      "learning_rate": 9.006929704472101e-06,
      "loss": 0.1411,
      "step": 7815
    },
    {
      "epoch": 0.22801797071007643,
      "grad_norm": 0.7899023405750852,
      "learning_rate": 9.006647101220398e-06,
      "loss": 0.1432,
      "step": 7816
    },
    {
      "epoch": 0.22804714394071998,
      "grad_norm": 1.022143416772875,
      "learning_rate": 9.006364462198306e-06,
      "loss": 0.1841,
      "step": 7817
    },
    {
      "epoch": 0.22807631717136356,
      "grad_norm": 0.9567311761695345,
      "learning_rate": 9.006081787408348e-06,
      "loss": 0.1545,
      "step": 7818
    },
    {
      "epoch": 0.22810549040200712,
      "grad_norm": 0.9918120469913401,
      "learning_rate": 9.005799076853048e-06,
      "loss": 0.1541,
      "step": 7819
    },
    {
      "epoch": 0.22813466363265067,
      "grad_norm": 0.9430094332135609,
      "learning_rate": 9.00551633053493e-06,
      "loss": 0.1706,
      "step": 7820
    },
    {
      "epoch": 0.22816383686329425,
      "grad_norm": 0.8282651104457783,
      "learning_rate": 9.005233548456518e-06,
      "loss": 0.1478,
      "step": 7821
    },
    {
      "epoch": 0.2281930100939378,
      "grad_norm": 1.0210830831906763,
      "learning_rate": 9.004950730620338e-06,
      "loss": 0.1705,
      "step": 7822
    },
    {
      "epoch": 0.22822218332458136,
      "grad_norm": 0.9474757314337756,
      "learning_rate": 9.004667877028915e-06,
      "loss": 0.1779,
      "step": 7823
    },
    {
      "epoch": 0.22825135655522494,
      "grad_norm": 0.8493551949810291,
      "learning_rate": 9.004384987684771e-06,
      "loss": 0.1598,
      "step": 7824
    },
    {
      "epoch": 0.2282805297858685,
      "grad_norm": 0.9577551249206481,
      "learning_rate": 9.004102062590437e-06,
      "loss": 0.1329,
      "step": 7825
    },
    {
      "epoch": 0.22830970301651204,
      "grad_norm": 0.9244488280811329,
      "learning_rate": 9.003819101748432e-06,
      "loss": 0.1712,
      "step": 7826
    },
    {
      "epoch": 0.2283388762471556,
      "grad_norm": 0.9143973487115579,
      "learning_rate": 9.003536105161288e-06,
      "loss": 0.1776,
      "step": 7827
    },
    {
      "epoch": 0.22836804947779918,
      "grad_norm": 0.9323695028120131,
      "learning_rate": 9.003253072831529e-06,
      "loss": 0.1707,
      "step": 7828
    },
    {
      "epoch": 0.22839722270844273,
      "grad_norm": 1.087380405820929,
      "learning_rate": 9.00297000476168e-06,
      "loss": 0.1712,
      "step": 7829
    },
    {
      "epoch": 0.22842639593908629,
      "grad_norm": 0.7641626718912142,
      "learning_rate": 9.002686900954275e-06,
      "loss": 0.1462,
      "step": 7830
    },
    {
      "epoch": 0.22845556916972987,
      "grad_norm": 0.8528325098822728,
      "learning_rate": 9.002403761411832e-06,
      "loss": 0.1595,
      "step": 7831
    },
    {
      "epoch": 0.22848474240037342,
      "grad_norm": 0.9642626444010138,
      "learning_rate": 9.002120586136887e-06,
      "loss": 0.1678,
      "step": 7832
    },
    {
      "epoch": 0.22851391563101697,
      "grad_norm": 0.9639017949911085,
      "learning_rate": 9.001837375131963e-06,
      "loss": 0.1442,
      "step": 7833
    },
    {
      "epoch": 0.22854308886166055,
      "grad_norm": 0.8683937244315321,
      "learning_rate": 9.00155412839959e-06,
      "loss": 0.1438,
      "step": 7834
    },
    {
      "epoch": 0.2285722620923041,
      "grad_norm": 0.7520997942617577,
      "learning_rate": 9.001270845942298e-06,
      "loss": 0.1417,
      "step": 7835
    },
    {
      "epoch": 0.22860143532294766,
      "grad_norm": 0.9781807377410655,
      "learning_rate": 9.000987527762614e-06,
      "loss": 0.1924,
      "step": 7836
    },
    {
      "epoch": 0.2286306085535912,
      "grad_norm": 0.839190644885023,
      "learning_rate": 9.000704173863071e-06,
      "loss": 0.1753,
      "step": 7837
    },
    {
      "epoch": 0.2286597817842348,
      "grad_norm": 0.6885528433710029,
      "learning_rate": 9.000420784246194e-06,
      "loss": 0.1635,
      "step": 7838
    },
    {
      "epoch": 0.22868895501487835,
      "grad_norm": 0.7794359405780938,
      "learning_rate": 9.000137358914516e-06,
      "loss": 0.1565,
      "step": 7839
    },
    {
      "epoch": 0.2287181282455219,
      "grad_norm": 0.7247611656491247,
      "learning_rate": 8.999853897870565e-06,
      "loss": 0.1522,
      "step": 7840
    },
    {
      "epoch": 0.22874730147616548,
      "grad_norm": 1.0571012770404538,
      "learning_rate": 8.999570401116874e-06,
      "loss": 0.1601,
      "step": 7841
    },
    {
      "epoch": 0.22877647470680904,
      "grad_norm": 0.9208776416346971,
      "learning_rate": 8.999286868655974e-06,
      "loss": 0.1723,
      "step": 7842
    },
    {
      "epoch": 0.2288056479374526,
      "grad_norm": 0.8248710677339088,
      "learning_rate": 8.999003300490396e-06,
      "loss": 0.15,
      "step": 7843
    },
    {
      "epoch": 0.22883482116809614,
      "grad_norm": 0.7639701251864959,
      "learning_rate": 8.99871969662267e-06,
      "loss": 0.1621,
      "step": 7844
    },
    {
      "epoch": 0.22886399439873972,
      "grad_norm": 1.0376494613981613,
      "learning_rate": 8.998436057055332e-06,
      "loss": 0.1783,
      "step": 7845
    },
    {
      "epoch": 0.22889316762938328,
      "grad_norm": 0.7757586736623177,
      "learning_rate": 8.998152381790907e-06,
      "loss": 0.1491,
      "step": 7846
    },
    {
      "epoch": 0.22892234086002683,
      "grad_norm": 0.8881310885196493,
      "learning_rate": 8.997868670831935e-06,
      "loss": 0.1548,
      "step": 7847
    },
    {
      "epoch": 0.2289515140906704,
      "grad_norm": 0.7430788008706929,
      "learning_rate": 8.997584924180945e-06,
      "loss": 0.1647,
      "step": 7848
    },
    {
      "epoch": 0.22898068732131396,
      "grad_norm": 0.8635293715316253,
      "learning_rate": 8.99730114184047e-06,
      "loss": 0.1621,
      "step": 7849
    },
    {
      "epoch": 0.22900986055195752,
      "grad_norm": 0.78795988793354,
      "learning_rate": 8.997017323813046e-06,
      "loss": 0.1434,
      "step": 7850
    },
    {
      "epoch": 0.2290390337826011,
      "grad_norm": 0.7522582486832059,
      "learning_rate": 8.996733470101204e-06,
      "loss": 0.1809,
      "step": 7851
    },
    {
      "epoch": 0.22906820701324465,
      "grad_norm": 0.731333929964769,
      "learning_rate": 8.99644958070748e-06,
      "loss": 0.1564,
      "step": 7852
    },
    {
      "epoch": 0.2290973802438882,
      "grad_norm": 0.8006664639301642,
      "learning_rate": 8.99616565563441e-06,
      "loss": 0.1587,
      "step": 7853
    },
    {
      "epoch": 0.22912655347453176,
      "grad_norm": 0.7840244405115172,
      "learning_rate": 8.995881694884526e-06,
      "loss": 0.1699,
      "step": 7854
    },
    {
      "epoch": 0.22915572670517534,
      "grad_norm": 0.7834716981915015,
      "learning_rate": 8.995597698460364e-06,
      "loss": 0.144,
      "step": 7855
    },
    {
      "epoch": 0.2291848999358189,
      "grad_norm": 0.7539945566652915,
      "learning_rate": 8.99531366636446e-06,
      "loss": 0.1868,
      "step": 7856
    },
    {
      "epoch": 0.22921407316646245,
      "grad_norm": 0.8070913680316629,
      "learning_rate": 8.99502959859935e-06,
      "loss": 0.1511,
      "step": 7857
    },
    {
      "epoch": 0.22924324639710603,
      "grad_norm": 0.7146344547838658,
      "learning_rate": 8.994745495167567e-06,
      "loss": 0.1549,
      "step": 7858
    },
    {
      "epoch": 0.22927241962774958,
      "grad_norm": 0.8230625572382745,
      "learning_rate": 8.994461356071651e-06,
      "loss": 0.1788,
      "step": 7859
    },
    {
      "epoch": 0.22930159285839313,
      "grad_norm": 0.9757606580104359,
      "learning_rate": 8.99417718131414e-06,
      "loss": 0.1405,
      "step": 7860
    },
    {
      "epoch": 0.2293307660890367,
      "grad_norm": 0.7860021861901404,
      "learning_rate": 8.993892970897564e-06,
      "loss": 0.1302,
      "step": 7861
    },
    {
      "epoch": 0.22935993931968027,
      "grad_norm": 0.8665656633073543,
      "learning_rate": 8.993608724824467e-06,
      "loss": 0.1536,
      "step": 7862
    },
    {
      "epoch": 0.22938911255032382,
      "grad_norm": 0.7053428463513204,
      "learning_rate": 8.993324443097387e-06,
      "loss": 0.1569,
      "step": 7863
    },
    {
      "epoch": 0.22941828578096737,
      "grad_norm": 0.8591552881685272,
      "learning_rate": 8.993040125718857e-06,
      "loss": 0.1651,
      "step": 7864
    },
    {
      "epoch": 0.22944745901161095,
      "grad_norm": 0.8655036985855517,
      "learning_rate": 8.992755772691418e-06,
      "loss": 0.1622,
      "step": 7865
    },
    {
      "epoch": 0.2294766322422545,
      "grad_norm": 0.8914369730508105,
      "learning_rate": 8.99247138401761e-06,
      "loss": 0.1664,
      "step": 7866
    },
    {
      "epoch": 0.22950580547289806,
      "grad_norm": 0.941520779873375,
      "learning_rate": 8.99218695969997e-06,
      "loss": 0.1658,
      "step": 7867
    },
    {
      "epoch": 0.22953497870354164,
      "grad_norm": 0.9019841936882262,
      "learning_rate": 8.991902499741036e-06,
      "loss": 0.1598,
      "step": 7868
    },
    {
      "epoch": 0.2295641519341852,
      "grad_norm": 1.0984679069219512,
      "learning_rate": 8.991618004143353e-06,
      "loss": 0.1542,
      "step": 7869
    },
    {
      "epoch": 0.22959332516482875,
      "grad_norm": 30.538307535441056,
      "learning_rate": 8.991333472909455e-06,
      "loss": 0.2469,
      "step": 7870
    },
    {
      "epoch": 0.2296224983954723,
      "grad_norm": 0.9871575245444739,
      "learning_rate": 8.991048906041884e-06,
      "loss": 0.1521,
      "step": 7871
    },
    {
      "epoch": 0.22965167162611588,
      "grad_norm": 7.0030216908220515,
      "learning_rate": 8.990764303543183e-06,
      "loss": 0.1661,
      "step": 7872
    },
    {
      "epoch": 0.22968084485675944,
      "grad_norm": 0.8611120724498316,
      "learning_rate": 8.99047966541589e-06,
      "loss": 0.2029,
      "step": 7873
    },
    {
      "epoch": 0.229710018087403,
      "grad_norm": 1.0826007546623617,
      "learning_rate": 8.990194991662547e-06,
      "loss": 0.1964,
      "step": 7874
    },
    {
      "epoch": 0.22973919131804657,
      "grad_norm": 0.9013871563643052,
      "learning_rate": 8.989910282285696e-06,
      "loss": 0.1797,
      "step": 7875
    },
    {
      "epoch": 0.22976836454869012,
      "grad_norm": 0.6976822409689709,
      "learning_rate": 8.989625537287879e-06,
      "loss": 0.1487,
      "step": 7876
    },
    {
      "epoch": 0.22979753777933368,
      "grad_norm": 0.8141668230560807,
      "learning_rate": 8.989340756671637e-06,
      "loss": 0.1594,
      "step": 7877
    },
    {
      "epoch": 0.22982671100997726,
      "grad_norm": 0.797237042164959,
      "learning_rate": 8.989055940439513e-06,
      "loss": 0.1853,
      "step": 7878
    },
    {
      "epoch": 0.2298558842406208,
      "grad_norm": 0.8427973492284777,
      "learning_rate": 8.98877108859405e-06,
      "loss": 0.1854,
      "step": 7879
    },
    {
      "epoch": 0.22988505747126436,
      "grad_norm": 0.7827418496927713,
      "learning_rate": 8.98848620113779e-06,
      "loss": 0.1588,
      "step": 7880
    },
    {
      "epoch": 0.22991423070190792,
      "grad_norm": 0.8137145924122848,
      "learning_rate": 8.988201278073279e-06,
      "loss": 0.1877,
      "step": 7881
    },
    {
      "epoch": 0.2299434039325515,
      "grad_norm": 0.7185630838548984,
      "learning_rate": 8.987916319403058e-06,
      "loss": 0.145,
      "step": 7882
    },
    {
      "epoch": 0.22997257716319505,
      "grad_norm": 0.6945072273188773,
      "learning_rate": 8.987631325129672e-06,
      "loss": 0.1573,
      "step": 7883
    },
    {
      "epoch": 0.2300017503938386,
      "grad_norm": 0.6783951637049451,
      "learning_rate": 8.987346295255665e-06,
      "loss": 0.1542,
      "step": 7884
    },
    {
      "epoch": 0.2300309236244822,
      "grad_norm": 0.8143489706937942,
      "learning_rate": 8.987061229783583e-06,
      "loss": 0.1544,
      "step": 7885
    },
    {
      "epoch": 0.23006009685512574,
      "grad_norm": 0.9324773063743967,
      "learning_rate": 8.98677612871597e-06,
      "loss": 0.1507,
      "step": 7886
    },
    {
      "epoch": 0.2300892700857693,
      "grad_norm": 0.8604850345630668,
      "learning_rate": 8.986490992055371e-06,
      "loss": 0.1712,
      "step": 7887
    },
    {
      "epoch": 0.23011844331641285,
      "grad_norm": 0.9366105297218565,
      "learning_rate": 8.986205819804332e-06,
      "loss": 0.1654,
      "step": 7888
    },
    {
      "epoch": 0.23014761654705643,
      "grad_norm": 0.7698870467790884,
      "learning_rate": 8.9859206119654e-06,
      "loss": 0.1563,
      "step": 7889
    },
    {
      "epoch": 0.23017678977769998,
      "grad_norm": 0.8892805451272776,
      "learning_rate": 8.98563536854112e-06,
      "loss": 0.1482,
      "step": 7890
    },
    {
      "epoch": 0.23020596300834353,
      "grad_norm": 0.831349679074709,
      "learning_rate": 8.985350089534039e-06,
      "loss": 0.1876,
      "step": 7891
    },
    {
      "epoch": 0.23023513623898711,
      "grad_norm": 0.9095725014546474,
      "learning_rate": 8.985064774946704e-06,
      "loss": 0.1659,
      "step": 7892
    },
    {
      "epoch": 0.23026430946963067,
      "grad_norm": 0.6884617720193952,
      "learning_rate": 8.98477942478166e-06,
      "loss": 0.1585,
      "step": 7893
    },
    {
      "epoch": 0.23029348270027422,
      "grad_norm": 0.7678957873083491,
      "learning_rate": 8.984494039041458e-06,
      "loss": 0.156,
      "step": 7894
    },
    {
      "epoch": 0.2303226559309178,
      "grad_norm": 0.8958766686510108,
      "learning_rate": 8.984208617728645e-06,
      "loss": 0.1651,
      "step": 7895
    },
    {
      "epoch": 0.23035182916156136,
      "grad_norm": 0.7220839846605178,
      "learning_rate": 8.983923160845766e-06,
      "loss": 0.1646,
      "step": 7896
    },
    {
      "epoch": 0.2303810023922049,
      "grad_norm": 0.7298317309624487,
      "learning_rate": 8.983637668395375e-06,
      "loss": 0.1296,
      "step": 7897
    },
    {
      "epoch": 0.23041017562284846,
      "grad_norm": 0.9141541064688687,
      "learning_rate": 8.983352140380017e-06,
      "loss": 0.1598,
      "step": 7898
    },
    {
      "epoch": 0.23043934885349204,
      "grad_norm": 0.867744659092262,
      "learning_rate": 8.983066576802241e-06,
      "loss": 0.1861,
      "step": 7899
    },
    {
      "epoch": 0.2304685220841356,
      "grad_norm": 0.9990059552793454,
      "learning_rate": 8.9827809776646e-06,
      "loss": 0.1694,
      "step": 7900
    },
    {
      "epoch": 0.23049769531477915,
      "grad_norm": 0.8735981646512109,
      "learning_rate": 8.98249534296964e-06,
      "loss": 0.1724,
      "step": 7901
    },
    {
      "epoch": 0.23052686854542273,
      "grad_norm": 0.8882672213679386,
      "learning_rate": 8.98220967271991e-06,
      "loss": 0.1522,
      "step": 7902
    },
    {
      "epoch": 0.23055604177606628,
      "grad_norm": 0.9497935493498831,
      "learning_rate": 8.981923966917965e-06,
      "loss": 0.1771,
      "step": 7903
    },
    {
      "epoch": 0.23058521500670984,
      "grad_norm": 0.8874610411280186,
      "learning_rate": 8.981638225566352e-06,
      "loss": 0.1736,
      "step": 7904
    },
    {
      "epoch": 0.23061438823735342,
      "grad_norm": 0.9846342420793052,
      "learning_rate": 8.981352448667625e-06,
      "loss": 0.159,
      "step": 7905
    },
    {
      "epoch": 0.23064356146799697,
      "grad_norm": 0.9571905763581517,
      "learning_rate": 8.981066636224334e-06,
      "loss": 0.1644,
      "step": 7906
    },
    {
      "epoch": 0.23067273469864052,
      "grad_norm": 1.000066371424318,
      "learning_rate": 8.980780788239029e-06,
      "loss": 0.1565,
      "step": 7907
    },
    {
      "epoch": 0.23070190792928408,
      "grad_norm": 0.8388845856794662,
      "learning_rate": 8.980494904714263e-06,
      "loss": 0.1396,
      "step": 7908
    },
    {
      "epoch": 0.23073108115992766,
      "grad_norm": 1.1958278189996134,
      "learning_rate": 8.98020898565259e-06,
      "loss": 0.1526,
      "step": 7909
    },
    {
      "epoch": 0.2307602543905712,
      "grad_norm": 0.8659713240342617,
      "learning_rate": 8.979923031056561e-06,
      "loss": 0.1575,
      "step": 7910
    },
    {
      "epoch": 0.23078942762121477,
      "grad_norm": 0.8888592808462227,
      "learning_rate": 8.979637040928728e-06,
      "loss": 0.1713,
      "step": 7911
    },
    {
      "epoch": 0.23081860085185835,
      "grad_norm": 1.0438234571076548,
      "learning_rate": 8.979351015271648e-06,
      "loss": 0.1779,
      "step": 7912
    },
    {
      "epoch": 0.2308477740825019,
      "grad_norm": 0.9340396543834354,
      "learning_rate": 8.979064954087871e-06,
      "loss": 0.1784,
      "step": 7913
    },
    {
      "epoch": 0.23087694731314545,
      "grad_norm": 0.8961219508440874,
      "learning_rate": 8.97877885737995e-06,
      "loss": 0.1523,
      "step": 7914
    },
    {
      "epoch": 0.230906120543789,
      "grad_norm": 0.9385047700926765,
      "learning_rate": 8.978492725150444e-06,
      "loss": 0.1603,
      "step": 7915
    },
    {
      "epoch": 0.2309352937744326,
      "grad_norm": 1.1180766056341944,
      "learning_rate": 8.978206557401903e-06,
      "loss": 0.1799,
      "step": 7916
    },
    {
      "epoch": 0.23096446700507614,
      "grad_norm": 0.8924194484470929,
      "learning_rate": 8.977920354136885e-06,
      "loss": 0.1895,
      "step": 7917
    },
    {
      "epoch": 0.2309936402357197,
      "grad_norm": 0.7494284756242796,
      "learning_rate": 8.977634115357942e-06,
      "loss": 0.1388,
      "step": 7918
    },
    {
      "epoch": 0.23102281346636327,
      "grad_norm": 0.7633926722825112,
      "learning_rate": 8.977347841067631e-06,
      "loss": 0.1608,
      "step": 7919
    },
    {
      "epoch": 0.23105198669700683,
      "grad_norm": 0.8638611396990575,
      "learning_rate": 8.97706153126851e-06,
      "loss": 0.1641,
      "step": 7920
    },
    {
      "epoch": 0.23108115992765038,
      "grad_norm": 0.780354734871524,
      "learning_rate": 8.976775185963131e-06,
      "loss": 0.1378,
      "step": 7921
    },
    {
      "epoch": 0.23111033315829396,
      "grad_norm": 0.8743108969033693,
      "learning_rate": 8.976488805154054e-06,
      "loss": 0.193,
      "step": 7922
    },
    {
      "epoch": 0.23113950638893752,
      "grad_norm": 0.8404789222654602,
      "learning_rate": 8.976202388843833e-06,
      "loss": 0.1547,
      "step": 7923
    },
    {
      "epoch": 0.23116867961958107,
      "grad_norm": 0.858721831932792,
      "learning_rate": 8.975915937035029e-06,
      "loss": 0.1795,
      "step": 7924
    },
    {
      "epoch": 0.23119785285022462,
      "grad_norm": 0.8571207713848548,
      "learning_rate": 8.975629449730194e-06,
      "loss": 0.1759,
      "step": 7925
    },
    {
      "epoch": 0.2312270260808682,
      "grad_norm": 0.9479269031813717,
      "learning_rate": 8.975342926931888e-06,
      "loss": 0.16,
      "step": 7926
    },
    {
      "epoch": 0.23125619931151176,
      "grad_norm": 0.8689659096373238,
      "learning_rate": 8.97505636864267e-06,
      "loss": 0.1819,
      "step": 7927
    },
    {
      "epoch": 0.2312853725421553,
      "grad_norm": 0.7629244776263481,
      "learning_rate": 8.974769774865097e-06,
      "loss": 0.167,
      "step": 7928
    },
    {
      "epoch": 0.2313145457727989,
      "grad_norm": 0.8319382999924249,
      "learning_rate": 8.97448314560173e-06,
      "loss": 0.1928,
      "step": 7929
    },
    {
      "epoch": 0.23134371900344244,
      "grad_norm": 0.7636448846716699,
      "learning_rate": 8.974196480855126e-06,
      "loss": 0.1476,
      "step": 7930
    },
    {
      "epoch": 0.231372892234086,
      "grad_norm": 0.7868278619232064,
      "learning_rate": 8.973909780627845e-06,
      "loss": 0.1553,
      "step": 7931
    },
    {
      "epoch": 0.23140206546472955,
      "grad_norm": 1.783456156342513,
      "learning_rate": 8.973623044922444e-06,
      "loss": 0.1468,
      "step": 7932
    },
    {
      "epoch": 0.23143123869537313,
      "grad_norm": 0.7361392006785855,
      "learning_rate": 8.973336273741487e-06,
      "loss": 0.1935,
      "step": 7933
    },
    {
      "epoch": 0.23146041192601668,
      "grad_norm": 0.6799103841962983,
      "learning_rate": 8.973049467087531e-06,
      "loss": 0.1595,
      "step": 7934
    },
    {
      "epoch": 0.23148958515666024,
      "grad_norm": 0.8197974864462231,
      "learning_rate": 8.972762624963139e-06,
      "loss": 0.1556,
      "step": 7935
    },
    {
      "epoch": 0.23151875838730382,
      "grad_norm": 0.9292101613609204,
      "learning_rate": 8.972475747370869e-06,
      "loss": 0.1486,
      "step": 7936
    },
    {
      "epoch": 0.23154793161794737,
      "grad_norm": 0.8351032308641211,
      "learning_rate": 8.972188834313285e-06,
      "loss": 0.1508,
      "step": 7937
    },
    {
      "epoch": 0.23157710484859093,
      "grad_norm": 0.8236472852016963,
      "learning_rate": 8.971901885792947e-06,
      "loss": 0.1729,
      "step": 7938
    },
    {
      "epoch": 0.2316062780792345,
      "grad_norm": 0.754782621058893,
      "learning_rate": 8.971614901812417e-06,
      "loss": 0.1546,
      "step": 7939
    },
    {
      "epoch": 0.23163545130987806,
      "grad_norm": 0.8184346728606533,
      "learning_rate": 8.971327882374257e-06,
      "loss": 0.1498,
      "step": 7940
    },
    {
      "epoch": 0.2316646245405216,
      "grad_norm": 0.7842135080527304,
      "learning_rate": 8.97104082748103e-06,
      "loss": 0.1587,
      "step": 7941
    },
    {
      "epoch": 0.23169379777116517,
      "grad_norm": 0.8003555749338849,
      "learning_rate": 8.970753737135298e-06,
      "loss": 0.1545,
      "step": 7942
    },
    {
      "epoch": 0.23172297100180875,
      "grad_norm": 0.9322813589322437,
      "learning_rate": 8.970466611339625e-06,
      "loss": 0.1682,
      "step": 7943
    },
    {
      "epoch": 0.2317521442324523,
      "grad_norm": 0.7959947942319586,
      "learning_rate": 8.970179450096574e-06,
      "loss": 0.1509,
      "step": 7944
    },
    {
      "epoch": 0.23178131746309585,
      "grad_norm": 0.7854743779984855,
      "learning_rate": 8.96989225340871e-06,
      "loss": 0.1536,
      "step": 7945
    },
    {
      "epoch": 0.23181049069373943,
      "grad_norm": 1.0302643387878918,
      "learning_rate": 8.969605021278594e-06,
      "loss": 0.1548,
      "step": 7946
    },
    {
      "epoch": 0.231839663924383,
      "grad_norm": 0.8798981554529599,
      "learning_rate": 8.969317753708792e-06,
      "loss": 0.1501,
      "step": 7947
    },
    {
      "epoch": 0.23186883715502654,
      "grad_norm": 0.8036381869669064,
      "learning_rate": 8.96903045070187e-06,
      "loss": 0.1587,
      "step": 7948
    },
    {
      "epoch": 0.23189801038567012,
      "grad_norm": 0.981191173155209,
      "learning_rate": 8.968743112260389e-06,
      "loss": 0.1757,
      "step": 7949
    },
    {
      "epoch": 0.23192718361631368,
      "grad_norm": 0.9158301363845617,
      "learning_rate": 8.968455738386919e-06,
      "loss": 0.1604,
      "step": 7950
    },
    {
      "epoch": 0.23195635684695723,
      "grad_norm": 1.0966531889290108,
      "learning_rate": 8.968168329084022e-06,
      "loss": 0.1801,
      "step": 7951
    },
    {
      "epoch": 0.23198553007760078,
      "grad_norm": 0.9422267898448748,
      "learning_rate": 8.967880884354267e-06,
      "loss": 0.1644,
      "step": 7952
    },
    {
      "epoch": 0.23201470330824436,
      "grad_norm": 0.7561762332547298,
      "learning_rate": 8.967593404200219e-06,
      "loss": 0.1493,
      "step": 7953
    },
    {
      "epoch": 0.23204387653888792,
      "grad_norm": 1.1758815200025448,
      "learning_rate": 8.967305888624442e-06,
      "loss": 0.1709,
      "step": 7954
    },
    {
      "epoch": 0.23207304976953147,
      "grad_norm": 0.9566726405483177,
      "learning_rate": 8.967018337629508e-06,
      "loss": 0.1836,
      "step": 7955
    },
    {
      "epoch": 0.23210222300017505,
      "grad_norm": 0.7946192356490421,
      "learning_rate": 8.966730751217978e-06,
      "loss": 0.1681,
      "step": 7956
    },
    {
      "epoch": 0.2321313962308186,
      "grad_norm": 0.9160009772815291,
      "learning_rate": 8.966443129392426e-06,
      "loss": 0.1519,
      "step": 7957
    },
    {
      "epoch": 0.23216056946146216,
      "grad_norm": 1.0918327574228717,
      "learning_rate": 8.966155472155414e-06,
      "loss": 0.1702,
      "step": 7958
    },
    {
      "epoch": 0.2321897426921057,
      "grad_norm": 0.7626379469549405,
      "learning_rate": 8.965867779509513e-06,
      "loss": 0.155,
      "step": 7959
    },
    {
      "epoch": 0.2322189159227493,
      "grad_norm": 0.8618726573903422,
      "learning_rate": 8.965580051457292e-06,
      "loss": 0.1588,
      "step": 7960
    },
    {
      "epoch": 0.23224808915339284,
      "grad_norm": 0.9285347313650374,
      "learning_rate": 8.96529228800132e-06,
      "loss": 0.1753,
      "step": 7961
    },
    {
      "epoch": 0.2322772623840364,
      "grad_norm": 0.8710843645255506,
      "learning_rate": 8.965004489144165e-06,
      "loss": 0.1694,
      "step": 7962
    },
    {
      "epoch": 0.23230643561467998,
      "grad_norm": 0.7944912694806492,
      "learning_rate": 8.964716654888395e-06,
      "loss": 0.1671,
      "step": 7963
    },
    {
      "epoch": 0.23233560884532353,
      "grad_norm": 0.805655434799871,
      "learning_rate": 8.964428785236581e-06,
      "loss": 0.1552,
      "step": 7964
    },
    {
      "epoch": 0.23236478207596709,
      "grad_norm": 0.8562198493417296,
      "learning_rate": 8.964140880191294e-06,
      "loss": 0.1476,
      "step": 7965
    },
    {
      "epoch": 0.23239395530661067,
      "grad_norm": 0.7119304754090852,
      "learning_rate": 8.963852939755104e-06,
      "loss": 0.1662,
      "step": 7966
    },
    {
      "epoch": 0.23242312853725422,
      "grad_norm": 0.9471470810961862,
      "learning_rate": 8.96356496393058e-06,
      "loss": 0.1595,
      "step": 7967
    },
    {
      "epoch": 0.23245230176789777,
      "grad_norm": 0.802465025689359,
      "learning_rate": 8.963276952720294e-06,
      "loss": 0.1747,
      "step": 7968
    },
    {
      "epoch": 0.23248147499854133,
      "grad_norm": 0.653049839552109,
      "learning_rate": 8.96298890612682e-06,
      "loss": 0.1743,
      "step": 7969
    },
    {
      "epoch": 0.2325106482291849,
      "grad_norm": 0.7449545503708133,
      "learning_rate": 8.962700824152724e-06,
      "loss": 0.1519,
      "step": 7970
    },
    {
      "epoch": 0.23253982145982846,
      "grad_norm": 0.859759788278501,
      "learning_rate": 8.962412706800583e-06,
      "loss": 0.1643,
      "step": 7971
    },
    {
      "epoch": 0.23256899469047201,
      "grad_norm": 0.7912390424457756,
      "learning_rate": 8.962124554072966e-06,
      "loss": 0.1623,
      "step": 7972
    },
    {
      "epoch": 0.2325981679211156,
      "grad_norm": 0.96985934893583,
      "learning_rate": 8.961836365972448e-06,
      "loss": 0.1567,
      "step": 7973
    },
    {
      "epoch": 0.23262734115175915,
      "grad_norm": 0.8698070988113134,
      "learning_rate": 8.9615481425016e-06,
      "loss": 0.1413,
      "step": 7974
    },
    {
      "epoch": 0.2326565143824027,
      "grad_norm": 1.1161478727879717,
      "learning_rate": 8.961259883662997e-06,
      "loss": 0.1888,
      "step": 7975
    },
    {
      "epoch": 0.23268568761304628,
      "grad_norm": 0.9863245588204235,
      "learning_rate": 8.960971589459208e-06,
      "loss": 0.1671,
      "step": 7976
    },
    {
      "epoch": 0.23271486084368984,
      "grad_norm": 1.340980192673474,
      "learning_rate": 8.960683259892813e-06,
      "loss": 0.1938,
      "step": 7977
    },
    {
      "epoch": 0.2327440340743334,
      "grad_norm": 1.0935669382378899,
      "learning_rate": 8.960394894966383e-06,
      "loss": 0.1596,
      "step": 7978
    },
    {
      "epoch": 0.23277320730497694,
      "grad_norm": 0.7201980386984678,
      "learning_rate": 8.960106494682492e-06,
      "loss": 0.155,
      "step": 7979
    },
    {
      "epoch": 0.23280238053562052,
      "grad_norm": 0.9488219835295263,
      "learning_rate": 8.959818059043717e-06,
      "loss": 0.1618,
      "step": 7980
    },
    {
      "epoch": 0.23283155376626408,
      "grad_norm": 1.0172603853180795,
      "learning_rate": 8.959529588052631e-06,
      "loss": 0.1788,
      "step": 7981
    },
    {
      "epoch": 0.23286072699690763,
      "grad_norm": 0.7545652274753679,
      "learning_rate": 8.959241081711811e-06,
      "loss": 0.1527,
      "step": 7982
    },
    {
      "epoch": 0.2328899002275512,
      "grad_norm": 0.9252378447180483,
      "learning_rate": 8.95895254002383e-06,
      "loss": 0.1716,
      "step": 7983
    },
    {
      "epoch": 0.23291907345819476,
      "grad_norm": 0.7860081958102194,
      "learning_rate": 8.958663962991265e-06,
      "loss": 0.157,
      "step": 7984
    },
    {
      "epoch": 0.23294824668883832,
      "grad_norm": 1.022959670385792,
      "learning_rate": 8.958375350616695e-06,
      "loss": 0.1515,
      "step": 7985
    },
    {
      "epoch": 0.23297741991948187,
      "grad_norm": 0.8943316343739773,
      "learning_rate": 8.958086702902695e-06,
      "loss": 0.1801,
      "step": 7986
    },
    {
      "epoch": 0.23300659315012545,
      "grad_norm": 1.0773939516632338,
      "learning_rate": 8.957798019851842e-06,
      "loss": 0.1685,
      "step": 7987
    },
    {
      "epoch": 0.233035766380769,
      "grad_norm": 0.9049791383744568,
      "learning_rate": 8.957509301466712e-06,
      "loss": 0.1691,
      "step": 7988
    },
    {
      "epoch": 0.23306493961141256,
      "grad_norm": 0.8968414027056072,
      "learning_rate": 8.957220547749884e-06,
      "loss": 0.1465,
      "step": 7989
    },
    {
      "epoch": 0.23309411284205614,
      "grad_norm": 0.9636828632979421,
      "learning_rate": 8.956931758703935e-06,
      "loss": 0.1811,
      "step": 7990
    },
    {
      "epoch": 0.2331232860726997,
      "grad_norm": 0.9914752249113326,
      "learning_rate": 8.956642934331446e-06,
      "loss": 0.1709,
      "step": 7991
    },
    {
      "epoch": 0.23315245930334325,
      "grad_norm": 1.088550261313748,
      "learning_rate": 8.956354074634992e-06,
      "loss": 0.1863,
      "step": 7992
    },
    {
      "epoch": 0.23318163253398683,
      "grad_norm": 1.1213752797523344,
      "learning_rate": 8.956065179617153e-06,
      "loss": 0.1504,
      "step": 7993
    },
    {
      "epoch": 0.23321080576463038,
      "grad_norm": 1.5068470923028956,
      "learning_rate": 8.955776249280508e-06,
      "loss": 0.1528,
      "step": 7994
    },
    {
      "epoch": 0.23323997899527393,
      "grad_norm": 1.0904897897433061,
      "learning_rate": 8.955487283627638e-06,
      "loss": 0.2054,
      "step": 7995
    },
    {
      "epoch": 0.2332691522259175,
      "grad_norm": 1.0699673947294661,
      "learning_rate": 8.955198282661122e-06,
      "loss": 0.1348,
      "step": 7996
    },
    {
      "epoch": 0.23329832545656107,
      "grad_norm": 0.7775833248102009,
      "learning_rate": 8.954909246383539e-06,
      "loss": 0.1811,
      "step": 7997
    },
    {
      "epoch": 0.23332749868720462,
      "grad_norm": 0.9471724998179966,
      "learning_rate": 8.95462017479747e-06,
      "loss": 0.1642,
      "step": 7998
    },
    {
      "epoch": 0.23335667191784817,
      "grad_norm": 0.8766472407340916,
      "learning_rate": 8.954331067905498e-06,
      "loss": 0.1598,
      "step": 7999
    },
    {
      "epoch": 0.23338584514849176,
      "grad_norm": 0.7779864407460095,
      "learning_rate": 8.9540419257102e-06,
      "loss": 0.1601,
      "step": 8000
    },
    {
      "epoch": 0.2334150183791353,
      "grad_norm": 0.8068738332383827,
      "learning_rate": 8.953752748214161e-06,
      "loss": 0.1677,
      "step": 8001
    },
    {
      "epoch": 0.23344419160977886,
      "grad_norm": 0.9702547364467246,
      "learning_rate": 8.953463535419962e-06,
      "loss": 0.1731,
      "step": 8002
    },
    {
      "epoch": 0.23347336484042241,
      "grad_norm": 0.8231585920754951,
      "learning_rate": 8.953174287330182e-06,
      "loss": 0.1867,
      "step": 8003
    },
    {
      "epoch": 0.233502538071066,
      "grad_norm": 0.7479409714255466,
      "learning_rate": 8.952885003947407e-06,
      "loss": 0.1697,
      "step": 8004
    },
    {
      "epoch": 0.23353171130170955,
      "grad_norm": 0.7061660965729089,
      "learning_rate": 8.95259568527422e-06,
      "loss": 0.1417,
      "step": 8005
    },
    {
      "epoch": 0.2335608845323531,
      "grad_norm": 1.0359937169217408,
      "learning_rate": 8.952306331313199e-06,
      "loss": 0.2055,
      "step": 8006
    },
    {
      "epoch": 0.23359005776299668,
      "grad_norm": 0.8176126070362488,
      "learning_rate": 8.952016942066932e-06,
      "loss": 0.1684,
      "step": 8007
    },
    {
      "epoch": 0.23361923099364024,
      "grad_norm": 1.0809415238229225,
      "learning_rate": 8.951727517538001e-06,
      "loss": 0.1641,
      "step": 8008
    },
    {
      "epoch": 0.2336484042242838,
      "grad_norm": 0.8048304969169773,
      "learning_rate": 8.951438057728991e-06,
      "loss": 0.1588,
      "step": 8009
    },
    {
      "epoch": 0.23367757745492737,
      "grad_norm": 0.9538286575976014,
      "learning_rate": 8.951148562642485e-06,
      "loss": 0.1657,
      "step": 8010
    },
    {
      "epoch": 0.23370675068557092,
      "grad_norm": 1.1010655732932972,
      "learning_rate": 8.950859032281068e-06,
      "loss": 0.1975,
      "step": 8011
    },
    {
      "epoch": 0.23373592391621448,
      "grad_norm": 0.8423336991321413,
      "learning_rate": 8.950569466647322e-06,
      "loss": 0.1913,
      "step": 8012
    },
    {
      "epoch": 0.23376509714685803,
      "grad_norm": 0.9953928472588061,
      "learning_rate": 8.950279865743838e-06,
      "loss": 0.1685,
      "step": 8013
    },
    {
      "epoch": 0.2337942703775016,
      "grad_norm": 0.7296554387113371,
      "learning_rate": 8.949990229573198e-06,
      "loss": 0.1487,
      "step": 8014
    },
    {
      "epoch": 0.23382344360814517,
      "grad_norm": 0.7998938992446264,
      "learning_rate": 8.949700558137986e-06,
      "loss": 0.145,
      "step": 8015
    },
    {
      "epoch": 0.23385261683878872,
      "grad_norm": 0.8282895543551169,
      "learning_rate": 8.949410851440793e-06,
      "loss": 0.1876,
      "step": 8016
    },
    {
      "epoch": 0.2338817900694323,
      "grad_norm": 0.7432614557343605,
      "learning_rate": 8.949121109484202e-06,
      "loss": 0.1445,
      "step": 8017
    },
    {
      "epoch": 0.23391096330007585,
      "grad_norm": 0.827381807714969,
      "learning_rate": 8.9488313322708e-06,
      "loss": 0.1824,
      "step": 8018
    },
    {
      "epoch": 0.2339401365307194,
      "grad_norm": 0.7444319638690382,
      "learning_rate": 8.948541519803174e-06,
      "loss": 0.1404,
      "step": 8019
    },
    {
      "epoch": 0.233969309761363,
      "grad_norm": 0.7989216545674781,
      "learning_rate": 8.948251672083913e-06,
      "loss": 0.1606,
      "step": 8020
    },
    {
      "epoch": 0.23399848299200654,
      "grad_norm": 0.8655703677849157,
      "learning_rate": 8.947961789115602e-06,
      "loss": 0.1628,
      "step": 8021
    },
    {
      "epoch": 0.2340276562226501,
      "grad_norm": 0.878427661366001,
      "learning_rate": 8.947671870900833e-06,
      "loss": 0.1626,
      "step": 8022
    },
    {
      "epoch": 0.23405682945329365,
      "grad_norm": 0.7137478691908138,
      "learning_rate": 8.94738191744219e-06,
      "loss": 0.1424,
      "step": 8023
    },
    {
      "epoch": 0.23408600268393723,
      "grad_norm": 0.9131009998198611,
      "learning_rate": 8.947091928742265e-06,
      "loss": 0.174,
      "step": 8024
    },
    {
      "epoch": 0.23411517591458078,
      "grad_norm": 0.8295889966807191,
      "learning_rate": 8.946801904803643e-06,
      "loss": 0.157,
      "step": 8025
    },
    {
      "epoch": 0.23414434914522433,
      "grad_norm": 0.679940778230621,
      "learning_rate": 8.946511845628917e-06,
      "loss": 0.1654,
      "step": 8026
    },
    {
      "epoch": 0.23417352237586792,
      "grad_norm": 0.6610192945988974,
      "learning_rate": 8.946221751220676e-06,
      "loss": 0.1355,
      "step": 8027
    },
    {
      "epoch": 0.23420269560651147,
      "grad_norm": 3.442276636815862,
      "learning_rate": 8.945931621581511e-06,
      "loss": 0.1647,
      "step": 8028
    },
    {
      "epoch": 0.23423186883715502,
      "grad_norm": 0.8839468671166052,
      "learning_rate": 8.945641456714007e-06,
      "loss": 0.1424,
      "step": 8029
    },
    {
      "epoch": 0.23426104206779857,
      "grad_norm": 0.8889270339695862,
      "learning_rate": 8.94535125662076e-06,
      "loss": 0.1647,
      "step": 8030
    },
    {
      "epoch": 0.23429021529844216,
      "grad_norm": 1.0277466748950153,
      "learning_rate": 8.94506102130436e-06,
      "loss": 0.1728,
      "step": 8031
    },
    {
      "epoch": 0.2343193885290857,
      "grad_norm": 0.8233028025233522,
      "learning_rate": 8.944770750767393e-06,
      "loss": 0.1326,
      "step": 8032
    },
    {
      "epoch": 0.23434856175972926,
      "grad_norm": 0.7082904575322121,
      "learning_rate": 8.944480445012458e-06,
      "loss": 0.1366,
      "step": 8033
    },
    {
      "epoch": 0.23437773499037284,
      "grad_norm": 0.7853682305585855,
      "learning_rate": 8.94419010404214e-06,
      "loss": 0.1695,
      "step": 8034
    },
    {
      "epoch": 0.2344069082210164,
      "grad_norm": 0.805043308464724,
      "learning_rate": 8.943899727859038e-06,
      "loss": 0.1645,
      "step": 8035
    },
    {
      "epoch": 0.23443608145165995,
      "grad_norm": 0.8223772365652124,
      "learning_rate": 8.943609316465739e-06,
      "loss": 0.155,
      "step": 8036
    },
    {
      "epoch": 0.23446525468230353,
      "grad_norm": 0.8983760952745956,
      "learning_rate": 8.943318869864836e-06,
      "loss": 0.1343,
      "step": 8037
    },
    {
      "epoch": 0.23449442791294708,
      "grad_norm": 0.7157276833187896,
      "learning_rate": 8.943028388058925e-06,
      "loss": 0.1403,
      "step": 8038
    },
    {
      "epoch": 0.23452360114359064,
      "grad_norm": 0.7825010396865133,
      "learning_rate": 8.942737871050598e-06,
      "loss": 0.1321,
      "step": 8039
    },
    {
      "epoch": 0.2345527743742342,
      "grad_norm": 0.8076007038951147,
      "learning_rate": 8.942447318842449e-06,
      "loss": 0.1929,
      "step": 8040
    },
    {
      "epoch": 0.23458194760487777,
      "grad_norm": 0.8279545523036859,
      "learning_rate": 8.94215673143707e-06,
      "loss": 0.1736,
      "step": 8041
    },
    {
      "epoch": 0.23461112083552133,
      "grad_norm": 0.7908824169183024,
      "learning_rate": 8.941866108837058e-06,
      "loss": 0.1557,
      "step": 8042
    },
    {
      "epoch": 0.23464029406616488,
      "grad_norm": 0.8955458930943784,
      "learning_rate": 8.941575451045006e-06,
      "loss": 0.1511,
      "step": 8043
    },
    {
      "epoch": 0.23466946729680846,
      "grad_norm": 0.8377022011248544,
      "learning_rate": 8.941284758063508e-06,
      "loss": 0.1823,
      "step": 8044
    },
    {
      "epoch": 0.234698640527452,
      "grad_norm": 2.101691490561202,
      "learning_rate": 8.940994029895162e-06,
      "loss": 0.1541,
      "step": 8045
    },
    {
      "epoch": 0.23472781375809557,
      "grad_norm": 0.8641975686700145,
      "learning_rate": 8.940703266542561e-06,
      "loss": 0.1395,
      "step": 8046
    },
    {
      "epoch": 0.23475698698873912,
      "grad_norm": 0.7837571588736078,
      "learning_rate": 8.940412468008303e-06,
      "loss": 0.1741,
      "step": 8047
    },
    {
      "epoch": 0.2347861602193827,
      "grad_norm": 0.8798153568066842,
      "learning_rate": 8.940121634294983e-06,
      "loss": 0.1627,
      "step": 8048
    },
    {
      "epoch": 0.23481533345002625,
      "grad_norm": 0.8248862615696155,
      "learning_rate": 8.939830765405198e-06,
      "loss": 0.1569,
      "step": 8049
    },
    {
      "epoch": 0.2348445066806698,
      "grad_norm": 1.2675451855792133,
      "learning_rate": 8.939539861341544e-06,
      "loss": 0.1523,
      "step": 8050
    },
    {
      "epoch": 0.2348736799113134,
      "grad_norm": 0.8146104364588431,
      "learning_rate": 8.939248922106618e-06,
      "loss": 0.1699,
      "step": 8051
    },
    {
      "epoch": 0.23490285314195694,
      "grad_norm": 0.8045537304185456,
      "learning_rate": 8.938957947703019e-06,
      "loss": 0.1512,
      "step": 8052
    },
    {
      "epoch": 0.2349320263726005,
      "grad_norm": 0.9183869283521147,
      "learning_rate": 8.938666938133343e-06,
      "loss": 0.1859,
      "step": 8053
    },
    {
      "epoch": 0.23496119960324408,
      "grad_norm": 0.8897729225953223,
      "learning_rate": 8.938375893400189e-06,
      "loss": 0.1604,
      "step": 8054
    },
    {
      "epoch": 0.23499037283388763,
      "grad_norm": 0.7725779472047589,
      "learning_rate": 8.938084813506155e-06,
      "loss": 0.1525,
      "step": 8055
    },
    {
      "epoch": 0.23501954606453118,
      "grad_norm": 0.7989758230798651,
      "learning_rate": 8.937793698453841e-06,
      "loss": 0.1627,
      "step": 8056
    },
    {
      "epoch": 0.23504871929517474,
      "grad_norm": 0.8581415588098958,
      "learning_rate": 8.937502548245844e-06,
      "loss": 0.1553,
      "step": 8057
    },
    {
      "epoch": 0.23507789252581832,
      "grad_norm": 0.9810036306537716,
      "learning_rate": 8.937211362884764e-06,
      "loss": 0.1514,
      "step": 8058
    },
    {
      "epoch": 0.23510706575646187,
      "grad_norm": 0.702512980123612,
      "learning_rate": 8.9369201423732e-06,
      "loss": 0.1743,
      "step": 8059
    },
    {
      "epoch": 0.23513623898710542,
      "grad_norm": 0.9888176350519338,
      "learning_rate": 8.936628886713754e-06,
      "loss": 0.1582,
      "step": 8060
    },
    {
      "epoch": 0.235165412217749,
      "grad_norm": 0.9156496686672432,
      "learning_rate": 8.936337595909024e-06,
      "loss": 0.1596,
      "step": 8061
    },
    {
      "epoch": 0.23519458544839256,
      "grad_norm": 0.7957213009908428,
      "learning_rate": 8.936046269961614e-06,
      "loss": 0.152,
      "step": 8062
    },
    {
      "epoch": 0.2352237586790361,
      "grad_norm": 0.9959649863010526,
      "learning_rate": 8.93575490887412e-06,
      "loss": 0.2111,
      "step": 8063
    },
    {
      "epoch": 0.2352529319096797,
      "grad_norm": 1.0618771730079335,
      "learning_rate": 8.935463512649147e-06,
      "loss": 0.1481,
      "step": 8064
    },
    {
      "epoch": 0.23528210514032324,
      "grad_norm": 0.8232608343299364,
      "learning_rate": 8.935172081289293e-06,
      "loss": 0.1599,
      "step": 8065
    },
    {
      "epoch": 0.2353112783709668,
      "grad_norm": 0.9889686791904171,
      "learning_rate": 8.934880614797166e-06,
      "loss": 0.154,
      "step": 8066
    },
    {
      "epoch": 0.23534045160161035,
      "grad_norm": 0.8758211257441239,
      "learning_rate": 8.934589113175363e-06,
      "loss": 0.1548,
      "step": 8067
    },
    {
      "epoch": 0.23536962483225393,
      "grad_norm": 0.861350412594102,
      "learning_rate": 8.934297576426487e-06,
      "loss": 0.1443,
      "step": 8068
    },
    {
      "epoch": 0.23539879806289749,
      "grad_norm": 0.8082345622066743,
      "learning_rate": 8.93400600455314e-06,
      "loss": 0.154,
      "step": 8069
    },
    {
      "epoch": 0.23542797129354104,
      "grad_norm": 1.0046091692315662,
      "learning_rate": 8.933714397557928e-06,
      "loss": 0.1865,
      "step": 8070
    },
    {
      "epoch": 0.23545714452418462,
      "grad_norm": 0.9800636252704941,
      "learning_rate": 8.933422755443453e-06,
      "loss": 0.1387,
      "step": 8071
    },
    {
      "epoch": 0.23548631775482817,
      "grad_norm": 6.8535591367862825,
      "learning_rate": 8.933131078212318e-06,
      "loss": 0.1666,
      "step": 8072
    },
    {
      "epoch": 0.23551549098547173,
      "grad_norm": 1.024408699368027,
      "learning_rate": 8.932839365867127e-06,
      "loss": 0.1554,
      "step": 8073
    },
    {
      "epoch": 0.23554466421611528,
      "grad_norm": 0.8963925660044842,
      "learning_rate": 8.932547618410486e-06,
      "loss": 0.1703,
      "step": 8074
    },
    {
      "epoch": 0.23557383744675886,
      "grad_norm": 0.7748710339714768,
      "learning_rate": 8.932255835845e-06,
      "loss": 0.1505,
      "step": 8075
    },
    {
      "epoch": 0.2356030106774024,
      "grad_norm": 3.705315847541083,
      "learning_rate": 8.931964018173272e-06,
      "loss": 0.1912,
      "step": 8076
    },
    {
      "epoch": 0.23563218390804597,
      "grad_norm": 0.830467918815383,
      "learning_rate": 8.931672165397907e-06,
      "loss": 0.1723,
      "step": 8077
    },
    {
      "epoch": 0.23566135713868955,
      "grad_norm": 0.784473307751724,
      "learning_rate": 8.931380277521511e-06,
      "loss": 0.1519,
      "step": 8078
    },
    {
      "epoch": 0.2356905303693331,
      "grad_norm": 0.9859581011216128,
      "learning_rate": 8.931088354546691e-06,
      "loss": 0.1603,
      "step": 8079
    },
    {
      "epoch": 0.23571970359997665,
      "grad_norm": 0.8925307079247121,
      "learning_rate": 8.930796396476051e-06,
      "loss": 0.176,
      "step": 8080
    },
    {
      "epoch": 0.23574887683062024,
      "grad_norm": 0.7635710131029193,
      "learning_rate": 8.930504403312201e-06,
      "loss": 0.1947,
      "step": 8081
    },
    {
      "epoch": 0.2357780500612638,
      "grad_norm": 0.8758109918925939,
      "learning_rate": 8.930212375057747e-06,
      "loss": 0.1627,
      "step": 8082
    },
    {
      "epoch": 0.23580722329190734,
      "grad_norm": 0.9129551107737623,
      "learning_rate": 8.929920311715293e-06,
      "loss": 0.1575,
      "step": 8083
    },
    {
      "epoch": 0.2358363965225509,
      "grad_norm": 0.6878454110352461,
      "learning_rate": 8.92962821328745e-06,
      "loss": 0.1373,
      "step": 8084
    },
    {
      "epoch": 0.23586556975319448,
      "grad_norm": 0.8301824701868347,
      "learning_rate": 8.929336079776822e-06,
      "loss": 0.1652,
      "step": 8085
    },
    {
      "epoch": 0.23589474298383803,
      "grad_norm": 0.8809332968442258,
      "learning_rate": 8.929043911186021e-06,
      "loss": 0.1528,
      "step": 8086
    },
    {
      "epoch": 0.23592391621448158,
      "grad_norm": 0.8597079340281912,
      "learning_rate": 8.928751707517655e-06,
      "loss": 0.1596,
      "step": 8087
    },
    {
      "epoch": 0.23595308944512516,
      "grad_norm": 0.8951525859435204,
      "learning_rate": 8.92845946877433e-06,
      "loss": 0.1846,
      "step": 8088
    },
    {
      "epoch": 0.23598226267576872,
      "grad_norm": 0.8740482636392535,
      "learning_rate": 8.92816719495866e-06,
      "loss": 0.1852,
      "step": 8089
    },
    {
      "epoch": 0.23601143590641227,
      "grad_norm": 0.7349794775328545,
      "learning_rate": 8.927874886073247e-06,
      "loss": 0.166,
      "step": 8090
    },
    {
      "epoch": 0.23604060913705585,
      "grad_norm": 0.9817526374427119,
      "learning_rate": 8.927582542120707e-06,
      "loss": 0.1689,
      "step": 8091
    },
    {
      "epoch": 0.2360697823676994,
      "grad_norm": 0.9985307535530884,
      "learning_rate": 8.927290163103646e-06,
      "loss": 0.1799,
      "step": 8092
    },
    {
      "epoch": 0.23609895559834296,
      "grad_norm": 0.7443694265539429,
      "learning_rate": 8.926997749024677e-06,
      "loss": 0.1252,
      "step": 8093
    },
    {
      "epoch": 0.2361281288289865,
      "grad_norm": 1.0655059388918109,
      "learning_rate": 8.926705299886408e-06,
      "loss": 0.1545,
      "step": 8094
    },
    {
      "epoch": 0.2361573020596301,
      "grad_norm": 0.7822390842209348,
      "learning_rate": 8.926412815691454e-06,
      "loss": 0.1596,
      "step": 8095
    },
    {
      "epoch": 0.23618647529027365,
      "grad_norm": 0.6411630374731159,
      "learning_rate": 8.926120296442421e-06,
      "loss": 0.1466,
      "step": 8096
    },
    {
      "epoch": 0.2362156485209172,
      "grad_norm": 0.7433286019275946,
      "learning_rate": 8.925827742141926e-06,
      "loss": 0.1542,
      "step": 8097
    },
    {
      "epoch": 0.23624482175156078,
      "grad_norm": 0.8266822077356416,
      "learning_rate": 8.925535152792577e-06,
      "loss": 0.1556,
      "step": 8098
    },
    {
      "epoch": 0.23627399498220433,
      "grad_norm": 0.8447328919697852,
      "learning_rate": 8.925242528396986e-06,
      "loss": 0.1962,
      "step": 8099
    },
    {
      "epoch": 0.23630316821284789,
      "grad_norm": 0.867242488085671,
      "learning_rate": 8.924949868957769e-06,
      "loss": 0.1705,
      "step": 8100
    },
    {
      "epoch": 0.23633234144349144,
      "grad_norm": 0.861202562787116,
      "learning_rate": 8.924657174477535e-06,
      "loss": 0.176,
      "step": 8101
    },
    {
      "epoch": 0.23636151467413502,
      "grad_norm": 0.8235656420001373,
      "learning_rate": 8.924364444958898e-06,
      "loss": 0.1737,
      "step": 8102
    },
    {
      "epoch": 0.23639068790477857,
      "grad_norm": 0.7579535439089422,
      "learning_rate": 8.924071680404474e-06,
      "loss": 0.177,
      "step": 8103
    },
    {
      "epoch": 0.23641986113542213,
      "grad_norm": 0.8908424358661887,
      "learning_rate": 8.923778880816874e-06,
      "loss": 0.1625,
      "step": 8104
    },
    {
      "epoch": 0.2364490343660657,
      "grad_norm": 0.9356466203247811,
      "learning_rate": 8.923486046198712e-06,
      "loss": 0.1823,
      "step": 8105
    },
    {
      "epoch": 0.23647820759670926,
      "grad_norm": 0.7974110500492371,
      "learning_rate": 8.923193176552604e-06,
      "loss": 0.1761,
      "step": 8106
    },
    {
      "epoch": 0.23650738082735281,
      "grad_norm": 0.9337107920765044,
      "learning_rate": 8.922900271881163e-06,
      "loss": 0.1369,
      "step": 8107
    },
    {
      "epoch": 0.2365365540579964,
      "grad_norm": 0.7518255024182023,
      "learning_rate": 8.922607332187005e-06,
      "loss": 0.1502,
      "step": 8108
    },
    {
      "epoch": 0.23656572728863995,
      "grad_norm": 0.8621728466275934,
      "learning_rate": 8.922314357472745e-06,
      "loss": 0.1581,
      "step": 8109
    },
    {
      "epoch": 0.2365949005192835,
      "grad_norm": 1.0851661029928277,
      "learning_rate": 8.922021347741e-06,
      "loss": 0.1499,
      "step": 8110
    },
    {
      "epoch": 0.23662407374992706,
      "grad_norm": 0.79383978632219,
      "learning_rate": 8.921728302994385e-06,
      "loss": 0.141,
      "step": 8111
    },
    {
      "epoch": 0.23665324698057064,
      "grad_norm": 0.8758827526338778,
      "learning_rate": 8.921435223235514e-06,
      "loss": 0.1548,
      "step": 8112
    },
    {
      "epoch": 0.2366824202112142,
      "grad_norm": 1.003926531533097,
      "learning_rate": 8.921142108467007e-06,
      "loss": 0.1923,
      "step": 8113
    },
    {
      "epoch": 0.23671159344185774,
      "grad_norm": 0.7985070966603274,
      "learning_rate": 8.920848958691479e-06,
      "loss": 0.1885,
      "step": 8114
    },
    {
      "epoch": 0.23674076667250132,
      "grad_norm": 0.8518616953368908,
      "learning_rate": 8.920555773911547e-06,
      "loss": 0.1591,
      "step": 8115
    },
    {
      "epoch": 0.23676993990314488,
      "grad_norm": 0.9892504056410393,
      "learning_rate": 8.920262554129828e-06,
      "loss": 0.1658,
      "step": 8116
    },
    {
      "epoch": 0.23679911313378843,
      "grad_norm": 0.9199013954920274,
      "learning_rate": 8.919969299348943e-06,
      "loss": 0.1509,
      "step": 8117
    },
    {
      "epoch": 0.23682828636443198,
      "grad_norm": 1.0071317843558194,
      "learning_rate": 8.919676009571508e-06,
      "loss": 0.1915,
      "step": 8118
    },
    {
      "epoch": 0.23685745959507556,
      "grad_norm": 0.8415660230966282,
      "learning_rate": 8.919382684800138e-06,
      "loss": 0.1676,
      "step": 8119
    },
    {
      "epoch": 0.23688663282571912,
      "grad_norm": 1.0731547107912802,
      "learning_rate": 8.919089325037457e-06,
      "loss": 0.1498,
      "step": 8120
    },
    {
      "epoch": 0.23691580605636267,
      "grad_norm": 0.9496190979236668,
      "learning_rate": 8.918795930286084e-06,
      "loss": 0.1551,
      "step": 8121
    },
    {
      "epoch": 0.23694497928700625,
      "grad_norm": 0.731065271012625,
      "learning_rate": 8.918502500548633e-06,
      "loss": 0.1679,
      "step": 8122
    },
    {
      "epoch": 0.2369741525176498,
      "grad_norm": 1.0046200625359758,
      "learning_rate": 8.91820903582773e-06,
      "loss": 0.1899,
      "step": 8123
    },
    {
      "epoch": 0.23700332574829336,
      "grad_norm": 1.064180628499699,
      "learning_rate": 8.91791553612599e-06,
      "loss": 0.1892,
      "step": 8124
    },
    {
      "epoch": 0.23703249897893694,
      "grad_norm": 0.7097016029175054,
      "learning_rate": 8.917622001446035e-06,
      "loss": 0.174,
      "step": 8125
    },
    {
      "epoch": 0.2370616722095805,
      "grad_norm": 0.9185933572329031,
      "learning_rate": 8.917328431790488e-06,
      "loss": 0.1612,
      "step": 8126
    },
    {
      "epoch": 0.23709084544022405,
      "grad_norm": 0.9006503602478779,
      "learning_rate": 8.917034827161969e-06,
      "loss": 0.1731,
      "step": 8127
    },
    {
      "epoch": 0.2371200186708676,
      "grad_norm": 0.8489097841051876,
      "learning_rate": 8.916741187563094e-06,
      "loss": 0.1305,
      "step": 8128
    },
    {
      "epoch": 0.23714919190151118,
      "grad_norm": 0.9279074904873551,
      "learning_rate": 8.91644751299649e-06,
      "loss": 0.1641,
      "step": 8129
    },
    {
      "epoch": 0.23717836513215473,
      "grad_norm": 0.6544628540452366,
      "learning_rate": 8.91615380346478e-06,
      "loss": 0.1335,
      "step": 8130
    },
    {
      "epoch": 0.2372075383627983,
      "grad_norm": 0.7925203958586841,
      "learning_rate": 8.915860058970582e-06,
      "loss": 0.1345,
      "step": 8131
    },
    {
      "epoch": 0.23723671159344187,
      "grad_norm": 0.7910895955053553,
      "learning_rate": 8.91556627951652e-06,
      "loss": 0.1621,
      "step": 8132
    },
    {
      "epoch": 0.23726588482408542,
      "grad_norm": 0.7104863015512473,
      "learning_rate": 8.915272465105218e-06,
      "loss": 0.1854,
      "step": 8133
    },
    {
      "epoch": 0.23729505805472897,
      "grad_norm": 1.0036378804966084,
      "learning_rate": 8.914978615739297e-06,
      "loss": 0.1868,
      "step": 8134
    },
    {
      "epoch": 0.23732423128537256,
      "grad_norm": 0.8674590168823036,
      "learning_rate": 8.914684731421382e-06,
      "loss": 0.1628,
      "step": 8135
    },
    {
      "epoch": 0.2373534045160161,
      "grad_norm": 0.8672015430406163,
      "learning_rate": 8.914390812154094e-06,
      "loss": 0.1769,
      "step": 8136
    },
    {
      "epoch": 0.23738257774665966,
      "grad_norm": 0.6706117907279768,
      "learning_rate": 8.914096857940062e-06,
      "loss": 0.161,
      "step": 8137
    },
    {
      "epoch": 0.23741175097730322,
      "grad_norm": 0.8967270688761205,
      "learning_rate": 8.913802868781907e-06,
      "loss": 0.1873,
      "step": 8138
    },
    {
      "epoch": 0.2374409242079468,
      "grad_norm": 0.7705670336362179,
      "learning_rate": 8.913508844682255e-06,
      "loss": 0.1524,
      "step": 8139
    },
    {
      "epoch": 0.23747009743859035,
      "grad_norm": 0.822662568821872,
      "learning_rate": 8.91321478564373e-06,
      "loss": 0.1529,
      "step": 8140
    },
    {
      "epoch": 0.2374992706692339,
      "grad_norm": 0.8236205950678361,
      "learning_rate": 8.912920691668957e-06,
      "loss": 0.1762,
      "step": 8141
    },
    {
      "epoch": 0.23752844389987748,
      "grad_norm": 0.8853210785081229,
      "learning_rate": 8.912626562760563e-06,
      "loss": 0.1395,
      "step": 8142
    },
    {
      "epoch": 0.23755761713052104,
      "grad_norm": 0.7586904563452705,
      "learning_rate": 8.912332398921171e-06,
      "loss": 0.1518,
      "step": 8143
    },
    {
      "epoch": 0.2375867903611646,
      "grad_norm": 0.8651132750866162,
      "learning_rate": 8.91203820015341e-06,
      "loss": 0.1772,
      "step": 8144
    },
    {
      "epoch": 0.23761596359180814,
      "grad_norm": 0.9343750341887734,
      "learning_rate": 8.911743966459908e-06,
      "loss": 0.1534,
      "step": 8145
    },
    {
      "epoch": 0.23764513682245172,
      "grad_norm": 0.8400181663770149,
      "learning_rate": 8.911449697843286e-06,
      "loss": 0.1513,
      "step": 8146
    },
    {
      "epoch": 0.23767431005309528,
      "grad_norm": 0.8103376553564225,
      "learning_rate": 8.911155394306177e-06,
      "loss": 0.1513,
      "step": 8147
    },
    {
      "epoch": 0.23770348328373883,
      "grad_norm": 1.040231236289833,
      "learning_rate": 8.910861055851208e-06,
      "loss": 0.1707,
      "step": 8148
    },
    {
      "epoch": 0.2377326565143824,
      "grad_norm": 0.7340572763322634,
      "learning_rate": 8.910566682481001e-06,
      "loss": 0.1475,
      "step": 8149
    },
    {
      "epoch": 0.23776182974502597,
      "grad_norm": 0.8822008953144147,
      "learning_rate": 8.91027227419819e-06,
      "loss": 0.147,
      "step": 8150
    },
    {
      "epoch": 0.23779100297566952,
      "grad_norm": 0.7228118620003549,
      "learning_rate": 8.909977831005403e-06,
      "loss": 0.1408,
      "step": 8151
    },
    {
      "epoch": 0.2378201762063131,
      "grad_norm": 1.1023290703753612,
      "learning_rate": 8.909683352905267e-06,
      "loss": 0.1888,
      "step": 8152
    },
    {
      "epoch": 0.23784934943695665,
      "grad_norm": 0.8220589699407617,
      "learning_rate": 8.90938883990041e-06,
      "loss": 0.1543,
      "step": 8153
    },
    {
      "epoch": 0.2378785226676002,
      "grad_norm": 0.9489623908894284,
      "learning_rate": 8.909094291993464e-06,
      "loss": 0.157,
      "step": 8154
    },
    {
      "epoch": 0.23790769589824376,
      "grad_norm": 0.7905877103053048,
      "learning_rate": 8.908799709187057e-06,
      "loss": 0.1531,
      "step": 8155
    },
    {
      "epoch": 0.23793686912888734,
      "grad_norm": 0.8957629797758214,
      "learning_rate": 8.908505091483819e-06,
      "loss": 0.1514,
      "step": 8156
    },
    {
      "epoch": 0.2379660423595309,
      "grad_norm": 0.9786727450738257,
      "learning_rate": 8.90821043888638e-06,
      "loss": 0.1827,
      "step": 8157
    },
    {
      "epoch": 0.23799521559017445,
      "grad_norm": 0.7937521194492156,
      "learning_rate": 8.907915751397372e-06,
      "loss": 0.1646,
      "step": 8158
    },
    {
      "epoch": 0.23802438882081803,
      "grad_norm": 0.702109595124551,
      "learning_rate": 8.907621029019425e-06,
      "loss": 0.1629,
      "step": 8159
    },
    {
      "epoch": 0.23805356205146158,
      "grad_norm": 0.876485020854691,
      "learning_rate": 8.907326271755171e-06,
      "loss": 0.1617,
      "step": 8160
    },
    {
      "epoch": 0.23808273528210513,
      "grad_norm": 0.8172164952933865,
      "learning_rate": 8.90703147960724e-06,
      "loss": 0.1543,
      "step": 8161
    },
    {
      "epoch": 0.23811190851274872,
      "grad_norm": 1.0589003402978017,
      "learning_rate": 8.906736652578264e-06,
      "loss": 0.1711,
      "step": 8162
    },
    {
      "epoch": 0.23814108174339227,
      "grad_norm": 0.8320959954646029,
      "learning_rate": 8.906441790670877e-06,
      "loss": 0.1676,
      "step": 8163
    },
    {
      "epoch": 0.23817025497403582,
      "grad_norm": 0.7943563348025561,
      "learning_rate": 8.906146893887708e-06,
      "loss": 0.1526,
      "step": 8164
    },
    {
      "epoch": 0.23819942820467938,
      "grad_norm": 0.9243283488288019,
      "learning_rate": 8.905851962231393e-06,
      "loss": 0.1519,
      "step": 8165
    },
    {
      "epoch": 0.23822860143532296,
      "grad_norm": 0.6483119265403959,
      "learning_rate": 8.905556995704566e-06,
      "loss": 0.1452,
      "step": 8166
    },
    {
      "epoch": 0.2382577746659665,
      "grad_norm": 1.0631456207996153,
      "learning_rate": 8.905261994309857e-06,
      "loss": 0.1974,
      "step": 8167
    },
    {
      "epoch": 0.23828694789661006,
      "grad_norm": 0.7426459150555751,
      "learning_rate": 8.9049669580499e-06,
      "loss": 0.1361,
      "step": 8168
    },
    {
      "epoch": 0.23831612112725364,
      "grad_norm": 0.7508657482341585,
      "learning_rate": 8.904671886927334e-06,
      "loss": 0.1559,
      "step": 8169
    },
    {
      "epoch": 0.2383452943578972,
      "grad_norm": 0.9359231183611243,
      "learning_rate": 8.904376780944786e-06,
      "loss": 0.1678,
      "step": 8170
    },
    {
      "epoch": 0.23837446758854075,
      "grad_norm": 0.8394916958169752,
      "learning_rate": 8.904081640104895e-06,
      "loss": 0.1437,
      "step": 8171
    },
    {
      "epoch": 0.2384036408191843,
      "grad_norm": 0.7594288412337721,
      "learning_rate": 8.903786464410295e-06,
      "loss": 0.1254,
      "step": 8172
    },
    {
      "epoch": 0.23843281404982788,
      "grad_norm": 0.8464185946780548,
      "learning_rate": 8.903491253863622e-06,
      "loss": 0.1525,
      "step": 8173
    },
    {
      "epoch": 0.23846198728047144,
      "grad_norm": 0.8671937451759861,
      "learning_rate": 8.903196008467511e-06,
      "loss": 0.1619,
      "step": 8174
    },
    {
      "epoch": 0.238491160511115,
      "grad_norm": 0.9450577389185771,
      "learning_rate": 8.902900728224597e-06,
      "loss": 0.1697,
      "step": 8175
    },
    {
      "epoch": 0.23852033374175857,
      "grad_norm": 0.6310459193767253,
      "learning_rate": 8.902605413137517e-06,
      "loss": 0.1456,
      "step": 8176
    },
    {
      "epoch": 0.23854950697240213,
      "grad_norm": 0.8438666745577079,
      "learning_rate": 8.902310063208907e-06,
      "loss": 0.1753,
      "step": 8177
    },
    {
      "epoch": 0.23857868020304568,
      "grad_norm": 0.7813218821139011,
      "learning_rate": 8.902014678441406e-06,
      "loss": 0.1585,
      "step": 8178
    },
    {
      "epoch": 0.23860785343368926,
      "grad_norm": 0.7184763683271836,
      "learning_rate": 8.90171925883765e-06,
      "loss": 0.179,
      "step": 8179
    },
    {
      "epoch": 0.2386370266643328,
      "grad_norm": 0.788171670428232,
      "learning_rate": 8.901423804400273e-06,
      "loss": 0.1457,
      "step": 8180
    },
    {
      "epoch": 0.23866619989497637,
      "grad_norm": 0.8076824354053939,
      "learning_rate": 8.901128315131917e-06,
      "loss": 0.147,
      "step": 8181
    },
    {
      "epoch": 0.23869537312561992,
      "grad_norm": 0.7033556840440839,
      "learning_rate": 8.900832791035218e-06,
      "loss": 0.1428,
      "step": 8182
    },
    {
      "epoch": 0.2387245463562635,
      "grad_norm": 0.7371450897019787,
      "learning_rate": 8.900537232112816e-06,
      "loss": 0.1681,
      "step": 8183
    },
    {
      "epoch": 0.23875371958690705,
      "grad_norm": 0.7652072647129703,
      "learning_rate": 8.90024163836735e-06,
      "loss": 0.1788,
      "step": 8184
    },
    {
      "epoch": 0.2387828928175506,
      "grad_norm": 0.9637137372175031,
      "learning_rate": 8.899946009801455e-06,
      "loss": 0.1649,
      "step": 8185
    },
    {
      "epoch": 0.2388120660481942,
      "grad_norm": 0.8459098693789464,
      "learning_rate": 8.899650346417773e-06,
      "loss": 0.1829,
      "step": 8186
    },
    {
      "epoch": 0.23884123927883774,
      "grad_norm": 1.0011578724095007,
      "learning_rate": 8.899354648218947e-06,
      "loss": 0.1615,
      "step": 8187
    },
    {
      "epoch": 0.2388704125094813,
      "grad_norm": 0.7948233494079993,
      "learning_rate": 8.899058915207611e-06,
      "loss": 0.1681,
      "step": 8188
    },
    {
      "epoch": 0.23889958574012485,
      "grad_norm": 0.9046111595988586,
      "learning_rate": 8.898763147386408e-06,
      "loss": 0.1338,
      "step": 8189
    },
    {
      "epoch": 0.23892875897076843,
      "grad_norm": 0.8962855423839565,
      "learning_rate": 8.898467344757979e-06,
      "loss": 0.1772,
      "step": 8190
    },
    {
      "epoch": 0.23895793220141198,
      "grad_norm": 0.7896687967471685,
      "learning_rate": 8.898171507324964e-06,
      "loss": 0.1607,
      "step": 8191
    },
    {
      "epoch": 0.23898710543205554,
      "grad_norm": 1.0707583006921217,
      "learning_rate": 8.897875635090005e-06,
      "loss": 0.1556,
      "step": 8192
    },
    {
      "epoch": 0.23901627866269912,
      "grad_norm": 0.909136269454888,
      "learning_rate": 8.89757972805574e-06,
      "loss": 0.1516,
      "step": 8193
    },
    {
      "epoch": 0.23904545189334267,
      "grad_norm": 0.8802805681779381,
      "learning_rate": 8.897283786224817e-06,
      "loss": 0.1511,
      "step": 8194
    },
    {
      "epoch": 0.23907462512398622,
      "grad_norm": 0.749464207805033,
      "learning_rate": 8.896987809599874e-06,
      "loss": 0.1702,
      "step": 8195
    },
    {
      "epoch": 0.2391037983546298,
      "grad_norm": 0.6808348804245334,
      "learning_rate": 8.896691798183552e-06,
      "loss": 0.1477,
      "step": 8196
    },
    {
      "epoch": 0.23913297158527336,
      "grad_norm": 0.7649554312826676,
      "learning_rate": 8.896395751978498e-06,
      "loss": 0.1527,
      "step": 8197
    },
    {
      "epoch": 0.2391621448159169,
      "grad_norm": 0.9094753134461215,
      "learning_rate": 8.896099670987351e-06,
      "loss": 0.1826,
      "step": 8198
    },
    {
      "epoch": 0.23919131804656046,
      "grad_norm": 0.8199112485425468,
      "learning_rate": 8.895803555212757e-06,
      "loss": 0.1668,
      "step": 8199
    },
    {
      "epoch": 0.23922049127720404,
      "grad_norm": 0.7843508250661938,
      "learning_rate": 8.89550740465736e-06,
      "loss": 0.1617,
      "step": 8200
    },
    {
      "epoch": 0.2392496645078476,
      "grad_norm": 0.7023619564626988,
      "learning_rate": 8.895211219323802e-06,
      "loss": 0.1416,
      "step": 8201
    },
    {
      "epoch": 0.23927883773849115,
      "grad_norm": 0.8051786204993654,
      "learning_rate": 8.894914999214727e-06,
      "loss": 0.1609,
      "step": 8202
    },
    {
      "epoch": 0.23930801096913473,
      "grad_norm": 0.7610729256588492,
      "learning_rate": 8.894618744332783e-06,
      "loss": 0.1716,
      "step": 8203
    },
    {
      "epoch": 0.23933718419977829,
      "grad_norm": 0.8722026870288849,
      "learning_rate": 8.89432245468061e-06,
      "loss": 0.1743,
      "step": 8204
    },
    {
      "epoch": 0.23936635743042184,
      "grad_norm": 0.7253988078472988,
      "learning_rate": 8.894026130260858e-06,
      "loss": 0.1566,
      "step": 8205
    },
    {
      "epoch": 0.23939553066106542,
      "grad_norm": 0.7558594801533177,
      "learning_rate": 8.89372977107617e-06,
      "loss": 0.1722,
      "step": 8206
    },
    {
      "epoch": 0.23942470389170897,
      "grad_norm": 0.715904179026281,
      "learning_rate": 8.89343337712919e-06,
      "loss": 0.1419,
      "step": 8207
    },
    {
      "epoch": 0.23945387712235253,
      "grad_norm": 0.8812508029464208,
      "learning_rate": 8.893136948422569e-06,
      "loss": 0.1581,
      "step": 8208
    },
    {
      "epoch": 0.23948305035299608,
      "grad_norm": 0.8586838495986173,
      "learning_rate": 8.89284048495895e-06,
      "loss": 0.1779,
      "step": 8209
    },
    {
      "epoch": 0.23951222358363966,
      "grad_norm": 0.7406003304511822,
      "learning_rate": 8.892543986740979e-06,
      "loss": 0.1473,
      "step": 8210
    },
    {
      "epoch": 0.23954139681428321,
      "grad_norm": 0.8979035192711243,
      "learning_rate": 8.892247453771306e-06,
      "loss": 0.1515,
      "step": 8211
    },
    {
      "epoch": 0.23957057004492677,
      "grad_norm": 0.6963097051903574,
      "learning_rate": 8.891950886052576e-06,
      "loss": 0.1394,
      "step": 8212
    },
    {
      "epoch": 0.23959974327557035,
      "grad_norm": 1.267749439137429,
      "learning_rate": 8.891654283587438e-06,
      "loss": 0.165,
      "step": 8213
    },
    {
      "epoch": 0.2396289165062139,
      "grad_norm": 1.1354387491417997,
      "learning_rate": 8.891357646378538e-06,
      "loss": 0.1672,
      "step": 8214
    },
    {
      "epoch": 0.23965808973685745,
      "grad_norm": 0.7444284210006101,
      "learning_rate": 8.891060974428528e-06,
      "loss": 0.1744,
      "step": 8215
    },
    {
      "epoch": 0.239687262967501,
      "grad_norm": 0.6855454737424209,
      "learning_rate": 8.890764267740053e-06,
      "loss": 0.1379,
      "step": 8216
    },
    {
      "epoch": 0.2397164361981446,
      "grad_norm": 0.9024297879509084,
      "learning_rate": 8.890467526315765e-06,
      "loss": 0.1595,
      "step": 8217
    },
    {
      "epoch": 0.23974560942878814,
      "grad_norm": 0.7834463539446351,
      "learning_rate": 8.89017075015831e-06,
      "loss": 0.1689,
      "step": 8218
    },
    {
      "epoch": 0.2397747826594317,
      "grad_norm": 0.988813715135792,
      "learning_rate": 8.889873939270341e-06,
      "loss": 0.1837,
      "step": 8219
    },
    {
      "epoch": 0.23980395589007528,
      "grad_norm": 0.7267918856114794,
      "learning_rate": 8.889577093654504e-06,
      "loss": 0.1419,
      "step": 8220
    },
    {
      "epoch": 0.23983312912071883,
      "grad_norm": 0.7787868909140904,
      "learning_rate": 8.889280213313454e-06,
      "loss": 0.1544,
      "step": 8221
    },
    {
      "epoch": 0.23986230235136238,
      "grad_norm": 1.0797811006635678,
      "learning_rate": 8.888983298249838e-06,
      "loss": 0.1623,
      "step": 8222
    },
    {
      "epoch": 0.23989147558200596,
      "grad_norm": 0.8443592671417744,
      "learning_rate": 8.888686348466305e-06,
      "loss": 0.1674,
      "step": 8223
    },
    {
      "epoch": 0.23992064881264952,
      "grad_norm": 1.0697659872943215,
      "learning_rate": 8.88838936396551e-06,
      "loss": 0.1826,
      "step": 8224
    },
    {
      "epoch": 0.23994982204329307,
      "grad_norm": 0.7664760824723789,
      "learning_rate": 8.888092344750103e-06,
      "loss": 0.1366,
      "step": 8225
    },
    {
      "epoch": 0.23997899527393662,
      "grad_norm": 0.9754768917787945,
      "learning_rate": 8.887795290822736e-06,
      "loss": 0.1776,
      "step": 8226
    },
    {
      "epoch": 0.2400081685045802,
      "grad_norm": 0.8573701254119962,
      "learning_rate": 8.887498202186062e-06,
      "loss": 0.1644,
      "step": 8227
    },
    {
      "epoch": 0.24003734173522376,
      "grad_norm": 0.9220847422993835,
      "learning_rate": 8.88720107884273e-06,
      "loss": 0.1358,
      "step": 8228
    },
    {
      "epoch": 0.2400665149658673,
      "grad_norm": 0.9619544322343004,
      "learning_rate": 8.886903920795396e-06,
      "loss": 0.1627,
      "step": 8229
    },
    {
      "epoch": 0.2400956881965109,
      "grad_norm": 0.9877543280422053,
      "learning_rate": 8.88660672804671e-06,
      "loss": 0.1609,
      "step": 8230
    },
    {
      "epoch": 0.24012486142715445,
      "grad_norm": 0.770908935040861,
      "learning_rate": 8.886309500599328e-06,
      "loss": 0.1835,
      "step": 8231
    },
    {
      "epoch": 0.240154034657798,
      "grad_norm": 1.1567854629304466,
      "learning_rate": 8.886012238455903e-06,
      "loss": 0.1808,
      "step": 8232
    },
    {
      "epoch": 0.24018320788844155,
      "grad_norm": 1.1332016046396984,
      "learning_rate": 8.885714941619087e-06,
      "loss": 0.1654,
      "step": 8233
    },
    {
      "epoch": 0.24021238111908513,
      "grad_norm": 0.9002091516933857,
      "learning_rate": 8.885417610091536e-06,
      "loss": 0.1418,
      "step": 8234
    },
    {
      "epoch": 0.2402415543497287,
      "grad_norm": 1.2171802257120898,
      "learning_rate": 8.885120243875905e-06,
      "loss": 0.1466,
      "step": 8235
    },
    {
      "epoch": 0.24027072758037224,
      "grad_norm": 0.9221963422303984,
      "learning_rate": 8.884822842974847e-06,
      "loss": 0.1697,
      "step": 8236
    },
    {
      "epoch": 0.24029990081101582,
      "grad_norm": 0.9012719303878594,
      "learning_rate": 8.88452540739102e-06,
      "loss": 0.1695,
      "step": 8237
    },
    {
      "epoch": 0.24032907404165937,
      "grad_norm": 0.8564097549638308,
      "learning_rate": 8.884227937127076e-06,
      "loss": 0.1563,
      "step": 8238
    },
    {
      "epoch": 0.24035824727230293,
      "grad_norm": 0.8370971704673468,
      "learning_rate": 8.883930432185673e-06,
      "loss": 0.1455,
      "step": 8239
    },
    {
      "epoch": 0.2403874205029465,
      "grad_norm": 1.0679698209456254,
      "learning_rate": 8.883632892569466e-06,
      "loss": 0.1617,
      "step": 8240
    },
    {
      "epoch": 0.24041659373359006,
      "grad_norm": 1.118725383962962,
      "learning_rate": 8.88333531828111e-06,
      "loss": 0.1589,
      "step": 8241
    },
    {
      "epoch": 0.24044576696423361,
      "grad_norm": 0.9374701175420922,
      "learning_rate": 8.883037709323263e-06,
      "loss": 0.1632,
      "step": 8242
    },
    {
      "epoch": 0.24047494019487717,
      "grad_norm": 0.894106912041009,
      "learning_rate": 8.882740065698586e-06,
      "loss": 0.1641,
      "step": 8243
    },
    {
      "epoch": 0.24050411342552075,
      "grad_norm": 0.88794811179921,
      "learning_rate": 8.882442387409729e-06,
      "loss": 0.1426,
      "step": 8244
    },
    {
      "epoch": 0.2405332866561643,
      "grad_norm": 0.8991441307822194,
      "learning_rate": 8.882144674459354e-06,
      "loss": 0.1781,
      "step": 8245
    },
    {
      "epoch": 0.24056245988680786,
      "grad_norm": 0.8587688129744776,
      "learning_rate": 8.88184692685012e-06,
      "loss": 0.1424,
      "step": 8246
    },
    {
      "epoch": 0.24059163311745144,
      "grad_norm": 1.0713316984903607,
      "learning_rate": 8.88154914458468e-06,
      "loss": 0.1662,
      "step": 8247
    },
    {
      "epoch": 0.240620806348095,
      "grad_norm": 0.8101948080509492,
      "learning_rate": 8.881251327665699e-06,
      "loss": 0.1503,
      "step": 8248
    },
    {
      "epoch": 0.24064997957873854,
      "grad_norm": 0.7635063527922878,
      "learning_rate": 8.88095347609583e-06,
      "loss": 0.1642,
      "step": 8249
    },
    {
      "epoch": 0.24067915280938212,
      "grad_norm": 0.882359914169995,
      "learning_rate": 8.880655589877737e-06,
      "loss": 0.163,
      "step": 8250
    },
    {
      "epoch": 0.24070832604002568,
      "grad_norm": 0.6826838905996303,
      "learning_rate": 8.880357669014077e-06,
      "loss": 0.1562,
      "step": 8251
    },
    {
      "epoch": 0.24073749927066923,
      "grad_norm": 0.7851893250684556,
      "learning_rate": 8.88005971350751e-06,
      "loss": 0.1518,
      "step": 8252
    },
    {
      "epoch": 0.24076667250131278,
      "grad_norm": 0.8064569725792964,
      "learning_rate": 8.879761723360695e-06,
      "loss": 0.1783,
      "step": 8253
    },
    {
      "epoch": 0.24079584573195636,
      "grad_norm": 0.7934433704487039,
      "learning_rate": 8.879463698576294e-06,
      "loss": 0.1567,
      "step": 8254
    },
    {
      "epoch": 0.24082501896259992,
      "grad_norm": 0.7199144588868956,
      "learning_rate": 8.879165639156968e-06,
      "loss": 0.1509,
      "step": 8255
    },
    {
      "epoch": 0.24085419219324347,
      "grad_norm": 0.8761875305665132,
      "learning_rate": 8.878867545105377e-06,
      "loss": 0.1502,
      "step": 8256
    },
    {
      "epoch": 0.24088336542388705,
      "grad_norm": 1.013003472781489,
      "learning_rate": 8.87856941642418e-06,
      "loss": 0.1408,
      "step": 8257
    },
    {
      "epoch": 0.2409125386545306,
      "grad_norm": 0.9854636634111242,
      "learning_rate": 8.878271253116044e-06,
      "loss": 0.1632,
      "step": 8258
    },
    {
      "epoch": 0.24094171188517416,
      "grad_norm": 0.7951311412085198,
      "learning_rate": 8.877973055183629e-06,
      "loss": 0.2009,
      "step": 8259
    },
    {
      "epoch": 0.2409708851158177,
      "grad_norm": 1.0186181133174064,
      "learning_rate": 8.877674822629595e-06,
      "loss": 0.1964,
      "step": 8260
    },
    {
      "epoch": 0.2410000583464613,
      "grad_norm": 0.9672603265242955,
      "learning_rate": 8.877376555456604e-06,
      "loss": 0.1573,
      "step": 8261
    },
    {
      "epoch": 0.24102923157710485,
      "grad_norm": 0.8894235143992626,
      "learning_rate": 8.877078253667321e-06,
      "loss": 0.1908,
      "step": 8262
    },
    {
      "epoch": 0.2410584048077484,
      "grad_norm": 0.8754001686628599,
      "learning_rate": 8.876779917264412e-06,
      "loss": 0.1466,
      "step": 8263
    },
    {
      "epoch": 0.24108757803839198,
      "grad_norm": 1.0628663388415525,
      "learning_rate": 8.876481546250535e-06,
      "loss": 0.157,
      "step": 8264
    },
    {
      "epoch": 0.24111675126903553,
      "grad_norm": 0.9723407257991477,
      "learning_rate": 8.876183140628355e-06,
      "loss": 0.145,
      "step": 8265
    },
    {
      "epoch": 0.2411459244996791,
      "grad_norm": 0.9469882403125053,
      "learning_rate": 8.87588470040054e-06,
      "loss": 0.1625,
      "step": 8266
    },
    {
      "epoch": 0.24117509773032267,
      "grad_norm": 0.8699907055802648,
      "learning_rate": 8.87558622556975e-06,
      "loss": 0.1407,
      "step": 8267
    },
    {
      "epoch": 0.24120427096096622,
      "grad_norm": 0.9010022926823396,
      "learning_rate": 8.875287716138651e-06,
      "loss": 0.1565,
      "step": 8268
    },
    {
      "epoch": 0.24123344419160977,
      "grad_norm": 0.8853167383305223,
      "learning_rate": 8.87498917210991e-06,
      "loss": 0.147,
      "step": 8269
    },
    {
      "epoch": 0.24126261742225333,
      "grad_norm": 0.7458998076653564,
      "learning_rate": 8.87469059348619e-06,
      "loss": 0.1628,
      "step": 8270
    },
    {
      "epoch": 0.2412917906528969,
      "grad_norm": 0.9189429055503623,
      "learning_rate": 8.874391980270157e-06,
      "loss": 0.1468,
      "step": 8271
    },
    {
      "epoch": 0.24132096388354046,
      "grad_norm": 0.7682745223848754,
      "learning_rate": 8.874093332464477e-06,
      "loss": 0.15,
      "step": 8272
    },
    {
      "epoch": 0.24135013711418402,
      "grad_norm": 0.7591197071634669,
      "learning_rate": 8.873794650071819e-06,
      "loss": 0.1593,
      "step": 8273
    },
    {
      "epoch": 0.2413793103448276,
      "grad_norm": 1.1246996911394547,
      "learning_rate": 8.873495933094844e-06,
      "loss": 0.1581,
      "step": 8274
    },
    {
      "epoch": 0.24140848357547115,
      "grad_norm": 0.7076735575515133,
      "learning_rate": 8.873197181536223e-06,
      "loss": 0.1543,
      "step": 8275
    },
    {
      "epoch": 0.2414376568061147,
      "grad_norm": 0.9029068568351564,
      "learning_rate": 8.872898395398624e-06,
      "loss": 0.1755,
      "step": 8276
    },
    {
      "epoch": 0.24146683003675828,
      "grad_norm": 0.7549673909441204,
      "learning_rate": 8.87259957468471e-06,
      "loss": 0.1629,
      "step": 8277
    },
    {
      "epoch": 0.24149600326740184,
      "grad_norm": 0.7575082299161752,
      "learning_rate": 8.872300719397152e-06,
      "loss": 0.1514,
      "step": 8278
    },
    {
      "epoch": 0.2415251764980454,
      "grad_norm": 0.8352503239311698,
      "learning_rate": 8.872001829538619e-06,
      "loss": 0.1987,
      "step": 8279
    },
    {
      "epoch": 0.24155434972868894,
      "grad_norm": 0.7337178680236806,
      "learning_rate": 8.871702905111776e-06,
      "loss": 0.167,
      "step": 8280
    },
    {
      "epoch": 0.24158352295933253,
      "grad_norm": 0.7666388414207655,
      "learning_rate": 8.871403946119294e-06,
      "loss": 0.1666,
      "step": 8281
    },
    {
      "epoch": 0.24161269618997608,
      "grad_norm": 0.8030265459227783,
      "learning_rate": 8.871104952563843e-06,
      "loss": 0.174,
      "step": 8282
    },
    {
      "epoch": 0.24164186942061963,
      "grad_norm": 0.906392696766294,
      "learning_rate": 8.870805924448091e-06,
      "loss": 0.1606,
      "step": 8283
    },
    {
      "epoch": 0.2416710426512632,
      "grad_norm": 0.6662394594558032,
      "learning_rate": 8.870506861774708e-06,
      "loss": 0.1489,
      "step": 8284
    },
    {
      "epoch": 0.24170021588190677,
      "grad_norm": 0.8848343698281591,
      "learning_rate": 8.870207764546363e-06,
      "loss": 0.1632,
      "step": 8285
    },
    {
      "epoch": 0.24172938911255032,
      "grad_norm": 0.9298968774587074,
      "learning_rate": 8.869908632765727e-06,
      "loss": 0.1734,
      "step": 8286
    },
    {
      "epoch": 0.24175856234319387,
      "grad_norm": 0.8343997794773508,
      "learning_rate": 8.86960946643547e-06,
      "loss": 0.1832,
      "step": 8287
    },
    {
      "epoch": 0.24178773557383745,
      "grad_norm": 1.220524871783636,
      "learning_rate": 8.869310265558264e-06,
      "loss": 0.1592,
      "step": 8288
    },
    {
      "epoch": 0.241816908804481,
      "grad_norm": 0.8262776044773047,
      "learning_rate": 8.869011030136781e-06,
      "loss": 0.1787,
      "step": 8289
    },
    {
      "epoch": 0.24184608203512456,
      "grad_norm": 1.1037398922421195,
      "learning_rate": 8.868711760173688e-06,
      "loss": 0.1785,
      "step": 8290
    },
    {
      "epoch": 0.24187525526576814,
      "grad_norm": 0.7803727084389832,
      "learning_rate": 8.868412455671663e-06,
      "loss": 0.1632,
      "step": 8291
    },
    {
      "epoch": 0.2419044284964117,
      "grad_norm": 0.9759382256345664,
      "learning_rate": 8.868113116633374e-06,
      "loss": 0.184,
      "step": 8292
    },
    {
      "epoch": 0.24193360172705525,
      "grad_norm": 0.887876272034532,
      "learning_rate": 8.867813743061493e-06,
      "loss": 0.1361,
      "step": 8293
    },
    {
      "epoch": 0.24196277495769883,
      "grad_norm": 0.807529058890851,
      "learning_rate": 8.867514334958696e-06,
      "loss": 0.1425,
      "step": 8294
    },
    {
      "epoch": 0.24199194818834238,
      "grad_norm": 0.7849043958733483,
      "learning_rate": 8.867214892327653e-06,
      "loss": 0.1573,
      "step": 8295
    },
    {
      "epoch": 0.24202112141898594,
      "grad_norm": 0.9015404177738828,
      "learning_rate": 8.86691541517104e-06,
      "loss": 0.1952,
      "step": 8296
    },
    {
      "epoch": 0.2420502946496295,
      "grad_norm": 0.9219305059684217,
      "learning_rate": 8.866615903491529e-06,
      "loss": 0.152,
      "step": 8297
    },
    {
      "epoch": 0.24207946788027307,
      "grad_norm": 0.8353303290023836,
      "learning_rate": 8.866316357291793e-06,
      "loss": 0.161,
      "step": 8298
    },
    {
      "epoch": 0.24210864111091662,
      "grad_norm": 0.8584782625626277,
      "learning_rate": 8.866016776574509e-06,
      "loss": 0.1634,
      "step": 8299
    },
    {
      "epoch": 0.24213781434156018,
      "grad_norm": 0.8010399387149826,
      "learning_rate": 8.865717161342348e-06,
      "loss": 0.1501,
      "step": 8300
    },
    {
      "epoch": 0.24216698757220376,
      "grad_norm": 0.8403769802589146,
      "learning_rate": 8.86541751159799e-06,
      "loss": 0.1468,
      "step": 8301
    },
    {
      "epoch": 0.2421961608028473,
      "grad_norm": 0.9557938237322129,
      "learning_rate": 8.865117827344106e-06,
      "loss": 0.1571,
      "step": 8302
    },
    {
      "epoch": 0.24222533403349086,
      "grad_norm": 1.036574215317314,
      "learning_rate": 8.864818108583372e-06,
      "loss": 0.1533,
      "step": 8303
    },
    {
      "epoch": 0.24225450726413442,
      "grad_norm": 1.0665467470789982,
      "learning_rate": 8.864518355318465e-06,
      "loss": 0.1583,
      "step": 8304
    },
    {
      "epoch": 0.242283680494778,
      "grad_norm": 0.8899892947644182,
      "learning_rate": 8.864218567552061e-06,
      "loss": 0.16,
      "step": 8305
    },
    {
      "epoch": 0.24231285372542155,
      "grad_norm": 0.8293750946816629,
      "learning_rate": 8.863918745286836e-06,
      "loss": 0.1801,
      "step": 8306
    },
    {
      "epoch": 0.2423420269560651,
      "grad_norm": 0.9905375946112777,
      "learning_rate": 8.863618888525466e-06,
      "loss": 0.1587,
      "step": 8307
    },
    {
      "epoch": 0.24237120018670869,
      "grad_norm": 0.6796720878474997,
      "learning_rate": 8.863318997270628e-06,
      "loss": 0.1318,
      "step": 8308
    },
    {
      "epoch": 0.24240037341735224,
      "grad_norm": 0.8107026806590917,
      "learning_rate": 8.863019071525004e-06,
      "loss": 0.1567,
      "step": 8309
    },
    {
      "epoch": 0.2424295466479958,
      "grad_norm": 1.2963652228907871,
      "learning_rate": 8.862719111291265e-06,
      "loss": 0.1704,
      "step": 8310
    },
    {
      "epoch": 0.24245871987863937,
      "grad_norm": 0.7919308649984564,
      "learning_rate": 8.862419116572091e-06,
      "loss": 0.168,
      "step": 8311
    },
    {
      "epoch": 0.24248789310928293,
      "grad_norm": 0.7014596902903338,
      "learning_rate": 8.862119087370164e-06,
      "loss": 0.1485,
      "step": 8312
    },
    {
      "epoch": 0.24251706633992648,
      "grad_norm": 0.8910306467453531,
      "learning_rate": 8.861819023688158e-06,
      "loss": 0.1802,
      "step": 8313
    },
    {
      "epoch": 0.24254623957057003,
      "grad_norm": 0.6840901742528571,
      "learning_rate": 8.861518925528753e-06,
      "loss": 0.1311,
      "step": 8314
    },
    {
      "epoch": 0.2425754128012136,
      "grad_norm": 0.6571646828440548,
      "learning_rate": 8.861218792894631e-06,
      "loss": 0.1485,
      "step": 8315
    },
    {
      "epoch": 0.24260458603185717,
      "grad_norm": 0.9296671671227267,
      "learning_rate": 8.860918625788468e-06,
      "loss": 0.1413,
      "step": 8316
    },
    {
      "epoch": 0.24263375926250072,
      "grad_norm": 0.7680292881436243,
      "learning_rate": 8.860618424212945e-06,
      "loss": 0.1631,
      "step": 8317
    },
    {
      "epoch": 0.2426629324931443,
      "grad_norm": 0.7553524615331095,
      "learning_rate": 8.860318188170744e-06,
      "loss": 0.1505,
      "step": 8318
    },
    {
      "epoch": 0.24269210572378785,
      "grad_norm": 0.7744109809950512,
      "learning_rate": 8.860017917664543e-06,
      "loss": 0.1543,
      "step": 8319
    },
    {
      "epoch": 0.2427212789544314,
      "grad_norm": 0.741122023611664,
      "learning_rate": 8.859717612697023e-06,
      "loss": 0.1649,
      "step": 8320
    },
    {
      "epoch": 0.242750452185075,
      "grad_norm": 0.8842410624110942,
      "learning_rate": 8.859417273270866e-06,
      "loss": 0.146,
      "step": 8321
    },
    {
      "epoch": 0.24277962541571854,
      "grad_norm": 0.8396582509731948,
      "learning_rate": 8.859116899388752e-06,
      "loss": 0.1991,
      "step": 8322
    },
    {
      "epoch": 0.2428087986463621,
      "grad_norm": 0.8399060739518684,
      "learning_rate": 8.858816491053364e-06,
      "loss": 0.1617,
      "step": 8323
    },
    {
      "epoch": 0.24283797187700565,
      "grad_norm": 0.932869570379997,
      "learning_rate": 8.858516048267383e-06,
      "loss": 0.1553,
      "step": 8324
    },
    {
      "epoch": 0.24286714510764923,
      "grad_norm": 0.9219004107075606,
      "learning_rate": 8.85821557103349e-06,
      "loss": 0.1521,
      "step": 8325
    },
    {
      "epoch": 0.24289631833829278,
      "grad_norm": 0.7569424998936463,
      "learning_rate": 8.857915059354373e-06,
      "loss": 0.1229,
      "step": 8326
    },
    {
      "epoch": 0.24292549156893634,
      "grad_norm": 0.9706743467715019,
      "learning_rate": 8.85761451323271e-06,
      "loss": 0.155,
      "step": 8327
    },
    {
      "epoch": 0.24295466479957992,
      "grad_norm": 0.9094527668747412,
      "learning_rate": 8.857313932671186e-06,
      "loss": 0.1528,
      "step": 8328
    },
    {
      "epoch": 0.24298383803022347,
      "grad_norm": 0.9351577674569229,
      "learning_rate": 8.857013317672484e-06,
      "loss": 0.1449,
      "step": 8329
    },
    {
      "epoch": 0.24301301126086702,
      "grad_norm": 0.7590114397369029,
      "learning_rate": 8.856712668239287e-06,
      "loss": 0.144,
      "step": 8330
    },
    {
      "epoch": 0.24304218449151058,
      "grad_norm": 0.8761829605476987,
      "learning_rate": 8.85641198437428e-06,
      "loss": 0.1637,
      "step": 8331
    },
    {
      "epoch": 0.24307135772215416,
      "grad_norm": 0.9394578365099572,
      "learning_rate": 8.856111266080149e-06,
      "loss": 0.1459,
      "step": 8332
    },
    {
      "epoch": 0.2431005309527977,
      "grad_norm": 0.7659881061961028,
      "learning_rate": 8.855810513359574e-06,
      "loss": 0.1548,
      "step": 8333
    },
    {
      "epoch": 0.24312970418344126,
      "grad_norm": 0.8870597339994251,
      "learning_rate": 8.855509726215247e-06,
      "loss": 0.1614,
      "step": 8334
    },
    {
      "epoch": 0.24315887741408485,
      "grad_norm": 0.9362402687968784,
      "learning_rate": 8.855208904649848e-06,
      "loss": 0.1602,
      "step": 8335
    },
    {
      "epoch": 0.2431880506447284,
      "grad_norm": 0.920572264713605,
      "learning_rate": 8.854908048666064e-06,
      "loss": 0.1549,
      "step": 8336
    },
    {
      "epoch": 0.24321722387537195,
      "grad_norm": 0.7838104690539485,
      "learning_rate": 8.85460715826658e-06,
      "loss": 0.1584,
      "step": 8337
    },
    {
      "epoch": 0.24324639710601553,
      "grad_norm": 0.8296277648008216,
      "learning_rate": 8.854306233454085e-06,
      "loss": 0.1636,
      "step": 8338
    },
    {
      "epoch": 0.24327557033665909,
      "grad_norm": 0.9261275509176963,
      "learning_rate": 8.854005274231264e-06,
      "loss": 0.1583,
      "step": 8339
    },
    {
      "epoch": 0.24330474356730264,
      "grad_norm": 0.7284314849938635,
      "learning_rate": 8.853704280600803e-06,
      "loss": 0.151,
      "step": 8340
    },
    {
      "epoch": 0.2433339167979462,
      "grad_norm": 0.855592558560831,
      "learning_rate": 8.853403252565391e-06,
      "loss": 0.1499,
      "step": 8341
    },
    {
      "epoch": 0.24336309002858977,
      "grad_norm": 1.0361960340942988,
      "learning_rate": 8.853102190127714e-06,
      "loss": 0.1929,
      "step": 8342
    },
    {
      "epoch": 0.24339226325923333,
      "grad_norm": 0.8606824807586467,
      "learning_rate": 8.852801093290461e-06,
      "loss": 0.1676,
      "step": 8343
    },
    {
      "epoch": 0.24342143648987688,
      "grad_norm": 0.802848091187224,
      "learning_rate": 8.852499962056321e-06,
      "loss": 0.1475,
      "step": 8344
    },
    {
      "epoch": 0.24345060972052046,
      "grad_norm": 0.941050678114156,
      "learning_rate": 8.852198796427978e-06,
      "loss": 0.1586,
      "step": 8345
    },
    {
      "epoch": 0.24347978295116401,
      "grad_norm": 0.8658456740404586,
      "learning_rate": 8.851897596408125e-06,
      "loss": 0.1487,
      "step": 8346
    },
    {
      "epoch": 0.24350895618180757,
      "grad_norm": 0.7557942088182621,
      "learning_rate": 8.85159636199945e-06,
      "loss": 0.1837,
      "step": 8347
    },
    {
      "epoch": 0.24353812941245115,
      "grad_norm": 0.9696259629692668,
      "learning_rate": 8.851295093204642e-06,
      "loss": 0.1666,
      "step": 8348
    },
    {
      "epoch": 0.2435673026430947,
      "grad_norm": 1.1376486002663677,
      "learning_rate": 8.850993790026391e-06,
      "loss": 0.1538,
      "step": 8349
    },
    {
      "epoch": 0.24359647587373826,
      "grad_norm": 0.7397184643381797,
      "learning_rate": 8.850692452467387e-06,
      "loss": 0.1669,
      "step": 8350
    },
    {
      "epoch": 0.2436256491043818,
      "grad_norm": 1.0140075529701373,
      "learning_rate": 8.850391080530319e-06,
      "loss": 0.1752,
      "step": 8351
    },
    {
      "epoch": 0.2436548223350254,
      "grad_norm": 1.0352251266250718,
      "learning_rate": 8.850089674217879e-06,
      "loss": 0.1754,
      "step": 8352
    },
    {
      "epoch": 0.24368399556566894,
      "grad_norm": 0.7166052530629659,
      "learning_rate": 8.849788233532759e-06,
      "loss": 0.1357,
      "step": 8353
    },
    {
      "epoch": 0.2437131687963125,
      "grad_norm": 0.8722454672563336,
      "learning_rate": 8.849486758477647e-06,
      "loss": 0.1563,
      "step": 8354
    },
    {
      "epoch": 0.24374234202695608,
      "grad_norm": 0.787635601188435,
      "learning_rate": 8.849185249055236e-06,
      "loss": 0.1512,
      "step": 8355
    },
    {
      "epoch": 0.24377151525759963,
      "grad_norm": 0.7581295989698724,
      "learning_rate": 8.848883705268219e-06,
      "loss": 0.1546,
      "step": 8356
    },
    {
      "epoch": 0.24380068848824318,
      "grad_norm": 0.850561381936157,
      "learning_rate": 8.848582127119285e-06,
      "loss": 0.1618,
      "step": 8357
    },
    {
      "epoch": 0.24382986171888674,
      "grad_norm": 0.7886592090366925,
      "learning_rate": 8.84828051461113e-06,
      "loss": 0.1723,
      "step": 8358
    },
    {
      "epoch": 0.24385903494953032,
      "grad_norm": 0.8313972585497709,
      "learning_rate": 8.847978867746446e-06,
      "loss": 0.1518,
      "step": 8359
    },
    {
      "epoch": 0.24388820818017387,
      "grad_norm": 0.8607898970493084,
      "learning_rate": 8.847677186527924e-06,
      "loss": 0.1642,
      "step": 8360
    },
    {
      "epoch": 0.24391738141081742,
      "grad_norm": 0.7871688502088359,
      "learning_rate": 8.84737547095826e-06,
      "loss": 0.1494,
      "step": 8361
    },
    {
      "epoch": 0.243946554641461,
      "grad_norm": 0.8363080403057463,
      "learning_rate": 8.847073721040145e-06,
      "loss": 0.1779,
      "step": 8362
    },
    {
      "epoch": 0.24397572787210456,
      "grad_norm": 0.8484381925285275,
      "learning_rate": 8.846771936776275e-06,
      "loss": 0.1782,
      "step": 8363
    },
    {
      "epoch": 0.2440049011027481,
      "grad_norm": 1.0067802342523158,
      "learning_rate": 8.846470118169343e-06,
      "loss": 0.1451,
      "step": 8364
    },
    {
      "epoch": 0.2440340743333917,
      "grad_norm": 0.8955634481195281,
      "learning_rate": 8.846168265222044e-06,
      "loss": 0.148,
      "step": 8365
    },
    {
      "epoch": 0.24406324756403525,
      "grad_norm": 1.0444161973369948,
      "learning_rate": 8.845866377937073e-06,
      "loss": 0.1753,
      "step": 8366
    },
    {
      "epoch": 0.2440924207946788,
      "grad_norm": 0.906266243851301,
      "learning_rate": 8.845564456317124e-06,
      "loss": 0.1451,
      "step": 8367
    },
    {
      "epoch": 0.24412159402532235,
      "grad_norm": 1.0563299544092395,
      "learning_rate": 8.845262500364896e-06,
      "loss": 0.1526,
      "step": 8368
    },
    {
      "epoch": 0.24415076725596593,
      "grad_norm": 0.8454233917176577,
      "learning_rate": 8.84496051008308e-06,
      "loss": 0.1537,
      "step": 8369
    },
    {
      "epoch": 0.2441799404866095,
      "grad_norm": 1.0749430110370448,
      "learning_rate": 8.844658485474376e-06,
      "loss": 0.1633,
      "step": 8370
    },
    {
      "epoch": 0.24420911371725304,
      "grad_norm": 1.0658178285046038,
      "learning_rate": 8.844356426541476e-06,
      "loss": 0.1558,
      "step": 8371
    },
    {
      "epoch": 0.24423828694789662,
      "grad_norm": 0.8166177664164356,
      "learning_rate": 8.844054333287081e-06,
      "loss": 0.1927,
      "step": 8372
    },
    {
      "epoch": 0.24426746017854017,
      "grad_norm": 1.1520638365934748,
      "learning_rate": 8.84375220571389e-06,
      "loss": 0.1737,
      "step": 8373
    },
    {
      "epoch": 0.24429663340918373,
      "grad_norm": 0.9214527141771043,
      "learning_rate": 8.843450043824593e-06,
      "loss": 0.1859,
      "step": 8374
    },
    {
      "epoch": 0.24432580663982728,
      "grad_norm": 0.7492326427282036,
      "learning_rate": 8.843147847621893e-06,
      "loss": 0.1419,
      "step": 8375
    },
    {
      "epoch": 0.24435497987047086,
      "grad_norm": 0.7411821285949995,
      "learning_rate": 8.842845617108485e-06,
      "loss": 0.1607,
      "step": 8376
    },
    {
      "epoch": 0.24438415310111442,
      "grad_norm": 0.7154000436837367,
      "learning_rate": 8.842543352287069e-06,
      "loss": 0.1481,
      "step": 8377
    },
    {
      "epoch": 0.24441332633175797,
      "grad_norm": 0.9138082082050378,
      "learning_rate": 8.842241053160345e-06,
      "loss": 0.1662,
      "step": 8378
    },
    {
      "epoch": 0.24444249956240155,
      "grad_norm": 0.964306699508199,
      "learning_rate": 8.841938719731008e-06,
      "loss": 0.1418,
      "step": 8379
    },
    {
      "epoch": 0.2444716727930451,
      "grad_norm": 0.7727967753547276,
      "learning_rate": 8.841636352001762e-06,
      "loss": 0.1754,
      "step": 8380
    },
    {
      "epoch": 0.24450084602368866,
      "grad_norm": 0.8789704182148401,
      "learning_rate": 8.841333949975302e-06,
      "loss": 0.1444,
      "step": 8381
    },
    {
      "epoch": 0.24453001925433224,
      "grad_norm": 0.7411244681921415,
      "learning_rate": 8.84103151365433e-06,
      "loss": 0.1458,
      "step": 8382
    },
    {
      "epoch": 0.2445591924849758,
      "grad_norm": 0.8487622912626777,
      "learning_rate": 8.840729043041545e-06,
      "loss": 0.1606,
      "step": 8383
    },
    {
      "epoch": 0.24458836571561934,
      "grad_norm": 0.9033530842049082,
      "learning_rate": 8.840426538139647e-06,
      "loss": 0.1865,
      "step": 8384
    },
    {
      "epoch": 0.2446175389462629,
      "grad_norm": 0.8992042468042295,
      "learning_rate": 8.84012399895134e-06,
      "loss": 0.1704,
      "step": 8385
    },
    {
      "epoch": 0.24464671217690648,
      "grad_norm": 0.7081280719070593,
      "learning_rate": 8.83982142547932e-06,
      "loss": 0.1592,
      "step": 8386
    },
    {
      "epoch": 0.24467588540755003,
      "grad_norm": 1.0275869945524918,
      "learning_rate": 8.839518817726293e-06,
      "loss": 0.1369,
      "step": 8387
    },
    {
      "epoch": 0.24470505863819358,
      "grad_norm": 0.7760137392426738,
      "learning_rate": 8.839216175694957e-06,
      "loss": 0.1523,
      "step": 8388
    },
    {
      "epoch": 0.24473423186883717,
      "grad_norm": 0.8148903229715854,
      "learning_rate": 8.838913499388018e-06,
      "loss": 0.1679,
      "step": 8389
    },
    {
      "epoch": 0.24476340509948072,
      "grad_norm": 0.9797201186920089,
      "learning_rate": 8.838610788808173e-06,
      "loss": 0.1422,
      "step": 8390
    },
    {
      "epoch": 0.24479257833012427,
      "grad_norm": 1.0186898166413612,
      "learning_rate": 8.838308043958128e-06,
      "loss": 0.1759,
      "step": 8391
    },
    {
      "epoch": 0.24482175156076785,
      "grad_norm": 1.027818751354689,
      "learning_rate": 8.838005264840585e-06,
      "loss": 0.1646,
      "step": 8392
    },
    {
      "epoch": 0.2448509247914114,
      "grad_norm": 1.225498641750817,
      "learning_rate": 8.837702451458248e-06,
      "loss": 0.1519,
      "step": 8393
    },
    {
      "epoch": 0.24488009802205496,
      "grad_norm": 0.9275017187124127,
      "learning_rate": 8.83739960381382e-06,
      "loss": 0.1519,
      "step": 8394
    },
    {
      "epoch": 0.2449092712526985,
      "grad_norm": 1.0713892531659432,
      "learning_rate": 8.837096721910004e-06,
      "loss": 0.1524,
      "step": 8395
    },
    {
      "epoch": 0.2449384444833421,
      "grad_norm": 1.162071648834617,
      "learning_rate": 8.836793805749504e-06,
      "loss": 0.1648,
      "step": 8396
    },
    {
      "epoch": 0.24496761771398565,
      "grad_norm": 0.7598401323146186,
      "learning_rate": 8.836490855335026e-06,
      "loss": 0.1681,
      "step": 8397
    },
    {
      "epoch": 0.2449967909446292,
      "grad_norm": 0.7747140963682789,
      "learning_rate": 8.83618787066927e-06,
      "loss": 0.1494,
      "step": 8398
    },
    {
      "epoch": 0.24502596417527278,
      "grad_norm": 0.8215032281001173,
      "learning_rate": 8.835884851754948e-06,
      "loss": 0.1581,
      "step": 8399
    },
    {
      "epoch": 0.24505513740591633,
      "grad_norm": 0.7220283583031136,
      "learning_rate": 8.83558179859476e-06,
      "loss": 0.155,
      "step": 8400
    },
    {
      "epoch": 0.2450843106365599,
      "grad_norm": 0.6641240567457919,
      "learning_rate": 8.835278711191414e-06,
      "loss": 0.1304,
      "step": 8401
    },
    {
      "epoch": 0.24511348386720344,
      "grad_norm": 0.7538929804627872,
      "learning_rate": 8.834975589547616e-06,
      "loss": 0.1456,
      "step": 8402
    },
    {
      "epoch": 0.24514265709784702,
      "grad_norm": 0.8066346810016369,
      "learning_rate": 8.83467243366607e-06,
      "loss": 0.1367,
      "step": 8403
    },
    {
      "epoch": 0.24517183032849058,
      "grad_norm": 0.7457877340021333,
      "learning_rate": 8.834369243549484e-06,
      "loss": 0.1419,
      "step": 8404
    },
    {
      "epoch": 0.24520100355913413,
      "grad_norm": 0.687374925843236,
      "learning_rate": 8.834066019200566e-06,
      "loss": 0.1444,
      "step": 8405
    },
    {
      "epoch": 0.2452301767897777,
      "grad_norm": 0.9135412221902482,
      "learning_rate": 8.83376276062202e-06,
      "loss": 0.1415,
      "step": 8406
    },
    {
      "epoch": 0.24525935002042126,
      "grad_norm": 0.8808876465180047,
      "learning_rate": 8.833459467816557e-06,
      "loss": 0.1384,
      "step": 8407
    },
    {
      "epoch": 0.24528852325106482,
      "grad_norm": 0.7754192923566724,
      "learning_rate": 8.833156140786883e-06,
      "loss": 0.1786,
      "step": 8408
    },
    {
      "epoch": 0.2453176964817084,
      "grad_norm": 0.8651372266868499,
      "learning_rate": 8.832852779535704e-06,
      "loss": 0.18,
      "step": 8409
    },
    {
      "epoch": 0.24534686971235195,
      "grad_norm": 0.8809853052651581,
      "learning_rate": 8.832549384065732e-06,
      "loss": 0.1696,
      "step": 8410
    },
    {
      "epoch": 0.2453760429429955,
      "grad_norm": 0.8133090326836228,
      "learning_rate": 8.832245954379674e-06,
      "loss": 0.1438,
      "step": 8411
    },
    {
      "epoch": 0.24540521617363906,
      "grad_norm": 0.9548720252177634,
      "learning_rate": 8.831942490480238e-06,
      "loss": 0.1598,
      "step": 8412
    },
    {
      "epoch": 0.24543438940428264,
      "grad_norm": 0.9654900151038924,
      "learning_rate": 8.831638992370136e-06,
      "loss": 0.164,
      "step": 8413
    },
    {
      "epoch": 0.2454635626349262,
      "grad_norm": 1.0402333658753493,
      "learning_rate": 8.831335460052075e-06,
      "loss": 0.1499,
      "step": 8414
    },
    {
      "epoch": 0.24549273586556974,
      "grad_norm": 1.0229072464196518,
      "learning_rate": 8.831031893528765e-06,
      "loss": 0.1668,
      "step": 8415
    },
    {
      "epoch": 0.24552190909621333,
      "grad_norm": 1.1475131038509692,
      "learning_rate": 8.830728292802917e-06,
      "loss": 0.1671,
      "step": 8416
    },
    {
      "epoch": 0.24555108232685688,
      "grad_norm": 0.9151730725841057,
      "learning_rate": 8.830424657877241e-06,
      "loss": 0.1521,
      "step": 8417
    },
    {
      "epoch": 0.24558025555750043,
      "grad_norm": 0.7484544291478948,
      "learning_rate": 8.830120988754448e-06,
      "loss": 0.1581,
      "step": 8418
    },
    {
      "epoch": 0.24560942878814399,
      "grad_norm": 0.835116926310824,
      "learning_rate": 8.82981728543725e-06,
      "loss": 0.1717,
      "step": 8419
    },
    {
      "epoch": 0.24563860201878757,
      "grad_norm": 2.437720013564385,
      "learning_rate": 8.829513547928357e-06,
      "loss": 0.186,
      "step": 8420
    },
    {
      "epoch": 0.24566777524943112,
      "grad_norm": 0.9567922993993235,
      "learning_rate": 8.829209776230481e-06,
      "loss": 0.1402,
      "step": 8421
    },
    {
      "epoch": 0.24569694848007467,
      "grad_norm": 0.6785539352419789,
      "learning_rate": 8.828905970346333e-06,
      "loss": 0.1562,
      "step": 8422
    },
    {
      "epoch": 0.24572612171071825,
      "grad_norm": 1.0096327952414297,
      "learning_rate": 8.82860213027863e-06,
      "loss": 0.1565,
      "step": 8423
    },
    {
      "epoch": 0.2457552949413618,
      "grad_norm": 0.9654423138572058,
      "learning_rate": 8.828298256030078e-06,
      "loss": 0.1736,
      "step": 8424
    },
    {
      "epoch": 0.24578446817200536,
      "grad_norm": 0.869416426857183,
      "learning_rate": 8.827994347603395e-06,
      "loss": 0.1433,
      "step": 8425
    },
    {
      "epoch": 0.24581364140264894,
      "grad_norm": 0.654907225293584,
      "learning_rate": 8.82769040500129e-06,
      "loss": 0.1526,
      "step": 8426
    },
    {
      "epoch": 0.2458428146332925,
      "grad_norm": 1.0479087753183833,
      "learning_rate": 8.827386428226481e-06,
      "loss": 0.1591,
      "step": 8427
    },
    {
      "epoch": 0.24587198786393605,
      "grad_norm": 0.86967230802673,
      "learning_rate": 8.827082417281679e-06,
      "loss": 0.1478,
      "step": 8428
    },
    {
      "epoch": 0.2459011610945796,
      "grad_norm": 0.7681872088185056,
      "learning_rate": 8.826778372169599e-06,
      "loss": 0.1431,
      "step": 8429
    },
    {
      "epoch": 0.24593033432522318,
      "grad_norm": 0.9663204434008695,
      "learning_rate": 8.826474292892954e-06,
      "loss": 0.1547,
      "step": 8430
    },
    {
      "epoch": 0.24595950755586674,
      "grad_norm": 0.8851904730109313,
      "learning_rate": 8.82617017945446e-06,
      "loss": 0.1549,
      "step": 8431
    },
    {
      "epoch": 0.2459886807865103,
      "grad_norm": 0.7937661421652855,
      "learning_rate": 8.825866031856833e-06,
      "loss": 0.1739,
      "step": 8432
    },
    {
      "epoch": 0.24601785401715387,
      "grad_norm": 0.7341655041980866,
      "learning_rate": 8.825561850102788e-06,
      "loss": 0.1457,
      "step": 8433
    },
    {
      "epoch": 0.24604702724779742,
      "grad_norm": 1.0127929178350177,
      "learning_rate": 8.82525763419504e-06,
      "loss": 0.1962,
      "step": 8434
    },
    {
      "epoch": 0.24607620047844098,
      "grad_norm": 0.9877385349978934,
      "learning_rate": 8.824953384136305e-06,
      "loss": 0.1924,
      "step": 8435
    },
    {
      "epoch": 0.24610537370908456,
      "grad_norm": 0.8526552191336515,
      "learning_rate": 8.824649099929297e-06,
      "loss": 0.1591,
      "step": 8436
    },
    {
      "epoch": 0.2461345469397281,
      "grad_norm": 0.8486905635216188,
      "learning_rate": 8.824344781576736e-06,
      "loss": 0.1717,
      "step": 8437
    },
    {
      "epoch": 0.24616372017037166,
      "grad_norm": 0.9146396786883396,
      "learning_rate": 8.82404042908134e-06,
      "loss": 0.1603,
      "step": 8438
    },
    {
      "epoch": 0.24619289340101522,
      "grad_norm": 0.9111314156629063,
      "learning_rate": 8.823736042445822e-06,
      "loss": 0.1741,
      "step": 8439
    },
    {
      "epoch": 0.2462220666316588,
      "grad_norm": 0.7504101195768871,
      "learning_rate": 8.8234316216729e-06,
      "loss": 0.1656,
      "step": 8440
    },
    {
      "epoch": 0.24625123986230235,
      "grad_norm": 0.8826741149882912,
      "learning_rate": 8.823127166765296e-06,
      "loss": 0.1782,
      "step": 8441
    },
    {
      "epoch": 0.2462804130929459,
      "grad_norm": 1.079721249237678,
      "learning_rate": 8.822822677725725e-06,
      "loss": 0.1427,
      "step": 8442
    },
    {
      "epoch": 0.24630958632358949,
      "grad_norm": 0.8250626002008481,
      "learning_rate": 8.822518154556904e-06,
      "loss": 0.1471,
      "step": 8443
    },
    {
      "epoch": 0.24633875955423304,
      "grad_norm": 2.595163956846552,
      "learning_rate": 8.822213597261553e-06,
      "loss": 0.1564,
      "step": 8444
    },
    {
      "epoch": 0.2463679327848766,
      "grad_norm": 1.028256115729576,
      "learning_rate": 8.821909005842393e-06,
      "loss": 0.1628,
      "step": 8445
    },
    {
      "epoch": 0.24639710601552015,
      "grad_norm": 0.8793545771654575,
      "learning_rate": 8.821604380302141e-06,
      "loss": 0.1517,
      "step": 8446
    },
    {
      "epoch": 0.24642627924616373,
      "grad_norm": 0.6827770932932468,
      "learning_rate": 8.82129972064352e-06,
      "loss": 0.1579,
      "step": 8447
    },
    {
      "epoch": 0.24645545247680728,
      "grad_norm": 0.8235675301541698,
      "learning_rate": 8.820995026869244e-06,
      "loss": 0.1639,
      "step": 8448
    },
    {
      "epoch": 0.24648462570745083,
      "grad_norm": 0.9152598512939685,
      "learning_rate": 8.820690298982037e-06,
      "loss": 0.1788,
      "step": 8449
    },
    {
      "epoch": 0.24651379893809441,
      "grad_norm": 0.840929740765244,
      "learning_rate": 8.82038553698462e-06,
      "loss": 0.1414,
      "step": 8450
    },
    {
      "epoch": 0.24654297216873797,
      "grad_norm": 0.7697768271526885,
      "learning_rate": 8.820080740879713e-06,
      "loss": 0.1521,
      "step": 8451
    },
    {
      "epoch": 0.24657214539938152,
      "grad_norm": 3.034581115019779,
      "learning_rate": 8.819775910670036e-06,
      "loss": 0.1517,
      "step": 8452
    },
    {
      "epoch": 0.2466013186300251,
      "grad_norm": 0.8956114868858843,
      "learning_rate": 8.819471046358313e-06,
      "loss": 0.1552,
      "step": 8453
    },
    {
      "epoch": 0.24663049186066865,
      "grad_norm": 1.075827573742953,
      "learning_rate": 8.819166147947263e-06,
      "loss": 0.1694,
      "step": 8454
    },
    {
      "epoch": 0.2466596650913122,
      "grad_norm": 0.7881868658598621,
      "learning_rate": 8.81886121543961e-06,
      "loss": 0.1531,
      "step": 8455
    },
    {
      "epoch": 0.24668883832195576,
      "grad_norm": 0.729149444478052,
      "learning_rate": 8.818556248838075e-06,
      "loss": 0.1895,
      "step": 8456
    },
    {
      "epoch": 0.24671801155259934,
      "grad_norm": 0.8361171197534403,
      "learning_rate": 8.818251248145382e-06,
      "loss": 0.1638,
      "step": 8457
    },
    {
      "epoch": 0.2467471847832429,
      "grad_norm": 0.8258725227718289,
      "learning_rate": 8.817946213364254e-06,
      "loss": 0.17,
      "step": 8458
    },
    {
      "epoch": 0.24677635801388645,
      "grad_norm": 1.0109646348966774,
      "learning_rate": 8.817641144497413e-06,
      "loss": 0.1714,
      "step": 8459
    },
    {
      "epoch": 0.24680553124453003,
      "grad_norm": 0.8278978580142614,
      "learning_rate": 8.817336041547582e-06,
      "loss": 0.1489,
      "step": 8460
    },
    {
      "epoch": 0.24683470447517358,
      "grad_norm": 0.8980349193116316,
      "learning_rate": 8.817030904517488e-06,
      "loss": 0.1738,
      "step": 8461
    },
    {
      "epoch": 0.24686387770581714,
      "grad_norm": 1.0060006588202914,
      "learning_rate": 8.816725733409852e-06,
      "loss": 0.1641,
      "step": 8462
    },
    {
      "epoch": 0.24689305093646072,
      "grad_norm": 1.1469604360476755,
      "learning_rate": 8.8164205282274e-06,
      "loss": 0.1559,
      "step": 8463
    },
    {
      "epoch": 0.24692222416710427,
      "grad_norm": 0.9779373329827378,
      "learning_rate": 8.816115288972857e-06,
      "loss": 0.1691,
      "step": 8464
    },
    {
      "epoch": 0.24695139739774782,
      "grad_norm": 1.5967700458984158,
      "learning_rate": 8.815810015648947e-06,
      "loss": 0.1711,
      "step": 8465
    },
    {
      "epoch": 0.24698057062839138,
      "grad_norm": 0.9709621836414194,
      "learning_rate": 8.815504708258398e-06,
      "loss": 0.1709,
      "step": 8466
    },
    {
      "epoch": 0.24700974385903496,
      "grad_norm": 0.9165203118739379,
      "learning_rate": 8.815199366803932e-06,
      "loss": 0.1869,
      "step": 8467
    },
    {
      "epoch": 0.2470389170896785,
      "grad_norm": 0.8066612396376657,
      "learning_rate": 8.814893991288277e-06,
      "loss": 0.1504,
      "step": 8468
    },
    {
      "epoch": 0.24706809032032206,
      "grad_norm": 0.7944070270789971,
      "learning_rate": 8.814588581714158e-06,
      "loss": 0.179,
      "step": 8469
    },
    {
      "epoch": 0.24709726355096565,
      "grad_norm": 0.8136541392797165,
      "learning_rate": 8.814283138084305e-06,
      "loss": 0.1585,
      "step": 8470
    },
    {
      "epoch": 0.2471264367816092,
      "grad_norm": 0.8970606040478348,
      "learning_rate": 8.813977660401442e-06,
      "loss": 0.1446,
      "step": 8471
    },
    {
      "epoch": 0.24715561001225275,
      "grad_norm": 0.6884550119953908,
      "learning_rate": 8.813672148668296e-06,
      "loss": 0.1317,
      "step": 8472
    },
    {
      "epoch": 0.2471847832428963,
      "grad_norm": 0.792880746072291,
      "learning_rate": 8.813366602887596e-06,
      "loss": 0.1705,
      "step": 8473
    },
    {
      "epoch": 0.2472139564735399,
      "grad_norm": 0.8942780596943223,
      "learning_rate": 8.81306102306207e-06,
      "loss": 0.1696,
      "step": 8474
    },
    {
      "epoch": 0.24724312970418344,
      "grad_norm": 0.9207073793628455,
      "learning_rate": 8.812755409194444e-06,
      "loss": 0.1902,
      "step": 8475
    },
    {
      "epoch": 0.247272302934827,
      "grad_norm": 0.7593633274691847,
      "learning_rate": 8.81244976128745e-06,
      "loss": 0.1565,
      "step": 8476
    },
    {
      "epoch": 0.24730147616547057,
      "grad_norm": 0.9856712072108497,
      "learning_rate": 8.812144079343814e-06,
      "loss": 0.1913,
      "step": 8477
    },
    {
      "epoch": 0.24733064939611413,
      "grad_norm": 0.9154848599457953,
      "learning_rate": 8.811838363366263e-06,
      "loss": 0.1612,
      "step": 8478
    },
    {
      "epoch": 0.24735982262675768,
      "grad_norm": 0.7981364221196089,
      "learning_rate": 8.811532613357532e-06,
      "loss": 0.1938,
      "step": 8479
    },
    {
      "epoch": 0.24738899585740126,
      "grad_norm": 0.8215717143394589,
      "learning_rate": 8.811226829320347e-06,
      "loss": 0.1933,
      "step": 8480
    },
    {
      "epoch": 0.24741816908804481,
      "grad_norm": 0.8889634002739583,
      "learning_rate": 8.810921011257439e-06,
      "loss": 0.16,
      "step": 8481
    },
    {
      "epoch": 0.24744734231868837,
      "grad_norm": 0.7136621367510201,
      "learning_rate": 8.810615159171539e-06,
      "loss": 0.1527,
      "step": 8482
    },
    {
      "epoch": 0.24747651554933192,
      "grad_norm": 0.768581865624633,
      "learning_rate": 8.810309273065374e-06,
      "loss": 0.1462,
      "step": 8483
    },
    {
      "epoch": 0.2475056887799755,
      "grad_norm": 0.7718778497787705,
      "learning_rate": 8.810003352941679e-06,
      "loss": 0.1498,
      "step": 8484
    },
    {
      "epoch": 0.24753486201061906,
      "grad_norm": 0.8938330577938709,
      "learning_rate": 8.809697398803183e-06,
      "loss": 0.1766,
      "step": 8485
    },
    {
      "epoch": 0.2475640352412626,
      "grad_norm": 0.7270221163387063,
      "learning_rate": 8.809391410652618e-06,
      "loss": 0.1661,
      "step": 8486
    },
    {
      "epoch": 0.2475932084719062,
      "grad_norm": 0.8740024671496396,
      "learning_rate": 8.809085388492716e-06,
      "loss": 0.167,
      "step": 8487
    },
    {
      "epoch": 0.24762238170254974,
      "grad_norm": 1.0996909584243546,
      "learning_rate": 8.808779332326208e-06,
      "loss": 0.1993,
      "step": 8488
    },
    {
      "epoch": 0.2476515549331933,
      "grad_norm": 0.8376569971712228,
      "learning_rate": 8.808473242155828e-06,
      "loss": 0.1428,
      "step": 8489
    },
    {
      "epoch": 0.24768072816383685,
      "grad_norm": 0.8162306345311063,
      "learning_rate": 8.808167117984308e-06,
      "loss": 0.1546,
      "step": 8490
    },
    {
      "epoch": 0.24770990139448043,
      "grad_norm": 1.145727284508113,
      "learning_rate": 8.807860959814381e-06,
      "loss": 0.2119,
      "step": 8491
    },
    {
      "epoch": 0.24773907462512398,
      "grad_norm": 0.8791723337086851,
      "learning_rate": 8.807554767648782e-06,
      "loss": 0.1759,
      "step": 8492
    },
    {
      "epoch": 0.24776824785576754,
      "grad_norm": 0.7711098477920081,
      "learning_rate": 8.80724854149024e-06,
      "loss": 0.1558,
      "step": 8493
    },
    {
      "epoch": 0.24779742108641112,
      "grad_norm": 0.879832302265608,
      "learning_rate": 8.806942281341496e-06,
      "loss": 0.165,
      "step": 8494
    },
    {
      "epoch": 0.24782659431705467,
      "grad_norm": 1.0633651975194989,
      "learning_rate": 8.806635987205276e-06,
      "loss": 0.149,
      "step": 8495
    },
    {
      "epoch": 0.24785576754769822,
      "grad_norm": 0.9650721712161302,
      "learning_rate": 8.80632965908432e-06,
      "loss": 0.1769,
      "step": 8496
    },
    {
      "epoch": 0.2478849407783418,
      "grad_norm": 1.115058116156931,
      "learning_rate": 8.806023296981364e-06,
      "loss": 0.1764,
      "step": 8497
    },
    {
      "epoch": 0.24791411400898536,
      "grad_norm": 0.8621447225082789,
      "learning_rate": 8.805716900899137e-06,
      "loss": 0.1581,
      "step": 8498
    },
    {
      "epoch": 0.2479432872396289,
      "grad_norm": 0.877791472812501,
      "learning_rate": 8.805410470840378e-06,
      "loss": 0.1538,
      "step": 8499
    },
    {
      "epoch": 0.24797246047027247,
      "grad_norm": 0.850704153107941,
      "learning_rate": 8.805104006807825e-06,
      "loss": 0.1705,
      "step": 8500
    },
    {
      "epoch": 0.24800163370091605,
      "grad_norm": 1.1570560093546498,
      "learning_rate": 8.80479750880421e-06,
      "loss": 0.1484,
      "step": 8501
    },
    {
      "epoch": 0.2480308069315596,
      "grad_norm": 1.1830242487341915,
      "learning_rate": 8.804490976832272e-06,
      "loss": 0.1528,
      "step": 8502
    },
    {
      "epoch": 0.24805998016220315,
      "grad_norm": 0.8592503509249563,
      "learning_rate": 8.804184410894747e-06,
      "loss": 0.1621,
      "step": 8503
    },
    {
      "epoch": 0.24808915339284673,
      "grad_norm": 1.5333932107566217,
      "learning_rate": 8.803877810994373e-06,
      "loss": 0.1643,
      "step": 8504
    },
    {
      "epoch": 0.2481183266234903,
      "grad_norm": 0.9298591687473505,
      "learning_rate": 8.803571177133884e-06,
      "loss": 0.1664,
      "step": 8505
    },
    {
      "epoch": 0.24814749985413384,
      "grad_norm": 0.9366701256850264,
      "learning_rate": 8.80326450931602e-06,
      "loss": 0.1675,
      "step": 8506
    },
    {
      "epoch": 0.24817667308477742,
      "grad_norm": 1.0417008665791745,
      "learning_rate": 8.802957807543517e-06,
      "loss": 0.1484,
      "step": 8507
    },
    {
      "epoch": 0.24820584631542097,
      "grad_norm": 0.6713970157192196,
      "learning_rate": 8.802651071819118e-06,
      "loss": 0.1784,
      "step": 8508
    },
    {
      "epoch": 0.24823501954606453,
      "grad_norm": 0.7218572054930797,
      "learning_rate": 8.802344302145555e-06,
      "loss": 0.1619,
      "step": 8509
    },
    {
      "epoch": 0.24826419277670808,
      "grad_norm": 0.8067575500397708,
      "learning_rate": 8.80203749852557e-06,
      "loss": 0.1829,
      "step": 8510
    },
    {
      "epoch": 0.24829336600735166,
      "grad_norm": 0.6142079062235674,
      "learning_rate": 8.801730660961902e-06,
      "loss": 0.1457,
      "step": 8511
    },
    {
      "epoch": 0.24832253923799522,
      "grad_norm": 0.7805665552257766,
      "learning_rate": 8.80142378945729e-06,
      "loss": 0.141,
      "step": 8512
    },
    {
      "epoch": 0.24835171246863877,
      "grad_norm": 0.8006755079437554,
      "learning_rate": 8.801116884014475e-06,
      "loss": 0.162,
      "step": 8513
    },
    {
      "epoch": 0.24838088569928235,
      "grad_norm": 0.9079863639047883,
      "learning_rate": 8.800809944636195e-06,
      "loss": 0.1377,
      "step": 8514
    },
    {
      "epoch": 0.2484100589299259,
      "grad_norm": 0.7832487583579651,
      "learning_rate": 8.800502971325193e-06,
      "loss": 0.1521,
      "step": 8515
    },
    {
      "epoch": 0.24843923216056946,
      "grad_norm": 0.892213394264103,
      "learning_rate": 8.800195964084205e-06,
      "loss": 0.185,
      "step": 8516
    },
    {
      "epoch": 0.248468405391213,
      "grad_norm": 0.9631334992011685,
      "learning_rate": 8.799888922915975e-06,
      "loss": 0.1519,
      "step": 8517
    },
    {
      "epoch": 0.2484975786218566,
      "grad_norm": 0.7927291508948916,
      "learning_rate": 8.799581847823247e-06,
      "loss": 0.1579,
      "step": 8518
    },
    {
      "epoch": 0.24852675185250014,
      "grad_norm": 0.782056273651509,
      "learning_rate": 8.799274738808757e-06,
      "loss": 0.1464,
      "step": 8519
    },
    {
      "epoch": 0.2485559250831437,
      "grad_norm": 0.8473680251759129,
      "learning_rate": 8.798967595875247e-06,
      "loss": 0.1684,
      "step": 8520
    },
    {
      "epoch": 0.24858509831378728,
      "grad_norm": 0.8015432953151329,
      "learning_rate": 8.798660419025464e-06,
      "loss": 0.1628,
      "step": 8521
    },
    {
      "epoch": 0.24861427154443083,
      "grad_norm": 1.0293800914323845,
      "learning_rate": 8.798353208262147e-06,
      "loss": 0.187,
      "step": 8522
    },
    {
      "epoch": 0.24864344477507438,
      "grad_norm": 0.908218371022314,
      "learning_rate": 8.79804596358804e-06,
      "loss": 0.1727,
      "step": 8523
    },
    {
      "epoch": 0.24867261800571797,
      "grad_norm": 1.1202371764316217,
      "learning_rate": 8.797738685005883e-06,
      "loss": 0.1757,
      "step": 8524
    },
    {
      "epoch": 0.24870179123636152,
      "grad_norm": 1.0151239456504162,
      "learning_rate": 8.797431372518424e-06,
      "loss": 0.151,
      "step": 8525
    },
    {
      "epoch": 0.24873096446700507,
      "grad_norm": 0.7940225277400671,
      "learning_rate": 8.797124026128403e-06,
      "loss": 0.1655,
      "step": 8526
    },
    {
      "epoch": 0.24876013769764863,
      "grad_norm": 0.725528592028752,
      "learning_rate": 8.796816645838566e-06,
      "loss": 0.1543,
      "step": 8527
    },
    {
      "epoch": 0.2487893109282922,
      "grad_norm": 0.9403967739044864,
      "learning_rate": 8.796509231651655e-06,
      "loss": 0.1857,
      "step": 8528
    },
    {
      "epoch": 0.24881848415893576,
      "grad_norm": 0.9240299974494754,
      "learning_rate": 8.796201783570417e-06,
      "loss": 0.1642,
      "step": 8529
    },
    {
      "epoch": 0.2488476573895793,
      "grad_norm": 0.7519665485351806,
      "learning_rate": 8.795894301597596e-06,
      "loss": 0.1623,
      "step": 8530
    },
    {
      "epoch": 0.2488768306202229,
      "grad_norm": 0.8571661523051104,
      "learning_rate": 8.795586785735935e-06,
      "loss": 0.1726,
      "step": 8531
    },
    {
      "epoch": 0.24890600385086645,
      "grad_norm": 0.947172515776725,
      "learning_rate": 8.795279235988183e-06,
      "loss": 0.1604,
      "step": 8532
    },
    {
      "epoch": 0.24893517708151,
      "grad_norm": 0.8151591283214685,
      "learning_rate": 8.794971652357083e-06,
      "loss": 0.1589,
      "step": 8533
    },
    {
      "epoch": 0.24896435031215358,
      "grad_norm": 0.8089411185537707,
      "learning_rate": 8.794664034845383e-06,
      "loss": 0.1738,
      "step": 8534
    },
    {
      "epoch": 0.24899352354279713,
      "grad_norm": 0.7158728657870362,
      "learning_rate": 8.794356383455826e-06,
      "loss": 0.1651,
      "step": 8535
    },
    {
      "epoch": 0.2490226967734407,
      "grad_norm": 0.8113539840458991,
      "learning_rate": 8.794048698191165e-06,
      "loss": 0.1578,
      "step": 8536
    },
    {
      "epoch": 0.24905187000408424,
      "grad_norm": 0.6200616649387198,
      "learning_rate": 8.79374097905414e-06,
      "loss": 0.1495,
      "step": 8537
    },
    {
      "epoch": 0.24908104323472782,
      "grad_norm": 0.9316102662343403,
      "learning_rate": 8.793433226047501e-06,
      "loss": 0.1726,
      "step": 8538
    },
    {
      "epoch": 0.24911021646537138,
      "grad_norm": 0.8846989390645414,
      "learning_rate": 8.793125439173997e-06,
      "loss": 0.1464,
      "step": 8539
    },
    {
      "epoch": 0.24913938969601493,
      "grad_norm": 0.7452266622871222,
      "learning_rate": 8.792817618436375e-06,
      "loss": 0.164,
      "step": 8540
    },
    {
      "epoch": 0.2491685629266585,
      "grad_norm": 0.8744304124780296,
      "learning_rate": 8.792509763837382e-06,
      "loss": 0.1474,
      "step": 8541
    },
    {
      "epoch": 0.24919773615730206,
      "grad_norm": 0.8878039406492998,
      "learning_rate": 8.792201875379767e-06,
      "loss": 0.1688,
      "step": 8542
    },
    {
      "epoch": 0.24922690938794562,
      "grad_norm": 0.9490768869872591,
      "learning_rate": 8.791893953066279e-06,
      "loss": 0.1524,
      "step": 8543
    },
    {
      "epoch": 0.24925608261858917,
      "grad_norm": 0.8695160326896547,
      "learning_rate": 8.791585996899667e-06,
      "loss": 0.158,
      "step": 8544
    },
    {
      "epoch": 0.24928525584923275,
      "grad_norm": 0.9238782009657288,
      "learning_rate": 8.79127800688268e-06,
      "loss": 0.1801,
      "step": 8545
    },
    {
      "epoch": 0.2493144290798763,
      "grad_norm": 0.911790527348034,
      "learning_rate": 8.790969983018067e-06,
      "loss": 0.1786,
      "step": 8546
    },
    {
      "epoch": 0.24934360231051986,
      "grad_norm": 0.9008959008504507,
      "learning_rate": 8.790661925308582e-06,
      "loss": 0.1675,
      "step": 8547
    },
    {
      "epoch": 0.24937277554116344,
      "grad_norm": 0.8888191495930848,
      "learning_rate": 8.79035383375697e-06,
      "loss": 0.157,
      "step": 8548
    },
    {
      "epoch": 0.249401948771807,
      "grad_norm": 0.9852866117505581,
      "learning_rate": 8.790045708365983e-06,
      "loss": 0.1563,
      "step": 8549
    },
    {
      "epoch": 0.24943112200245054,
      "grad_norm": 0.8088381591791408,
      "learning_rate": 8.789737549138376e-06,
      "loss": 0.1505,
      "step": 8550
    },
    {
      "epoch": 0.24946029523309413,
      "grad_norm": 0.7571559136577702,
      "learning_rate": 8.789429356076895e-06,
      "loss": 0.1529,
      "step": 8551
    },
    {
      "epoch": 0.24948946846373768,
      "grad_norm": 0.9057581284918355,
      "learning_rate": 8.789121129184292e-06,
      "loss": 0.1319,
      "step": 8552
    },
    {
      "epoch": 0.24951864169438123,
      "grad_norm": 0.7759745420464187,
      "learning_rate": 8.78881286846332e-06,
      "loss": 0.145,
      "step": 8553
    },
    {
      "epoch": 0.24954781492502479,
      "grad_norm": 0.693580534777219,
      "learning_rate": 8.788504573916735e-06,
      "loss": 0.15,
      "step": 8554
    },
    {
      "epoch": 0.24957698815566837,
      "grad_norm": 0.7697521924643248,
      "learning_rate": 8.788196245547283e-06,
      "loss": 0.1468,
      "step": 8555
    },
    {
      "epoch": 0.24960616138631192,
      "grad_norm": 0.7953894129472417,
      "learning_rate": 8.787887883357718e-06,
      "loss": 0.1826,
      "step": 8556
    },
    {
      "epoch": 0.24963533461695547,
      "grad_norm": 0.7171112382319759,
      "learning_rate": 8.787579487350795e-06,
      "loss": 0.1663,
      "step": 8557
    },
    {
      "epoch": 0.24966450784759905,
      "grad_norm": 0.8971018760703712,
      "learning_rate": 8.787271057529267e-06,
      "loss": 0.168,
      "step": 8558
    },
    {
      "epoch": 0.2496936810782426,
      "grad_norm": 0.914590183249974,
      "learning_rate": 8.786962593895887e-06,
      "loss": 0.1697,
      "step": 8559
    },
    {
      "epoch": 0.24972285430888616,
      "grad_norm": 0.7356187797125372,
      "learning_rate": 8.786654096453411e-06,
      "loss": 0.1627,
      "step": 8560
    },
    {
      "epoch": 0.24975202753952971,
      "grad_norm": 0.7085676442730278,
      "learning_rate": 8.786345565204588e-06,
      "loss": 0.1481,
      "step": 8561
    },
    {
      "epoch": 0.2497812007701733,
      "grad_norm": 0.8035803998376765,
      "learning_rate": 8.786037000152176e-06,
      "loss": 0.1521,
      "step": 8562
    },
    {
      "epoch": 0.24981037400081685,
      "grad_norm": 0.8456489905190574,
      "learning_rate": 8.785728401298931e-06,
      "loss": 0.1561,
      "step": 8563
    },
    {
      "epoch": 0.2498395472314604,
      "grad_norm": 0.806132692376375,
      "learning_rate": 8.785419768647606e-06,
      "loss": 0.1635,
      "step": 8564
    },
    {
      "epoch": 0.24986872046210398,
      "grad_norm": 0.697632255673314,
      "learning_rate": 8.785111102200958e-06,
      "loss": 0.1273,
      "step": 8565
    },
    {
      "epoch": 0.24989789369274754,
      "grad_norm": 0.7291816114635233,
      "learning_rate": 8.78480240196174e-06,
      "loss": 0.1439,
      "step": 8566
    },
    {
      "epoch": 0.2499270669233911,
      "grad_norm": 0.8278397948964653,
      "learning_rate": 8.784493667932709e-06,
      "loss": 0.1608,
      "step": 8567
    },
    {
      "epoch": 0.24995624015403467,
      "grad_norm": 0.8764478466593116,
      "learning_rate": 8.784184900116623e-06,
      "loss": 0.1725,
      "step": 8568
    },
    {
      "epoch": 0.24998541338467822,
      "grad_norm": 0.8910826173893902,
      "learning_rate": 8.783876098516239e-06,
      "loss": 0.1613,
      "step": 8569
    },
    {
      "epoch": 0.2500145866153218,
      "grad_norm": 0.9454110100942791,
      "learning_rate": 8.783567263134312e-06,
      "loss": 0.1909,
      "step": 8570
    },
    {
      "epoch": 0.25004375984596533,
      "grad_norm": 0.7174902051083285,
      "learning_rate": 8.783258393973597e-06,
      "loss": 0.1531,
      "step": 8571
    },
    {
      "epoch": 0.2500729330766089,
      "grad_norm": 0.8168221964766684,
      "learning_rate": 8.782949491036856e-06,
      "loss": 0.1601,
      "step": 8572
    },
    {
      "epoch": 0.2501021063072525,
      "grad_norm": 0.7669406975016696,
      "learning_rate": 8.782640554326847e-06,
      "loss": 0.1343,
      "step": 8573
    },
    {
      "epoch": 0.25013127953789605,
      "grad_norm": 0.8471735608476396,
      "learning_rate": 8.782331583846323e-06,
      "loss": 0.1638,
      "step": 8574
    },
    {
      "epoch": 0.2501604527685396,
      "grad_norm": 0.6773378519508466,
      "learning_rate": 8.782022579598046e-06,
      "loss": 0.135,
      "step": 8575
    },
    {
      "epoch": 0.25018962599918315,
      "grad_norm": 0.9158554370246568,
      "learning_rate": 8.781713541584775e-06,
      "loss": 0.1768,
      "step": 8576
    },
    {
      "epoch": 0.2502187992298267,
      "grad_norm": 1.1025626378261217,
      "learning_rate": 8.78140446980927e-06,
      "loss": 0.1492,
      "step": 8577
    },
    {
      "epoch": 0.25024797246047026,
      "grad_norm": 0.9874043507033101,
      "learning_rate": 8.781095364274286e-06,
      "loss": 0.151,
      "step": 8578
    },
    {
      "epoch": 0.2502771456911138,
      "grad_norm": 0.9573220286234951,
      "learning_rate": 8.780786224982585e-06,
      "loss": 0.1588,
      "step": 8579
    },
    {
      "epoch": 0.2503063189217574,
      "grad_norm": 0.8466670019844869,
      "learning_rate": 8.780477051936928e-06,
      "loss": 0.1523,
      "step": 8580
    },
    {
      "epoch": 0.250335492152401,
      "grad_norm": 0.8922545595963715,
      "learning_rate": 8.780167845140075e-06,
      "loss": 0.1722,
      "step": 8581
    },
    {
      "epoch": 0.2503646653830445,
      "grad_norm": 0.7860626236954013,
      "learning_rate": 8.779858604594786e-06,
      "loss": 0.1589,
      "step": 8582
    },
    {
      "epoch": 0.2503938386136881,
      "grad_norm": 0.8939785190141168,
      "learning_rate": 8.779549330303822e-06,
      "loss": 0.1522,
      "step": 8583
    },
    {
      "epoch": 0.25042301184433163,
      "grad_norm": 0.9091443804093601,
      "learning_rate": 8.779240022269941e-06,
      "loss": 0.172,
      "step": 8584
    },
    {
      "epoch": 0.2504521850749752,
      "grad_norm": 0.7720207746400931,
      "learning_rate": 8.778930680495911e-06,
      "loss": 0.1697,
      "step": 8585
    },
    {
      "epoch": 0.25048135830561874,
      "grad_norm": 0.8593576498178653,
      "learning_rate": 8.778621304984487e-06,
      "loss": 0.1549,
      "step": 8586
    },
    {
      "epoch": 0.25051053153626235,
      "grad_norm": 0.8583096187012206,
      "learning_rate": 8.778311895738436e-06,
      "loss": 0.1396,
      "step": 8587
    },
    {
      "epoch": 0.2505397047669059,
      "grad_norm": 0.7815697983140117,
      "learning_rate": 8.778002452760517e-06,
      "loss": 0.1694,
      "step": 8588
    },
    {
      "epoch": 0.25056887799754946,
      "grad_norm": 0.9921630331516273,
      "learning_rate": 8.777692976053496e-06,
      "loss": 0.1823,
      "step": 8589
    },
    {
      "epoch": 0.250598051228193,
      "grad_norm": 0.9075847254953955,
      "learning_rate": 8.77738346562013e-06,
      "loss": 0.1536,
      "step": 8590
    },
    {
      "epoch": 0.25062722445883656,
      "grad_norm": 0.9035407785795335,
      "learning_rate": 8.77707392146319e-06,
      "loss": 0.1569,
      "step": 8591
    },
    {
      "epoch": 0.2506563976894801,
      "grad_norm": 0.9623455556518227,
      "learning_rate": 8.776764343585434e-06,
      "loss": 0.1777,
      "step": 8592
    },
    {
      "epoch": 0.25068557092012367,
      "grad_norm": 0.7794053355405665,
      "learning_rate": 8.776454731989628e-06,
      "loss": 0.1787,
      "step": 8593
    },
    {
      "epoch": 0.2507147441507673,
      "grad_norm": 0.8189228578184276,
      "learning_rate": 8.776145086678535e-06,
      "loss": 0.1457,
      "step": 8594
    },
    {
      "epoch": 0.25074391738141083,
      "grad_norm": 0.8106201577521042,
      "learning_rate": 8.775835407654922e-06,
      "loss": 0.1808,
      "step": 8595
    },
    {
      "epoch": 0.2507730906120544,
      "grad_norm": 1.0761468091271227,
      "learning_rate": 8.77552569492155e-06,
      "loss": 0.1709,
      "step": 8596
    },
    {
      "epoch": 0.25080226384269794,
      "grad_norm": 0.7975032316074803,
      "learning_rate": 8.775215948481187e-06,
      "loss": 0.1448,
      "step": 8597
    },
    {
      "epoch": 0.2508314370733415,
      "grad_norm": 1.0595088346673573,
      "learning_rate": 8.774906168336595e-06,
      "loss": 0.1569,
      "step": 8598
    },
    {
      "epoch": 0.25086061030398504,
      "grad_norm": 0.7694985571630539,
      "learning_rate": 8.774596354490544e-06,
      "loss": 0.159,
      "step": 8599
    },
    {
      "epoch": 0.25088978353462865,
      "grad_norm": 1.1726649632400676,
      "learning_rate": 8.774286506945797e-06,
      "loss": 0.1853,
      "step": 8600
    },
    {
      "epoch": 0.2509189567652722,
      "grad_norm": 1.010183535285587,
      "learning_rate": 8.773976625705122e-06,
      "loss": 0.1998,
      "step": 8601
    },
    {
      "epoch": 0.25094812999591576,
      "grad_norm": 0.8032566842963813,
      "learning_rate": 8.773666710771283e-06,
      "loss": 0.1576,
      "step": 8602
    },
    {
      "epoch": 0.2509773032265593,
      "grad_norm": 0.7919812675501868,
      "learning_rate": 8.77335676214705e-06,
      "loss": 0.1656,
      "step": 8603
    },
    {
      "epoch": 0.25100647645720287,
      "grad_norm": 0.9404814588709256,
      "learning_rate": 8.773046779835189e-06,
      "loss": 0.158,
      "step": 8604
    },
    {
      "epoch": 0.2510356496878464,
      "grad_norm": 0.8010105402020736,
      "learning_rate": 8.772736763838466e-06,
      "loss": 0.1669,
      "step": 8605
    },
    {
      "epoch": 0.25106482291848997,
      "grad_norm": 1.118376939418988,
      "learning_rate": 8.772426714159648e-06,
      "loss": 0.1547,
      "step": 8606
    },
    {
      "epoch": 0.2510939961491336,
      "grad_norm": 0.7799989234003954,
      "learning_rate": 8.772116630801506e-06,
      "loss": 0.1542,
      "step": 8607
    },
    {
      "epoch": 0.25112316937977713,
      "grad_norm": 0.712752334450024,
      "learning_rate": 8.77180651376681e-06,
      "loss": 0.1582,
      "step": 8608
    },
    {
      "epoch": 0.2511523426104207,
      "grad_norm": 0.7919712574181268,
      "learning_rate": 8.771496363058323e-06,
      "loss": 0.1809,
      "step": 8609
    },
    {
      "epoch": 0.25118151584106424,
      "grad_norm": 1.0160139063748992,
      "learning_rate": 8.771186178678817e-06,
      "loss": 0.1618,
      "step": 8610
    },
    {
      "epoch": 0.2512106890717078,
      "grad_norm": 0.8429941639356404,
      "learning_rate": 8.770875960631063e-06,
      "loss": 0.1639,
      "step": 8611
    },
    {
      "epoch": 0.25123986230235135,
      "grad_norm": 1.0152709983300994,
      "learning_rate": 8.770565708917826e-06,
      "loss": 0.1607,
      "step": 8612
    },
    {
      "epoch": 0.2512690355329949,
      "grad_norm": 0.9282003931382112,
      "learning_rate": 8.77025542354188e-06,
      "loss": 0.1421,
      "step": 8613
    },
    {
      "epoch": 0.2512982087636385,
      "grad_norm": 0.8633570939081969,
      "learning_rate": 8.769945104505992e-06,
      "loss": 0.1563,
      "step": 8614
    },
    {
      "epoch": 0.25132738199428206,
      "grad_norm": 0.764496718697493,
      "learning_rate": 8.769634751812937e-06,
      "loss": 0.1615,
      "step": 8615
    },
    {
      "epoch": 0.2513565552249256,
      "grad_norm": 0.8117302432475778,
      "learning_rate": 8.769324365465482e-06,
      "loss": 0.1425,
      "step": 8616
    },
    {
      "epoch": 0.25138572845556917,
      "grad_norm": 0.9333843792401662,
      "learning_rate": 8.769013945466396e-06,
      "loss": 0.1695,
      "step": 8617
    },
    {
      "epoch": 0.2514149016862127,
      "grad_norm": 1.120504946358395,
      "learning_rate": 8.768703491818455e-06,
      "loss": 0.1693,
      "step": 8618
    },
    {
      "epoch": 0.2514440749168563,
      "grad_norm": 0.8375545723038,
      "learning_rate": 8.76839300452443e-06,
      "loss": 0.1578,
      "step": 8619
    },
    {
      "epoch": 0.25147324814749983,
      "grad_norm": 0.9141436940891724,
      "learning_rate": 8.76808248358709e-06,
      "loss": 0.1502,
      "step": 8620
    },
    {
      "epoch": 0.25150242137814344,
      "grad_norm": 0.9099046009644094,
      "learning_rate": 8.76777192900921e-06,
      "loss": 0.1797,
      "step": 8621
    },
    {
      "epoch": 0.251531594608787,
      "grad_norm": 0.7826325768943171,
      "learning_rate": 8.767461340793563e-06,
      "loss": 0.1521,
      "step": 8622
    },
    {
      "epoch": 0.25156076783943054,
      "grad_norm": 0.8693445201995545,
      "learning_rate": 8.767150718942919e-06,
      "loss": 0.1623,
      "step": 8623
    },
    {
      "epoch": 0.2515899410700741,
      "grad_norm": 1.0338015467666206,
      "learning_rate": 8.766840063460054e-06,
      "loss": 0.1561,
      "step": 8624
    },
    {
      "epoch": 0.25161911430071765,
      "grad_norm": 0.9980300350500342,
      "learning_rate": 8.766529374347738e-06,
      "loss": 0.1593,
      "step": 8625
    },
    {
      "epoch": 0.2516482875313612,
      "grad_norm": 0.8951577122410843,
      "learning_rate": 8.766218651608748e-06,
      "loss": 0.1547,
      "step": 8626
    },
    {
      "epoch": 0.2516774607620048,
      "grad_norm": 0.8125359105854942,
      "learning_rate": 8.765907895245857e-06,
      "loss": 0.1576,
      "step": 8627
    },
    {
      "epoch": 0.25170663399264837,
      "grad_norm": 0.9954761177039216,
      "learning_rate": 8.765597105261838e-06,
      "loss": 0.1688,
      "step": 8628
    },
    {
      "epoch": 0.2517358072232919,
      "grad_norm": 0.9543299198439987,
      "learning_rate": 8.765286281659469e-06,
      "loss": 0.1483,
      "step": 8629
    },
    {
      "epoch": 0.25176498045393547,
      "grad_norm": 0.8071287681668878,
      "learning_rate": 8.764975424441522e-06,
      "loss": 0.157,
      "step": 8630
    },
    {
      "epoch": 0.251794153684579,
      "grad_norm": 0.8540025790801501,
      "learning_rate": 8.764664533610774e-06,
      "loss": 0.1868,
      "step": 8631
    },
    {
      "epoch": 0.2518233269152226,
      "grad_norm": 0.9667079527671832,
      "learning_rate": 8.764353609169997e-06,
      "loss": 0.1524,
      "step": 8632
    },
    {
      "epoch": 0.25185250014586613,
      "grad_norm": 0.9058758410702621,
      "learning_rate": 8.764042651121973e-06,
      "loss": 0.1516,
      "step": 8633
    },
    {
      "epoch": 0.25188167337650974,
      "grad_norm": 0.8409416216707294,
      "learning_rate": 8.763731659469473e-06,
      "loss": 0.1475,
      "step": 8634
    },
    {
      "epoch": 0.2519108466071533,
      "grad_norm": 0.8538141851741363,
      "learning_rate": 8.763420634215277e-06,
      "loss": 0.1428,
      "step": 8635
    },
    {
      "epoch": 0.25194001983779685,
      "grad_norm": 0.9442534273461484,
      "learning_rate": 8.763109575362156e-06,
      "loss": 0.1615,
      "step": 8636
    },
    {
      "epoch": 0.2519691930684404,
      "grad_norm": 0.805499568697246,
      "learning_rate": 8.762798482912895e-06,
      "loss": 0.1632,
      "step": 8637
    },
    {
      "epoch": 0.25199836629908395,
      "grad_norm": 0.6903477934349642,
      "learning_rate": 8.762487356870267e-06,
      "loss": 0.1724,
      "step": 8638
    },
    {
      "epoch": 0.2520275395297275,
      "grad_norm": 0.7868907082652199,
      "learning_rate": 8.762176197237048e-06,
      "loss": 0.1323,
      "step": 8639
    },
    {
      "epoch": 0.25205671276037106,
      "grad_norm": 0.6980466504190983,
      "learning_rate": 8.76186500401602e-06,
      "loss": 0.1552,
      "step": 8640
    },
    {
      "epoch": 0.25208588599101467,
      "grad_norm": 0.792164302645313,
      "learning_rate": 8.761553777209957e-06,
      "loss": 0.1546,
      "step": 8641
    },
    {
      "epoch": 0.2521150592216582,
      "grad_norm": 0.94007538803066,
      "learning_rate": 8.761242516821642e-06,
      "loss": 0.1372,
      "step": 8642
    },
    {
      "epoch": 0.2521442324523018,
      "grad_norm": 0.8307347612519377,
      "learning_rate": 8.760931222853851e-06,
      "loss": 0.1457,
      "step": 8643
    },
    {
      "epoch": 0.25217340568294533,
      "grad_norm": 0.9547156420905087,
      "learning_rate": 8.760619895309364e-06,
      "loss": 0.1611,
      "step": 8644
    },
    {
      "epoch": 0.2522025789135889,
      "grad_norm": 1.2277498281310582,
      "learning_rate": 8.76030853419096e-06,
      "loss": 0.1869,
      "step": 8645
    },
    {
      "epoch": 0.25223175214423244,
      "grad_norm": 0.7589041422482073,
      "learning_rate": 8.759997139501418e-06,
      "loss": 0.1893,
      "step": 8646
    },
    {
      "epoch": 0.252260925374876,
      "grad_norm": 1.1921327947013587,
      "learning_rate": 8.759685711243519e-06,
      "loss": 0.1566,
      "step": 8647
    },
    {
      "epoch": 0.2522900986055196,
      "grad_norm": 1.0194506263989331,
      "learning_rate": 8.759374249420046e-06,
      "loss": 0.1679,
      "step": 8648
    },
    {
      "epoch": 0.25231927183616315,
      "grad_norm": 0.8559406641656732,
      "learning_rate": 8.759062754033776e-06,
      "loss": 0.1517,
      "step": 8649
    },
    {
      "epoch": 0.2523484450668067,
      "grad_norm": 0.8029563133814118,
      "learning_rate": 8.75875122508749e-06,
      "loss": 0.1579,
      "step": 8650
    },
    {
      "epoch": 0.25237761829745026,
      "grad_norm": 1.0332525877684775,
      "learning_rate": 8.758439662583972e-06,
      "loss": 0.1528,
      "step": 8651
    },
    {
      "epoch": 0.2524067915280938,
      "grad_norm": 1.0776228863205328,
      "learning_rate": 8.758128066526002e-06,
      "loss": 0.165,
      "step": 8652
    },
    {
      "epoch": 0.25243596475873736,
      "grad_norm": 0.9647404425923645,
      "learning_rate": 8.75781643691636e-06,
      "loss": 0.1493,
      "step": 8653
    },
    {
      "epoch": 0.2524651379893809,
      "grad_norm": 1.3075908157331049,
      "learning_rate": 8.757504773757831e-06,
      "loss": 0.1519,
      "step": 8654
    },
    {
      "epoch": 0.2524943112200245,
      "grad_norm": 0.9019791019347585,
      "learning_rate": 8.757193077053197e-06,
      "loss": 0.1432,
      "step": 8655
    },
    {
      "epoch": 0.2525234844506681,
      "grad_norm": 0.8670379469726758,
      "learning_rate": 8.756881346805238e-06,
      "loss": 0.1371,
      "step": 8656
    },
    {
      "epoch": 0.25255265768131163,
      "grad_norm": 0.9192781121346693,
      "learning_rate": 8.75656958301674e-06,
      "loss": 0.1419,
      "step": 8657
    },
    {
      "epoch": 0.2525818309119552,
      "grad_norm": 0.8267824674551398,
      "learning_rate": 8.756257785690488e-06,
      "loss": 0.1417,
      "step": 8658
    },
    {
      "epoch": 0.25261100414259874,
      "grad_norm": 0.7242660404992772,
      "learning_rate": 8.755945954829259e-06,
      "loss": 0.1454,
      "step": 8659
    },
    {
      "epoch": 0.2526401773732423,
      "grad_norm": 0.9708102460840208,
      "learning_rate": 8.755634090435845e-06,
      "loss": 0.158,
      "step": 8660
    },
    {
      "epoch": 0.2526693506038859,
      "grad_norm": 0.8964518881575788,
      "learning_rate": 8.755322192513026e-06,
      "loss": 0.154,
      "step": 8661
    },
    {
      "epoch": 0.25269852383452945,
      "grad_norm": 0.7597045241854307,
      "learning_rate": 8.755010261063583e-06,
      "loss": 0.1633,
      "step": 8662
    },
    {
      "epoch": 0.252727697065173,
      "grad_norm": 0.6959537002629588,
      "learning_rate": 8.754698296090306e-06,
      "loss": 0.1346,
      "step": 8663
    },
    {
      "epoch": 0.25275687029581656,
      "grad_norm": 1.1404850589425124,
      "learning_rate": 8.754386297595982e-06,
      "loss": 0.1723,
      "step": 8664
    },
    {
      "epoch": 0.2527860435264601,
      "grad_norm": 0.8324400735924496,
      "learning_rate": 8.754074265583391e-06,
      "loss": 0.1613,
      "step": 8665
    },
    {
      "epoch": 0.25281521675710367,
      "grad_norm": 0.8231664541683732,
      "learning_rate": 8.753762200055323e-06,
      "loss": 0.1457,
      "step": 8666
    },
    {
      "epoch": 0.2528443899877472,
      "grad_norm": 0.7615114736289824,
      "learning_rate": 8.75345010101456e-06,
      "loss": 0.148,
      "step": 8667
    },
    {
      "epoch": 0.25287356321839083,
      "grad_norm": 0.6634927283572258,
      "learning_rate": 8.753137968463891e-06,
      "loss": 0.1614,
      "step": 8668
    },
    {
      "epoch": 0.2529027364490344,
      "grad_norm": 0.7274235145822803,
      "learning_rate": 8.752825802406104e-06,
      "loss": 0.1622,
      "step": 8669
    },
    {
      "epoch": 0.25293190967967794,
      "grad_norm": 0.8576516402955692,
      "learning_rate": 8.752513602843984e-06,
      "loss": 0.1695,
      "step": 8670
    },
    {
      "epoch": 0.2529610829103215,
      "grad_norm": 3.6622315857967838,
      "learning_rate": 8.752201369780317e-06,
      "loss": 0.156,
      "step": 8671
    },
    {
      "epoch": 0.25299025614096504,
      "grad_norm": 1.0653715826093315,
      "learning_rate": 8.751889103217892e-06,
      "loss": 0.1438,
      "step": 8672
    },
    {
      "epoch": 0.2530194293716086,
      "grad_norm": 0.9536320888241338,
      "learning_rate": 8.751576803159495e-06,
      "loss": 0.1403,
      "step": 8673
    },
    {
      "epoch": 0.25304860260225215,
      "grad_norm": 0.9389850085117988,
      "learning_rate": 8.751264469607919e-06,
      "loss": 0.1532,
      "step": 8674
    },
    {
      "epoch": 0.25307777583289576,
      "grad_norm": 0.9264477765314943,
      "learning_rate": 8.750952102565949e-06,
      "loss": 0.1612,
      "step": 8675
    },
    {
      "epoch": 0.2531069490635393,
      "grad_norm": 0.7896236278290961,
      "learning_rate": 8.750639702036372e-06,
      "loss": 0.1576,
      "step": 8676
    },
    {
      "epoch": 0.25313612229418286,
      "grad_norm": 1.089660135807044,
      "learning_rate": 8.75032726802198e-06,
      "loss": 0.1816,
      "step": 8677
    },
    {
      "epoch": 0.2531652955248264,
      "grad_norm": 0.7963842850599411,
      "learning_rate": 8.75001480052556e-06,
      "loss": 0.167,
      "step": 8678
    },
    {
      "epoch": 0.25319446875546997,
      "grad_norm": 0.8047992319509558,
      "learning_rate": 8.749702299549908e-06,
      "loss": 0.159,
      "step": 8679
    },
    {
      "epoch": 0.2532236419861135,
      "grad_norm": 0.8361255619347895,
      "learning_rate": 8.749389765097805e-06,
      "loss": 0.1534,
      "step": 8680
    },
    {
      "epoch": 0.2532528152167571,
      "grad_norm": 0.8121742773997878,
      "learning_rate": 8.749077197172044e-06,
      "loss": 0.1673,
      "step": 8681
    },
    {
      "epoch": 0.2532819884474007,
      "grad_norm": 0.7805005133041115,
      "learning_rate": 8.74876459577542e-06,
      "loss": 0.174,
      "step": 8682
    },
    {
      "epoch": 0.25331116167804424,
      "grad_norm": 0.7274078253360706,
      "learning_rate": 8.748451960910718e-06,
      "loss": 0.1759,
      "step": 8683
    },
    {
      "epoch": 0.2533403349086878,
      "grad_norm": 1.0429367089407238,
      "learning_rate": 8.748139292580733e-06,
      "loss": 0.2104,
      "step": 8684
    },
    {
      "epoch": 0.25336950813933135,
      "grad_norm": 0.8506984585250397,
      "learning_rate": 8.747826590788256e-06,
      "loss": 0.1652,
      "step": 8685
    },
    {
      "epoch": 0.2533986813699749,
      "grad_norm": 1.1421366549445577,
      "learning_rate": 8.747513855536077e-06,
      "loss": 0.1337,
      "step": 8686
    },
    {
      "epoch": 0.25342785460061845,
      "grad_norm": 1.1113881068498233,
      "learning_rate": 8.747201086826989e-06,
      "loss": 0.1407,
      "step": 8687
    },
    {
      "epoch": 0.25345702783126206,
      "grad_norm": 0.9908063541335966,
      "learning_rate": 8.746888284663784e-06,
      "loss": 0.174,
      "step": 8688
    },
    {
      "epoch": 0.2534862010619056,
      "grad_norm": 0.7712802509983608,
      "learning_rate": 8.746575449049255e-06,
      "loss": 0.1406,
      "step": 8689
    },
    {
      "epoch": 0.25351537429254917,
      "grad_norm": 0.7431100563887824,
      "learning_rate": 8.746262579986194e-06,
      "loss": 0.1513,
      "step": 8690
    },
    {
      "epoch": 0.2535445475231927,
      "grad_norm": 0.7644407130429836,
      "learning_rate": 8.745949677477396e-06,
      "loss": 0.1537,
      "step": 8691
    },
    {
      "epoch": 0.2535737207538363,
      "grad_norm": 0.9870168221703469,
      "learning_rate": 8.745636741525654e-06,
      "loss": 0.1978,
      "step": 8692
    },
    {
      "epoch": 0.2536028939844798,
      "grad_norm": 0.8982865652069405,
      "learning_rate": 8.745323772133761e-06,
      "loss": 0.1409,
      "step": 8693
    },
    {
      "epoch": 0.2536320672151234,
      "grad_norm": 0.9042036941833452,
      "learning_rate": 8.745010769304509e-06,
      "loss": 0.1413,
      "step": 8694
    },
    {
      "epoch": 0.253661240445767,
      "grad_norm": 0.9012370192724426,
      "learning_rate": 8.744697733040696e-06,
      "loss": 0.177,
      "step": 8695
    },
    {
      "epoch": 0.25369041367641054,
      "grad_norm": 0.7438421399011913,
      "learning_rate": 8.744384663345118e-06,
      "loss": 0.1522,
      "step": 8696
    },
    {
      "epoch": 0.2537195869070541,
      "grad_norm": 0.843966325383359,
      "learning_rate": 8.744071560220567e-06,
      "loss": 0.1901,
      "step": 8697
    },
    {
      "epoch": 0.25374876013769765,
      "grad_norm": 0.7354098162433835,
      "learning_rate": 8.743758423669837e-06,
      "loss": 0.1535,
      "step": 8698
    },
    {
      "epoch": 0.2537779333683412,
      "grad_norm": 1.2117025504985062,
      "learning_rate": 8.743445253695725e-06,
      "loss": 0.1744,
      "step": 8699
    },
    {
      "epoch": 0.25380710659898476,
      "grad_norm": 0.9362173993868674,
      "learning_rate": 8.743132050301031e-06,
      "loss": 0.1653,
      "step": 8700
    },
    {
      "epoch": 0.2538362798296283,
      "grad_norm": 0.8310313706059644,
      "learning_rate": 8.742818813488545e-06,
      "loss": 0.15,
      "step": 8701
    },
    {
      "epoch": 0.2538654530602719,
      "grad_norm": 0.8139816868476868,
      "learning_rate": 8.742505543261066e-06,
      "loss": 0.1394,
      "step": 8702
    },
    {
      "epoch": 0.25389462629091547,
      "grad_norm": 0.7197049985343916,
      "learning_rate": 8.742192239621391e-06,
      "loss": 0.159,
      "step": 8703
    },
    {
      "epoch": 0.253923799521559,
      "grad_norm": 0.7073837646585768,
      "learning_rate": 8.741878902572318e-06,
      "loss": 0.1603,
      "step": 8704
    },
    {
      "epoch": 0.2539529727522026,
      "grad_norm": 0.7041714988132413,
      "learning_rate": 8.741565532116643e-06,
      "loss": 0.1687,
      "step": 8705
    },
    {
      "epoch": 0.25398214598284613,
      "grad_norm": 0.7522600859004025,
      "learning_rate": 8.741252128257164e-06,
      "loss": 0.1433,
      "step": 8706
    },
    {
      "epoch": 0.2540113192134897,
      "grad_norm": 0.7407409471862808,
      "learning_rate": 8.740938690996678e-06,
      "loss": 0.1498,
      "step": 8707
    },
    {
      "epoch": 0.25404049244413324,
      "grad_norm": 0.8667489606012565,
      "learning_rate": 8.740625220337987e-06,
      "loss": 0.1435,
      "step": 8708
    },
    {
      "epoch": 0.25406966567477685,
      "grad_norm": 0.6547311529300278,
      "learning_rate": 8.740311716283884e-06,
      "loss": 0.1362,
      "step": 8709
    },
    {
      "epoch": 0.2540988389054204,
      "grad_norm": 0.8371369414539442,
      "learning_rate": 8.739998178837172e-06,
      "loss": 0.1499,
      "step": 8710
    },
    {
      "epoch": 0.25412801213606395,
      "grad_norm": 0.8356802734640116,
      "learning_rate": 8.739684608000651e-06,
      "loss": 0.1419,
      "step": 8711
    },
    {
      "epoch": 0.2541571853667075,
      "grad_norm": 0.7991211929141547,
      "learning_rate": 8.739371003777117e-06,
      "loss": 0.1506,
      "step": 8712
    },
    {
      "epoch": 0.25418635859735106,
      "grad_norm": 0.7427853021170802,
      "learning_rate": 8.73905736616937e-06,
      "loss": 0.1538,
      "step": 8713
    },
    {
      "epoch": 0.2542155318279946,
      "grad_norm": 0.8113598601341298,
      "learning_rate": 8.738743695180214e-06,
      "loss": 0.1589,
      "step": 8714
    },
    {
      "epoch": 0.2542447050586382,
      "grad_norm": 0.8569630973636471,
      "learning_rate": 8.738429990812445e-06,
      "loss": 0.1637,
      "step": 8715
    },
    {
      "epoch": 0.2542738782892818,
      "grad_norm": 0.9100324245795195,
      "learning_rate": 8.738116253068866e-06,
      "loss": 0.16,
      "step": 8716
    },
    {
      "epoch": 0.2543030515199253,
      "grad_norm": 0.8638207685398084,
      "learning_rate": 8.737802481952277e-06,
      "loss": 0.1536,
      "step": 8717
    },
    {
      "epoch": 0.2543322247505689,
      "grad_norm": 0.845447471564934,
      "learning_rate": 8.73748867746548e-06,
      "loss": 0.1394,
      "step": 8718
    },
    {
      "epoch": 0.25436139798121243,
      "grad_norm": 0.873617854106875,
      "learning_rate": 8.737174839611277e-06,
      "loss": 0.1692,
      "step": 8719
    },
    {
      "epoch": 0.254390571211856,
      "grad_norm": 0.9311736290655294,
      "learning_rate": 8.736860968392469e-06,
      "loss": 0.1448,
      "step": 8720
    },
    {
      "epoch": 0.25441974444249954,
      "grad_norm": 1.1191446734340702,
      "learning_rate": 8.736547063811858e-06,
      "loss": 0.1659,
      "step": 8721
    },
    {
      "epoch": 0.25444891767314315,
      "grad_norm": 1.1348476700458443,
      "learning_rate": 8.736233125872247e-06,
      "loss": 0.1582,
      "step": 8722
    },
    {
      "epoch": 0.2544780909037867,
      "grad_norm": 1.1460856678321898,
      "learning_rate": 8.735919154576438e-06,
      "loss": 0.1621,
      "step": 8723
    },
    {
      "epoch": 0.25450726413443026,
      "grad_norm": 1.1746904149793125,
      "learning_rate": 8.735605149927236e-06,
      "loss": 0.1658,
      "step": 8724
    },
    {
      "epoch": 0.2545364373650738,
      "grad_norm": 1.329046099020268,
      "learning_rate": 8.735291111927441e-06,
      "loss": 0.1516,
      "step": 8725
    },
    {
      "epoch": 0.25456561059571736,
      "grad_norm": 1.1640390457061447,
      "learning_rate": 8.73497704057986e-06,
      "loss": 0.1882,
      "step": 8726
    },
    {
      "epoch": 0.2545947838263609,
      "grad_norm": 1.0280124658537055,
      "learning_rate": 8.734662935887295e-06,
      "loss": 0.1573,
      "step": 8727
    },
    {
      "epoch": 0.25462395705700447,
      "grad_norm": 0.9639943602244389,
      "learning_rate": 8.73434879785255e-06,
      "loss": 0.17,
      "step": 8728
    },
    {
      "epoch": 0.2546531302876481,
      "grad_norm": 0.825427524115082,
      "learning_rate": 8.734034626478432e-06,
      "loss": 0.1587,
      "step": 8729
    },
    {
      "epoch": 0.25468230351829163,
      "grad_norm": 0.9130862061586684,
      "learning_rate": 8.733720421767744e-06,
      "loss": 0.1663,
      "step": 8730
    },
    {
      "epoch": 0.2547114767489352,
      "grad_norm": 0.8498724775905401,
      "learning_rate": 8.733406183723293e-06,
      "loss": 0.1486,
      "step": 8731
    },
    {
      "epoch": 0.25474064997957874,
      "grad_norm": 1.0775770106421836,
      "learning_rate": 8.73309191234788e-06,
      "loss": 0.1389,
      "step": 8732
    },
    {
      "epoch": 0.2547698232102223,
      "grad_norm": 0.7640333168428327,
      "learning_rate": 8.732777607644314e-06,
      "loss": 0.1382,
      "step": 8733
    },
    {
      "epoch": 0.25479899644086584,
      "grad_norm": 0.8952833004604634,
      "learning_rate": 8.7324632696154e-06,
      "loss": 0.1836,
      "step": 8734
    },
    {
      "epoch": 0.2548281696715094,
      "grad_norm": 0.849439438598427,
      "learning_rate": 8.732148898263946e-06,
      "loss": 0.1611,
      "step": 8735
    },
    {
      "epoch": 0.254857342902153,
      "grad_norm": 1.0782415390561326,
      "learning_rate": 8.73183449359276e-06,
      "loss": 0.196,
      "step": 8736
    },
    {
      "epoch": 0.25488651613279656,
      "grad_norm": 1.0005187587540993,
      "learning_rate": 8.731520055604642e-06,
      "loss": 0.2017,
      "step": 8737
    },
    {
      "epoch": 0.2549156893634401,
      "grad_norm": 0.7896312246550041,
      "learning_rate": 8.731205584302406e-06,
      "loss": 0.1589,
      "step": 8738
    },
    {
      "epoch": 0.25494486259408367,
      "grad_norm": 0.944690480624181,
      "learning_rate": 8.730891079688856e-06,
      "loss": 0.1515,
      "step": 8739
    },
    {
      "epoch": 0.2549740358247272,
      "grad_norm": 1.028373765804265,
      "learning_rate": 8.730576541766803e-06,
      "loss": 0.1718,
      "step": 8740
    },
    {
      "epoch": 0.25500320905537077,
      "grad_norm": 0.9030042930995393,
      "learning_rate": 8.730261970539052e-06,
      "loss": 0.1606,
      "step": 8741
    },
    {
      "epoch": 0.2550323822860144,
      "grad_norm": 0.8539913410412551,
      "learning_rate": 8.729947366008413e-06,
      "loss": 0.1404,
      "step": 8742
    },
    {
      "epoch": 0.25506155551665793,
      "grad_norm": 0.8820475575043571,
      "learning_rate": 8.729632728177695e-06,
      "loss": 0.1377,
      "step": 8743
    },
    {
      "epoch": 0.2550907287473015,
      "grad_norm": 0.7477643030610125,
      "learning_rate": 8.729318057049704e-06,
      "loss": 0.1599,
      "step": 8744
    },
    {
      "epoch": 0.25511990197794504,
      "grad_norm": 0.7396971754806884,
      "learning_rate": 8.729003352627255e-06,
      "loss": 0.1551,
      "step": 8745
    },
    {
      "epoch": 0.2551490752085886,
      "grad_norm": 1.1071026612148385,
      "learning_rate": 8.728688614913152e-06,
      "loss": 0.1372,
      "step": 8746
    },
    {
      "epoch": 0.25517824843923215,
      "grad_norm": 0.9409646097818506,
      "learning_rate": 8.728373843910207e-06,
      "loss": 0.1521,
      "step": 8747
    },
    {
      "epoch": 0.2552074216698757,
      "grad_norm": 0.9150115926042979,
      "learning_rate": 8.728059039621231e-06,
      "loss": 0.1638,
      "step": 8748
    },
    {
      "epoch": 0.2552365949005193,
      "grad_norm": 0.9937005959563189,
      "learning_rate": 8.727744202049035e-06,
      "loss": 0.1609,
      "step": 8749
    },
    {
      "epoch": 0.25526576813116286,
      "grad_norm": 0.8958513950848999,
      "learning_rate": 8.727429331196426e-06,
      "loss": 0.1703,
      "step": 8750
    },
    {
      "epoch": 0.2552949413618064,
      "grad_norm": 0.8248501867962532,
      "learning_rate": 8.72711442706622e-06,
      "loss": 0.1424,
      "step": 8751
    },
    {
      "epoch": 0.25532411459244997,
      "grad_norm": 0.9571136844958456,
      "learning_rate": 8.726799489661225e-06,
      "loss": 0.1659,
      "step": 8752
    },
    {
      "epoch": 0.2553532878230935,
      "grad_norm": 1.0667095653183198,
      "learning_rate": 8.726484518984256e-06,
      "loss": 0.1616,
      "step": 8753
    },
    {
      "epoch": 0.2553824610537371,
      "grad_norm": 0.8618680921339075,
      "learning_rate": 8.72616951503812e-06,
      "loss": 0.1573,
      "step": 8754
    },
    {
      "epoch": 0.25541163428438063,
      "grad_norm": 0.8299326664354898,
      "learning_rate": 8.725854477825632e-06,
      "loss": 0.179,
      "step": 8755
    },
    {
      "epoch": 0.25544080751502424,
      "grad_norm": 0.8713894055508062,
      "learning_rate": 8.725539407349606e-06,
      "loss": 0.1391,
      "step": 8756
    },
    {
      "epoch": 0.2554699807456678,
      "grad_norm": 0.8007962491614893,
      "learning_rate": 8.725224303612854e-06,
      "loss": 0.1533,
      "step": 8757
    },
    {
      "epoch": 0.25549915397631134,
      "grad_norm": 0.971945767080681,
      "learning_rate": 8.724909166618187e-06,
      "loss": 0.1811,
      "step": 8758
    },
    {
      "epoch": 0.2555283272069549,
      "grad_norm": 0.8146285945171134,
      "learning_rate": 8.724593996368422e-06,
      "loss": 0.1613,
      "step": 8759
    },
    {
      "epoch": 0.25555750043759845,
      "grad_norm": 0.8374369013562248,
      "learning_rate": 8.72427879286637e-06,
      "loss": 0.1497,
      "step": 8760
    },
    {
      "epoch": 0.255586673668242,
      "grad_norm": 0.9625265198360471,
      "learning_rate": 8.723963556114847e-06,
      "loss": 0.1552,
      "step": 8761
    },
    {
      "epoch": 0.25561584689888556,
      "grad_norm": 0.7911600922588395,
      "learning_rate": 8.723648286116664e-06,
      "loss": 0.1567,
      "step": 8762
    },
    {
      "epoch": 0.25564502012952917,
      "grad_norm": 0.6944363270822046,
      "learning_rate": 8.723332982874639e-06,
      "loss": 0.16,
      "step": 8763
    },
    {
      "epoch": 0.2556741933601727,
      "grad_norm": 0.7716485153147591,
      "learning_rate": 8.723017646391587e-06,
      "loss": 0.1689,
      "step": 8764
    },
    {
      "epoch": 0.2557033665908163,
      "grad_norm": 0.8354276672058369,
      "learning_rate": 8.722702276670323e-06,
      "loss": 0.1541,
      "step": 8765
    },
    {
      "epoch": 0.2557325398214598,
      "grad_norm": 0.695295395783904,
      "learning_rate": 8.72238687371366e-06,
      "loss": 0.1488,
      "step": 8766
    },
    {
      "epoch": 0.2557617130521034,
      "grad_norm": 0.7860631251591896,
      "learning_rate": 8.722071437524415e-06,
      "loss": 0.1492,
      "step": 8767
    },
    {
      "epoch": 0.25579088628274693,
      "grad_norm": 0.8912421457723493,
      "learning_rate": 8.721755968105406e-06,
      "loss": 0.1689,
      "step": 8768
    },
    {
      "epoch": 0.2558200595133905,
      "grad_norm": 0.7598400483680359,
      "learning_rate": 8.721440465459448e-06,
      "loss": 0.1569,
      "step": 8769
    },
    {
      "epoch": 0.2558492327440341,
      "grad_norm": 0.857580910539667,
      "learning_rate": 8.721124929589358e-06,
      "loss": 0.1437,
      "step": 8770
    },
    {
      "epoch": 0.25587840597467765,
      "grad_norm": 0.6838812636405465,
      "learning_rate": 8.720809360497953e-06,
      "loss": 0.1589,
      "step": 8771
    },
    {
      "epoch": 0.2559075792053212,
      "grad_norm": 0.9153310537215729,
      "learning_rate": 8.720493758188049e-06,
      "loss": 0.1506,
      "step": 8772
    },
    {
      "epoch": 0.25593675243596475,
      "grad_norm": 0.82615409438108,
      "learning_rate": 8.720178122662466e-06,
      "loss": 0.152,
      "step": 8773
    },
    {
      "epoch": 0.2559659256666083,
      "grad_norm": 0.8605169310453629,
      "learning_rate": 8.71986245392402e-06,
      "loss": 0.1637,
      "step": 8774
    },
    {
      "epoch": 0.25599509889725186,
      "grad_norm": 0.9090589536210341,
      "learning_rate": 8.719546751975531e-06,
      "loss": 0.1457,
      "step": 8775
    },
    {
      "epoch": 0.25602427212789547,
      "grad_norm": 0.7772837033436167,
      "learning_rate": 8.719231016819817e-06,
      "loss": 0.1443,
      "step": 8776
    },
    {
      "epoch": 0.256053445358539,
      "grad_norm": 1.0667870199913696,
      "learning_rate": 8.718915248459697e-06,
      "loss": 0.1499,
      "step": 8777
    },
    {
      "epoch": 0.2560826185891826,
      "grad_norm": 0.8081053989600095,
      "learning_rate": 8.718599446897987e-06,
      "loss": 0.1692,
      "step": 8778
    },
    {
      "epoch": 0.25611179181982613,
      "grad_norm": 0.983564666362862,
      "learning_rate": 8.718283612137508e-06,
      "loss": 0.1746,
      "step": 8779
    },
    {
      "epoch": 0.2561409650504697,
      "grad_norm": 0.8528117300126145,
      "learning_rate": 8.717967744181084e-06,
      "loss": 0.1757,
      "step": 8780
    },
    {
      "epoch": 0.25617013828111324,
      "grad_norm": 0.741080976782169,
      "learning_rate": 8.717651843031529e-06,
      "loss": 0.1547,
      "step": 8781
    },
    {
      "epoch": 0.2561993115117568,
      "grad_norm": 1.1153756248130342,
      "learning_rate": 8.717335908691667e-06,
      "loss": 0.1674,
      "step": 8782
    },
    {
      "epoch": 0.2562284847424004,
      "grad_norm": 0.8409951078676309,
      "learning_rate": 8.717019941164317e-06,
      "loss": 0.1452,
      "step": 8783
    },
    {
      "epoch": 0.25625765797304395,
      "grad_norm": 0.6809635702314836,
      "learning_rate": 8.7167039404523e-06,
      "loss": 0.1911,
      "step": 8784
    },
    {
      "epoch": 0.2562868312036875,
      "grad_norm": 1.1079818340434244,
      "learning_rate": 8.71638790655844e-06,
      "loss": 0.1412,
      "step": 8785
    },
    {
      "epoch": 0.25631600443433106,
      "grad_norm": 0.9838662069191417,
      "learning_rate": 8.716071839485552e-06,
      "loss": 0.1679,
      "step": 8786
    },
    {
      "epoch": 0.2563451776649746,
      "grad_norm": 0.6506205885962708,
      "learning_rate": 8.715755739236464e-06,
      "loss": 0.1546,
      "step": 8787
    },
    {
      "epoch": 0.25637435089561816,
      "grad_norm": 0.718062480563348,
      "learning_rate": 8.715439605813994e-06,
      "loss": 0.1545,
      "step": 8788
    },
    {
      "epoch": 0.2564035241262617,
      "grad_norm": 0.8612418407748099,
      "learning_rate": 8.715123439220968e-06,
      "loss": 0.141,
      "step": 8789
    },
    {
      "epoch": 0.2564326973569053,
      "grad_norm": 0.634017237768269,
      "learning_rate": 8.714807239460206e-06,
      "loss": 0.1696,
      "step": 8790
    },
    {
      "epoch": 0.2564618705875489,
      "grad_norm": 0.7110658042582967,
      "learning_rate": 8.714491006534532e-06,
      "loss": 0.155,
      "step": 8791
    },
    {
      "epoch": 0.25649104381819243,
      "grad_norm": 1.2230597945265116,
      "learning_rate": 8.714174740446769e-06,
      "loss": 0.1521,
      "step": 8792
    },
    {
      "epoch": 0.256520217048836,
      "grad_norm": 0.7733159226290556,
      "learning_rate": 8.713858441199741e-06,
      "loss": 0.179,
      "step": 8793
    },
    {
      "epoch": 0.25654939027947954,
      "grad_norm": 0.9072653388804234,
      "learning_rate": 8.713542108796271e-06,
      "loss": 0.1584,
      "step": 8794
    },
    {
      "epoch": 0.2565785635101231,
      "grad_norm": 1.1565474412328696,
      "learning_rate": 8.713225743239183e-06,
      "loss": 0.1729,
      "step": 8795
    },
    {
      "epoch": 0.25660773674076665,
      "grad_norm": 0.8551553752200075,
      "learning_rate": 8.712909344531302e-06,
      "loss": 0.1369,
      "step": 8796
    },
    {
      "epoch": 0.25663690997141025,
      "grad_norm": 0.9607543688190029,
      "learning_rate": 8.712592912675454e-06,
      "loss": 0.1688,
      "step": 8797
    },
    {
      "epoch": 0.2566660832020538,
      "grad_norm": 0.9504308927374339,
      "learning_rate": 8.712276447674462e-06,
      "loss": 0.1917,
      "step": 8798
    },
    {
      "epoch": 0.25669525643269736,
      "grad_norm": 1.0782985099457012,
      "learning_rate": 8.711959949531152e-06,
      "loss": 0.183,
      "step": 8799
    },
    {
      "epoch": 0.2567244296633409,
      "grad_norm": 1.1792973780465728,
      "learning_rate": 8.71164341824835e-06,
      "loss": 0.1563,
      "step": 8800
    },
    {
      "epoch": 0.25675360289398447,
      "grad_norm": 0.8086254196557137,
      "learning_rate": 8.711326853828881e-06,
      "loss": 0.1761,
      "step": 8801
    },
    {
      "epoch": 0.256782776124628,
      "grad_norm": 0.8422364878747902,
      "learning_rate": 8.711010256275572e-06,
      "loss": 0.1523,
      "step": 8802
    },
    {
      "epoch": 0.25681194935527163,
      "grad_norm": 1.0218613293672558,
      "learning_rate": 8.710693625591249e-06,
      "loss": 0.1478,
      "step": 8803
    },
    {
      "epoch": 0.2568411225859152,
      "grad_norm": 1.067926623148478,
      "learning_rate": 8.71037696177874e-06,
      "loss": 0.1506,
      "step": 8804
    },
    {
      "epoch": 0.25687029581655874,
      "grad_norm": 0.8521451983426845,
      "learning_rate": 8.710060264840872e-06,
      "loss": 0.1609,
      "step": 8805
    },
    {
      "epoch": 0.2568994690472023,
      "grad_norm": 1.0538250897279264,
      "learning_rate": 8.70974353478047e-06,
      "loss": 0.1773,
      "step": 8806
    },
    {
      "epoch": 0.25692864227784584,
      "grad_norm": 0.8845549457939076,
      "learning_rate": 8.709426771600363e-06,
      "loss": 0.1599,
      "step": 8807
    },
    {
      "epoch": 0.2569578155084894,
      "grad_norm": 1.0316144930372386,
      "learning_rate": 8.70910997530338e-06,
      "loss": 0.1897,
      "step": 8808
    },
    {
      "epoch": 0.25698698873913295,
      "grad_norm": 0.8275803806540443,
      "learning_rate": 8.70879314589235e-06,
      "loss": 0.1785,
      "step": 8809
    },
    {
      "epoch": 0.25701616196977656,
      "grad_norm": 0.9629761451867813,
      "learning_rate": 8.708476283370098e-06,
      "loss": 0.1811,
      "step": 8810
    },
    {
      "epoch": 0.2570453352004201,
      "grad_norm": 0.9505803037569778,
      "learning_rate": 8.708159387739456e-06,
      "loss": 0.158,
      "step": 8811
    },
    {
      "epoch": 0.25707450843106366,
      "grad_norm": 0.7218256849695398,
      "learning_rate": 8.70784245900325e-06,
      "loss": 0.1709,
      "step": 8812
    },
    {
      "epoch": 0.2571036816617072,
      "grad_norm": 0.8765072740167568,
      "learning_rate": 8.707525497164316e-06,
      "loss": 0.1731,
      "step": 8813
    },
    {
      "epoch": 0.25713285489235077,
      "grad_norm": 0.8155519387229843,
      "learning_rate": 8.707208502225476e-06,
      "loss": 0.1466,
      "step": 8814
    },
    {
      "epoch": 0.2571620281229943,
      "grad_norm": 0.7103380025975924,
      "learning_rate": 8.706891474189566e-06,
      "loss": 0.1564,
      "step": 8815
    },
    {
      "epoch": 0.2571912013536379,
      "grad_norm": 0.7816166744788744,
      "learning_rate": 8.706574413059411e-06,
      "loss": 0.148,
      "step": 8816
    },
    {
      "epoch": 0.2572203745842815,
      "grad_norm": 0.6888803989050883,
      "learning_rate": 8.706257318837846e-06,
      "loss": 0.1337,
      "step": 8817
    },
    {
      "epoch": 0.25724954781492504,
      "grad_norm": 0.7799227031136653,
      "learning_rate": 8.7059401915277e-06,
      "loss": 0.1365,
      "step": 8818
    },
    {
      "epoch": 0.2572787210455686,
      "grad_norm": 0.8660913193244866,
      "learning_rate": 8.705623031131805e-06,
      "loss": 0.1838,
      "step": 8819
    },
    {
      "epoch": 0.25730789427621215,
      "grad_norm": 0.6483985644983833,
      "learning_rate": 8.70530583765299e-06,
      "loss": 0.1331,
      "step": 8820
    },
    {
      "epoch": 0.2573370675068557,
      "grad_norm": 1.2646915220802024,
      "learning_rate": 8.704988611094093e-06,
      "loss": 0.1595,
      "step": 8821
    },
    {
      "epoch": 0.25736624073749925,
      "grad_norm": 0.7979583065309406,
      "learning_rate": 8.704671351457941e-06,
      "loss": 0.1565,
      "step": 8822
    },
    {
      "epoch": 0.2573954139681428,
      "grad_norm": 0.7451983996609904,
      "learning_rate": 8.704354058747366e-06,
      "loss": 0.1467,
      "step": 8823
    },
    {
      "epoch": 0.2574245871987864,
      "grad_norm": 1.0089067781043577,
      "learning_rate": 8.704036732965202e-06,
      "loss": 0.1668,
      "step": 8824
    },
    {
      "epoch": 0.25745376042942997,
      "grad_norm": 1.0520293847921796,
      "learning_rate": 8.703719374114283e-06,
      "loss": 0.1518,
      "step": 8825
    },
    {
      "epoch": 0.2574829336600735,
      "grad_norm": 0.8712651565054161,
      "learning_rate": 8.703401982197444e-06,
      "loss": 0.1662,
      "step": 8826
    },
    {
      "epoch": 0.2575121068907171,
      "grad_norm": 0.9988059629732003,
      "learning_rate": 8.703084557217513e-06,
      "loss": 0.1497,
      "step": 8827
    },
    {
      "epoch": 0.2575412801213606,
      "grad_norm": 0.8589199021224142,
      "learning_rate": 8.702767099177328e-06,
      "loss": 0.1649,
      "step": 8828
    },
    {
      "epoch": 0.2575704533520042,
      "grad_norm": 0.7726342037518046,
      "learning_rate": 8.702449608079722e-06,
      "loss": 0.1294,
      "step": 8829
    },
    {
      "epoch": 0.2575996265826478,
      "grad_norm": 0.7079604464746598,
      "learning_rate": 8.70213208392753e-06,
      "loss": 0.1504,
      "step": 8830
    },
    {
      "epoch": 0.25762879981329134,
      "grad_norm": 0.8668711322604143,
      "learning_rate": 8.701814526723588e-06,
      "loss": 0.1645,
      "step": 8831
    },
    {
      "epoch": 0.2576579730439349,
      "grad_norm": 0.8689087172820066,
      "learning_rate": 8.701496936470728e-06,
      "loss": 0.1598,
      "step": 8832
    },
    {
      "epoch": 0.25768714627457845,
      "grad_norm": 0.729254569499863,
      "learning_rate": 8.701179313171787e-06,
      "loss": 0.1745,
      "step": 8833
    },
    {
      "epoch": 0.257716319505222,
      "grad_norm": 0.7750600007600736,
      "learning_rate": 8.7008616568296e-06,
      "loss": 0.161,
      "step": 8834
    },
    {
      "epoch": 0.25774549273586556,
      "grad_norm": 1.1576861073778233,
      "learning_rate": 8.700543967447005e-06,
      "loss": 0.1961,
      "step": 8835
    },
    {
      "epoch": 0.2577746659665091,
      "grad_norm": 0.7936500970083683,
      "learning_rate": 8.700226245026838e-06,
      "loss": 0.1596,
      "step": 8836
    },
    {
      "epoch": 0.2578038391971527,
      "grad_norm": 0.82538331026121,
      "learning_rate": 8.699908489571931e-06,
      "loss": 0.1334,
      "step": 8837
    },
    {
      "epoch": 0.25783301242779627,
      "grad_norm": 0.7553905612520914,
      "learning_rate": 8.699590701085125e-06,
      "loss": 0.1621,
      "step": 8838
    },
    {
      "epoch": 0.2578621856584398,
      "grad_norm": 0.7823037908029749,
      "learning_rate": 8.699272879569258e-06,
      "loss": 0.161,
      "step": 8839
    },
    {
      "epoch": 0.2578913588890834,
      "grad_norm": 0.8909308772809414,
      "learning_rate": 8.698955025027165e-06,
      "loss": 0.1622,
      "step": 8840
    },
    {
      "epoch": 0.25792053211972693,
      "grad_norm": 0.6483082566578128,
      "learning_rate": 8.698637137461685e-06,
      "loss": 0.147,
      "step": 8841
    },
    {
      "epoch": 0.2579497053503705,
      "grad_norm": 1.1071666630884744,
      "learning_rate": 8.698319216875656e-06,
      "loss": 0.1652,
      "step": 8842
    },
    {
      "epoch": 0.25797887858101404,
      "grad_norm": 0.8406033754314253,
      "learning_rate": 8.698001263271914e-06,
      "loss": 0.1398,
      "step": 8843
    },
    {
      "epoch": 0.25800805181165765,
      "grad_norm": 0.7163675769592432,
      "learning_rate": 8.697683276653302e-06,
      "loss": 0.1765,
      "step": 8844
    },
    {
      "epoch": 0.2580372250423012,
      "grad_norm": 0.8801074754734578,
      "learning_rate": 8.697365257022654e-06,
      "loss": 0.1617,
      "step": 8845
    },
    {
      "epoch": 0.25806639827294475,
      "grad_norm": 0.8879008470690003,
      "learning_rate": 8.697047204382813e-06,
      "loss": 0.1408,
      "step": 8846
    },
    {
      "epoch": 0.2580955715035883,
      "grad_norm": 0.7716951003806196,
      "learning_rate": 8.696729118736618e-06,
      "loss": 0.1424,
      "step": 8847
    },
    {
      "epoch": 0.25812474473423186,
      "grad_norm": 0.8899398981891741,
      "learning_rate": 8.696411000086906e-06,
      "loss": 0.1855,
      "step": 8848
    },
    {
      "epoch": 0.2581539179648754,
      "grad_norm": 0.8839715875952767,
      "learning_rate": 8.69609284843652e-06,
      "loss": 0.1559,
      "step": 8849
    },
    {
      "epoch": 0.25818309119551897,
      "grad_norm": 1.0712819701383225,
      "learning_rate": 8.695774663788299e-06,
      "loss": 0.1678,
      "step": 8850
    },
    {
      "epoch": 0.2582122644261626,
      "grad_norm": 0.9844537685094489,
      "learning_rate": 8.695456446145084e-06,
      "loss": 0.1464,
      "step": 8851
    },
    {
      "epoch": 0.2582414376568061,
      "grad_norm": 1.0907816708370612,
      "learning_rate": 8.695138195509715e-06,
      "loss": 0.1625,
      "step": 8852
    },
    {
      "epoch": 0.2582706108874497,
      "grad_norm": 1.0740844466516966,
      "learning_rate": 8.694819911885034e-06,
      "loss": 0.1758,
      "step": 8853
    },
    {
      "epoch": 0.25829978411809323,
      "grad_norm": 0.8891126516343939,
      "learning_rate": 8.694501595273887e-06,
      "loss": 0.153,
      "step": 8854
    },
    {
      "epoch": 0.2583289573487368,
      "grad_norm": 0.8336304641396416,
      "learning_rate": 8.694183245679108e-06,
      "loss": 0.1636,
      "step": 8855
    },
    {
      "epoch": 0.25835813057938034,
      "grad_norm": 0.9731712741496811,
      "learning_rate": 8.693864863103546e-06,
      "loss": 0.1687,
      "step": 8856
    },
    {
      "epoch": 0.25838730381002395,
      "grad_norm": 0.9648990726717422,
      "learning_rate": 8.693546447550036e-06,
      "loss": 0.173,
      "step": 8857
    },
    {
      "epoch": 0.2584164770406675,
      "grad_norm": 0.904415710537177,
      "learning_rate": 8.693227999021428e-06,
      "loss": 0.1546,
      "step": 8858
    },
    {
      "epoch": 0.25844565027131106,
      "grad_norm": 0.7976806557549195,
      "learning_rate": 8.69290951752056e-06,
      "loss": 0.1497,
      "step": 8859
    },
    {
      "epoch": 0.2584748235019546,
      "grad_norm": 0.8187815959775875,
      "learning_rate": 8.69259100305028e-06,
      "loss": 0.1555,
      "step": 8860
    },
    {
      "epoch": 0.25850399673259816,
      "grad_norm": 0.7853515194328271,
      "learning_rate": 8.692272455613427e-06,
      "loss": 0.1534,
      "step": 8861
    },
    {
      "epoch": 0.2585331699632417,
      "grad_norm": 0.7234433521282186,
      "learning_rate": 8.691953875212848e-06,
      "loss": 0.1656,
      "step": 8862
    },
    {
      "epoch": 0.25856234319388527,
      "grad_norm": 0.8750074985201797,
      "learning_rate": 8.691635261851385e-06,
      "loss": 0.1457,
      "step": 8863
    },
    {
      "epoch": 0.2585915164245289,
      "grad_norm": 0.7847218231122696,
      "learning_rate": 8.691316615531885e-06,
      "loss": 0.1459,
      "step": 8864
    },
    {
      "epoch": 0.25862068965517243,
      "grad_norm": 0.7680291020795362,
      "learning_rate": 8.690997936257191e-06,
      "loss": 0.1697,
      "step": 8865
    },
    {
      "epoch": 0.258649862885816,
      "grad_norm": 0.6764550047370509,
      "learning_rate": 8.690679224030149e-06,
      "loss": 0.156,
      "step": 8866
    },
    {
      "epoch": 0.25867903611645954,
      "grad_norm": 0.8041923507195612,
      "learning_rate": 8.690360478853603e-06,
      "loss": 0.1414,
      "step": 8867
    },
    {
      "epoch": 0.2587082093471031,
      "grad_norm": 0.8314780058372228,
      "learning_rate": 8.6900417007304e-06,
      "loss": 0.145,
      "step": 8868
    },
    {
      "epoch": 0.25873738257774664,
      "grad_norm": 0.8227874731554615,
      "learning_rate": 8.689722889663386e-06,
      "loss": 0.1571,
      "step": 8869
    },
    {
      "epoch": 0.2587665558083902,
      "grad_norm": 0.7284739481406283,
      "learning_rate": 8.689404045655406e-06,
      "loss": 0.1593,
      "step": 8870
    },
    {
      "epoch": 0.2587957290390338,
      "grad_norm": 1.277646345397803,
      "learning_rate": 8.689085168709309e-06,
      "loss": 0.1556,
      "step": 8871
    },
    {
      "epoch": 0.25882490226967736,
      "grad_norm": 0.8730722697824033,
      "learning_rate": 8.688766258827938e-06,
      "loss": 0.1789,
      "step": 8872
    },
    {
      "epoch": 0.2588540755003209,
      "grad_norm": 0.8774586082077487,
      "learning_rate": 8.688447316014144e-06,
      "loss": 0.1462,
      "step": 8873
    },
    {
      "epoch": 0.25888324873096447,
      "grad_norm": 0.7898122849796687,
      "learning_rate": 8.688128340270772e-06,
      "loss": 0.1662,
      "step": 8874
    },
    {
      "epoch": 0.258912421961608,
      "grad_norm": 0.7686793143659327,
      "learning_rate": 8.68780933160067e-06,
      "loss": 0.147,
      "step": 8875
    },
    {
      "epoch": 0.2589415951922516,
      "grad_norm": 0.9601607688264278,
      "learning_rate": 8.687490290006689e-06,
      "loss": 0.1646,
      "step": 8876
    },
    {
      "epoch": 0.2589707684228951,
      "grad_norm": 0.9082518558436797,
      "learning_rate": 8.687171215491673e-06,
      "loss": 0.1979,
      "step": 8877
    },
    {
      "epoch": 0.25899994165353873,
      "grad_norm": 0.761382025265103,
      "learning_rate": 8.686852108058472e-06,
      "loss": 0.1427,
      "step": 8878
    },
    {
      "epoch": 0.2590291148841823,
      "grad_norm": 0.7154054704953288,
      "learning_rate": 8.686532967709938e-06,
      "loss": 0.1495,
      "step": 8879
    },
    {
      "epoch": 0.25905828811482584,
      "grad_norm": 0.7285945560701206,
      "learning_rate": 8.686213794448914e-06,
      "loss": 0.1534,
      "step": 8880
    },
    {
      "epoch": 0.2590874613454694,
      "grad_norm": 0.8378752925761715,
      "learning_rate": 8.685894588278256e-06,
      "loss": 0.148,
      "step": 8881
    },
    {
      "epoch": 0.25911663457611295,
      "grad_norm": 0.9615955944272917,
      "learning_rate": 8.685575349200812e-06,
      "loss": 0.164,
      "step": 8882
    },
    {
      "epoch": 0.2591458078067565,
      "grad_norm": 0.7677578971911746,
      "learning_rate": 8.685256077219428e-06,
      "loss": 0.1464,
      "step": 8883
    },
    {
      "epoch": 0.25917498103740005,
      "grad_norm": 0.877176485121053,
      "learning_rate": 8.684936772336961e-06,
      "loss": 0.1421,
      "step": 8884
    },
    {
      "epoch": 0.25920415426804366,
      "grad_norm": 0.9925513683909988,
      "learning_rate": 8.684617434556255e-06,
      "loss": 0.1452,
      "step": 8885
    },
    {
      "epoch": 0.2592333274986872,
      "grad_norm": 0.8638704131644256,
      "learning_rate": 8.684298063880166e-06,
      "loss": 0.1629,
      "step": 8886
    },
    {
      "epoch": 0.25926250072933077,
      "grad_norm": 0.7770560173656657,
      "learning_rate": 8.683978660311542e-06,
      "loss": 0.168,
      "step": 8887
    },
    {
      "epoch": 0.2592916739599743,
      "grad_norm": 0.8800620468543845,
      "learning_rate": 8.683659223853238e-06,
      "loss": 0.1867,
      "step": 8888
    },
    {
      "epoch": 0.2593208471906179,
      "grad_norm": 0.9077449434044605,
      "learning_rate": 8.683339754508102e-06,
      "loss": 0.1581,
      "step": 8889
    },
    {
      "epoch": 0.25935002042126143,
      "grad_norm": 0.7541267098540116,
      "learning_rate": 8.683020252278988e-06,
      "loss": 0.152,
      "step": 8890
    },
    {
      "epoch": 0.25937919365190504,
      "grad_norm": 0.9230556082843885,
      "learning_rate": 8.68270071716875e-06,
      "loss": 0.1671,
      "step": 8891
    },
    {
      "epoch": 0.2594083668825486,
      "grad_norm": 0.9330205850452694,
      "learning_rate": 8.682381149180239e-06,
      "loss": 0.1989,
      "step": 8892
    },
    {
      "epoch": 0.25943754011319214,
      "grad_norm": 0.8778281677680428,
      "learning_rate": 8.682061548316307e-06,
      "loss": 0.1331,
      "step": 8893
    },
    {
      "epoch": 0.2594667133438357,
      "grad_norm": 0.8308052619745485,
      "learning_rate": 8.681741914579807e-06,
      "loss": 0.1745,
      "step": 8894
    },
    {
      "epoch": 0.25949588657447925,
      "grad_norm": 0.7996080524419938,
      "learning_rate": 8.681422247973596e-06,
      "loss": 0.1615,
      "step": 8895
    },
    {
      "epoch": 0.2595250598051228,
      "grad_norm": 0.9020034260693374,
      "learning_rate": 8.681102548500526e-06,
      "loss": 0.1636,
      "step": 8896
    },
    {
      "epoch": 0.25955423303576636,
      "grad_norm": 0.8887946298156156,
      "learning_rate": 8.68078281616345e-06,
      "loss": 0.1743,
      "step": 8897
    },
    {
      "epoch": 0.25958340626640997,
      "grad_norm": 0.7243999100222906,
      "learning_rate": 8.680463050965227e-06,
      "loss": 0.1558,
      "step": 8898
    },
    {
      "epoch": 0.2596125794970535,
      "grad_norm": 0.8516601822735672,
      "learning_rate": 8.680143252908704e-06,
      "loss": 0.1774,
      "step": 8899
    },
    {
      "epoch": 0.2596417527276971,
      "grad_norm": 0.9026393380363806,
      "learning_rate": 8.679823421996745e-06,
      "loss": 0.1433,
      "step": 8900
    },
    {
      "epoch": 0.2596709259583406,
      "grad_norm": 0.6900676970466267,
      "learning_rate": 8.679503558232197e-06,
      "loss": 0.1449,
      "step": 8901
    },
    {
      "epoch": 0.2597000991889842,
      "grad_norm": 0.9630417895997558,
      "learning_rate": 8.679183661617923e-06,
      "loss": 0.1735,
      "step": 8902
    },
    {
      "epoch": 0.25972927241962773,
      "grad_norm": 0.76408418310192,
      "learning_rate": 8.678863732156773e-06,
      "loss": 0.1499,
      "step": 8903
    },
    {
      "epoch": 0.2597584456502713,
      "grad_norm": 0.8353763808398833,
      "learning_rate": 8.678543769851606e-06,
      "loss": 0.1333,
      "step": 8904
    },
    {
      "epoch": 0.2597876188809149,
      "grad_norm": 0.7340719249282294,
      "learning_rate": 8.678223774705279e-06,
      "loss": 0.1486,
      "step": 8905
    },
    {
      "epoch": 0.25981679211155845,
      "grad_norm": 0.7884093911045592,
      "learning_rate": 8.677903746720648e-06,
      "loss": 0.1886,
      "step": 8906
    },
    {
      "epoch": 0.259845965342202,
      "grad_norm": 0.9833278264474146,
      "learning_rate": 8.677583685900572e-06,
      "loss": 0.169,
      "step": 8907
    },
    {
      "epoch": 0.25987513857284555,
      "grad_norm": 0.9930250670443083,
      "learning_rate": 8.677263592247905e-06,
      "loss": 0.1481,
      "step": 8908
    },
    {
      "epoch": 0.2599043118034891,
      "grad_norm": 0.9653846703152127,
      "learning_rate": 8.676943465765506e-06,
      "loss": 0.1324,
      "step": 8909
    },
    {
      "epoch": 0.25993348503413266,
      "grad_norm": 0.9202175390113022,
      "learning_rate": 8.676623306456235e-06,
      "loss": 0.1332,
      "step": 8910
    },
    {
      "epoch": 0.2599626582647762,
      "grad_norm": 0.8731167581220094,
      "learning_rate": 8.676303114322948e-06,
      "loss": 0.1493,
      "step": 8911
    },
    {
      "epoch": 0.2599918314954198,
      "grad_norm": 0.886676344309571,
      "learning_rate": 8.675982889368503e-06,
      "loss": 0.1739,
      "step": 8912
    },
    {
      "epoch": 0.2600210047260634,
      "grad_norm": 0.7860685323189874,
      "learning_rate": 8.675662631595762e-06,
      "loss": 0.1703,
      "step": 8913
    },
    {
      "epoch": 0.26005017795670693,
      "grad_norm": 1.1572269862127893,
      "learning_rate": 8.675342341007582e-06,
      "loss": 0.1552,
      "step": 8914
    },
    {
      "epoch": 0.2600793511873505,
      "grad_norm": 0.7990010528377889,
      "learning_rate": 8.675022017606824e-06,
      "loss": 0.1511,
      "step": 8915
    },
    {
      "epoch": 0.26010852441799404,
      "grad_norm": 0.9849423360542807,
      "learning_rate": 8.674701661396345e-06,
      "loss": 0.1535,
      "step": 8916
    },
    {
      "epoch": 0.2601376976486376,
      "grad_norm": 0.7597937933589964,
      "learning_rate": 8.674381272379008e-06,
      "loss": 0.1351,
      "step": 8917
    },
    {
      "epoch": 0.2601668708792812,
      "grad_norm": 0.8573456198285051,
      "learning_rate": 8.674060850557673e-06,
      "loss": 0.154,
      "step": 8918
    },
    {
      "epoch": 0.26019604410992475,
      "grad_norm": 0.9188513446682759,
      "learning_rate": 8.673740395935198e-06,
      "loss": 0.1729,
      "step": 8919
    },
    {
      "epoch": 0.2602252173405683,
      "grad_norm": 1.2466147536443686,
      "learning_rate": 8.673419908514447e-06,
      "loss": 0.1777,
      "step": 8920
    },
    {
      "epoch": 0.26025439057121186,
      "grad_norm": 1.0470505186834715,
      "learning_rate": 8.67309938829828e-06,
      "loss": 0.1642,
      "step": 8921
    },
    {
      "epoch": 0.2602835638018554,
      "grad_norm": 1.1318805959368543,
      "learning_rate": 8.672778835289556e-06,
      "loss": 0.1586,
      "step": 8922
    },
    {
      "epoch": 0.26031273703249896,
      "grad_norm": 0.8774068696043981,
      "learning_rate": 8.672458249491143e-06,
      "loss": 0.1549,
      "step": 8923
    },
    {
      "epoch": 0.2603419102631425,
      "grad_norm": 0.9910230944103623,
      "learning_rate": 8.672137630905897e-06,
      "loss": 0.1942,
      "step": 8924
    },
    {
      "epoch": 0.2603710834937861,
      "grad_norm": 1.0426900066770715,
      "learning_rate": 8.671816979536682e-06,
      "loss": 0.1459,
      "step": 8925
    },
    {
      "epoch": 0.2604002567244297,
      "grad_norm": 0.872076369217735,
      "learning_rate": 8.671496295386363e-06,
      "loss": 0.1679,
      "step": 8926
    },
    {
      "epoch": 0.26042942995507323,
      "grad_norm": 0.7784845171279845,
      "learning_rate": 8.671175578457803e-06,
      "loss": 0.1801,
      "step": 8927
    },
    {
      "epoch": 0.2604586031857168,
      "grad_norm": 1.1143806082736285,
      "learning_rate": 8.670854828753862e-06,
      "loss": 0.1559,
      "step": 8928
    },
    {
      "epoch": 0.26048777641636034,
      "grad_norm": 0.8231650852474093,
      "learning_rate": 8.670534046277405e-06,
      "loss": 0.1738,
      "step": 8929
    },
    {
      "epoch": 0.2605169496470039,
      "grad_norm": 0.7152319986232941,
      "learning_rate": 8.670213231031299e-06,
      "loss": 0.1518,
      "step": 8930
    },
    {
      "epoch": 0.26054612287764745,
      "grad_norm": 0.8815074491040681,
      "learning_rate": 8.669892383018402e-06,
      "loss": 0.1901,
      "step": 8931
    },
    {
      "epoch": 0.26057529610829105,
      "grad_norm": 0.9281452692370107,
      "learning_rate": 8.669571502241582e-06,
      "loss": 0.1405,
      "step": 8932
    },
    {
      "epoch": 0.2606044693389346,
      "grad_norm": 0.843511794898961,
      "learning_rate": 8.669250588703706e-06,
      "loss": 0.1507,
      "step": 8933
    },
    {
      "epoch": 0.26063364256957816,
      "grad_norm": 0.8313034900109824,
      "learning_rate": 8.668929642407634e-06,
      "loss": 0.1737,
      "step": 8934
    },
    {
      "epoch": 0.2606628158002217,
      "grad_norm": 0.8363379483768809,
      "learning_rate": 8.668608663356237e-06,
      "loss": 0.1558,
      "step": 8935
    },
    {
      "epoch": 0.26069198903086527,
      "grad_norm": 0.8525148222277982,
      "learning_rate": 8.668287651552377e-06,
      "loss": 0.1657,
      "step": 8936
    },
    {
      "epoch": 0.2607211622615088,
      "grad_norm": 0.8191935090884097,
      "learning_rate": 8.66796660699892e-06,
      "loss": 0.1445,
      "step": 8937
    },
    {
      "epoch": 0.2607503354921524,
      "grad_norm": 0.8618948197418193,
      "learning_rate": 8.667645529698731e-06,
      "loss": 0.1542,
      "step": 8938
    },
    {
      "epoch": 0.260779508722796,
      "grad_norm": 0.8259232628387545,
      "learning_rate": 8.66732441965468e-06,
      "loss": 0.1305,
      "step": 8939
    },
    {
      "epoch": 0.26080868195343954,
      "grad_norm": 0.710543349995988,
      "learning_rate": 8.667003276869632e-06,
      "loss": 0.1302,
      "step": 8940
    },
    {
      "epoch": 0.2608378551840831,
      "grad_norm": 0.8812651562659243,
      "learning_rate": 8.666682101346456e-06,
      "loss": 0.1849,
      "step": 8941
    },
    {
      "epoch": 0.26086702841472664,
      "grad_norm": 0.8899631092618145,
      "learning_rate": 8.666360893088015e-06,
      "loss": 0.172,
      "step": 8942
    },
    {
      "epoch": 0.2608962016453702,
      "grad_norm": 0.8699659262285109,
      "learning_rate": 8.666039652097178e-06,
      "loss": 0.1272,
      "step": 8943
    },
    {
      "epoch": 0.26092537487601375,
      "grad_norm": 0.8170363413887268,
      "learning_rate": 8.665718378376816e-06,
      "loss": 0.1593,
      "step": 8944
    },
    {
      "epoch": 0.26095454810665736,
      "grad_norm": 0.980424642282014,
      "learning_rate": 8.665397071929796e-06,
      "loss": 0.1715,
      "step": 8945
    },
    {
      "epoch": 0.2609837213373009,
      "grad_norm": 0.8161710016929798,
      "learning_rate": 8.665075732758985e-06,
      "loss": 0.1482,
      "step": 8946
    },
    {
      "epoch": 0.26101289456794446,
      "grad_norm": 0.8648771296327689,
      "learning_rate": 8.664754360867252e-06,
      "loss": 0.1674,
      "step": 8947
    },
    {
      "epoch": 0.261042067798588,
      "grad_norm": 1.1281315541060049,
      "learning_rate": 8.664432956257468e-06,
      "loss": 0.1571,
      "step": 8948
    },
    {
      "epoch": 0.26107124102923157,
      "grad_norm": 0.9988097399923852,
      "learning_rate": 8.664111518932501e-06,
      "loss": 0.1841,
      "step": 8949
    },
    {
      "epoch": 0.2611004142598751,
      "grad_norm": 0.9501992764518179,
      "learning_rate": 8.663790048895222e-06,
      "loss": 0.1608,
      "step": 8950
    },
    {
      "epoch": 0.2611295874905187,
      "grad_norm": 0.9569829937773743,
      "learning_rate": 8.6634685461485e-06,
      "loss": 0.1523,
      "step": 8951
    },
    {
      "epoch": 0.2611587607211623,
      "grad_norm": 1.122425336542205,
      "learning_rate": 8.663147010695202e-06,
      "loss": 0.1558,
      "step": 8952
    },
    {
      "epoch": 0.26118793395180584,
      "grad_norm": 1.0707956436614317,
      "learning_rate": 8.662825442538206e-06,
      "loss": 0.1465,
      "step": 8953
    },
    {
      "epoch": 0.2612171071824494,
      "grad_norm": 0.6557527786271601,
      "learning_rate": 8.662503841680377e-06,
      "loss": 0.1436,
      "step": 8954
    },
    {
      "epoch": 0.26124628041309295,
      "grad_norm": 1.0398305195766397,
      "learning_rate": 8.662182208124588e-06,
      "loss": 0.1681,
      "step": 8955
    },
    {
      "epoch": 0.2612754536437365,
      "grad_norm": 1.0251881764708946,
      "learning_rate": 8.661860541873712e-06,
      "loss": 0.172,
      "step": 8956
    },
    {
      "epoch": 0.26130462687438005,
      "grad_norm": 1.1542249340298345,
      "learning_rate": 8.661538842930617e-06,
      "loss": 0.166,
      "step": 8957
    },
    {
      "epoch": 0.2613338001050236,
      "grad_norm": 0.8961954416241569,
      "learning_rate": 8.661217111298179e-06,
      "loss": 0.1572,
      "step": 8958
    },
    {
      "epoch": 0.2613629733356672,
      "grad_norm": 1.0721378267867974,
      "learning_rate": 8.660895346979268e-06,
      "loss": 0.1571,
      "step": 8959
    },
    {
      "epoch": 0.26139214656631077,
      "grad_norm": 1.233492676885101,
      "learning_rate": 8.660573549976755e-06,
      "loss": 0.1747,
      "step": 8960
    },
    {
      "epoch": 0.2614213197969543,
      "grad_norm": 0.7274276762163758,
      "learning_rate": 8.66025172029352e-06,
      "loss": 0.1454,
      "step": 8961
    },
    {
      "epoch": 0.2614504930275979,
      "grad_norm": 0.8377190903857192,
      "learning_rate": 8.65992985793243e-06,
      "loss": 0.1604,
      "step": 8962
    },
    {
      "epoch": 0.2614796662582414,
      "grad_norm": 1.08457859515976,
      "learning_rate": 8.659607962896356e-06,
      "loss": 0.1585,
      "step": 8963
    },
    {
      "epoch": 0.261508839488885,
      "grad_norm": 0.692446274962691,
      "learning_rate": 8.65928603518818e-06,
      "loss": 0.1327,
      "step": 8964
    },
    {
      "epoch": 0.26153801271952853,
      "grad_norm": 0.9453876214076944,
      "learning_rate": 8.65896407481077e-06,
      "loss": 0.1535,
      "step": 8965
    },
    {
      "epoch": 0.26156718595017214,
      "grad_norm": 0.8171866133815451,
      "learning_rate": 8.658642081767003e-06,
      "loss": 0.1611,
      "step": 8966
    },
    {
      "epoch": 0.2615963591808157,
      "grad_norm": 0.7267011421363825,
      "learning_rate": 8.658320056059752e-06,
      "loss": 0.1756,
      "step": 8967
    },
    {
      "epoch": 0.26162553241145925,
      "grad_norm": 0.6845048571919239,
      "learning_rate": 8.657997997691893e-06,
      "loss": 0.1624,
      "step": 8968
    },
    {
      "epoch": 0.2616547056421028,
      "grad_norm": 0.8526214663746644,
      "learning_rate": 8.657675906666301e-06,
      "loss": 0.1786,
      "step": 8969
    },
    {
      "epoch": 0.26168387887274636,
      "grad_norm": 0.7279969395521828,
      "learning_rate": 8.657353782985853e-06,
      "loss": 0.144,
      "step": 8970
    },
    {
      "epoch": 0.2617130521033899,
      "grad_norm": 6.897200323623024,
      "learning_rate": 8.657031626653423e-06,
      "loss": 0.2039,
      "step": 8971
    },
    {
      "epoch": 0.2617422253340335,
      "grad_norm": 0.9029048669461691,
      "learning_rate": 8.656709437671886e-06,
      "loss": 0.1441,
      "step": 8972
    },
    {
      "epoch": 0.26177139856467707,
      "grad_norm": 0.793420907658017,
      "learning_rate": 8.656387216044122e-06,
      "loss": 0.1268,
      "step": 8973
    },
    {
      "epoch": 0.2618005717953206,
      "grad_norm": 0.7511495038593089,
      "learning_rate": 8.656064961773006e-06,
      "loss": 0.1498,
      "step": 8974
    },
    {
      "epoch": 0.2618297450259642,
      "grad_norm": 0.9088885928694428,
      "learning_rate": 8.655742674861414e-06,
      "loss": 0.1448,
      "step": 8975
    },
    {
      "epoch": 0.26185891825660773,
      "grad_norm": 0.6745868296167258,
      "learning_rate": 8.655420355312224e-06,
      "loss": 0.1746,
      "step": 8976
    },
    {
      "epoch": 0.2618880914872513,
      "grad_norm": 1.1519673108011803,
      "learning_rate": 8.655098003128312e-06,
      "loss": 0.1735,
      "step": 8977
    },
    {
      "epoch": 0.26191726471789484,
      "grad_norm": 1.231659580622008,
      "learning_rate": 8.654775618312561e-06,
      "loss": 0.1455,
      "step": 8978
    },
    {
      "epoch": 0.26194643794853845,
      "grad_norm": 0.7365798841092727,
      "learning_rate": 8.654453200867842e-06,
      "loss": 0.1909,
      "step": 8979
    },
    {
      "epoch": 0.261975611179182,
      "grad_norm": 0.8951672978577805,
      "learning_rate": 8.654130750797041e-06,
      "loss": 0.156,
      "step": 8980
    },
    {
      "epoch": 0.26200478440982555,
      "grad_norm": 1.0670266903104302,
      "learning_rate": 8.65380826810303e-06,
      "loss": 0.1511,
      "step": 8981
    },
    {
      "epoch": 0.2620339576404691,
      "grad_norm": 0.7877643841195099,
      "learning_rate": 8.653485752788692e-06,
      "loss": 0.1423,
      "step": 8982
    },
    {
      "epoch": 0.26206313087111266,
      "grad_norm": 0.863593991044593,
      "learning_rate": 8.653163204856906e-06,
      "loss": 0.1511,
      "step": 8983
    },
    {
      "epoch": 0.2620923041017562,
      "grad_norm": 0.8138077050375636,
      "learning_rate": 8.65284062431055e-06,
      "loss": 0.1583,
      "step": 8984
    },
    {
      "epoch": 0.26212147733239977,
      "grad_norm": 0.7585225714676208,
      "learning_rate": 8.652518011152507e-06,
      "loss": 0.1727,
      "step": 8985
    },
    {
      "epoch": 0.2621506505630434,
      "grad_norm": 0.9090473772890441,
      "learning_rate": 8.652195365385652e-06,
      "loss": 0.1228,
      "step": 8986
    },
    {
      "epoch": 0.26217982379368693,
      "grad_norm": 0.7627118636691527,
      "learning_rate": 8.651872687012871e-06,
      "loss": 0.1982,
      "step": 8987
    },
    {
      "epoch": 0.2622089970243305,
      "grad_norm": 0.7793849449518128,
      "learning_rate": 8.651549976037042e-06,
      "loss": 0.1645,
      "step": 8988
    },
    {
      "epoch": 0.26223817025497403,
      "grad_norm": 0.6968624570367113,
      "learning_rate": 8.651227232461045e-06,
      "loss": 0.1493,
      "step": 8989
    },
    {
      "epoch": 0.2622673434856176,
      "grad_norm": 0.8330253397591032,
      "learning_rate": 8.650904456287765e-06,
      "loss": 0.197,
      "step": 8990
    },
    {
      "epoch": 0.26229651671626114,
      "grad_norm": 0.9423238449724195,
      "learning_rate": 8.65058164752008e-06,
      "loss": 0.1647,
      "step": 8991
    },
    {
      "epoch": 0.2623256899469047,
      "grad_norm": 0.7795221338227699,
      "learning_rate": 8.650258806160874e-06,
      "loss": 0.1633,
      "step": 8992
    },
    {
      "epoch": 0.2623548631775483,
      "grad_norm": 0.9439610475194946,
      "learning_rate": 8.649935932213029e-06,
      "loss": 0.1692,
      "step": 8993
    },
    {
      "epoch": 0.26238403640819186,
      "grad_norm": 1.2520198627267944,
      "learning_rate": 8.649613025679428e-06,
      "loss": 0.1469,
      "step": 8994
    },
    {
      "epoch": 0.2624132096388354,
      "grad_norm": 0.8153986117003251,
      "learning_rate": 8.649290086562952e-06,
      "loss": 0.1635,
      "step": 8995
    },
    {
      "epoch": 0.26244238286947896,
      "grad_norm": 0.9706801494347831,
      "learning_rate": 8.648967114866485e-06,
      "loss": 0.1399,
      "step": 8996
    },
    {
      "epoch": 0.2624715561001225,
      "grad_norm": 1.0048217905972299,
      "learning_rate": 8.648644110592912e-06,
      "loss": 0.158,
      "step": 8997
    },
    {
      "epoch": 0.26250072933076607,
      "grad_norm": 0.6717974086650561,
      "learning_rate": 8.648321073745113e-06,
      "loss": 0.1535,
      "step": 8998
    },
    {
      "epoch": 0.2625299025614097,
      "grad_norm": 1.2447905270372501,
      "learning_rate": 8.647998004325977e-06,
      "loss": 0.1781,
      "step": 8999
    },
    {
      "epoch": 0.26255907579205323,
      "grad_norm": 0.9021571185221067,
      "learning_rate": 8.647674902338384e-06,
      "loss": 0.158,
      "step": 9000
    },
    {
      "epoch": 0.2625882490226968,
      "grad_norm": 0.7515560073859457,
      "learning_rate": 8.647351767785221e-06,
      "loss": 0.1625,
      "step": 9001
    },
    {
      "epoch": 0.26261742225334034,
      "grad_norm": 0.8906606297136825,
      "learning_rate": 8.647028600669373e-06,
      "loss": 0.1646,
      "step": 9002
    },
    {
      "epoch": 0.2626465954839839,
      "grad_norm": 0.7556433769020066,
      "learning_rate": 8.646705400993722e-06,
      "loss": 0.1549,
      "step": 9003
    },
    {
      "epoch": 0.26267576871462744,
      "grad_norm": 0.7594058956556977,
      "learning_rate": 8.646382168761159e-06,
      "loss": 0.1482,
      "step": 9004
    },
    {
      "epoch": 0.262704941945271,
      "grad_norm": 0.7329656671449547,
      "learning_rate": 8.646058903974563e-06,
      "loss": 0.1623,
      "step": 9005
    },
    {
      "epoch": 0.2627341151759146,
      "grad_norm": 0.7762564542528988,
      "learning_rate": 8.645735606636825e-06,
      "loss": 0.1415,
      "step": 9006
    },
    {
      "epoch": 0.26276328840655816,
      "grad_norm": 0.9856730080145799,
      "learning_rate": 8.645412276750829e-06,
      "loss": 0.1517,
      "step": 9007
    },
    {
      "epoch": 0.2627924616372017,
      "grad_norm": 0.7124863843563982,
      "learning_rate": 8.645088914319464e-06,
      "loss": 0.1643,
      "step": 9008
    },
    {
      "epoch": 0.26282163486784527,
      "grad_norm": 0.9274028579170007,
      "learning_rate": 8.644765519345615e-06,
      "loss": 0.1647,
      "step": 9009
    },
    {
      "epoch": 0.2628508080984888,
      "grad_norm": 0.9549257484451333,
      "learning_rate": 8.644442091832168e-06,
      "loss": 0.1537,
      "step": 9010
    },
    {
      "epoch": 0.2628799813291324,
      "grad_norm": 0.6818318593964277,
      "learning_rate": 8.644118631782014e-06,
      "loss": 0.1314,
      "step": 9011
    },
    {
      "epoch": 0.2629091545597759,
      "grad_norm": 0.8348523017673742,
      "learning_rate": 8.643795139198037e-06,
      "loss": 0.1442,
      "step": 9012
    },
    {
      "epoch": 0.26293832779041953,
      "grad_norm": 0.8854657649503088,
      "learning_rate": 8.643471614083127e-06,
      "loss": 0.1873,
      "step": 9013
    },
    {
      "epoch": 0.2629675010210631,
      "grad_norm": 0.7212956182481403,
      "learning_rate": 8.643148056440174e-06,
      "loss": 0.1267,
      "step": 9014
    },
    {
      "epoch": 0.26299667425170664,
      "grad_norm": 0.7784757188427204,
      "learning_rate": 8.642824466272065e-06,
      "loss": 0.1537,
      "step": 9015
    },
    {
      "epoch": 0.2630258474823502,
      "grad_norm": 0.9231990584958093,
      "learning_rate": 8.642500843581687e-06,
      "loss": 0.1672,
      "step": 9016
    },
    {
      "epoch": 0.26305502071299375,
      "grad_norm": 0.8723099299784459,
      "learning_rate": 8.64217718837193e-06,
      "loss": 0.1497,
      "step": 9017
    },
    {
      "epoch": 0.2630841939436373,
      "grad_norm": 0.9186162309499485,
      "learning_rate": 8.641853500645685e-06,
      "loss": 0.1549,
      "step": 9018
    },
    {
      "epoch": 0.26311336717428085,
      "grad_norm": 0.8548084882452498,
      "learning_rate": 8.641529780405843e-06,
      "loss": 0.1778,
      "step": 9019
    },
    {
      "epoch": 0.26314254040492446,
      "grad_norm": 0.9450437817969356,
      "learning_rate": 8.641206027655293e-06,
      "loss": 0.1745,
      "step": 9020
    },
    {
      "epoch": 0.263171713635568,
      "grad_norm": 0.8052277640189358,
      "learning_rate": 8.640882242396922e-06,
      "loss": 0.1454,
      "step": 9021
    },
    {
      "epoch": 0.26320088686621157,
      "grad_norm": 0.9179526845228608,
      "learning_rate": 8.640558424633625e-06,
      "loss": 0.1511,
      "step": 9022
    },
    {
      "epoch": 0.2632300600968551,
      "grad_norm": 0.9192983316214682,
      "learning_rate": 8.640234574368292e-06,
      "loss": 0.1674,
      "step": 9023
    },
    {
      "epoch": 0.2632592333274987,
      "grad_norm": 0.9721749546684811,
      "learning_rate": 8.639910691603815e-06,
      "loss": 0.1589,
      "step": 9024
    },
    {
      "epoch": 0.26328840655814223,
      "grad_norm": 0.8077257913425597,
      "learning_rate": 8.63958677634308e-06,
      "loss": 0.1858,
      "step": 9025
    },
    {
      "epoch": 0.2633175797887858,
      "grad_norm": 0.738405800801672,
      "learning_rate": 8.639262828588988e-06,
      "loss": 0.1564,
      "step": 9026
    },
    {
      "epoch": 0.2633467530194294,
      "grad_norm": 0.7852468710522374,
      "learning_rate": 8.638938848344422e-06,
      "loss": 0.1719,
      "step": 9027
    },
    {
      "epoch": 0.26337592625007294,
      "grad_norm": 0.7730085408186513,
      "learning_rate": 8.63861483561228e-06,
      "loss": 0.1497,
      "step": 9028
    },
    {
      "epoch": 0.2634050994807165,
      "grad_norm": 0.9883000188980213,
      "learning_rate": 8.638290790395453e-06,
      "loss": 0.167,
      "step": 9029
    },
    {
      "epoch": 0.26343427271136005,
      "grad_norm": 0.8436573459162998,
      "learning_rate": 8.637966712696837e-06,
      "loss": 0.1643,
      "step": 9030
    },
    {
      "epoch": 0.2634634459420036,
      "grad_norm": 0.7092549122404046,
      "learning_rate": 8.637642602519321e-06,
      "loss": 0.1694,
      "step": 9031
    },
    {
      "epoch": 0.26349261917264716,
      "grad_norm": 1.0446338585028105,
      "learning_rate": 8.6373184598658e-06,
      "loss": 0.1352,
      "step": 9032
    },
    {
      "epoch": 0.26352179240329077,
      "grad_norm": 0.7027271973633433,
      "learning_rate": 8.636994284739167e-06,
      "loss": 0.1584,
      "step": 9033
    },
    {
      "epoch": 0.2635509656339343,
      "grad_norm": 0.9778596732464395,
      "learning_rate": 8.636670077142319e-06,
      "loss": 0.1623,
      "step": 9034
    },
    {
      "epoch": 0.2635801388645779,
      "grad_norm": 0.8677475585979214,
      "learning_rate": 8.636345837078149e-06,
      "loss": 0.1393,
      "step": 9035
    },
    {
      "epoch": 0.2636093120952214,
      "grad_norm": 0.8155722533322951,
      "learning_rate": 8.63602156454955e-06,
      "loss": 0.1377,
      "step": 9036
    },
    {
      "epoch": 0.263638485325865,
      "grad_norm": 0.9343654193152432,
      "learning_rate": 8.63569725955942e-06,
      "loss": 0.1707,
      "step": 9037
    },
    {
      "epoch": 0.26366765855650853,
      "grad_norm": 0.920493289169441,
      "learning_rate": 8.63537292211065e-06,
      "loss": 0.1643,
      "step": 9038
    },
    {
      "epoch": 0.2636968317871521,
      "grad_norm": 1.1822121690421965,
      "learning_rate": 8.63504855220614e-06,
      "loss": 0.1527,
      "step": 9039
    },
    {
      "epoch": 0.2637260050177957,
      "grad_norm": 0.8324666194814841,
      "learning_rate": 8.634724149848785e-06,
      "loss": 0.1497,
      "step": 9040
    },
    {
      "epoch": 0.26375517824843925,
      "grad_norm": 0.8232415364228624,
      "learning_rate": 8.634399715041479e-06,
      "loss": 0.1547,
      "step": 9041
    },
    {
      "epoch": 0.2637843514790828,
      "grad_norm": 1.0870219534930903,
      "learning_rate": 8.634075247787121e-06,
      "loss": 0.1638,
      "step": 9042
    },
    {
      "epoch": 0.26381352470972635,
      "grad_norm": 0.8903388992881184,
      "learning_rate": 8.633750748088608e-06,
      "loss": 0.1396,
      "step": 9043
    },
    {
      "epoch": 0.2638426979403699,
      "grad_norm": 1.0044263782715226,
      "learning_rate": 8.633426215948833e-06,
      "loss": 0.1733,
      "step": 9044
    },
    {
      "epoch": 0.26387187117101346,
      "grad_norm": 1.2698529262262845,
      "learning_rate": 8.633101651370696e-06,
      "loss": 0.1823,
      "step": 9045
    },
    {
      "epoch": 0.263901044401657,
      "grad_norm": 1.0729459899868792,
      "learning_rate": 8.632777054357098e-06,
      "loss": 0.2038,
      "step": 9046
    },
    {
      "epoch": 0.2639302176323006,
      "grad_norm": 0.7010983361020153,
      "learning_rate": 8.632452424910932e-06,
      "loss": 0.163,
      "step": 9047
    },
    {
      "epoch": 0.2639593908629442,
      "grad_norm": 1.0570119784118244,
      "learning_rate": 8.632127763035096e-06,
      "loss": 0.1752,
      "step": 9048
    },
    {
      "epoch": 0.26398856409358773,
      "grad_norm": 1.0045126713576789,
      "learning_rate": 8.631803068732493e-06,
      "loss": 0.1549,
      "step": 9049
    },
    {
      "epoch": 0.2640177373242313,
      "grad_norm": 0.966544410497631,
      "learning_rate": 8.631478342006019e-06,
      "loss": 0.1407,
      "step": 9050
    },
    {
      "epoch": 0.26404691055487484,
      "grad_norm": 0.9947474879539469,
      "learning_rate": 8.631153582858571e-06,
      "loss": 0.1771,
      "step": 9051
    },
    {
      "epoch": 0.2640760837855184,
      "grad_norm": 0.8973227562563453,
      "learning_rate": 8.630828791293053e-06,
      "loss": 0.1617,
      "step": 9052
    },
    {
      "epoch": 0.26410525701616194,
      "grad_norm": 1.1181779623509178,
      "learning_rate": 8.63050396731236e-06,
      "loss": 0.1372,
      "step": 9053
    },
    {
      "epoch": 0.26413443024680555,
      "grad_norm": 0.8669198873518774,
      "learning_rate": 8.630179110919396e-06,
      "loss": 0.1598,
      "step": 9054
    },
    {
      "epoch": 0.2641636034774491,
      "grad_norm": 0.8858988726842243,
      "learning_rate": 8.62985422211706e-06,
      "loss": 0.148,
      "step": 9055
    },
    {
      "epoch": 0.26419277670809266,
      "grad_norm": 1.0085743706771297,
      "learning_rate": 8.629529300908252e-06,
      "loss": 0.1687,
      "step": 9056
    },
    {
      "epoch": 0.2642219499387362,
      "grad_norm": 0.7803668383702542,
      "learning_rate": 8.629204347295871e-06,
      "loss": 0.1473,
      "step": 9057
    },
    {
      "epoch": 0.26425112316937976,
      "grad_norm": 0.9095071387023369,
      "learning_rate": 8.628879361282822e-06,
      "loss": 0.1715,
      "step": 9058
    },
    {
      "epoch": 0.2642802964000233,
      "grad_norm": 0.7505477609686817,
      "learning_rate": 8.628554342872001e-06,
      "loss": 0.1432,
      "step": 9059
    },
    {
      "epoch": 0.2643094696306669,
      "grad_norm": 0.799754619139009,
      "learning_rate": 8.628229292066317e-06,
      "loss": 0.1556,
      "step": 9060
    },
    {
      "epoch": 0.2643386428613105,
      "grad_norm": 0.8988449170981903,
      "learning_rate": 8.627904208868667e-06,
      "loss": 0.1534,
      "step": 9061
    },
    {
      "epoch": 0.26436781609195403,
      "grad_norm": 0.8730500814357809,
      "learning_rate": 8.627579093281954e-06,
      "loss": 0.144,
      "step": 9062
    },
    {
      "epoch": 0.2643969893225976,
      "grad_norm": 0.8647646197225216,
      "learning_rate": 8.62725394530908e-06,
      "loss": 0.1594,
      "step": 9063
    },
    {
      "epoch": 0.26442616255324114,
      "grad_norm": 0.704882418996132,
      "learning_rate": 8.62692876495295e-06,
      "loss": 0.1342,
      "step": 9064
    },
    {
      "epoch": 0.2644553357838847,
      "grad_norm": 0.8287469086871684,
      "learning_rate": 8.626603552216463e-06,
      "loss": 0.146,
      "step": 9065
    },
    {
      "epoch": 0.26448450901452825,
      "grad_norm": 0.7172902656043784,
      "learning_rate": 8.626278307102527e-06,
      "loss": 0.1673,
      "step": 9066
    },
    {
      "epoch": 0.26451368224517186,
      "grad_norm": 0.8710773055210546,
      "learning_rate": 8.625953029614045e-06,
      "loss": 0.1723,
      "step": 9067
    },
    {
      "epoch": 0.2645428554758154,
      "grad_norm": 0.9036504981894505,
      "learning_rate": 8.625627719753919e-06,
      "loss": 0.1723,
      "step": 9068
    },
    {
      "epoch": 0.26457202870645896,
      "grad_norm": 0.827967608980461,
      "learning_rate": 8.625302377525055e-06,
      "loss": 0.1419,
      "step": 9069
    },
    {
      "epoch": 0.2646012019371025,
      "grad_norm": 1.1107301199535424,
      "learning_rate": 8.624977002930356e-06,
      "loss": 0.1516,
      "step": 9070
    },
    {
      "epoch": 0.26463037516774607,
      "grad_norm": 1.0366582363229322,
      "learning_rate": 8.624651595972729e-06,
      "loss": 0.156,
      "step": 9071
    },
    {
      "epoch": 0.2646595483983896,
      "grad_norm": 0.8720778348750352,
      "learning_rate": 8.624326156655075e-06,
      "loss": 0.1832,
      "step": 9072
    },
    {
      "epoch": 0.2646887216290332,
      "grad_norm": 1.1158055232479938,
      "learning_rate": 8.624000684980305e-06,
      "loss": 0.1605,
      "step": 9073
    },
    {
      "epoch": 0.2647178948596768,
      "grad_norm": 0.9359495379112925,
      "learning_rate": 8.62367518095132e-06,
      "loss": 0.1451,
      "step": 9074
    },
    {
      "epoch": 0.26474706809032034,
      "grad_norm": 0.797724665799639,
      "learning_rate": 8.623349644571029e-06,
      "loss": 0.1413,
      "step": 9075
    },
    {
      "epoch": 0.2647762413209639,
      "grad_norm": 1.147284726854106,
      "learning_rate": 8.623024075842337e-06,
      "loss": 0.1632,
      "step": 9076
    },
    {
      "epoch": 0.26480541455160744,
      "grad_norm": 0.8992657168090905,
      "learning_rate": 8.622698474768151e-06,
      "loss": 0.182,
      "step": 9077
    },
    {
      "epoch": 0.264834587782251,
      "grad_norm": 0.7815571394446682,
      "learning_rate": 8.622372841351378e-06,
      "loss": 0.1639,
      "step": 9078
    },
    {
      "epoch": 0.26486376101289455,
      "grad_norm": 0.8564192048276966,
      "learning_rate": 8.622047175594926e-06,
      "loss": 0.1688,
      "step": 9079
    },
    {
      "epoch": 0.2648929342435381,
      "grad_norm": 0.8074776318284307,
      "learning_rate": 8.6217214775017e-06,
      "loss": 0.1461,
      "step": 9080
    },
    {
      "epoch": 0.2649221074741817,
      "grad_norm": 0.7674874346703437,
      "learning_rate": 8.62139574707461e-06,
      "loss": 0.1711,
      "step": 9081
    },
    {
      "epoch": 0.26495128070482526,
      "grad_norm": 0.9988993533004416,
      "learning_rate": 8.621069984316562e-06,
      "loss": 0.143,
      "step": 9082
    },
    {
      "epoch": 0.2649804539354688,
      "grad_norm": 0.7562372592021596,
      "learning_rate": 8.620744189230468e-06,
      "loss": 0.1508,
      "step": 9083
    },
    {
      "epoch": 0.26500962716611237,
      "grad_norm": 0.7517946916366848,
      "learning_rate": 8.620418361819231e-06,
      "loss": 0.1884,
      "step": 9084
    },
    {
      "epoch": 0.2650388003967559,
      "grad_norm": 0.8642858531473507,
      "learning_rate": 8.620092502085766e-06,
      "loss": 0.149,
      "step": 9085
    },
    {
      "epoch": 0.2650679736273995,
      "grad_norm": 0.8279634536047271,
      "learning_rate": 8.619766610032978e-06,
      "loss": 0.1563,
      "step": 9086
    },
    {
      "epoch": 0.2650971468580431,
      "grad_norm": 1.0595198042791127,
      "learning_rate": 8.619440685663777e-06,
      "loss": 0.1529,
      "step": 9087
    },
    {
      "epoch": 0.26512632008868664,
      "grad_norm": 0.8305228327930336,
      "learning_rate": 8.619114728981076e-06,
      "loss": 0.1795,
      "step": 9088
    },
    {
      "epoch": 0.2651554933193302,
      "grad_norm": 0.7461641349306766,
      "learning_rate": 8.61878873998778e-06,
      "loss": 0.132,
      "step": 9089
    },
    {
      "epoch": 0.26518466654997375,
      "grad_norm": 1.1785531779312541,
      "learning_rate": 8.618462718686803e-06,
      "loss": 0.1636,
      "step": 9090
    },
    {
      "epoch": 0.2652138397806173,
      "grad_norm": 0.7808261703990087,
      "learning_rate": 8.618136665081056e-06,
      "loss": 0.1692,
      "step": 9091
    },
    {
      "epoch": 0.26524301301126085,
      "grad_norm": 0.9399072229359386,
      "learning_rate": 8.617810579173448e-06,
      "loss": 0.1771,
      "step": 9092
    },
    {
      "epoch": 0.2652721862419044,
      "grad_norm": 0.8716638403910036,
      "learning_rate": 8.61748446096689e-06,
      "loss": 0.1472,
      "step": 9093
    },
    {
      "epoch": 0.265301359472548,
      "grad_norm": 0.8539748051850683,
      "learning_rate": 8.617158310464295e-06,
      "loss": 0.1618,
      "step": 9094
    },
    {
      "epoch": 0.26533053270319157,
      "grad_norm": 0.7179318488689149,
      "learning_rate": 8.616832127668573e-06,
      "loss": 0.1631,
      "step": 9095
    },
    {
      "epoch": 0.2653597059338351,
      "grad_norm": 1.0042392548906347,
      "learning_rate": 8.616505912582638e-06,
      "loss": 0.1398,
      "step": 9096
    },
    {
      "epoch": 0.2653888791644787,
      "grad_norm": 0.8236697306505159,
      "learning_rate": 8.616179665209402e-06,
      "loss": 0.1655,
      "step": 9097
    },
    {
      "epoch": 0.26541805239512223,
      "grad_norm": 0.825549036235817,
      "learning_rate": 8.615853385551776e-06,
      "loss": 0.1774,
      "step": 9098
    },
    {
      "epoch": 0.2654472256257658,
      "grad_norm": 0.9269318908416707,
      "learning_rate": 8.615527073612675e-06,
      "loss": 0.1542,
      "step": 9099
    },
    {
      "epoch": 0.26547639885640933,
      "grad_norm": 0.7275122950988118,
      "learning_rate": 8.615200729395011e-06,
      "loss": 0.1437,
      "step": 9100
    },
    {
      "epoch": 0.26550557208705294,
      "grad_norm": 0.614408299811067,
      "learning_rate": 8.614874352901698e-06,
      "loss": 0.1539,
      "step": 9101
    },
    {
      "epoch": 0.2655347453176965,
      "grad_norm": 0.8434856908633734,
      "learning_rate": 8.61454794413565e-06,
      "loss": 0.1675,
      "step": 9102
    },
    {
      "epoch": 0.26556391854834005,
      "grad_norm": 0.7641500041163647,
      "learning_rate": 8.61422150309978e-06,
      "loss": 0.173,
      "step": 9103
    },
    {
      "epoch": 0.2655930917789836,
      "grad_norm": 0.8790201535354943,
      "learning_rate": 8.613895029797003e-06,
      "loss": 0.1704,
      "step": 9104
    },
    {
      "epoch": 0.26562226500962716,
      "grad_norm": 0.7619920842925546,
      "learning_rate": 8.613568524230235e-06,
      "loss": 0.1482,
      "step": 9105
    },
    {
      "epoch": 0.2656514382402707,
      "grad_norm": 0.9080016098777615,
      "learning_rate": 8.61324198640239e-06,
      "loss": 0.1458,
      "step": 9106
    },
    {
      "epoch": 0.26568061147091426,
      "grad_norm": 0.8153199610735942,
      "learning_rate": 8.612915416316383e-06,
      "loss": 0.1588,
      "step": 9107
    },
    {
      "epoch": 0.26570978470155787,
      "grad_norm": 0.7773071241995667,
      "learning_rate": 8.612588813975128e-06,
      "loss": 0.1576,
      "step": 9108
    },
    {
      "epoch": 0.2657389579322014,
      "grad_norm": 0.7211843174080956,
      "learning_rate": 8.612262179381546e-06,
      "loss": 0.1743,
      "step": 9109
    },
    {
      "epoch": 0.265768131162845,
      "grad_norm": 0.9762819829991857,
      "learning_rate": 8.611935512538546e-06,
      "loss": 0.158,
      "step": 9110
    },
    {
      "epoch": 0.26579730439348853,
      "grad_norm": 0.8956469604263455,
      "learning_rate": 8.611608813449049e-06,
      "loss": 0.1683,
      "step": 9111
    },
    {
      "epoch": 0.2658264776241321,
      "grad_norm": 0.8204892734320686,
      "learning_rate": 8.61128208211597e-06,
      "loss": 0.1602,
      "step": 9112
    },
    {
      "epoch": 0.26585565085477564,
      "grad_norm": 0.7809749207214982,
      "learning_rate": 8.610955318542228e-06,
      "loss": 0.1442,
      "step": 9113
    },
    {
      "epoch": 0.26588482408541925,
      "grad_norm": 0.9870598039420582,
      "learning_rate": 8.610628522730739e-06,
      "loss": 0.1586,
      "step": 9114
    },
    {
      "epoch": 0.2659139973160628,
      "grad_norm": 1.0519012197283568,
      "learning_rate": 8.61030169468442e-06,
      "loss": 0.1703,
      "step": 9115
    },
    {
      "epoch": 0.26594317054670635,
      "grad_norm": 0.8214479126276892,
      "learning_rate": 8.60997483440619e-06,
      "loss": 0.1786,
      "step": 9116
    },
    {
      "epoch": 0.2659723437773499,
      "grad_norm": 0.8068036418888651,
      "learning_rate": 8.609647941898965e-06,
      "loss": 0.1677,
      "step": 9117
    },
    {
      "epoch": 0.26600151700799346,
      "grad_norm": 1.0475768614955818,
      "learning_rate": 8.609321017165666e-06,
      "loss": 0.1665,
      "step": 9118
    },
    {
      "epoch": 0.266030690238637,
      "grad_norm": 0.9301065775618507,
      "learning_rate": 8.60899406020921e-06,
      "loss": 0.1467,
      "step": 9119
    },
    {
      "epoch": 0.26605986346928057,
      "grad_norm": 0.8931493725532034,
      "learning_rate": 8.608667071032518e-06,
      "loss": 0.2,
      "step": 9120
    },
    {
      "epoch": 0.2660890366999242,
      "grad_norm": 0.9897605891146277,
      "learning_rate": 8.608340049638505e-06,
      "loss": 0.1879,
      "step": 9121
    },
    {
      "epoch": 0.26611820993056773,
      "grad_norm": 0.9371094007725559,
      "learning_rate": 8.608012996030096e-06,
      "loss": 0.1386,
      "step": 9122
    },
    {
      "epoch": 0.2661473831612113,
      "grad_norm": 0.7642659985859662,
      "learning_rate": 8.607685910210207e-06,
      "loss": 0.157,
      "step": 9123
    },
    {
      "epoch": 0.26617655639185483,
      "grad_norm": 0.760785170955767,
      "learning_rate": 8.607358792181758e-06,
      "loss": 0.1592,
      "step": 9124
    },
    {
      "epoch": 0.2662057296224984,
      "grad_norm": 0.9363709350453202,
      "learning_rate": 8.607031641947674e-06,
      "loss": 0.1596,
      "step": 9125
    },
    {
      "epoch": 0.26623490285314194,
      "grad_norm": 0.9553890540930121,
      "learning_rate": 8.60670445951087e-06,
      "loss": 0.1592,
      "step": 9126
    },
    {
      "epoch": 0.2662640760837855,
      "grad_norm": 0.8108281807974532,
      "learning_rate": 8.606377244874272e-06,
      "loss": 0.164,
      "step": 9127
    },
    {
      "epoch": 0.2662932493144291,
      "grad_norm": 0.744083984081861,
      "learning_rate": 8.606049998040798e-06,
      "loss": 0.1456,
      "step": 9128
    },
    {
      "epoch": 0.26632242254507266,
      "grad_norm": 0.9122152357575725,
      "learning_rate": 8.60572271901337e-06,
      "loss": 0.1505,
      "step": 9129
    },
    {
      "epoch": 0.2663515957757162,
      "grad_norm": 0.9656027725681182,
      "learning_rate": 8.60539540779491e-06,
      "loss": 0.1513,
      "step": 9130
    },
    {
      "epoch": 0.26638076900635976,
      "grad_norm": 0.6944694313875288,
      "learning_rate": 8.60506806438834e-06,
      "loss": 0.1507,
      "step": 9131
    },
    {
      "epoch": 0.2664099422370033,
      "grad_norm": 1.3818471606419696,
      "learning_rate": 8.604740688796585e-06,
      "loss": 0.1453,
      "step": 9132
    },
    {
      "epoch": 0.26643911546764687,
      "grad_norm": 1.0641447817434493,
      "learning_rate": 8.604413281022563e-06,
      "loss": 0.1693,
      "step": 9133
    },
    {
      "epoch": 0.2664682886982904,
      "grad_norm": 0.8243456389069569,
      "learning_rate": 8.604085841069202e-06,
      "loss": 0.1424,
      "step": 9134
    },
    {
      "epoch": 0.26649746192893403,
      "grad_norm": 1.0053326829075988,
      "learning_rate": 8.60375836893942e-06,
      "loss": 0.1505,
      "step": 9135
    },
    {
      "epoch": 0.2665266351595776,
      "grad_norm": 0.9620008886312827,
      "learning_rate": 8.603430864636147e-06,
      "loss": 0.153,
      "step": 9136
    },
    {
      "epoch": 0.26655580839022114,
      "grad_norm": 0.9968262005020653,
      "learning_rate": 8.603103328162303e-06,
      "loss": 0.1624,
      "step": 9137
    },
    {
      "epoch": 0.2665849816208647,
      "grad_norm": 0.8984514152344192,
      "learning_rate": 8.602775759520812e-06,
      "loss": 0.1381,
      "step": 9138
    },
    {
      "epoch": 0.26661415485150824,
      "grad_norm": 0.9913964856948402,
      "learning_rate": 8.602448158714598e-06,
      "loss": 0.1482,
      "step": 9139
    },
    {
      "epoch": 0.2666433280821518,
      "grad_norm": 0.8265462729925408,
      "learning_rate": 8.602120525746588e-06,
      "loss": 0.1453,
      "step": 9140
    },
    {
      "epoch": 0.26667250131279535,
      "grad_norm": 0.8347403841944075,
      "learning_rate": 8.601792860619704e-06,
      "loss": 0.1562,
      "step": 9141
    },
    {
      "epoch": 0.26670167454343896,
      "grad_norm": 0.8428107837326312,
      "learning_rate": 8.601465163336875e-06,
      "loss": 0.167,
      "step": 9142
    },
    {
      "epoch": 0.2667308477740825,
      "grad_norm": 0.9401110801896505,
      "learning_rate": 8.601137433901026e-06,
      "loss": 0.1616,
      "step": 9143
    },
    {
      "epoch": 0.26676002100472607,
      "grad_norm": 0.7493122862994414,
      "learning_rate": 8.600809672315079e-06,
      "loss": 0.161,
      "step": 9144
    },
    {
      "epoch": 0.2667891942353696,
      "grad_norm": 0.9069616054462428,
      "learning_rate": 8.600481878581963e-06,
      "loss": 0.1646,
      "step": 9145
    },
    {
      "epoch": 0.2668183674660132,
      "grad_norm": 0.7983840351539716,
      "learning_rate": 8.600154052704606e-06,
      "loss": 0.1826,
      "step": 9146
    },
    {
      "epoch": 0.2668475406966567,
      "grad_norm": 0.9543317972020042,
      "learning_rate": 8.599826194685932e-06,
      "loss": 0.1525,
      "step": 9147
    },
    {
      "epoch": 0.26687671392730034,
      "grad_norm": 0.883120951423925,
      "learning_rate": 8.599498304528869e-06,
      "loss": 0.1865,
      "step": 9148
    },
    {
      "epoch": 0.2669058871579439,
      "grad_norm": 0.819372229915627,
      "learning_rate": 8.599170382236343e-06,
      "loss": 0.1692,
      "step": 9149
    },
    {
      "epoch": 0.26693506038858744,
      "grad_norm": 0.8996655475799555,
      "learning_rate": 8.598842427811286e-06,
      "loss": 0.1698,
      "step": 9150
    },
    {
      "epoch": 0.266964233619231,
      "grad_norm": 0.7135545026287398,
      "learning_rate": 8.598514441256622e-06,
      "loss": 0.1663,
      "step": 9151
    },
    {
      "epoch": 0.26699340684987455,
      "grad_norm": 0.8362320184332745,
      "learning_rate": 8.59818642257528e-06,
      "loss": 0.1815,
      "step": 9152
    },
    {
      "epoch": 0.2670225800805181,
      "grad_norm": 0.996673344357169,
      "learning_rate": 8.597858371770189e-06,
      "loss": 0.1485,
      "step": 9153
    },
    {
      "epoch": 0.26705175331116165,
      "grad_norm": 0.8816343547125585,
      "learning_rate": 8.597530288844275e-06,
      "loss": 0.1591,
      "step": 9154
    },
    {
      "epoch": 0.26708092654180526,
      "grad_norm": 0.8489802573039555,
      "learning_rate": 8.597202173800471e-06,
      "loss": 0.1665,
      "step": 9155
    },
    {
      "epoch": 0.2671100997724488,
      "grad_norm": 1.0257458096727483,
      "learning_rate": 8.596874026641705e-06,
      "loss": 0.1559,
      "step": 9156
    },
    {
      "epoch": 0.26713927300309237,
      "grad_norm": 0.9692393709333937,
      "learning_rate": 8.596545847370904e-06,
      "loss": 0.1564,
      "step": 9157
    },
    {
      "epoch": 0.2671684462337359,
      "grad_norm": 0.9106118102650673,
      "learning_rate": 8.596217635991004e-06,
      "loss": 0.1649,
      "step": 9158
    },
    {
      "epoch": 0.2671976194643795,
      "grad_norm": 0.9422265893505043,
      "learning_rate": 8.59588939250493e-06,
      "loss": 0.1709,
      "step": 9159
    },
    {
      "epoch": 0.26722679269502303,
      "grad_norm": 0.8309488586446533,
      "learning_rate": 8.595561116915613e-06,
      "loss": 0.163,
      "step": 9160
    },
    {
      "epoch": 0.2672559659256666,
      "grad_norm": 0.9041403780788585,
      "learning_rate": 8.595232809225987e-06,
      "loss": 0.1828,
      "step": 9161
    },
    {
      "epoch": 0.2672851391563102,
      "grad_norm": 0.8975794841273685,
      "learning_rate": 8.594904469438979e-06,
      "loss": 0.1361,
      "step": 9162
    },
    {
      "epoch": 0.26731431238695375,
      "grad_norm": 1.0592758026772573,
      "learning_rate": 8.594576097557521e-06,
      "loss": 0.1608,
      "step": 9163
    },
    {
      "epoch": 0.2673434856175973,
      "grad_norm": 0.7650386674472878,
      "learning_rate": 8.594247693584547e-06,
      "loss": 0.1619,
      "step": 9164
    },
    {
      "epoch": 0.26737265884824085,
      "grad_norm": 0.840847459811344,
      "learning_rate": 8.593919257522988e-06,
      "loss": 0.1519,
      "step": 9165
    },
    {
      "epoch": 0.2674018320788844,
      "grad_norm": 0.9773620966100263,
      "learning_rate": 8.593590789375775e-06,
      "loss": 0.1611,
      "step": 9166
    },
    {
      "epoch": 0.26743100530952796,
      "grad_norm": 0.8697579112754361,
      "learning_rate": 8.59326228914584e-06,
      "loss": 0.1359,
      "step": 9167
    },
    {
      "epoch": 0.2674601785401715,
      "grad_norm": 0.9788890258356004,
      "learning_rate": 8.59293375683612e-06,
      "loss": 0.139,
      "step": 9168
    },
    {
      "epoch": 0.2674893517708151,
      "grad_norm": 0.8896247117464511,
      "learning_rate": 8.592605192449543e-06,
      "loss": 0.1617,
      "step": 9169
    },
    {
      "epoch": 0.2675185250014587,
      "grad_norm": 1.0945036165838098,
      "learning_rate": 8.592276595989045e-06,
      "loss": 0.1648,
      "step": 9170
    },
    {
      "epoch": 0.2675476982321022,
      "grad_norm": 1.1723938840064165,
      "learning_rate": 8.59194796745756e-06,
      "loss": 0.156,
      "step": 9171
    },
    {
      "epoch": 0.2675768714627458,
      "grad_norm": 0.944501312743487,
      "learning_rate": 8.591619306858019e-06,
      "loss": 0.1815,
      "step": 9172
    },
    {
      "epoch": 0.26760604469338933,
      "grad_norm": 0.9320193758861964,
      "learning_rate": 8.59129061419336e-06,
      "loss": 0.1816,
      "step": 9173
    },
    {
      "epoch": 0.2676352179240329,
      "grad_norm": 1.0080757883311802,
      "learning_rate": 8.590961889466514e-06,
      "loss": 0.1503,
      "step": 9174
    },
    {
      "epoch": 0.2676643911546765,
      "grad_norm": 0.9988390895799796,
      "learning_rate": 8.590633132680419e-06,
      "loss": 0.1685,
      "step": 9175
    },
    {
      "epoch": 0.26769356438532005,
      "grad_norm": 0.9220635792551475,
      "learning_rate": 8.590304343838008e-06,
      "loss": 0.1659,
      "step": 9176
    },
    {
      "epoch": 0.2677227376159636,
      "grad_norm": 0.7333628790025075,
      "learning_rate": 8.589975522942218e-06,
      "loss": 0.1321,
      "step": 9177
    },
    {
      "epoch": 0.26775191084660716,
      "grad_norm": 1.079258263839422,
      "learning_rate": 8.589646669995983e-06,
      "loss": 0.1724,
      "step": 9178
    },
    {
      "epoch": 0.2677810840772507,
      "grad_norm": 0.8545169594228665,
      "learning_rate": 8.589317785002238e-06,
      "loss": 0.1295,
      "step": 9179
    },
    {
      "epoch": 0.26781025730789426,
      "grad_norm": 0.7275041417697529,
      "learning_rate": 8.588988867963922e-06,
      "loss": 0.1383,
      "step": 9180
    },
    {
      "epoch": 0.2678394305385378,
      "grad_norm": 1.0811129047409436,
      "learning_rate": 8.58865991888397e-06,
      "loss": 0.1805,
      "step": 9181
    },
    {
      "epoch": 0.2678686037691814,
      "grad_norm": 0.8539191484350386,
      "learning_rate": 8.588330937765318e-06,
      "loss": 0.1751,
      "step": 9182
    },
    {
      "epoch": 0.267897776999825,
      "grad_norm": 0.7555783785508675,
      "learning_rate": 8.588001924610905e-06,
      "loss": 0.1371,
      "step": 9183
    },
    {
      "epoch": 0.26792695023046853,
      "grad_norm": 0.7832880033592341,
      "learning_rate": 8.587672879423668e-06,
      "loss": 0.1533,
      "step": 9184
    },
    {
      "epoch": 0.2679561234611121,
      "grad_norm": 0.7548938877031783,
      "learning_rate": 8.587343802206543e-06,
      "loss": 0.1607,
      "step": 9185
    },
    {
      "epoch": 0.26798529669175564,
      "grad_norm": 0.7232071859426559,
      "learning_rate": 8.587014692962468e-06,
      "loss": 0.1623,
      "step": 9186
    },
    {
      "epoch": 0.2680144699223992,
      "grad_norm": 0.7363097351957697,
      "learning_rate": 8.586685551694384e-06,
      "loss": 0.1472,
      "step": 9187
    },
    {
      "epoch": 0.26804364315304274,
      "grad_norm": 0.8764554487660825,
      "learning_rate": 8.586356378405228e-06,
      "loss": 0.1571,
      "step": 9188
    },
    {
      "epoch": 0.26807281638368635,
      "grad_norm": 0.6740339109139587,
      "learning_rate": 8.586027173097935e-06,
      "loss": 0.1431,
      "step": 9189
    },
    {
      "epoch": 0.2681019896143299,
      "grad_norm": 0.7955090497724493,
      "learning_rate": 8.58569793577545e-06,
      "loss": 0.1666,
      "step": 9190
    },
    {
      "epoch": 0.26813116284497346,
      "grad_norm": 0.8110620248158532,
      "learning_rate": 8.58536866644071e-06,
      "loss": 0.1699,
      "step": 9191
    },
    {
      "epoch": 0.268160336075617,
      "grad_norm": 0.9302065781587839,
      "learning_rate": 8.585039365096652e-06,
      "loss": 0.1531,
      "step": 9192
    },
    {
      "epoch": 0.26818950930626057,
      "grad_norm": 0.7246321762602458,
      "learning_rate": 8.584710031746222e-06,
      "loss": 0.1318,
      "step": 9193
    },
    {
      "epoch": 0.2682186825369041,
      "grad_norm": 0.8120322319483088,
      "learning_rate": 8.584380666392354e-06,
      "loss": 0.1788,
      "step": 9194
    },
    {
      "epoch": 0.26824785576754767,
      "grad_norm": 0.8096852428493483,
      "learning_rate": 8.584051269037992e-06,
      "loss": 0.1534,
      "step": 9195
    },
    {
      "epoch": 0.2682770289981913,
      "grad_norm": 0.8341933111889306,
      "learning_rate": 8.583721839686074e-06,
      "loss": 0.1392,
      "step": 9196
    },
    {
      "epoch": 0.26830620222883483,
      "grad_norm": 0.8384538346422006,
      "learning_rate": 8.583392378339546e-06,
      "loss": 0.1547,
      "step": 9197
    },
    {
      "epoch": 0.2683353754594784,
      "grad_norm": 0.8337948487907447,
      "learning_rate": 8.583062885001345e-06,
      "loss": 0.1398,
      "step": 9198
    },
    {
      "epoch": 0.26836454869012194,
      "grad_norm": 0.9001017741716905,
      "learning_rate": 8.582733359674413e-06,
      "loss": 0.1388,
      "step": 9199
    },
    {
      "epoch": 0.2683937219207655,
      "grad_norm": 0.8210396932280165,
      "learning_rate": 8.582403802361694e-06,
      "loss": 0.1401,
      "step": 9200
    },
    {
      "epoch": 0.26842289515140905,
      "grad_norm": 0.8767992996359955,
      "learning_rate": 8.58207421306613e-06,
      "loss": 0.1399,
      "step": 9201
    },
    {
      "epoch": 0.26845206838205266,
      "grad_norm": 0.7541149616576392,
      "learning_rate": 8.58174459179066e-06,
      "loss": 0.1668,
      "step": 9202
    },
    {
      "epoch": 0.2684812416126962,
      "grad_norm": 1.0436433724993603,
      "learning_rate": 8.58141493853823e-06,
      "loss": 0.1596,
      "step": 9203
    },
    {
      "epoch": 0.26851041484333976,
      "grad_norm": 0.839183303192571,
      "learning_rate": 8.581085253311783e-06,
      "loss": 0.1518,
      "step": 9204
    },
    {
      "epoch": 0.2685395880739833,
      "grad_norm": 0.885748581867198,
      "learning_rate": 8.580755536114262e-06,
      "loss": 0.1646,
      "step": 9205
    },
    {
      "epoch": 0.26856876130462687,
      "grad_norm": 0.9010468031908282,
      "learning_rate": 8.58042578694861e-06,
      "loss": 0.1846,
      "step": 9206
    },
    {
      "epoch": 0.2685979345352704,
      "grad_norm": 0.9110955868109781,
      "learning_rate": 8.580096005817771e-06,
      "loss": 0.1526,
      "step": 9207
    },
    {
      "epoch": 0.268627107765914,
      "grad_norm": 0.7531201583190602,
      "learning_rate": 8.57976619272469e-06,
      "loss": 0.1607,
      "step": 9208
    },
    {
      "epoch": 0.2686562809965576,
      "grad_norm": 0.7406639129407381,
      "learning_rate": 8.57943634767231e-06,
      "loss": 0.1371,
      "step": 9209
    },
    {
      "epoch": 0.26868545422720114,
      "grad_norm": 0.8488613798188147,
      "learning_rate": 8.579106470663578e-06,
      "loss": 0.1679,
      "step": 9210
    },
    {
      "epoch": 0.2687146274578447,
      "grad_norm": 0.8229960816134502,
      "learning_rate": 8.578776561701438e-06,
      "loss": 0.1632,
      "step": 9211
    },
    {
      "epoch": 0.26874380068848824,
      "grad_norm": 1.1316756306441187,
      "learning_rate": 8.578446620788834e-06,
      "loss": 0.1689,
      "step": 9212
    },
    {
      "epoch": 0.2687729739191318,
      "grad_norm": 0.7629383223594314,
      "learning_rate": 8.578116647928714e-06,
      "loss": 0.177,
      "step": 9213
    },
    {
      "epoch": 0.26880214714977535,
      "grad_norm": 1.161795672400416,
      "learning_rate": 8.577786643124022e-06,
      "loss": 0.1656,
      "step": 9214
    },
    {
      "epoch": 0.2688313203804189,
      "grad_norm": 0.7668669152188602,
      "learning_rate": 8.577456606377704e-06,
      "loss": 0.1644,
      "step": 9215
    },
    {
      "epoch": 0.2688604936110625,
      "grad_norm": 0.7187454993662719,
      "learning_rate": 8.577126537692707e-06,
      "loss": 0.1525,
      "step": 9216
    },
    {
      "epoch": 0.26888966684170607,
      "grad_norm": 0.98976293249415,
      "learning_rate": 8.576796437071982e-06,
      "loss": 0.182,
      "step": 9217
    },
    {
      "epoch": 0.2689188400723496,
      "grad_norm": 0.7323738644458602,
      "learning_rate": 8.576466304518469e-06,
      "loss": 0.1478,
      "step": 9218
    },
    {
      "epoch": 0.26894801330299317,
      "grad_norm": 0.7744388673034995,
      "learning_rate": 8.57613614003512e-06,
      "loss": 0.1584,
      "step": 9219
    },
    {
      "epoch": 0.2689771865336367,
      "grad_norm": 1.4568329485976723,
      "learning_rate": 8.57580594362488e-06,
      "loss": 0.1464,
      "step": 9220
    },
    {
      "epoch": 0.2690063597642803,
      "grad_norm": 0.8150362305649753,
      "learning_rate": 8.5754757152907e-06,
      "loss": 0.1603,
      "step": 9221
    },
    {
      "epoch": 0.26903553299492383,
      "grad_norm": 0.9979086478524237,
      "learning_rate": 8.575145455035525e-06,
      "loss": 0.1578,
      "step": 9222
    },
    {
      "epoch": 0.26906470622556744,
      "grad_norm": 0.8383760282009125,
      "learning_rate": 8.574815162862305e-06,
      "loss": 0.1614,
      "step": 9223
    },
    {
      "epoch": 0.269093879456211,
      "grad_norm": 0.8110399966876206,
      "learning_rate": 8.574484838773988e-06,
      "loss": 0.1571,
      "step": 9224
    },
    {
      "epoch": 0.26912305268685455,
      "grad_norm": 0.9286612870673736,
      "learning_rate": 8.574154482773524e-06,
      "loss": 0.1386,
      "step": 9225
    },
    {
      "epoch": 0.2691522259174981,
      "grad_norm": 0.8942256144728984,
      "learning_rate": 8.573824094863863e-06,
      "loss": 0.1884,
      "step": 9226
    },
    {
      "epoch": 0.26918139914814165,
      "grad_norm": 0.8160018496012329,
      "learning_rate": 8.573493675047953e-06,
      "loss": 0.1827,
      "step": 9227
    },
    {
      "epoch": 0.2692105723787852,
      "grad_norm": 0.9950852190666365,
      "learning_rate": 8.573163223328744e-06,
      "loss": 0.1467,
      "step": 9228
    },
    {
      "epoch": 0.2692397456094288,
      "grad_norm": 0.9832431634164503,
      "learning_rate": 8.572832739709187e-06,
      "loss": 0.1525,
      "step": 9229
    },
    {
      "epoch": 0.26926891884007237,
      "grad_norm": 0.8231481339351706,
      "learning_rate": 8.572502224192233e-06,
      "loss": 0.1583,
      "step": 9230
    },
    {
      "epoch": 0.2692980920707159,
      "grad_norm": 0.924052550024724,
      "learning_rate": 8.572171676780832e-06,
      "loss": 0.1417,
      "step": 9231
    },
    {
      "epoch": 0.2693272653013595,
      "grad_norm": 0.8464288011978807,
      "learning_rate": 8.571841097477933e-06,
      "loss": 0.1314,
      "step": 9232
    },
    {
      "epoch": 0.26935643853200303,
      "grad_norm": 0.8420754487561416,
      "learning_rate": 8.571510486286492e-06,
      "loss": 0.1333,
      "step": 9233
    },
    {
      "epoch": 0.2693856117626466,
      "grad_norm": 1.0327153316506936,
      "learning_rate": 8.571179843209457e-06,
      "loss": 0.182,
      "step": 9234
    },
    {
      "epoch": 0.26941478499329014,
      "grad_norm": 1.0459509761829675,
      "learning_rate": 8.57084916824978e-06,
      "loss": 0.1758,
      "step": 9235
    },
    {
      "epoch": 0.26944395822393374,
      "grad_norm": 0.9712306776005475,
      "learning_rate": 8.570518461410415e-06,
      "loss": 0.148,
      "step": 9236
    },
    {
      "epoch": 0.2694731314545773,
      "grad_norm": 0.7930466124645379,
      "learning_rate": 8.570187722694312e-06,
      "loss": 0.1654,
      "step": 9237
    },
    {
      "epoch": 0.26950230468522085,
      "grad_norm": 0.780649466370968,
      "learning_rate": 8.569856952104427e-06,
      "loss": 0.1626,
      "step": 9238
    },
    {
      "epoch": 0.2695314779158644,
      "grad_norm": 0.9327302768809439,
      "learning_rate": 8.56952614964371e-06,
      "loss": 0.1627,
      "step": 9239
    },
    {
      "epoch": 0.26956065114650796,
      "grad_norm": 0.8298760840573467,
      "learning_rate": 8.569195315315117e-06,
      "loss": 0.1435,
      "step": 9240
    },
    {
      "epoch": 0.2695898243771515,
      "grad_norm": 0.866636920406715,
      "learning_rate": 8.568864449121599e-06,
      "loss": 0.1935,
      "step": 9241
    },
    {
      "epoch": 0.26961899760779506,
      "grad_norm": 0.8687460907291265,
      "learning_rate": 8.568533551066113e-06,
      "loss": 0.1839,
      "step": 9242
    },
    {
      "epoch": 0.26964817083843867,
      "grad_norm": 0.9308339552873933,
      "learning_rate": 8.56820262115161e-06,
      "loss": 0.15,
      "step": 9243
    },
    {
      "epoch": 0.2696773440690822,
      "grad_norm": 1.3052139536501206,
      "learning_rate": 8.567871659381047e-06,
      "loss": 0.1576,
      "step": 9244
    },
    {
      "epoch": 0.2697065172997258,
      "grad_norm": 0.9432496306131262,
      "learning_rate": 8.567540665757375e-06,
      "loss": 0.1902,
      "step": 9245
    },
    {
      "epoch": 0.26973569053036933,
      "grad_norm": 0.7887585699622204,
      "learning_rate": 8.567209640283553e-06,
      "loss": 0.1457,
      "step": 9246
    },
    {
      "epoch": 0.2697648637610129,
      "grad_norm": 0.8241288397490717,
      "learning_rate": 8.566878582962534e-06,
      "loss": 0.1569,
      "step": 9247
    },
    {
      "epoch": 0.26979403699165644,
      "grad_norm": 0.9992394244678918,
      "learning_rate": 8.566547493797278e-06,
      "loss": 0.1625,
      "step": 9248
    },
    {
      "epoch": 0.2698232102223,
      "grad_norm": 0.8744530226184944,
      "learning_rate": 8.566216372790735e-06,
      "loss": 0.1481,
      "step": 9249
    },
    {
      "epoch": 0.2698523834529436,
      "grad_norm": 0.8531297003913738,
      "learning_rate": 8.565885219945862e-06,
      "loss": 0.1335,
      "step": 9250
    },
    {
      "epoch": 0.26988155668358715,
      "grad_norm": 0.721705219093551,
      "learning_rate": 8.565554035265618e-06,
      "loss": 0.1431,
      "step": 9251
    },
    {
      "epoch": 0.2699107299142307,
      "grad_norm": 0.8786835574171571,
      "learning_rate": 8.565222818752959e-06,
      "loss": 0.1788,
      "step": 9252
    },
    {
      "epoch": 0.26993990314487426,
      "grad_norm": 0.7672552773990936,
      "learning_rate": 8.564891570410842e-06,
      "loss": 0.1617,
      "step": 9253
    },
    {
      "epoch": 0.2699690763755178,
      "grad_norm": 0.7098568549686243,
      "learning_rate": 8.564560290242224e-06,
      "loss": 0.1427,
      "step": 9254
    },
    {
      "epoch": 0.26999824960616137,
      "grad_norm": 0.685298869254431,
      "learning_rate": 8.564228978250062e-06,
      "loss": 0.1575,
      "step": 9255
    },
    {
      "epoch": 0.2700274228368049,
      "grad_norm": 0.9341513204785777,
      "learning_rate": 8.563897634437316e-06,
      "loss": 0.1572,
      "step": 9256
    },
    {
      "epoch": 0.27005659606744853,
      "grad_norm": 0.6749370421864948,
      "learning_rate": 8.563566258806942e-06,
      "loss": 0.1654,
      "step": 9257
    },
    {
      "epoch": 0.2700857692980921,
      "grad_norm": 0.6732788259830977,
      "learning_rate": 8.5632348513619e-06,
      "loss": 0.1533,
      "step": 9258
    },
    {
      "epoch": 0.27011494252873564,
      "grad_norm": 0.9100626040318655,
      "learning_rate": 8.562903412105146e-06,
      "loss": 0.1599,
      "step": 9259
    },
    {
      "epoch": 0.2701441157593792,
      "grad_norm": 0.8611613812154474,
      "learning_rate": 8.562571941039641e-06,
      "loss": 0.167,
      "step": 9260
    },
    {
      "epoch": 0.27017328899002274,
      "grad_norm": 0.9697066162839673,
      "learning_rate": 8.562240438168345e-06,
      "loss": 0.1409,
      "step": 9261
    },
    {
      "epoch": 0.2702024622206663,
      "grad_norm": 0.9169661997507168,
      "learning_rate": 8.561908903494216e-06,
      "loss": 0.1657,
      "step": 9262
    },
    {
      "epoch": 0.2702316354513099,
      "grad_norm": 0.6697914017206752,
      "learning_rate": 8.561577337020217e-06,
      "loss": 0.1351,
      "step": 9263
    },
    {
      "epoch": 0.27026080868195346,
      "grad_norm": 0.6985498204266574,
      "learning_rate": 8.561245738749302e-06,
      "loss": 0.1519,
      "step": 9264
    },
    {
      "epoch": 0.270289981912597,
      "grad_norm": 0.6572420921352242,
      "learning_rate": 8.560914108684437e-06,
      "loss": 0.1522,
      "step": 9265
    },
    {
      "epoch": 0.27031915514324056,
      "grad_norm": 0.8025213477402136,
      "learning_rate": 8.560582446828582e-06,
      "loss": 0.1455,
      "step": 9266
    },
    {
      "epoch": 0.2703483283738841,
      "grad_norm": 0.8256942591098151,
      "learning_rate": 8.560250753184695e-06,
      "loss": 0.1438,
      "step": 9267
    },
    {
      "epoch": 0.27037750160452767,
      "grad_norm": 1.6282283347267548,
      "learning_rate": 8.559919027755741e-06,
      "loss": 0.1566,
      "step": 9268
    },
    {
      "epoch": 0.2704066748351712,
      "grad_norm": 0.8542265396487017,
      "learning_rate": 8.55958727054468e-06,
      "loss": 0.152,
      "step": 9269
    },
    {
      "epoch": 0.27043584806581483,
      "grad_norm": 0.8206370962531879,
      "learning_rate": 8.559255481554471e-06,
      "loss": 0.1656,
      "step": 9270
    },
    {
      "epoch": 0.2704650212964584,
      "grad_norm": 0.6080706314194168,
      "learning_rate": 8.558923660788081e-06,
      "loss": 0.1317,
      "step": 9271
    },
    {
      "epoch": 0.27049419452710194,
      "grad_norm": 0.8862959009360586,
      "learning_rate": 8.558591808248469e-06,
      "loss": 0.145,
      "step": 9272
    },
    {
      "epoch": 0.2705233677577455,
      "grad_norm": 0.8417583707498439,
      "learning_rate": 8.5582599239386e-06,
      "loss": 0.1855,
      "step": 9273
    },
    {
      "epoch": 0.27055254098838905,
      "grad_norm": 0.6334134148413211,
      "learning_rate": 8.557928007861433e-06,
      "loss": 0.1513,
      "step": 9274
    },
    {
      "epoch": 0.2705817142190326,
      "grad_norm": 0.7808936451747582,
      "learning_rate": 8.557596060019936e-06,
      "loss": 0.1381,
      "step": 9275
    },
    {
      "epoch": 0.27061088744967615,
      "grad_norm": 0.8780040742489097,
      "learning_rate": 8.557264080417071e-06,
      "loss": 0.1418,
      "step": 9276
    },
    {
      "epoch": 0.27064006068031976,
      "grad_norm": 0.788857754357467,
      "learning_rate": 8.556932069055803e-06,
      "loss": 0.1608,
      "step": 9277
    },
    {
      "epoch": 0.2706692339109633,
      "grad_norm": 0.8843795580702432,
      "learning_rate": 8.556600025939092e-06,
      "loss": 0.1721,
      "step": 9278
    },
    {
      "epoch": 0.27069840714160687,
      "grad_norm": 1.071660232202477,
      "learning_rate": 8.556267951069906e-06,
      "loss": 0.1824,
      "step": 9279
    },
    {
      "epoch": 0.2707275803722504,
      "grad_norm": 0.880605245956067,
      "learning_rate": 8.555935844451209e-06,
      "loss": 0.1354,
      "step": 9280
    },
    {
      "epoch": 0.270756753602894,
      "grad_norm": 0.7316997735124465,
      "learning_rate": 8.555603706085965e-06,
      "loss": 0.14,
      "step": 9281
    },
    {
      "epoch": 0.2707859268335375,
      "grad_norm": 1.1402191306652094,
      "learning_rate": 8.55527153597714e-06,
      "loss": 0.1342,
      "step": 9282
    },
    {
      "epoch": 0.2708151000641811,
      "grad_norm": 0.95009564932012,
      "learning_rate": 8.5549393341277e-06,
      "loss": 0.1573,
      "step": 9283
    },
    {
      "epoch": 0.2708442732948247,
      "grad_norm": 0.6920552480734122,
      "learning_rate": 8.554607100540609e-06,
      "loss": 0.1513,
      "step": 9284
    },
    {
      "epoch": 0.27087344652546824,
      "grad_norm": 1.107356065150215,
      "learning_rate": 8.554274835218834e-06,
      "loss": 0.1697,
      "step": 9285
    },
    {
      "epoch": 0.2709026197561118,
      "grad_norm": 0.8283999436443544,
      "learning_rate": 8.553942538165344e-06,
      "loss": 0.1527,
      "step": 9286
    },
    {
      "epoch": 0.27093179298675535,
      "grad_norm": 0.8953118067974883,
      "learning_rate": 8.5536102093831e-06,
      "loss": 0.1472,
      "step": 9287
    },
    {
      "epoch": 0.2709609662173989,
      "grad_norm": 0.8188603250966501,
      "learning_rate": 8.553277848875077e-06,
      "loss": 0.2096,
      "step": 9288
    },
    {
      "epoch": 0.27099013944804246,
      "grad_norm": 1.0244274232072392,
      "learning_rate": 8.552945456644234e-06,
      "loss": 0.1616,
      "step": 9289
    },
    {
      "epoch": 0.27101931267868606,
      "grad_norm": 0.8385400445991816,
      "learning_rate": 8.552613032693545e-06,
      "loss": 0.1605,
      "step": 9290
    },
    {
      "epoch": 0.2710484859093296,
      "grad_norm": 0.729594948400735,
      "learning_rate": 8.552280577025972e-06,
      "loss": 0.1561,
      "step": 9291
    },
    {
      "epoch": 0.27107765913997317,
      "grad_norm": 0.9424074248978331,
      "learning_rate": 8.551948089644487e-06,
      "loss": 0.1773,
      "step": 9292
    },
    {
      "epoch": 0.2711068323706167,
      "grad_norm": 0.8850560771163402,
      "learning_rate": 8.551615570552058e-06,
      "loss": 0.1766,
      "step": 9293
    },
    {
      "epoch": 0.2711360056012603,
      "grad_norm": 0.7019283025275048,
      "learning_rate": 8.551283019751652e-06,
      "loss": 0.1595,
      "step": 9294
    },
    {
      "epoch": 0.27116517883190383,
      "grad_norm": 0.7876402084548211,
      "learning_rate": 8.550950437246239e-06,
      "loss": 0.2028,
      "step": 9295
    },
    {
      "epoch": 0.2711943520625474,
      "grad_norm": 0.9726648168405307,
      "learning_rate": 8.55061782303879e-06,
      "loss": 0.2062,
      "step": 9296
    },
    {
      "epoch": 0.271223525293191,
      "grad_norm": 0.743316445583261,
      "learning_rate": 8.550285177132271e-06,
      "loss": 0.1621,
      "step": 9297
    },
    {
      "epoch": 0.27125269852383455,
      "grad_norm": 0.897330137360762,
      "learning_rate": 8.549952499529654e-06,
      "loss": 0.1589,
      "step": 9298
    },
    {
      "epoch": 0.2712818717544781,
      "grad_norm": 0.8456954463841175,
      "learning_rate": 8.54961979023391e-06,
      "loss": 0.1563,
      "step": 9299
    },
    {
      "epoch": 0.27131104498512165,
      "grad_norm": 0.8642499948240627,
      "learning_rate": 8.549287049248006e-06,
      "loss": 0.1468,
      "step": 9300
    },
    {
      "epoch": 0.2713402182157652,
      "grad_norm": 0.8681786148279045,
      "learning_rate": 8.548954276574914e-06,
      "loss": 0.1511,
      "step": 9301
    },
    {
      "epoch": 0.27136939144640876,
      "grad_norm": 0.8314107658375232,
      "learning_rate": 8.548621472217606e-06,
      "loss": 0.199,
      "step": 9302
    },
    {
      "epoch": 0.2713985646770523,
      "grad_norm": 0.883653189288504,
      "learning_rate": 8.548288636179053e-06,
      "loss": 0.1512,
      "step": 9303
    },
    {
      "epoch": 0.2714277379076959,
      "grad_norm": 0.9456358639701622,
      "learning_rate": 8.547955768462226e-06,
      "loss": 0.1523,
      "step": 9304
    },
    {
      "epoch": 0.2714569111383395,
      "grad_norm": 0.7569096835080417,
      "learning_rate": 8.547622869070096e-06,
      "loss": 0.1696,
      "step": 9305
    },
    {
      "epoch": 0.271486084368983,
      "grad_norm": 1.030717279030035,
      "learning_rate": 8.547289938005638e-06,
      "loss": 0.1561,
      "step": 9306
    },
    {
      "epoch": 0.2715152575996266,
      "grad_norm": 0.9282325955141839,
      "learning_rate": 8.54695697527182e-06,
      "loss": 0.1362,
      "step": 9307
    },
    {
      "epoch": 0.27154443083027013,
      "grad_norm": 1.1271936814291548,
      "learning_rate": 8.546623980871617e-06,
      "loss": 0.1802,
      "step": 9308
    },
    {
      "epoch": 0.2715736040609137,
      "grad_norm": 1.0773611159059693,
      "learning_rate": 8.546290954808004e-06,
      "loss": 0.1555,
      "step": 9309
    },
    {
      "epoch": 0.27160277729155724,
      "grad_norm": 0.8407925582473406,
      "learning_rate": 8.54595789708395e-06,
      "loss": 0.1732,
      "step": 9310
    },
    {
      "epoch": 0.27163195052220085,
      "grad_norm": 0.9216617540670842,
      "learning_rate": 8.54562480770243e-06,
      "loss": 0.1375,
      "step": 9311
    },
    {
      "epoch": 0.2716611237528444,
      "grad_norm": 0.9849609811805082,
      "learning_rate": 8.54529168666642e-06,
      "loss": 0.1558,
      "step": 9312
    },
    {
      "epoch": 0.27169029698348796,
      "grad_norm": 0.8263233447640331,
      "learning_rate": 8.544958533978891e-06,
      "loss": 0.1605,
      "step": 9313
    },
    {
      "epoch": 0.2717194702141315,
      "grad_norm": 0.7228264966155231,
      "learning_rate": 8.544625349642818e-06,
      "loss": 0.1453,
      "step": 9314
    },
    {
      "epoch": 0.27174864344477506,
      "grad_norm": 0.8717108165395171,
      "learning_rate": 8.544292133661178e-06,
      "loss": 0.1437,
      "step": 9315
    },
    {
      "epoch": 0.2717778166754186,
      "grad_norm": 0.6765522309707471,
      "learning_rate": 8.543958886036942e-06,
      "loss": 0.1644,
      "step": 9316
    },
    {
      "epoch": 0.2718069899060622,
      "grad_norm": 0.9209086600834943,
      "learning_rate": 8.543625606773088e-06,
      "loss": 0.1614,
      "step": 9317
    },
    {
      "epoch": 0.2718361631367058,
      "grad_norm": 0.6893649129641594,
      "learning_rate": 8.543292295872591e-06,
      "loss": 0.1314,
      "step": 9318
    },
    {
      "epoch": 0.27186533636734933,
      "grad_norm": 1.004828097157871,
      "learning_rate": 8.542958953338424e-06,
      "loss": 0.1766,
      "step": 9319
    },
    {
      "epoch": 0.2718945095979929,
      "grad_norm": 0.8221329832359012,
      "learning_rate": 8.542625579173567e-06,
      "loss": 0.1761,
      "step": 9320
    },
    {
      "epoch": 0.27192368282863644,
      "grad_norm": 0.7769165503015246,
      "learning_rate": 8.542292173380994e-06,
      "loss": 0.1485,
      "step": 9321
    },
    {
      "epoch": 0.27195285605928,
      "grad_norm": 0.8324451091458328,
      "learning_rate": 8.541958735963683e-06,
      "loss": 0.1413,
      "step": 9322
    },
    {
      "epoch": 0.27198202928992354,
      "grad_norm": 0.7786202238810676,
      "learning_rate": 8.54162526692461e-06,
      "loss": 0.1442,
      "step": 9323
    },
    {
      "epoch": 0.27201120252056715,
      "grad_norm": 0.74539655946189,
      "learning_rate": 8.541291766266751e-06,
      "loss": 0.1439,
      "step": 9324
    },
    {
      "epoch": 0.2720403757512107,
      "grad_norm": 0.7872587596658237,
      "learning_rate": 8.540958233993084e-06,
      "loss": 0.1492,
      "step": 9325
    },
    {
      "epoch": 0.27206954898185426,
      "grad_norm": 0.9161060159698041,
      "learning_rate": 8.540624670106587e-06,
      "loss": 0.1496,
      "step": 9326
    },
    {
      "epoch": 0.2720987222124978,
      "grad_norm": 0.7337597554029629,
      "learning_rate": 8.54029107461024e-06,
      "loss": 0.1731,
      "step": 9327
    },
    {
      "epoch": 0.27212789544314137,
      "grad_norm": 0.9281427391751094,
      "learning_rate": 8.539957447507019e-06,
      "loss": 0.1601,
      "step": 9328
    },
    {
      "epoch": 0.2721570686737849,
      "grad_norm": 0.8651842214497207,
      "learning_rate": 8.539623788799903e-06,
      "loss": 0.1496,
      "step": 9329
    },
    {
      "epoch": 0.27218624190442847,
      "grad_norm": 0.6802650700621498,
      "learning_rate": 8.53929009849187e-06,
      "loss": 0.1468,
      "step": 9330
    },
    {
      "epoch": 0.2722154151350721,
      "grad_norm": 1.1047225626386818,
      "learning_rate": 8.5389563765859e-06,
      "loss": 0.1367,
      "step": 9331
    },
    {
      "epoch": 0.27224458836571563,
      "grad_norm": 0.9881449060735117,
      "learning_rate": 8.538622623084973e-06,
      "loss": 0.1797,
      "step": 9332
    },
    {
      "epoch": 0.2722737615963592,
      "grad_norm": 0.7753754384464604,
      "learning_rate": 8.538288837992066e-06,
      "loss": 0.1803,
      "step": 9333
    },
    {
      "epoch": 0.27230293482700274,
      "grad_norm": 1.1058551155594245,
      "learning_rate": 8.537955021310162e-06,
      "loss": 0.1688,
      "step": 9334
    },
    {
      "epoch": 0.2723321080576463,
      "grad_norm": 0.7792812827582498,
      "learning_rate": 8.537621173042241e-06,
      "loss": 0.1629,
      "step": 9335
    },
    {
      "epoch": 0.27236128128828985,
      "grad_norm": 0.8340503383022526,
      "learning_rate": 8.537287293191283e-06,
      "loss": 0.1583,
      "step": 9336
    },
    {
      "epoch": 0.2723904545189334,
      "grad_norm": 0.7750323342153583,
      "learning_rate": 8.536953381760266e-06,
      "loss": 0.1425,
      "step": 9337
    },
    {
      "epoch": 0.272419627749577,
      "grad_norm": 0.8793720364119025,
      "learning_rate": 8.536619438752176e-06,
      "loss": 0.1596,
      "step": 9338
    },
    {
      "epoch": 0.27244880098022056,
      "grad_norm": 0.7476810066439837,
      "learning_rate": 8.536285464169992e-06,
      "loss": 0.1577,
      "step": 9339
    },
    {
      "epoch": 0.2724779742108641,
      "grad_norm": 0.7617103675329747,
      "learning_rate": 8.535951458016693e-06,
      "loss": 0.1679,
      "step": 9340
    },
    {
      "epoch": 0.27250714744150767,
      "grad_norm": 0.8757811291725777,
      "learning_rate": 8.535617420295267e-06,
      "loss": 0.1488,
      "step": 9341
    },
    {
      "epoch": 0.2725363206721512,
      "grad_norm": 0.9223600901517369,
      "learning_rate": 8.53528335100869e-06,
      "loss": 0.17,
      "step": 9342
    },
    {
      "epoch": 0.2725654939027948,
      "grad_norm": 0.9502397521295001,
      "learning_rate": 8.534949250159947e-06,
      "loss": 0.153,
      "step": 9343
    },
    {
      "epoch": 0.2725946671334384,
      "grad_norm": 0.8614990086082593,
      "learning_rate": 8.534615117752024e-06,
      "loss": 0.1478,
      "step": 9344
    },
    {
      "epoch": 0.27262384036408194,
      "grad_norm": 0.7105929316816207,
      "learning_rate": 8.534280953787899e-06,
      "loss": 0.1413,
      "step": 9345
    },
    {
      "epoch": 0.2726530135947255,
      "grad_norm": 1.1303127016511665,
      "learning_rate": 8.533946758270556e-06,
      "loss": 0.1684,
      "step": 9346
    },
    {
      "epoch": 0.27268218682536904,
      "grad_norm": 0.7069785066889098,
      "learning_rate": 8.533612531202981e-06,
      "loss": 0.1393,
      "step": 9347
    },
    {
      "epoch": 0.2727113600560126,
      "grad_norm": 0.7996427312250106,
      "learning_rate": 8.533278272588159e-06,
      "loss": 0.1656,
      "step": 9348
    },
    {
      "epoch": 0.27274053328665615,
      "grad_norm": 1.0170876726172817,
      "learning_rate": 8.53294398242907e-06,
      "loss": 0.1747,
      "step": 9349
    },
    {
      "epoch": 0.2727697065172997,
      "grad_norm": 0.7867301607587156,
      "learning_rate": 8.5326096607287e-06,
      "loss": 0.1608,
      "step": 9350
    },
    {
      "epoch": 0.2727988797479433,
      "grad_norm": 0.8138361970107523,
      "learning_rate": 8.532275307490034e-06,
      "loss": 0.1786,
      "step": 9351
    },
    {
      "epoch": 0.27282805297858687,
      "grad_norm": 0.899526759375119,
      "learning_rate": 8.531940922716058e-06,
      "loss": 0.1555,
      "step": 9352
    },
    {
      "epoch": 0.2728572262092304,
      "grad_norm": 0.8441496100724186,
      "learning_rate": 8.531606506409757e-06,
      "loss": 0.1737,
      "step": 9353
    },
    {
      "epoch": 0.272886399439874,
      "grad_norm": 0.8824631930854943,
      "learning_rate": 8.531272058574116e-06,
      "loss": 0.1447,
      "step": 9354
    },
    {
      "epoch": 0.2729155726705175,
      "grad_norm": 0.9767803676552924,
      "learning_rate": 8.530937579212122e-06,
      "loss": 0.169,
      "step": 9355
    },
    {
      "epoch": 0.2729447459011611,
      "grad_norm": 1.5006568498457795,
      "learning_rate": 8.530603068326759e-06,
      "loss": 0.168,
      "step": 9356
    },
    {
      "epoch": 0.27297391913180463,
      "grad_norm": 0.8441209861961562,
      "learning_rate": 8.530268525921015e-06,
      "loss": 0.1273,
      "step": 9357
    },
    {
      "epoch": 0.27300309236244824,
      "grad_norm": 1.0782498860134957,
      "learning_rate": 8.529933951997875e-06,
      "loss": 0.1558,
      "step": 9358
    },
    {
      "epoch": 0.2730322655930918,
      "grad_norm": 0.655681288690383,
      "learning_rate": 8.52959934656033e-06,
      "loss": 0.1514,
      "step": 9359
    },
    {
      "epoch": 0.27306143882373535,
      "grad_norm": 1.041019869902648,
      "learning_rate": 8.529264709611362e-06,
      "loss": 0.177,
      "step": 9360
    },
    {
      "epoch": 0.2730906120543789,
      "grad_norm": 0.8222929072925065,
      "learning_rate": 8.528930041153962e-06,
      "loss": 0.1569,
      "step": 9361
    },
    {
      "epoch": 0.27311978528502245,
      "grad_norm": 0.6549911906259803,
      "learning_rate": 8.528595341191117e-06,
      "loss": 0.149,
      "step": 9362
    },
    {
      "epoch": 0.273148958515666,
      "grad_norm": 0.9660138123216552,
      "learning_rate": 8.528260609725816e-06,
      "loss": 0.159,
      "step": 9363
    },
    {
      "epoch": 0.27317813174630956,
      "grad_norm": 0.8154555060234866,
      "learning_rate": 8.527925846761046e-06,
      "loss": 0.1625,
      "step": 9364
    },
    {
      "epoch": 0.27320730497695317,
      "grad_norm": 0.783703359651396,
      "learning_rate": 8.527591052299797e-06,
      "loss": 0.1676,
      "step": 9365
    },
    {
      "epoch": 0.2732364782075967,
      "grad_norm": 0.9158468762553383,
      "learning_rate": 8.527256226345056e-06,
      "loss": 0.1566,
      "step": 9366
    },
    {
      "epoch": 0.2732656514382403,
      "grad_norm": 0.9611408206840761,
      "learning_rate": 8.526921368899815e-06,
      "loss": 0.1441,
      "step": 9367
    },
    {
      "epoch": 0.27329482466888383,
      "grad_norm": 0.8413751222970743,
      "learning_rate": 8.52658647996706e-06,
      "loss": 0.1453,
      "step": 9368
    },
    {
      "epoch": 0.2733239978995274,
      "grad_norm": 0.9248861010144814,
      "learning_rate": 8.526251559549783e-06,
      "loss": 0.1636,
      "step": 9369
    },
    {
      "epoch": 0.27335317113017094,
      "grad_norm": 0.9136868728947342,
      "learning_rate": 8.525916607650975e-06,
      "loss": 0.1564,
      "step": 9370
    },
    {
      "epoch": 0.27338234436081454,
      "grad_norm": 0.9328527593817365,
      "learning_rate": 8.525581624273624e-06,
      "loss": 0.1588,
      "step": 9371
    },
    {
      "epoch": 0.2734115175914581,
      "grad_norm": 0.9736891820902451,
      "learning_rate": 8.525246609420724e-06,
      "loss": 0.1625,
      "step": 9372
    },
    {
      "epoch": 0.27344069082210165,
      "grad_norm": 0.9497070190972525,
      "learning_rate": 8.524911563095262e-06,
      "loss": 0.161,
      "step": 9373
    },
    {
      "epoch": 0.2734698640527452,
      "grad_norm": 0.927790040062506,
      "learning_rate": 8.524576485300231e-06,
      "loss": 0.144,
      "step": 9374
    },
    {
      "epoch": 0.27349903728338876,
      "grad_norm": 0.7191343826786696,
      "learning_rate": 8.524241376038623e-06,
      "loss": 0.1349,
      "step": 9375
    },
    {
      "epoch": 0.2735282105140323,
      "grad_norm": 0.944773784415641,
      "learning_rate": 8.523906235313428e-06,
      "loss": 0.1508,
      "step": 9376
    },
    {
      "epoch": 0.27355738374467586,
      "grad_norm": 0.7617543791306891,
      "learning_rate": 8.52357106312764e-06,
      "loss": 0.1475,
      "step": 9377
    },
    {
      "epoch": 0.2735865569753195,
      "grad_norm": 0.8210786254523115,
      "learning_rate": 8.523235859484253e-06,
      "loss": 0.1479,
      "step": 9378
    },
    {
      "epoch": 0.273615730205963,
      "grad_norm": 0.9006378895303033,
      "learning_rate": 8.522900624386254e-06,
      "loss": 0.1673,
      "step": 9379
    },
    {
      "epoch": 0.2736449034366066,
      "grad_norm": 0.7785757867311436,
      "learning_rate": 8.522565357836642e-06,
      "loss": 0.1475,
      "step": 9380
    },
    {
      "epoch": 0.27367407666725013,
      "grad_norm": 0.9006336390443282,
      "learning_rate": 8.522230059838405e-06,
      "loss": 0.1666,
      "step": 9381
    },
    {
      "epoch": 0.2737032498978937,
      "grad_norm": 0.9108054518379897,
      "learning_rate": 8.521894730394541e-06,
      "loss": 0.1625,
      "step": 9382
    },
    {
      "epoch": 0.27373242312853724,
      "grad_norm": 0.7737262733776332,
      "learning_rate": 8.521559369508041e-06,
      "loss": 0.1566,
      "step": 9383
    },
    {
      "epoch": 0.2737615963591808,
      "grad_norm": 0.8088826900645648,
      "learning_rate": 8.5212239771819e-06,
      "loss": 0.1429,
      "step": 9384
    },
    {
      "epoch": 0.2737907695898244,
      "grad_norm": 0.7963841820897791,
      "learning_rate": 8.52088855341911e-06,
      "loss": 0.1262,
      "step": 9385
    },
    {
      "epoch": 0.27381994282046795,
      "grad_norm": 0.7710471350944503,
      "learning_rate": 8.520553098222668e-06,
      "loss": 0.1207,
      "step": 9386
    },
    {
      "epoch": 0.2738491160511115,
      "grad_norm": 0.6119562969575766,
      "learning_rate": 8.52021761159557e-06,
      "loss": 0.1494,
      "step": 9387
    },
    {
      "epoch": 0.27387828928175506,
      "grad_norm": 0.8668420715332206,
      "learning_rate": 8.519882093540808e-06,
      "loss": 0.1467,
      "step": 9388
    },
    {
      "epoch": 0.2739074625123986,
      "grad_norm": 0.8984444470974137,
      "learning_rate": 8.519546544061381e-06,
      "loss": 0.1644,
      "step": 9389
    },
    {
      "epoch": 0.27393663574304217,
      "grad_norm": 0.879324483678071,
      "learning_rate": 8.51921096316028e-06,
      "loss": 0.1712,
      "step": 9390
    },
    {
      "epoch": 0.2739658089736857,
      "grad_norm": 0.8891892272802413,
      "learning_rate": 8.518875350840504e-06,
      "loss": 0.1455,
      "step": 9391
    },
    {
      "epoch": 0.27399498220432933,
      "grad_norm": 0.7005306166547945,
      "learning_rate": 8.51853970710505e-06,
      "loss": 0.1612,
      "step": 9392
    },
    {
      "epoch": 0.2740241554349729,
      "grad_norm": 1.0161531778115314,
      "learning_rate": 8.518204031956913e-06,
      "loss": 0.1308,
      "step": 9393
    },
    {
      "epoch": 0.27405332866561644,
      "grad_norm": 0.6532963668948017,
      "learning_rate": 8.51786832539909e-06,
      "loss": 0.1543,
      "step": 9394
    },
    {
      "epoch": 0.27408250189626,
      "grad_norm": 0.8509789010570168,
      "learning_rate": 8.51753258743458e-06,
      "loss": 0.1822,
      "step": 9395
    },
    {
      "epoch": 0.27411167512690354,
      "grad_norm": 0.9009064557668237,
      "learning_rate": 8.517196818066377e-06,
      "loss": 0.165,
      "step": 9396
    },
    {
      "epoch": 0.2741408483575471,
      "grad_norm": 0.6804187319634439,
      "learning_rate": 8.51686101729748e-06,
      "loss": 0.1404,
      "step": 9397
    },
    {
      "epoch": 0.27417002158819065,
      "grad_norm": 0.887818453724445,
      "learning_rate": 8.516525185130888e-06,
      "loss": 0.1455,
      "step": 9398
    },
    {
      "epoch": 0.27419919481883426,
      "grad_norm": 0.7398801558085408,
      "learning_rate": 8.5161893215696e-06,
      "loss": 0.142,
      "step": 9399
    },
    {
      "epoch": 0.2742283680494778,
      "grad_norm": 0.6916633544053097,
      "learning_rate": 8.515853426616612e-06,
      "loss": 0.1381,
      "step": 9400
    },
    {
      "epoch": 0.27425754128012136,
      "grad_norm": 0.7964698108646552,
      "learning_rate": 8.515517500274923e-06,
      "loss": 0.1479,
      "step": 9401
    },
    {
      "epoch": 0.2742867145107649,
      "grad_norm": 0.8310633527532242,
      "learning_rate": 8.515181542547534e-06,
      "loss": 0.1467,
      "step": 9402
    },
    {
      "epoch": 0.27431588774140847,
      "grad_norm": 0.7182431225015057,
      "learning_rate": 8.514845553437443e-06,
      "loss": 0.1397,
      "step": 9403
    },
    {
      "epoch": 0.274345060972052,
      "grad_norm": 1.0091043667757507,
      "learning_rate": 8.514509532947651e-06,
      "loss": 0.1658,
      "step": 9404
    },
    {
      "epoch": 0.27437423420269563,
      "grad_norm": 1.0161650994653664,
      "learning_rate": 8.514173481081156e-06,
      "loss": 0.1545,
      "step": 9405
    },
    {
      "epoch": 0.2744034074333392,
      "grad_norm": 0.8351714662634255,
      "learning_rate": 8.513837397840958e-06,
      "loss": 0.1802,
      "step": 9406
    },
    {
      "epoch": 0.27443258066398274,
      "grad_norm": 1.2116086747951689,
      "learning_rate": 8.51350128323006e-06,
      "loss": 0.1461,
      "step": 9407
    },
    {
      "epoch": 0.2744617538946263,
      "grad_norm": 0.9772749691648196,
      "learning_rate": 8.513165137251463e-06,
      "loss": 0.1611,
      "step": 9408
    },
    {
      "epoch": 0.27449092712526985,
      "grad_norm": 0.8097937103562994,
      "learning_rate": 8.512828959908162e-06,
      "loss": 0.1442,
      "step": 9409
    },
    {
      "epoch": 0.2745201003559134,
      "grad_norm": 0.9182900497252301,
      "learning_rate": 8.512492751203165e-06,
      "loss": 0.1462,
      "step": 9410
    },
    {
      "epoch": 0.27454927358655695,
      "grad_norm": 0.8817319234172042,
      "learning_rate": 8.512156511139471e-06,
      "loss": 0.1632,
      "step": 9411
    },
    {
      "epoch": 0.27457844681720056,
      "grad_norm": 0.8286976994626774,
      "learning_rate": 8.511820239720084e-06,
      "loss": 0.1743,
      "step": 9412
    },
    {
      "epoch": 0.2746076200478441,
      "grad_norm": 0.735404074180732,
      "learning_rate": 8.511483936948002e-06,
      "loss": 0.1411,
      "step": 9413
    },
    {
      "epoch": 0.27463679327848767,
      "grad_norm": 0.9814813185347944,
      "learning_rate": 8.51114760282623e-06,
      "loss": 0.1515,
      "step": 9414
    },
    {
      "epoch": 0.2746659665091312,
      "grad_norm": 0.9934159197137334,
      "learning_rate": 8.51081123735777e-06,
      "loss": 0.1687,
      "step": 9415
    },
    {
      "epoch": 0.2746951397397748,
      "grad_norm": 0.7681812285132499,
      "learning_rate": 8.510474840545627e-06,
      "loss": 0.1555,
      "step": 9416
    },
    {
      "epoch": 0.2747243129704183,
      "grad_norm": 0.9609235946924203,
      "learning_rate": 8.5101384123928e-06,
      "loss": 0.1711,
      "step": 9417
    },
    {
      "epoch": 0.2747534862010619,
      "grad_norm": 1.0237015564079377,
      "learning_rate": 8.509801952902296e-06,
      "loss": 0.155,
      "step": 9418
    },
    {
      "epoch": 0.2747826594317055,
      "grad_norm": 0.7614658059014379,
      "learning_rate": 8.50946546207712e-06,
      "loss": 0.1631,
      "step": 9419
    },
    {
      "epoch": 0.27481183266234904,
      "grad_norm": 0.8529999767142598,
      "learning_rate": 8.509128939920272e-06,
      "loss": 0.1568,
      "step": 9420
    },
    {
      "epoch": 0.2748410058929926,
      "grad_norm": 0.8678651961051029,
      "learning_rate": 8.508792386434759e-06,
      "loss": 0.16,
      "step": 9421
    },
    {
      "epoch": 0.27487017912363615,
      "grad_norm": 0.7979435258787866,
      "learning_rate": 8.508455801623586e-06,
      "loss": 0.1792,
      "step": 9422
    },
    {
      "epoch": 0.2748993523542797,
      "grad_norm": 1.0046558438662228,
      "learning_rate": 8.508119185489757e-06,
      "loss": 0.1365,
      "step": 9423
    },
    {
      "epoch": 0.27492852558492326,
      "grad_norm": 0.7355819122446643,
      "learning_rate": 8.507782538036276e-06,
      "loss": 0.1447,
      "step": 9424
    },
    {
      "epoch": 0.2749576988155668,
      "grad_norm": 0.8460955767277549,
      "learning_rate": 8.507445859266152e-06,
      "loss": 0.1353,
      "step": 9425
    },
    {
      "epoch": 0.2749868720462104,
      "grad_norm": 0.7817880534790024,
      "learning_rate": 8.507109149182387e-06,
      "loss": 0.1678,
      "step": 9426
    },
    {
      "epoch": 0.27501604527685397,
      "grad_norm": 0.7356257704051247,
      "learning_rate": 8.506772407787988e-06,
      "loss": 0.176,
      "step": 9427
    },
    {
      "epoch": 0.2750452185074975,
      "grad_norm": 0.8704584172582093,
      "learning_rate": 8.506435635085966e-06,
      "loss": 0.1321,
      "step": 9428
    },
    {
      "epoch": 0.2750743917381411,
      "grad_norm": 0.8179491883004322,
      "learning_rate": 8.50609883107932e-06,
      "loss": 0.1518,
      "step": 9429
    },
    {
      "epoch": 0.27510356496878463,
      "grad_norm": 0.819497659757066,
      "learning_rate": 8.505761995771061e-06,
      "loss": 0.1696,
      "step": 9430
    },
    {
      "epoch": 0.2751327381994282,
      "grad_norm": 0.917272713572263,
      "learning_rate": 8.505425129164198e-06,
      "loss": 0.1654,
      "step": 9431
    },
    {
      "epoch": 0.2751619114300718,
      "grad_norm": 1.0703230633705194,
      "learning_rate": 8.505088231261733e-06,
      "loss": 0.2001,
      "step": 9432
    },
    {
      "epoch": 0.27519108466071535,
      "grad_norm": 1.0264963239532443,
      "learning_rate": 8.50475130206668e-06,
      "loss": 0.173,
      "step": 9433
    },
    {
      "epoch": 0.2752202578913589,
      "grad_norm": 0.8749461127264444,
      "learning_rate": 8.504414341582043e-06,
      "loss": 0.1455,
      "step": 9434
    },
    {
      "epoch": 0.27524943112200245,
      "grad_norm": 0.7617261193989336,
      "learning_rate": 8.50407734981083e-06,
      "loss": 0.1396,
      "step": 9435
    },
    {
      "epoch": 0.275278604352646,
      "grad_norm": 0.8694962379193559,
      "learning_rate": 8.503740326756052e-06,
      "loss": 0.1468,
      "step": 9436
    },
    {
      "epoch": 0.27530777758328956,
      "grad_norm": 0.785782274498047,
      "learning_rate": 8.503403272420718e-06,
      "loss": 0.1726,
      "step": 9437
    },
    {
      "epoch": 0.2753369508139331,
      "grad_norm": 0.9280001677456263,
      "learning_rate": 8.503066186807833e-06,
      "loss": 0.1608,
      "step": 9438
    },
    {
      "epoch": 0.2753661240445767,
      "grad_norm": 0.7299456908462157,
      "learning_rate": 8.502729069920412e-06,
      "loss": 0.1505,
      "step": 9439
    },
    {
      "epoch": 0.2753952972752203,
      "grad_norm": 0.8092359613359347,
      "learning_rate": 8.502391921761462e-06,
      "loss": 0.1811,
      "step": 9440
    },
    {
      "epoch": 0.2754244705058638,
      "grad_norm": 0.9013716823187342,
      "learning_rate": 8.502054742333992e-06,
      "loss": 0.1653,
      "step": 9441
    },
    {
      "epoch": 0.2754536437365074,
      "grad_norm": 0.9886197901556381,
      "learning_rate": 8.501717531641012e-06,
      "loss": 0.1699,
      "step": 9442
    },
    {
      "epoch": 0.27548281696715093,
      "grad_norm": 0.8659035733514054,
      "learning_rate": 8.501380289685536e-06,
      "loss": 0.1467,
      "step": 9443
    },
    {
      "epoch": 0.2755119901977945,
      "grad_norm": 0.9791734451901493,
      "learning_rate": 8.501043016470572e-06,
      "loss": 0.1642,
      "step": 9444
    },
    {
      "epoch": 0.27554116342843804,
      "grad_norm": 0.7702547427096468,
      "learning_rate": 8.500705711999131e-06,
      "loss": 0.1478,
      "step": 9445
    },
    {
      "epoch": 0.27557033665908165,
      "grad_norm": 0.7875749626466658,
      "learning_rate": 8.500368376274226e-06,
      "loss": 0.1364,
      "step": 9446
    },
    {
      "epoch": 0.2755995098897252,
      "grad_norm": 0.8043242573994069,
      "learning_rate": 8.500031009298866e-06,
      "loss": 0.1662,
      "step": 9447
    },
    {
      "epoch": 0.27562868312036876,
      "grad_norm": 0.8088033639772654,
      "learning_rate": 8.499693611076067e-06,
      "loss": 0.1675,
      "step": 9448
    },
    {
      "epoch": 0.2756578563510123,
      "grad_norm": 0.8821848949372785,
      "learning_rate": 8.499356181608838e-06,
      "loss": 0.1531,
      "step": 9449
    },
    {
      "epoch": 0.27568702958165586,
      "grad_norm": 0.9874441023274517,
      "learning_rate": 8.499018720900192e-06,
      "loss": 0.1525,
      "step": 9450
    },
    {
      "epoch": 0.2757162028122994,
      "grad_norm": 0.7685083610959368,
      "learning_rate": 8.498681228953143e-06,
      "loss": 0.1653,
      "step": 9451
    },
    {
      "epoch": 0.27574537604294297,
      "grad_norm": 0.8866078506983858,
      "learning_rate": 8.498343705770702e-06,
      "loss": 0.1714,
      "step": 9452
    },
    {
      "epoch": 0.2757745492735866,
      "grad_norm": 0.9772436502810337,
      "learning_rate": 8.498006151355884e-06,
      "loss": 0.1776,
      "step": 9453
    },
    {
      "epoch": 0.27580372250423013,
      "grad_norm": 0.7397309353070088,
      "learning_rate": 8.497668565711702e-06,
      "loss": 0.1467,
      "step": 9454
    },
    {
      "epoch": 0.2758328957348737,
      "grad_norm": 1.0057359122021043,
      "learning_rate": 8.49733094884117e-06,
      "loss": 0.1493,
      "step": 9455
    },
    {
      "epoch": 0.27586206896551724,
      "grad_norm": 0.8780908547386509,
      "learning_rate": 8.496993300747302e-06,
      "loss": 0.1507,
      "step": 9456
    },
    {
      "epoch": 0.2758912421961608,
      "grad_norm": 0.9346196779829378,
      "learning_rate": 8.496655621433114e-06,
      "loss": 0.1378,
      "step": 9457
    },
    {
      "epoch": 0.27592041542680434,
      "grad_norm": 0.8424834810393838,
      "learning_rate": 8.496317910901619e-06,
      "loss": 0.1819,
      "step": 9458
    },
    {
      "epoch": 0.27594958865744795,
      "grad_norm": 0.8996576444638953,
      "learning_rate": 8.49598016915583e-06,
      "loss": 0.1397,
      "step": 9459
    },
    {
      "epoch": 0.2759787618880915,
      "grad_norm": 0.8663009147602474,
      "learning_rate": 8.495642396198767e-06,
      "loss": 0.1819,
      "step": 9460
    },
    {
      "epoch": 0.27600793511873506,
      "grad_norm": 0.8017268754536865,
      "learning_rate": 8.495304592033442e-06,
      "loss": 0.158,
      "step": 9461
    },
    {
      "epoch": 0.2760371083493786,
      "grad_norm": 0.7966559988575063,
      "learning_rate": 8.494966756662873e-06,
      "loss": 0.1544,
      "step": 9462
    },
    {
      "epoch": 0.27606628158002217,
      "grad_norm": 1.001757100621573,
      "learning_rate": 8.494628890090075e-06,
      "loss": 0.1584,
      "step": 9463
    },
    {
      "epoch": 0.2760954548106657,
      "grad_norm": 0.7751713493444756,
      "learning_rate": 8.494290992318063e-06,
      "loss": 0.1672,
      "step": 9464
    },
    {
      "epoch": 0.2761246280413093,
      "grad_norm": 0.6891467762655721,
      "learning_rate": 8.493953063349857e-06,
      "loss": 0.164,
      "step": 9465
    },
    {
      "epoch": 0.2761538012719529,
      "grad_norm": 0.7761812369257313,
      "learning_rate": 8.493615103188471e-06,
      "loss": 0.1585,
      "step": 9466
    },
    {
      "epoch": 0.27618297450259643,
      "grad_norm": 0.933854139528823,
      "learning_rate": 8.493277111836924e-06,
      "loss": 0.179,
      "step": 9467
    },
    {
      "epoch": 0.27621214773324,
      "grad_norm": 0.7236528190804553,
      "learning_rate": 8.492939089298233e-06,
      "loss": 0.1453,
      "step": 9468
    },
    {
      "epoch": 0.27624132096388354,
      "grad_norm": 0.7853520953354832,
      "learning_rate": 8.492601035575414e-06,
      "loss": 0.1698,
      "step": 9469
    },
    {
      "epoch": 0.2762704941945271,
      "grad_norm": 0.844320334934501,
      "learning_rate": 8.492262950671488e-06,
      "loss": 0.1439,
      "step": 9470
    },
    {
      "epoch": 0.27629966742517065,
      "grad_norm": 0.7426067971931045,
      "learning_rate": 8.491924834589472e-06,
      "loss": 0.14,
      "step": 9471
    },
    {
      "epoch": 0.2763288406558142,
      "grad_norm": 0.7512280644895644,
      "learning_rate": 8.491586687332385e-06,
      "loss": 0.1542,
      "step": 9472
    },
    {
      "epoch": 0.2763580138864578,
      "grad_norm": 0.7872685923164168,
      "learning_rate": 8.491248508903245e-06,
      "loss": 0.1692,
      "step": 9473
    },
    {
      "epoch": 0.27638718711710136,
      "grad_norm": 0.8758122269640476,
      "learning_rate": 8.490910299305073e-06,
      "loss": 0.2009,
      "step": 9474
    },
    {
      "epoch": 0.2764163603477449,
      "grad_norm": 0.9125559819894459,
      "learning_rate": 8.490572058540884e-06,
      "loss": 0.1255,
      "step": 9475
    },
    {
      "epoch": 0.27644553357838847,
      "grad_norm": 0.7770517431740911,
      "learning_rate": 8.490233786613703e-06,
      "loss": 0.1476,
      "step": 9476
    },
    {
      "epoch": 0.276474706809032,
      "grad_norm": 1.1274314402792818,
      "learning_rate": 8.489895483526548e-06,
      "loss": 0.1386,
      "step": 9477
    },
    {
      "epoch": 0.2765038800396756,
      "grad_norm": 1.9472034299099266,
      "learning_rate": 8.48955714928244e-06,
      "loss": 0.1423,
      "step": 9478
    },
    {
      "epoch": 0.27653305327031913,
      "grad_norm": 0.9428783171021343,
      "learning_rate": 8.489218783884399e-06,
      "loss": 0.1689,
      "step": 9479
    },
    {
      "epoch": 0.27656222650096274,
      "grad_norm": 1.1013294079397176,
      "learning_rate": 8.488880387335444e-06,
      "loss": 0.1655,
      "step": 9480
    },
    {
      "epoch": 0.2765913997316063,
      "grad_norm": 0.6931283174171184,
      "learning_rate": 8.488541959638599e-06,
      "loss": 0.1564,
      "step": 9481
    },
    {
      "epoch": 0.27662057296224984,
      "grad_norm": 1.1418914566979168,
      "learning_rate": 8.488203500796883e-06,
      "loss": 0.1539,
      "step": 9482
    },
    {
      "epoch": 0.2766497461928934,
      "grad_norm": 0.8650724531174553,
      "learning_rate": 8.48786501081332e-06,
      "loss": 0.1812,
      "step": 9483
    },
    {
      "epoch": 0.27667891942353695,
      "grad_norm": 0.8506173055726773,
      "learning_rate": 8.487526489690928e-06,
      "loss": 0.1628,
      "step": 9484
    },
    {
      "epoch": 0.2767080926541805,
      "grad_norm": 1.088489506144093,
      "learning_rate": 8.487187937432737e-06,
      "loss": 0.153,
      "step": 9485
    },
    {
      "epoch": 0.2767372658848241,
      "grad_norm": 0.9366524320763602,
      "learning_rate": 8.486849354041761e-06,
      "loss": 0.1483,
      "step": 9486
    },
    {
      "epoch": 0.27676643911546767,
      "grad_norm": 0.7796073498256102,
      "learning_rate": 8.486510739521027e-06,
      "loss": 0.1574,
      "step": 9487
    },
    {
      "epoch": 0.2767956123461112,
      "grad_norm": 0.806991179746802,
      "learning_rate": 8.486172093873557e-06,
      "loss": 0.1625,
      "step": 9488
    },
    {
      "epoch": 0.2768247855767548,
      "grad_norm": 0.8683205123133251,
      "learning_rate": 8.485833417102375e-06,
      "loss": 0.1441,
      "step": 9489
    },
    {
      "epoch": 0.2768539588073983,
      "grad_norm": 0.8616785742222477,
      "learning_rate": 8.485494709210506e-06,
      "loss": 0.1741,
      "step": 9490
    },
    {
      "epoch": 0.2768831320380419,
      "grad_norm": 0.8220876158014837,
      "learning_rate": 8.485155970200972e-06,
      "loss": 0.17,
      "step": 9491
    },
    {
      "epoch": 0.27691230526868543,
      "grad_norm": 0.7495882026570969,
      "learning_rate": 8.484817200076796e-06,
      "loss": 0.1583,
      "step": 9492
    },
    {
      "epoch": 0.27694147849932904,
      "grad_norm": 1.0705677075094195,
      "learning_rate": 8.484478398841003e-06,
      "loss": 0.1576,
      "step": 9493
    },
    {
      "epoch": 0.2769706517299726,
      "grad_norm": 0.5928858273713736,
      "learning_rate": 8.48413956649662e-06,
      "loss": 0.1551,
      "step": 9494
    },
    {
      "epoch": 0.27699982496061615,
      "grad_norm": 1.0627089887650565,
      "learning_rate": 8.483800703046672e-06,
      "loss": 0.1647,
      "step": 9495
    },
    {
      "epoch": 0.2770289981912597,
      "grad_norm": 0.7247697188724354,
      "learning_rate": 8.483461808494182e-06,
      "loss": 0.1297,
      "step": 9496
    },
    {
      "epoch": 0.27705817142190325,
      "grad_norm": 0.8934715469223729,
      "learning_rate": 8.483122882842177e-06,
      "loss": 0.1694,
      "step": 9497
    },
    {
      "epoch": 0.2770873446525468,
      "grad_norm": 0.9746300274126058,
      "learning_rate": 8.48278392609368e-06,
      "loss": 0.1727,
      "step": 9498
    },
    {
      "epoch": 0.27711651788319036,
      "grad_norm": 0.8096267207398115,
      "learning_rate": 8.482444938251722e-06,
      "loss": 0.146,
      "step": 9499
    },
    {
      "epoch": 0.27714569111383397,
      "grad_norm": 0.8776300149543106,
      "learning_rate": 8.482105919319325e-06,
      "loss": 0.1532,
      "step": 9500
    },
    {
      "epoch": 0.2771748643444775,
      "grad_norm": 0.9293347863312186,
      "learning_rate": 8.48176686929952e-06,
      "loss": 0.1595,
      "step": 9501
    },
    {
      "epoch": 0.2772040375751211,
      "grad_norm": 0.9528707708136305,
      "learning_rate": 8.481427788195329e-06,
      "loss": 0.1623,
      "step": 9502
    },
    {
      "epoch": 0.27723321080576463,
      "grad_norm": 0.9223367460630922,
      "learning_rate": 8.481088676009783e-06,
      "loss": 0.1412,
      "step": 9503
    },
    {
      "epoch": 0.2772623840364082,
      "grad_norm": 0.791567245986272,
      "learning_rate": 8.48074953274591e-06,
      "loss": 0.1523,
      "step": 9504
    },
    {
      "epoch": 0.27729155726705174,
      "grad_norm": 1.3407727074347742,
      "learning_rate": 8.480410358406735e-06,
      "loss": 0.1557,
      "step": 9505
    },
    {
      "epoch": 0.2773207304976953,
      "grad_norm": 0.706668075550768,
      "learning_rate": 8.480071152995285e-06,
      "loss": 0.1551,
      "step": 9506
    },
    {
      "epoch": 0.2773499037283389,
      "grad_norm": 0.8734217382710632,
      "learning_rate": 8.479731916514592e-06,
      "loss": 0.1492,
      "step": 9507
    },
    {
      "epoch": 0.27737907695898245,
      "grad_norm": 0.7561871450179262,
      "learning_rate": 8.479392648967684e-06,
      "loss": 0.1482,
      "step": 9508
    },
    {
      "epoch": 0.277408250189626,
      "grad_norm": 0.9958435262815148,
      "learning_rate": 8.479053350357587e-06,
      "loss": 0.1515,
      "step": 9509
    },
    {
      "epoch": 0.27743742342026956,
      "grad_norm": 1.005507379061439,
      "learning_rate": 8.478714020687334e-06,
      "loss": 0.1683,
      "step": 9510
    },
    {
      "epoch": 0.2774665966509131,
      "grad_norm": 0.8848887712016784,
      "learning_rate": 8.478374659959953e-06,
      "loss": 0.1549,
      "step": 9511
    },
    {
      "epoch": 0.27749576988155666,
      "grad_norm": 0.9422045957305167,
      "learning_rate": 8.478035268178473e-06,
      "loss": 0.1711,
      "step": 9512
    },
    {
      "epoch": 0.2775249431122002,
      "grad_norm": 0.7149755989431283,
      "learning_rate": 8.477695845345922e-06,
      "loss": 0.1752,
      "step": 9513
    },
    {
      "epoch": 0.2775541163428438,
      "grad_norm": 0.8667007312392415,
      "learning_rate": 8.477356391465336e-06,
      "loss": 0.1247,
      "step": 9514
    },
    {
      "epoch": 0.2775832895734874,
      "grad_norm": 0.9587353462019751,
      "learning_rate": 8.477016906539742e-06,
      "loss": 0.1755,
      "step": 9515
    },
    {
      "epoch": 0.27761246280413093,
      "grad_norm": 0.8329582723699501,
      "learning_rate": 8.476677390572167e-06,
      "loss": 0.171,
      "step": 9516
    },
    {
      "epoch": 0.2776416360347745,
      "grad_norm": 0.9834669720070736,
      "learning_rate": 8.47633784356565e-06,
      "loss": 0.1534,
      "step": 9517
    },
    {
      "epoch": 0.27767080926541804,
      "grad_norm": 0.6999872588764519,
      "learning_rate": 8.475998265523219e-06,
      "loss": 0.1417,
      "step": 9518
    },
    {
      "epoch": 0.2776999824960616,
      "grad_norm": 0.8368581948785472,
      "learning_rate": 8.475658656447903e-06,
      "loss": 0.1643,
      "step": 9519
    },
    {
      "epoch": 0.2777291557267052,
      "grad_norm": 0.8936896290158791,
      "learning_rate": 8.475319016342739e-06,
      "loss": 0.174,
      "step": 9520
    },
    {
      "epoch": 0.27775832895734875,
      "grad_norm": 0.9997248846685993,
      "learning_rate": 8.474979345210753e-06,
      "loss": 0.1411,
      "step": 9521
    },
    {
      "epoch": 0.2777875021879923,
      "grad_norm": 0.7923848027596828,
      "learning_rate": 8.474639643054983e-06,
      "loss": 0.1643,
      "step": 9522
    },
    {
      "epoch": 0.27781667541863586,
      "grad_norm": 0.7807511127005486,
      "learning_rate": 8.47429990987846e-06,
      "loss": 0.1337,
      "step": 9523
    },
    {
      "epoch": 0.2778458486492794,
      "grad_norm": 0.8125141038135897,
      "learning_rate": 8.473960145684217e-06,
      "loss": 0.1439,
      "step": 9524
    },
    {
      "epoch": 0.27787502187992297,
      "grad_norm": 0.8156162963513861,
      "learning_rate": 8.473620350475284e-06,
      "loss": 0.132,
      "step": 9525
    },
    {
      "epoch": 0.2779041951105665,
      "grad_norm": 0.6806743244156654,
      "learning_rate": 8.473280524254701e-06,
      "loss": 0.1529,
      "step": 9526
    },
    {
      "epoch": 0.27793336834121013,
      "grad_norm": 0.7297864166420638,
      "learning_rate": 8.472940667025497e-06,
      "loss": 0.1428,
      "step": 9527
    },
    {
      "epoch": 0.2779625415718537,
      "grad_norm": 0.91214494122196,
      "learning_rate": 8.472600778790709e-06,
      "loss": 0.1531,
      "step": 9528
    },
    {
      "epoch": 0.27799171480249724,
      "grad_norm": 0.8338650660903385,
      "learning_rate": 8.472260859553369e-06,
      "loss": 0.1624,
      "step": 9529
    },
    {
      "epoch": 0.2780208880331408,
      "grad_norm": 0.8308162285551156,
      "learning_rate": 8.471920909316514e-06,
      "loss": 0.1509,
      "step": 9530
    },
    {
      "epoch": 0.27805006126378434,
      "grad_norm": 0.8998302888977185,
      "learning_rate": 8.47158092808318e-06,
      "loss": 0.1563,
      "step": 9531
    },
    {
      "epoch": 0.2780792344944279,
      "grad_norm": 0.9354212032607615,
      "learning_rate": 8.471240915856396e-06,
      "loss": 0.1407,
      "step": 9532
    },
    {
      "epoch": 0.27810840772507145,
      "grad_norm": 0.7051327334938483,
      "learning_rate": 8.470900872639203e-06,
      "loss": 0.1387,
      "step": 9533
    },
    {
      "epoch": 0.27813758095571506,
      "grad_norm": 0.8094406200554851,
      "learning_rate": 8.470560798434636e-06,
      "loss": 0.174,
      "step": 9534
    },
    {
      "epoch": 0.2781667541863586,
      "grad_norm": 0.9467595326622934,
      "learning_rate": 8.47022069324573e-06,
      "loss": 0.175,
      "step": 9535
    },
    {
      "epoch": 0.27819592741700216,
      "grad_norm": 0.8008647499990702,
      "learning_rate": 8.469880557075525e-06,
      "loss": 0.174,
      "step": 9536
    },
    {
      "epoch": 0.2782251006476457,
      "grad_norm": 0.9201909051532778,
      "learning_rate": 8.469540389927052e-06,
      "loss": 0.1502,
      "step": 9537
    },
    {
      "epoch": 0.27825427387828927,
      "grad_norm": 0.7352171064832371,
      "learning_rate": 8.46920019180335e-06,
      "loss": 0.1471,
      "step": 9538
    },
    {
      "epoch": 0.2782834471089328,
      "grad_norm": 0.9244963989311303,
      "learning_rate": 8.468859962707459e-06,
      "loss": 0.1441,
      "step": 9539
    },
    {
      "epoch": 0.2783126203395764,
      "grad_norm": 0.839838171042912,
      "learning_rate": 8.468519702642413e-06,
      "loss": 0.152,
      "step": 9540
    },
    {
      "epoch": 0.27834179357022,
      "grad_norm": 0.7189624686847728,
      "learning_rate": 8.468179411611252e-06,
      "loss": 0.1429,
      "step": 9541
    },
    {
      "epoch": 0.27837096680086354,
      "grad_norm": 0.8024451840621278,
      "learning_rate": 8.467839089617011e-06,
      "loss": 0.1433,
      "step": 9542
    },
    {
      "epoch": 0.2784001400315071,
      "grad_norm": 0.9133265010862672,
      "learning_rate": 8.467498736662732e-06,
      "loss": 0.1524,
      "step": 9543
    },
    {
      "epoch": 0.27842931326215065,
      "grad_norm": 0.7505854001962619,
      "learning_rate": 8.467158352751453e-06,
      "loss": 0.1634,
      "step": 9544
    },
    {
      "epoch": 0.2784584864927942,
      "grad_norm": 0.8928417787444326,
      "learning_rate": 8.466817937886211e-06,
      "loss": 0.1807,
      "step": 9545
    },
    {
      "epoch": 0.27848765972343775,
      "grad_norm": 1.0277759705271292,
      "learning_rate": 8.466477492070046e-06,
      "loss": 0.1535,
      "step": 9546
    },
    {
      "epoch": 0.27851683295408136,
      "grad_norm": 0.7637001525940128,
      "learning_rate": 8.466137015305997e-06,
      "loss": 0.1444,
      "step": 9547
    },
    {
      "epoch": 0.2785460061847249,
      "grad_norm": 0.7501390401123026,
      "learning_rate": 8.465796507597106e-06,
      "loss": 0.1741,
      "step": 9548
    },
    {
      "epoch": 0.27857517941536847,
      "grad_norm": 0.9450607457332094,
      "learning_rate": 8.465455968946409e-06,
      "loss": 0.1468,
      "step": 9549
    },
    {
      "epoch": 0.278604352646012,
      "grad_norm": 0.8179128995100446,
      "learning_rate": 8.465115399356948e-06,
      "loss": 0.1581,
      "step": 9550
    },
    {
      "epoch": 0.2786335258766556,
      "grad_norm": 0.8206187225207779,
      "learning_rate": 8.464774798831766e-06,
      "loss": 0.1693,
      "step": 9551
    },
    {
      "epoch": 0.2786626991072991,
      "grad_norm": 1.0282827859906551,
      "learning_rate": 8.464434167373901e-06,
      "loss": 0.1726,
      "step": 9552
    },
    {
      "epoch": 0.2786918723379427,
      "grad_norm": 0.8731238924757797,
      "learning_rate": 8.464093504986395e-06,
      "loss": 0.1881,
      "step": 9553
    },
    {
      "epoch": 0.2787210455685863,
      "grad_norm": 0.9374981958347302,
      "learning_rate": 8.463752811672289e-06,
      "loss": 0.1401,
      "step": 9554
    },
    {
      "epoch": 0.27875021879922984,
      "grad_norm": 0.7063176768742483,
      "learning_rate": 8.463412087434624e-06,
      "loss": 0.1391,
      "step": 9555
    },
    {
      "epoch": 0.2787793920298734,
      "grad_norm": 0.8843815844052086,
      "learning_rate": 8.463071332276442e-06,
      "loss": 0.1707,
      "step": 9556
    },
    {
      "epoch": 0.27880856526051695,
      "grad_norm": 1.0458012025569847,
      "learning_rate": 8.462730546200788e-06,
      "loss": 0.1432,
      "step": 9557
    },
    {
      "epoch": 0.2788377384911605,
      "grad_norm": 0.8036876244199916,
      "learning_rate": 8.4623897292107e-06,
      "loss": 0.1602,
      "step": 9558
    },
    {
      "epoch": 0.27886691172180406,
      "grad_norm": 0.9752595131467424,
      "learning_rate": 8.462048881309226e-06,
      "loss": 0.1565,
      "step": 9559
    },
    {
      "epoch": 0.2788960849524476,
      "grad_norm": 1.0282166667261705,
      "learning_rate": 8.461708002499405e-06,
      "loss": 0.1741,
      "step": 9560
    },
    {
      "epoch": 0.2789252581830912,
      "grad_norm": 0.8807169214872331,
      "learning_rate": 8.46136709278428e-06,
      "loss": 0.1444,
      "step": 9561
    },
    {
      "epoch": 0.27895443141373477,
      "grad_norm": 0.8867295328036336,
      "learning_rate": 8.461026152166896e-06,
      "loss": 0.1609,
      "step": 9562
    },
    {
      "epoch": 0.2789836046443783,
      "grad_norm": 1.0239980761347185,
      "learning_rate": 8.460685180650297e-06,
      "loss": 0.1756,
      "step": 9563
    },
    {
      "epoch": 0.2790127778750219,
      "grad_norm": 0.8283350317446394,
      "learning_rate": 8.460344178237528e-06,
      "loss": 0.133,
      "step": 9564
    },
    {
      "epoch": 0.27904195110566543,
      "grad_norm": 0.9790011158432254,
      "learning_rate": 8.460003144931632e-06,
      "loss": 0.1457,
      "step": 9565
    },
    {
      "epoch": 0.279071124336309,
      "grad_norm": 0.9965715972386453,
      "learning_rate": 8.459662080735653e-06,
      "loss": 0.1542,
      "step": 9566
    },
    {
      "epoch": 0.27910029756695254,
      "grad_norm": 0.7819092938540751,
      "learning_rate": 8.459320985652635e-06,
      "loss": 0.1477,
      "step": 9567
    },
    {
      "epoch": 0.27912947079759615,
      "grad_norm": 0.8493448943608398,
      "learning_rate": 8.458979859685628e-06,
      "loss": 0.1371,
      "step": 9568
    },
    {
      "epoch": 0.2791586440282397,
      "grad_norm": 1.0223759261181469,
      "learning_rate": 8.458638702837673e-06,
      "loss": 0.1449,
      "step": 9569
    },
    {
      "epoch": 0.27918781725888325,
      "grad_norm": 0.9055193244735283,
      "learning_rate": 8.45829751511182e-06,
      "loss": 0.144,
      "step": 9570
    },
    {
      "epoch": 0.2792169904895268,
      "grad_norm": 0.9511476417475678,
      "learning_rate": 8.457956296511109e-06,
      "loss": 0.1478,
      "step": 9571
    },
    {
      "epoch": 0.27924616372017036,
      "grad_norm": 0.9198111942266042,
      "learning_rate": 8.457615047038592e-06,
      "loss": 0.1505,
      "step": 9572
    },
    {
      "epoch": 0.2792753369508139,
      "grad_norm": 0.698641892426292,
      "learning_rate": 8.45727376669731e-06,
      "loss": 0.1462,
      "step": 9573
    },
    {
      "epoch": 0.2793045101814575,
      "grad_norm": 0.9362354068875892,
      "learning_rate": 8.456932455490317e-06,
      "loss": 0.136,
      "step": 9574
    },
    {
      "epoch": 0.2793336834121011,
      "grad_norm": 0.982646526197458,
      "learning_rate": 8.456591113420656e-06,
      "loss": 0.161,
      "step": 9575
    },
    {
      "epoch": 0.27936285664274463,
      "grad_norm": 0.7905049599781792,
      "learning_rate": 8.45624974049137e-06,
      "loss": 0.1616,
      "step": 9576
    },
    {
      "epoch": 0.2793920298733882,
      "grad_norm": 0.7745858645449902,
      "learning_rate": 8.455908336705515e-06,
      "loss": 0.1476,
      "step": 9577
    },
    {
      "epoch": 0.27942120310403173,
      "grad_norm": 0.941160734241985,
      "learning_rate": 8.455566902066138e-06,
      "loss": 0.1436,
      "step": 9578
    },
    {
      "epoch": 0.2794503763346753,
      "grad_norm": 0.9515311332832856,
      "learning_rate": 8.45522543657628e-06,
      "loss": 0.1519,
      "step": 9579
    },
    {
      "epoch": 0.27947954956531884,
      "grad_norm": 0.7975605752727278,
      "learning_rate": 8.454883940238995e-06,
      "loss": 0.1649,
      "step": 9580
    },
    {
      "epoch": 0.27950872279596245,
      "grad_norm": 0.9336212618189144,
      "learning_rate": 8.454542413057335e-06,
      "loss": 0.1767,
      "step": 9581
    },
    {
      "epoch": 0.279537896026606,
      "grad_norm": 1.0130532601655777,
      "learning_rate": 8.45420085503434e-06,
      "loss": 0.1596,
      "step": 9582
    },
    {
      "epoch": 0.27956706925724956,
      "grad_norm": 1.213368749096683,
      "learning_rate": 8.453859266173065e-06,
      "loss": 0.1501,
      "step": 9583
    },
    {
      "epoch": 0.2795962424878931,
      "grad_norm": 0.8912051490678606,
      "learning_rate": 8.453517646476561e-06,
      "loss": 0.1771,
      "step": 9584
    },
    {
      "epoch": 0.27962541571853666,
      "grad_norm": 1.0348282245629992,
      "learning_rate": 8.453175995947876e-06,
      "loss": 0.1435,
      "step": 9585
    },
    {
      "epoch": 0.2796545889491802,
      "grad_norm": 0.9865785864347076,
      "learning_rate": 8.452834314590059e-06,
      "loss": 0.1538,
      "step": 9586
    },
    {
      "epoch": 0.27968376217982377,
      "grad_norm": 1.4387595409228264,
      "learning_rate": 8.452492602406162e-06,
      "loss": 0.1444,
      "step": 9587
    },
    {
      "epoch": 0.2797129354104674,
      "grad_norm": 0.9294671559167269,
      "learning_rate": 8.452150859399234e-06,
      "loss": 0.1581,
      "step": 9588
    },
    {
      "epoch": 0.27974210864111093,
      "grad_norm": 0.9480256468071755,
      "learning_rate": 8.451809085572327e-06,
      "loss": 0.1433,
      "step": 9589
    },
    {
      "epoch": 0.2797712818717545,
      "grad_norm": 0.8572738889356397,
      "learning_rate": 8.451467280928494e-06,
      "loss": 0.1634,
      "step": 9590
    },
    {
      "epoch": 0.27980045510239804,
      "grad_norm": 0.8235818251921146,
      "learning_rate": 8.451125445470784e-06,
      "loss": 0.1709,
      "step": 9591
    },
    {
      "epoch": 0.2798296283330416,
      "grad_norm": 0.9078272402439236,
      "learning_rate": 8.450783579202251e-06,
      "loss": 0.1835,
      "step": 9592
    },
    {
      "epoch": 0.27985880156368514,
      "grad_norm": 0.8456187301304922,
      "learning_rate": 8.450441682125944e-06,
      "loss": 0.1569,
      "step": 9593
    },
    {
      "epoch": 0.2798879747943287,
      "grad_norm": 0.8309217526061093,
      "learning_rate": 8.45009975424492e-06,
      "loss": 0.176,
      "step": 9594
    },
    {
      "epoch": 0.2799171480249723,
      "grad_norm": 0.9969650659314814,
      "learning_rate": 8.449757795562229e-06,
      "loss": 0.1537,
      "step": 9595
    },
    {
      "epoch": 0.27994632125561586,
      "grad_norm": 0.6570499741744189,
      "learning_rate": 8.44941580608092e-06,
      "loss": 0.1211,
      "step": 9596
    },
    {
      "epoch": 0.2799754944862594,
      "grad_norm": 0.9583072138632722,
      "learning_rate": 8.449073785804054e-06,
      "loss": 0.1848,
      "step": 9597
    },
    {
      "epoch": 0.28000466771690297,
      "grad_norm": 0.9519186905717256,
      "learning_rate": 8.448731734734678e-06,
      "loss": 0.1884,
      "step": 9598
    },
    {
      "epoch": 0.2800338409475465,
      "grad_norm": 0.8923949258864584,
      "learning_rate": 8.448389652875852e-06,
      "loss": 0.1632,
      "step": 9599
    },
    {
      "epoch": 0.2800630141781901,
      "grad_norm": 1.0578728689403207,
      "learning_rate": 8.448047540230624e-06,
      "loss": 0.1603,
      "step": 9600
    },
    {
      "epoch": 0.2800921874088337,
      "grad_norm": 0.7595943667885049,
      "learning_rate": 8.447705396802051e-06,
      "loss": 0.1552,
      "step": 9601
    },
    {
      "epoch": 0.28012136063947723,
      "grad_norm": 0.7514324199529084,
      "learning_rate": 8.447363222593186e-06,
      "loss": 0.1491,
      "step": 9602
    },
    {
      "epoch": 0.2801505338701208,
      "grad_norm": 0.978949857641314,
      "learning_rate": 8.447021017607087e-06,
      "loss": 0.1555,
      "step": 9603
    },
    {
      "epoch": 0.28017970710076434,
      "grad_norm": 0.8809412748670732,
      "learning_rate": 8.446678781846806e-06,
      "loss": 0.1505,
      "step": 9604
    },
    {
      "epoch": 0.2802088803314079,
      "grad_norm": 0.7514678066878479,
      "learning_rate": 8.4463365153154e-06,
      "loss": 0.1383,
      "step": 9605
    },
    {
      "epoch": 0.28023805356205145,
      "grad_norm": 0.7846690672438622,
      "learning_rate": 8.445994218015923e-06,
      "loss": 0.1549,
      "step": 9606
    },
    {
      "epoch": 0.280267226792695,
      "grad_norm": 0.9113320114314601,
      "learning_rate": 8.445651889951435e-06,
      "loss": 0.142,
      "step": 9607
    },
    {
      "epoch": 0.2802964000233386,
      "grad_norm": 0.7934733096175987,
      "learning_rate": 8.445309531124988e-06,
      "loss": 0.1468,
      "step": 9608
    },
    {
      "epoch": 0.28032557325398216,
      "grad_norm": 0.7543301037519203,
      "learning_rate": 8.44496714153964e-06,
      "loss": 0.1757,
      "step": 9609
    },
    {
      "epoch": 0.2803547464846257,
      "grad_norm": 0.8455391980296363,
      "learning_rate": 8.444624721198447e-06,
      "loss": 0.1423,
      "step": 9610
    },
    {
      "epoch": 0.28038391971526927,
      "grad_norm": 0.9840328904892477,
      "learning_rate": 8.444282270104467e-06,
      "loss": 0.1475,
      "step": 9611
    },
    {
      "epoch": 0.2804130929459128,
      "grad_norm": 0.7572645399233074,
      "learning_rate": 8.443939788260757e-06,
      "loss": 0.1761,
      "step": 9612
    },
    {
      "epoch": 0.2804422661765564,
      "grad_norm": 1.0025945055821288,
      "learning_rate": 8.443597275670376e-06,
      "loss": 0.1563,
      "step": 9613
    },
    {
      "epoch": 0.28047143940719993,
      "grad_norm": 0.9968358292689676,
      "learning_rate": 8.44325473233638e-06,
      "loss": 0.1589,
      "step": 9614
    },
    {
      "epoch": 0.28050061263784354,
      "grad_norm": 0.8747427905731093,
      "learning_rate": 8.442912158261828e-06,
      "loss": 0.1516,
      "step": 9615
    },
    {
      "epoch": 0.2805297858684871,
      "grad_norm": 0.8879882495231916,
      "learning_rate": 8.442569553449777e-06,
      "loss": 0.1297,
      "step": 9616
    },
    {
      "epoch": 0.28055895909913064,
      "grad_norm": 0.8453425011322785,
      "learning_rate": 8.442226917903287e-06,
      "loss": 0.1619,
      "step": 9617
    },
    {
      "epoch": 0.2805881323297742,
      "grad_norm": 0.9752282385155893,
      "learning_rate": 8.441884251625419e-06,
      "loss": 0.1631,
      "step": 9618
    },
    {
      "epoch": 0.28061730556041775,
      "grad_norm": 1.0441464487675278,
      "learning_rate": 8.441541554619228e-06,
      "loss": 0.1432,
      "step": 9619
    },
    {
      "epoch": 0.2806464787910613,
      "grad_norm": 0.838889876502053,
      "learning_rate": 8.441198826887776e-06,
      "loss": 0.1543,
      "step": 9620
    },
    {
      "epoch": 0.28067565202170486,
      "grad_norm": 0.9042981784842837,
      "learning_rate": 8.440856068434122e-06,
      "loss": 0.1393,
      "step": 9621
    },
    {
      "epoch": 0.28070482525234847,
      "grad_norm": 0.7590671300959784,
      "learning_rate": 8.440513279261327e-06,
      "loss": 0.1466,
      "step": 9622
    },
    {
      "epoch": 0.280733998482992,
      "grad_norm": 0.786209439269673,
      "learning_rate": 8.44017045937245e-06,
      "loss": 0.1441,
      "step": 9623
    },
    {
      "epoch": 0.2807631717136356,
      "grad_norm": 0.8337456741412318,
      "learning_rate": 8.439827608770552e-06,
      "loss": 0.1631,
      "step": 9624
    },
    {
      "epoch": 0.2807923449442791,
      "grad_norm": 0.699361095548608,
      "learning_rate": 8.439484727458696e-06,
      "loss": 0.1478,
      "step": 9625
    },
    {
      "epoch": 0.2808215181749227,
      "grad_norm": 0.7906992172976698,
      "learning_rate": 8.43914181543994e-06,
      "loss": 0.1605,
      "step": 9626
    },
    {
      "epoch": 0.28085069140556623,
      "grad_norm": 0.716958191351726,
      "learning_rate": 8.438798872717349e-06,
      "loss": 0.1668,
      "step": 9627
    },
    {
      "epoch": 0.2808798646362098,
      "grad_norm": 0.9336564764011152,
      "learning_rate": 8.43845589929398e-06,
      "loss": 0.1394,
      "step": 9628
    },
    {
      "epoch": 0.2809090378668534,
      "grad_norm": 0.921589429799974,
      "learning_rate": 8.438112895172899e-06,
      "loss": 0.1426,
      "step": 9629
    },
    {
      "epoch": 0.28093821109749695,
      "grad_norm": 0.722214393748142,
      "learning_rate": 8.437769860357166e-06,
      "loss": 0.1551,
      "step": 9630
    },
    {
      "epoch": 0.2809673843281405,
      "grad_norm": 0.7821484680067312,
      "learning_rate": 8.437426794849845e-06,
      "loss": 0.1826,
      "step": 9631
    },
    {
      "epoch": 0.28099655755878405,
      "grad_norm": 0.8398165397558471,
      "learning_rate": 8.437083698653998e-06,
      "loss": 0.1399,
      "step": 9632
    },
    {
      "epoch": 0.2810257307894276,
      "grad_norm": 1.0337390383108171,
      "learning_rate": 8.436740571772689e-06,
      "loss": 0.1588,
      "step": 9633
    },
    {
      "epoch": 0.28105490402007116,
      "grad_norm": 0.7992678715725394,
      "learning_rate": 8.436397414208979e-06,
      "loss": 0.1614,
      "step": 9634
    },
    {
      "epoch": 0.28108407725071477,
      "grad_norm": 0.7817699595069072,
      "learning_rate": 8.436054225965933e-06,
      "loss": 0.1623,
      "step": 9635
    },
    {
      "epoch": 0.2811132504813583,
      "grad_norm": 0.822464347563449,
      "learning_rate": 8.435711007046616e-06,
      "loss": 0.147,
      "step": 9636
    },
    {
      "epoch": 0.2811424237120019,
      "grad_norm": 0.8761331626179581,
      "learning_rate": 8.435367757454092e-06,
      "loss": 0.15,
      "step": 9637
    },
    {
      "epoch": 0.28117159694264543,
      "grad_norm": 0.8090730886916009,
      "learning_rate": 8.435024477191426e-06,
      "loss": 0.1492,
      "step": 9638
    },
    {
      "epoch": 0.281200770173289,
      "grad_norm": 0.832891485574726,
      "learning_rate": 8.434681166261679e-06,
      "loss": 0.1448,
      "step": 9639
    },
    {
      "epoch": 0.28122994340393254,
      "grad_norm": 0.8954288258009087,
      "learning_rate": 8.434337824667918e-06,
      "loss": 0.1782,
      "step": 9640
    },
    {
      "epoch": 0.2812591166345761,
      "grad_norm": 0.7956031882393465,
      "learning_rate": 8.43399445241321e-06,
      "loss": 0.1666,
      "step": 9641
    },
    {
      "epoch": 0.2812882898652197,
      "grad_norm": 1.387686999700781,
      "learning_rate": 8.433651049500619e-06,
      "loss": 0.1595,
      "step": 9642
    },
    {
      "epoch": 0.28131746309586325,
      "grad_norm": 0.804597063386392,
      "learning_rate": 8.433307615933211e-06,
      "loss": 0.1454,
      "step": 9643
    },
    {
      "epoch": 0.2813466363265068,
      "grad_norm": 0.7854458337786944,
      "learning_rate": 8.432964151714052e-06,
      "loss": 0.1516,
      "step": 9644
    },
    {
      "epoch": 0.28137580955715036,
      "grad_norm": 0.8839624480045271,
      "learning_rate": 8.43262065684621e-06,
      "loss": 0.193,
      "step": 9645
    },
    {
      "epoch": 0.2814049827877939,
      "grad_norm": 0.7793352517051193,
      "learning_rate": 8.432277131332749e-06,
      "loss": 0.1507,
      "step": 9646
    },
    {
      "epoch": 0.28143415601843746,
      "grad_norm": 0.9059971897460766,
      "learning_rate": 8.431933575176737e-06,
      "loss": 0.1532,
      "step": 9647
    },
    {
      "epoch": 0.281463329249081,
      "grad_norm": 0.9085606820922222,
      "learning_rate": 8.43158998838124e-06,
      "loss": 0.1561,
      "step": 9648
    },
    {
      "epoch": 0.2814925024797246,
      "grad_norm": 1.09575395504824,
      "learning_rate": 8.431246370949328e-06,
      "loss": 0.1497,
      "step": 9649
    },
    {
      "epoch": 0.2815216757103682,
      "grad_norm": 0.7822475216933066,
      "learning_rate": 8.430902722884068e-06,
      "loss": 0.1655,
      "step": 9650
    },
    {
      "epoch": 0.28155084894101173,
      "grad_norm": 0.971521671599174,
      "learning_rate": 8.43055904418853e-06,
      "loss": 0.1612,
      "step": 9651
    },
    {
      "epoch": 0.2815800221716553,
      "grad_norm": 0.7586145780635484,
      "learning_rate": 8.430215334865775e-06,
      "loss": 0.152,
      "step": 9652
    },
    {
      "epoch": 0.28160919540229884,
      "grad_norm": 0.8031869234102891,
      "learning_rate": 8.429871594918879e-06,
      "loss": 0.1523,
      "step": 9653
    },
    {
      "epoch": 0.2816383686329424,
      "grad_norm": 0.751065033773529,
      "learning_rate": 8.429527824350908e-06,
      "loss": 0.1598,
      "step": 9654
    },
    {
      "epoch": 0.28166754186358595,
      "grad_norm": 0.6919716421043798,
      "learning_rate": 8.429184023164932e-06,
      "loss": 0.1487,
      "step": 9655
    },
    {
      "epoch": 0.28169671509422956,
      "grad_norm": 1.2130296148788566,
      "learning_rate": 8.428840191364017e-06,
      "loss": 0.1484,
      "step": 9656
    },
    {
      "epoch": 0.2817258883248731,
      "grad_norm": 0.8612543317526583,
      "learning_rate": 8.428496328951237e-06,
      "loss": 0.1561,
      "step": 9657
    },
    {
      "epoch": 0.28175506155551666,
      "grad_norm": 0.8319945336881983,
      "learning_rate": 8.42815243592966e-06,
      "loss": 0.1498,
      "step": 9658
    },
    {
      "epoch": 0.2817842347861602,
      "grad_norm": 0.8900715989334957,
      "learning_rate": 8.427808512302358e-06,
      "loss": 0.1557,
      "step": 9659
    },
    {
      "epoch": 0.28181340801680377,
      "grad_norm": 0.9209050378328874,
      "learning_rate": 8.427464558072397e-06,
      "loss": 0.1565,
      "step": 9660
    },
    {
      "epoch": 0.2818425812474473,
      "grad_norm": 1.0024738651345637,
      "learning_rate": 8.427120573242853e-06,
      "loss": 0.1735,
      "step": 9661
    },
    {
      "epoch": 0.28187175447809093,
      "grad_norm": 0.904717257631177,
      "learning_rate": 8.426776557816793e-06,
      "loss": 0.1865,
      "step": 9662
    },
    {
      "epoch": 0.2819009277087345,
      "grad_norm": 0.9535330914955639,
      "learning_rate": 8.426432511797292e-06,
      "loss": 0.1484,
      "step": 9663
    },
    {
      "epoch": 0.28193010093937804,
      "grad_norm": 0.83583328816481,
      "learning_rate": 8.426088435187418e-06,
      "loss": 0.1493,
      "step": 9664
    },
    {
      "epoch": 0.2819592741700216,
      "grad_norm": 0.9611065676349803,
      "learning_rate": 8.425744327990244e-06,
      "loss": 0.163,
      "step": 9665
    },
    {
      "epoch": 0.28198844740066514,
      "grad_norm": 0.9520432319743514,
      "learning_rate": 8.425400190208842e-06,
      "loss": 0.1459,
      "step": 9666
    },
    {
      "epoch": 0.2820176206313087,
      "grad_norm": 0.891136038748808,
      "learning_rate": 8.425056021846285e-06,
      "loss": 0.1744,
      "step": 9667
    },
    {
      "epoch": 0.28204679386195225,
      "grad_norm": 0.8104828816106517,
      "learning_rate": 8.424711822905647e-06,
      "loss": 0.1303,
      "step": 9668
    },
    {
      "epoch": 0.28207596709259586,
      "grad_norm": 0.6664588376329665,
      "learning_rate": 8.42436759339e-06,
      "loss": 0.1399,
      "step": 9669
    },
    {
      "epoch": 0.2821051403232394,
      "grad_norm": 0.8861763069231229,
      "learning_rate": 8.424023333302414e-06,
      "loss": 0.1526,
      "step": 9670
    },
    {
      "epoch": 0.28213431355388296,
      "grad_norm": 0.7632870412812516,
      "learning_rate": 8.423679042645967e-06,
      "loss": 0.1447,
      "step": 9671
    },
    {
      "epoch": 0.2821634867845265,
      "grad_norm": 0.7678346797721908,
      "learning_rate": 8.423334721423729e-06,
      "loss": 0.1472,
      "step": 9672
    },
    {
      "epoch": 0.28219266001517007,
      "grad_norm": 0.8287153773092156,
      "learning_rate": 8.422990369638778e-06,
      "loss": 0.1729,
      "step": 9673
    },
    {
      "epoch": 0.2822218332458136,
      "grad_norm": 0.7733026353657988,
      "learning_rate": 8.422645987294184e-06,
      "loss": 0.1572,
      "step": 9674
    },
    {
      "epoch": 0.2822510064764572,
      "grad_norm": 0.9097808971524729,
      "learning_rate": 8.422301574393025e-06,
      "loss": 0.1423,
      "step": 9675
    },
    {
      "epoch": 0.2822801797071008,
      "grad_norm": 0.821824437294448,
      "learning_rate": 8.421957130938374e-06,
      "loss": 0.1319,
      "step": 9676
    },
    {
      "epoch": 0.28230935293774434,
      "grad_norm": 0.9400066976953318,
      "learning_rate": 8.421612656933306e-06,
      "loss": 0.1534,
      "step": 9677
    },
    {
      "epoch": 0.2823385261683879,
      "grad_norm": 0.9436382052241648,
      "learning_rate": 8.421268152380898e-06,
      "loss": 0.1781,
      "step": 9678
    },
    {
      "epoch": 0.28236769939903145,
      "grad_norm": 0.8874724888722365,
      "learning_rate": 8.420923617284224e-06,
      "loss": 0.1608,
      "step": 9679
    },
    {
      "epoch": 0.282396872629675,
      "grad_norm": 0.7266858505170246,
      "learning_rate": 8.420579051646363e-06,
      "loss": 0.1562,
      "step": 9680
    },
    {
      "epoch": 0.28242604586031855,
      "grad_norm": 0.7521659709929877,
      "learning_rate": 8.420234455470386e-06,
      "loss": 0.1501,
      "step": 9681
    },
    {
      "epoch": 0.2824552190909621,
      "grad_norm": 1.1471401215465622,
      "learning_rate": 8.419889828759374e-06,
      "loss": 0.1311,
      "step": 9682
    },
    {
      "epoch": 0.2824843923216057,
      "grad_norm": 0.688710553463838,
      "learning_rate": 8.419545171516399e-06,
      "loss": 0.1613,
      "step": 9683
    },
    {
      "epoch": 0.28251356555224927,
      "grad_norm": 0.786648071437947,
      "learning_rate": 8.419200483744544e-06,
      "loss": 0.1601,
      "step": 9684
    },
    {
      "epoch": 0.2825427387828928,
      "grad_norm": 0.7593508447780223,
      "learning_rate": 8.418855765446883e-06,
      "loss": 0.1436,
      "step": 9685
    },
    {
      "epoch": 0.2825719120135364,
      "grad_norm": 0.8258345204164949,
      "learning_rate": 8.418511016626492e-06,
      "loss": 0.1522,
      "step": 9686
    },
    {
      "epoch": 0.28260108524417993,
      "grad_norm": 0.6930969976952299,
      "learning_rate": 8.418166237286453e-06,
      "loss": 0.1307,
      "step": 9687
    },
    {
      "epoch": 0.2826302584748235,
      "grad_norm": 0.775159952248678,
      "learning_rate": 8.41782142742984e-06,
      "loss": 0.1471,
      "step": 9688
    },
    {
      "epoch": 0.2826594317054671,
      "grad_norm": 0.9026015798308387,
      "learning_rate": 8.417476587059735e-06,
      "loss": 0.1495,
      "step": 9689
    },
    {
      "epoch": 0.28268860493611064,
      "grad_norm": 0.8061822204561225,
      "learning_rate": 8.417131716179212e-06,
      "loss": 0.1628,
      "step": 9690
    },
    {
      "epoch": 0.2827177781667542,
      "grad_norm": 0.7828662242122386,
      "learning_rate": 8.416786814791355e-06,
      "loss": 0.1493,
      "step": 9691
    },
    {
      "epoch": 0.28274695139739775,
      "grad_norm": 0.929254933961137,
      "learning_rate": 8.416441882899241e-06,
      "loss": 0.1742,
      "step": 9692
    },
    {
      "epoch": 0.2827761246280413,
      "grad_norm": 0.7516171096257392,
      "learning_rate": 8.41609692050595e-06,
      "loss": 0.1509,
      "step": 9693
    },
    {
      "epoch": 0.28280529785868486,
      "grad_norm": 0.7647455961180056,
      "learning_rate": 8.415751927614559e-06,
      "loss": 0.1315,
      "step": 9694
    },
    {
      "epoch": 0.2828344710893284,
      "grad_norm": 0.8782312479853289,
      "learning_rate": 8.415406904228151e-06,
      "loss": 0.1624,
      "step": 9695
    },
    {
      "epoch": 0.282863644319972,
      "grad_norm": 1.0164220231613077,
      "learning_rate": 8.415061850349806e-06,
      "loss": 0.1498,
      "step": 9696
    },
    {
      "epoch": 0.28289281755061557,
      "grad_norm": 0.8547393999235816,
      "learning_rate": 8.414716765982604e-06,
      "loss": 0.1562,
      "step": 9697
    },
    {
      "epoch": 0.2829219907812591,
      "grad_norm": 1.264434929576917,
      "learning_rate": 8.414371651129627e-06,
      "loss": 0.1452,
      "step": 9698
    },
    {
      "epoch": 0.2829511640119027,
      "grad_norm": 1.0139116707185494,
      "learning_rate": 8.414026505793953e-06,
      "loss": 0.1603,
      "step": 9699
    },
    {
      "epoch": 0.28298033724254623,
      "grad_norm": 0.9295979925804307,
      "learning_rate": 8.413681329978666e-06,
      "loss": 0.1654,
      "step": 9700
    },
    {
      "epoch": 0.2830095104731898,
      "grad_norm": 1.2400910685047386,
      "learning_rate": 8.413336123686847e-06,
      "loss": 0.1797,
      "step": 9701
    },
    {
      "epoch": 0.28303868370383334,
      "grad_norm": 1.0754853612442141,
      "learning_rate": 8.412990886921579e-06,
      "loss": 0.1712,
      "step": 9702
    },
    {
      "epoch": 0.28306785693447695,
      "grad_norm": 0.9555839236093209,
      "learning_rate": 8.412645619685943e-06,
      "loss": 0.1592,
      "step": 9703
    },
    {
      "epoch": 0.2830970301651205,
      "grad_norm": 1.0459397894466103,
      "learning_rate": 8.41230032198302e-06,
      "loss": 0.157,
      "step": 9704
    },
    {
      "epoch": 0.28312620339576405,
      "grad_norm": 0.8341881986997727,
      "learning_rate": 8.411954993815894e-06,
      "loss": 0.1588,
      "step": 9705
    },
    {
      "epoch": 0.2831553766264076,
      "grad_norm": 0.8119137135313987,
      "learning_rate": 8.41160963518765e-06,
      "loss": 0.1536,
      "step": 9706
    },
    {
      "epoch": 0.28318454985705116,
      "grad_norm": 0.8152878389508309,
      "learning_rate": 8.411264246101369e-06,
      "loss": 0.1603,
      "step": 9707
    },
    {
      "epoch": 0.2832137230876947,
      "grad_norm": 0.8343559062998911,
      "learning_rate": 8.410918826560134e-06,
      "loss": 0.1478,
      "step": 9708
    },
    {
      "epoch": 0.28324289631833827,
      "grad_norm": 0.7597627365743722,
      "learning_rate": 8.410573376567031e-06,
      "loss": 0.1563,
      "step": 9709
    },
    {
      "epoch": 0.2832720695489819,
      "grad_norm": 0.903494677094035,
      "learning_rate": 8.410227896125142e-06,
      "loss": 0.1692,
      "step": 9710
    },
    {
      "epoch": 0.28330124277962543,
      "grad_norm": 1.3270150784866643,
      "learning_rate": 8.409882385237555e-06,
      "loss": 0.1397,
      "step": 9711
    },
    {
      "epoch": 0.283330416010269,
      "grad_norm": 0.8830790643507895,
      "learning_rate": 8.409536843907351e-06,
      "loss": 0.1489,
      "step": 9712
    },
    {
      "epoch": 0.28335958924091253,
      "grad_norm": 1.068664797549712,
      "learning_rate": 8.409191272137616e-06,
      "loss": 0.1576,
      "step": 9713
    },
    {
      "epoch": 0.2833887624715561,
      "grad_norm": 0.7253653285187763,
      "learning_rate": 8.408845669931434e-06,
      "loss": 0.1393,
      "step": 9714
    },
    {
      "epoch": 0.28341793570219964,
      "grad_norm": 0.7163191072519595,
      "learning_rate": 8.408500037291894e-06,
      "loss": 0.1533,
      "step": 9715
    },
    {
      "epoch": 0.28344710893284325,
      "grad_norm": 0.8467666868444809,
      "learning_rate": 8.408154374222076e-06,
      "loss": 0.1556,
      "step": 9716
    },
    {
      "epoch": 0.2834762821634868,
      "grad_norm": 0.8696130561027053,
      "learning_rate": 8.407808680725072e-06,
      "loss": 0.1469,
      "step": 9717
    },
    {
      "epoch": 0.28350545539413036,
      "grad_norm": 0.7964640245797752,
      "learning_rate": 8.407462956803965e-06,
      "loss": 0.151,
      "step": 9718
    },
    {
      "epoch": 0.2835346286247739,
      "grad_norm": 0.7264124165991117,
      "learning_rate": 8.407117202461841e-06,
      "loss": 0.1548,
      "step": 9719
    },
    {
      "epoch": 0.28356380185541746,
      "grad_norm": 0.7390467530436048,
      "learning_rate": 8.406771417701788e-06,
      "loss": 0.1956,
      "step": 9720
    },
    {
      "epoch": 0.283592975086061,
      "grad_norm": 0.9587720822857085,
      "learning_rate": 8.406425602526895e-06,
      "loss": 0.1286,
      "step": 9721
    },
    {
      "epoch": 0.28362214831670457,
      "grad_norm": 0.8603120053150856,
      "learning_rate": 8.406079756940246e-06,
      "loss": 0.1409,
      "step": 9722
    },
    {
      "epoch": 0.2836513215473482,
      "grad_norm": 1.0536283198004628,
      "learning_rate": 8.40573388094493e-06,
      "loss": 0.1568,
      "step": 9723
    },
    {
      "epoch": 0.28368049477799173,
      "grad_norm": 0.7891749487967007,
      "learning_rate": 8.405387974544036e-06,
      "loss": 0.139,
      "step": 9724
    },
    {
      "epoch": 0.2837096680086353,
      "grad_norm": 0.8603404080694517,
      "learning_rate": 8.405042037740649e-06,
      "loss": 0.1345,
      "step": 9725
    },
    {
      "epoch": 0.28373884123927884,
      "grad_norm": 1.4079981681800657,
      "learning_rate": 8.404696070537861e-06,
      "loss": 0.161,
      "step": 9726
    },
    {
      "epoch": 0.2837680144699224,
      "grad_norm": 0.79156746578909,
      "learning_rate": 8.404350072938758e-06,
      "loss": 0.142,
      "step": 9727
    },
    {
      "epoch": 0.28379718770056594,
      "grad_norm": 0.9428572683118923,
      "learning_rate": 8.404004044946432e-06,
      "loss": 0.1466,
      "step": 9728
    },
    {
      "epoch": 0.2838263609312095,
      "grad_norm": 0.6755371678286886,
      "learning_rate": 8.40365798656397e-06,
      "loss": 0.134,
      "step": 9729
    },
    {
      "epoch": 0.2838555341618531,
      "grad_norm": 0.8582491479733165,
      "learning_rate": 8.403311897794461e-06,
      "loss": 0.1629,
      "step": 9730
    },
    {
      "epoch": 0.28388470739249666,
      "grad_norm": 0.909304447218576,
      "learning_rate": 8.402965778640996e-06,
      "loss": 0.1608,
      "step": 9731
    },
    {
      "epoch": 0.2839138806231402,
      "grad_norm": 0.7001333232973245,
      "learning_rate": 8.402619629106667e-06,
      "loss": 0.1592,
      "step": 9732
    },
    {
      "epoch": 0.28394305385378377,
      "grad_norm": 0.8150591980831794,
      "learning_rate": 8.40227344919456e-06,
      "loss": 0.1535,
      "step": 9733
    },
    {
      "epoch": 0.2839722270844273,
      "grad_norm": 0.7028752977026115,
      "learning_rate": 8.401927238907768e-06,
      "loss": 0.1527,
      "step": 9734
    },
    {
      "epoch": 0.2840014003150709,
      "grad_norm": 0.7821393443659244,
      "learning_rate": 8.401580998249383e-06,
      "loss": 0.1499,
      "step": 9735
    },
    {
      "epoch": 0.2840305735457144,
      "grad_norm": 0.8229367088023415,
      "learning_rate": 8.401234727222495e-06,
      "loss": 0.1568,
      "step": 9736
    },
    {
      "epoch": 0.28405974677635804,
      "grad_norm": 0.8706227628537601,
      "learning_rate": 8.400888425830193e-06,
      "loss": 0.1495,
      "step": 9737
    },
    {
      "epoch": 0.2840889200070016,
      "grad_norm": 0.6997778137587798,
      "learning_rate": 8.400542094075572e-06,
      "loss": 0.1346,
      "step": 9738
    },
    {
      "epoch": 0.28411809323764514,
      "grad_norm": 0.7620811191556768,
      "learning_rate": 8.400195731961725e-06,
      "loss": 0.1352,
      "step": 9739
    },
    {
      "epoch": 0.2841472664682887,
      "grad_norm": 0.740916557473217,
      "learning_rate": 8.39984933949174e-06,
      "loss": 0.1481,
      "step": 9740
    },
    {
      "epoch": 0.28417643969893225,
      "grad_norm": 0.6581033953325057,
      "learning_rate": 8.399502916668712e-06,
      "loss": 0.1644,
      "step": 9741
    },
    {
      "epoch": 0.2842056129295758,
      "grad_norm": 0.8227219664659927,
      "learning_rate": 8.399156463495735e-06,
      "loss": 0.1415,
      "step": 9742
    },
    {
      "epoch": 0.2842347861602194,
      "grad_norm": 0.7457571557541653,
      "learning_rate": 8.398809979975898e-06,
      "loss": 0.1405,
      "step": 9743
    },
    {
      "epoch": 0.28426395939086296,
      "grad_norm": 0.6780960869226833,
      "learning_rate": 8.398463466112298e-06,
      "loss": 0.1503,
      "step": 9744
    },
    {
      "epoch": 0.2842931326215065,
      "grad_norm": 0.7994458526346818,
      "learning_rate": 8.398116921908028e-06,
      "loss": 0.1329,
      "step": 9745
    },
    {
      "epoch": 0.28432230585215007,
      "grad_norm": 0.9628546074911857,
      "learning_rate": 8.397770347366181e-06,
      "loss": 0.1415,
      "step": 9746
    },
    {
      "epoch": 0.2843514790827936,
      "grad_norm": 0.6365447065570922,
      "learning_rate": 8.397423742489852e-06,
      "loss": 0.1444,
      "step": 9747
    },
    {
      "epoch": 0.2843806523134372,
      "grad_norm": 0.7321654765354305,
      "learning_rate": 8.397077107282134e-06,
      "loss": 0.1412,
      "step": 9748
    },
    {
      "epoch": 0.28440982554408073,
      "grad_norm": 1.3495289834810458,
      "learning_rate": 8.396730441746121e-06,
      "loss": 0.1539,
      "step": 9749
    },
    {
      "epoch": 0.28443899877472434,
      "grad_norm": 0.7402820912285748,
      "learning_rate": 8.396383745884913e-06,
      "loss": 0.1613,
      "step": 9750
    },
    {
      "epoch": 0.2844681720053679,
      "grad_norm": 0.7990324437728387,
      "learning_rate": 8.3960370197016e-06,
      "loss": 0.1454,
      "step": 9751
    },
    {
      "epoch": 0.28449734523601145,
      "grad_norm": 0.8000174929224456,
      "learning_rate": 8.395690263199279e-06,
      "loss": 0.1382,
      "step": 9752
    },
    {
      "epoch": 0.284526518466655,
      "grad_norm": 0.8392983185148813,
      "learning_rate": 8.395343476381047e-06,
      "loss": 0.1462,
      "step": 9753
    },
    {
      "epoch": 0.28455569169729855,
      "grad_norm": 0.811664279287985,
      "learning_rate": 8.394996659249996e-06,
      "loss": 0.1662,
      "step": 9754
    },
    {
      "epoch": 0.2845848649279421,
      "grad_norm": 0.8364192688275434,
      "learning_rate": 8.394649811809228e-06,
      "loss": 0.1635,
      "step": 9755
    },
    {
      "epoch": 0.28461403815858566,
      "grad_norm": 0.8514969812664084,
      "learning_rate": 8.394302934061836e-06,
      "loss": 0.1534,
      "step": 9756
    },
    {
      "epoch": 0.28464321138922927,
      "grad_norm": 0.6980328385273792,
      "learning_rate": 8.393956026010917e-06,
      "loss": 0.1375,
      "step": 9757
    },
    {
      "epoch": 0.2846723846198728,
      "grad_norm": 0.890712888984573,
      "learning_rate": 8.39360908765957e-06,
      "loss": 0.1501,
      "step": 9758
    },
    {
      "epoch": 0.2847015578505164,
      "grad_norm": 0.7935256251004001,
      "learning_rate": 8.393262119010891e-06,
      "loss": 0.1417,
      "step": 9759
    },
    {
      "epoch": 0.2847307310811599,
      "grad_norm": 1.0583601653878323,
      "learning_rate": 8.392915120067979e-06,
      "loss": 0.139,
      "step": 9760
    },
    {
      "epoch": 0.2847599043118035,
      "grad_norm": 0.8763551162292266,
      "learning_rate": 8.392568090833928e-06,
      "loss": 0.1318,
      "step": 9761
    },
    {
      "epoch": 0.28478907754244703,
      "grad_norm": 0.910605598245978,
      "learning_rate": 8.392221031311842e-06,
      "loss": 0.1421,
      "step": 9762
    },
    {
      "epoch": 0.2848182507730906,
      "grad_norm": 1.0002623823839114,
      "learning_rate": 8.391873941504813e-06,
      "loss": 0.1672,
      "step": 9763
    },
    {
      "epoch": 0.2848474240037342,
      "grad_norm": 1.150826451710878,
      "learning_rate": 8.391526821415946e-06,
      "loss": 0.1459,
      "step": 9764
    },
    {
      "epoch": 0.28487659723437775,
      "grad_norm": 0.8333527000506046,
      "learning_rate": 8.391179671048335e-06,
      "loss": 0.1463,
      "step": 9765
    },
    {
      "epoch": 0.2849057704650213,
      "grad_norm": 0.8496365208020461,
      "learning_rate": 8.390832490405083e-06,
      "loss": 0.1778,
      "step": 9766
    },
    {
      "epoch": 0.28493494369566486,
      "grad_norm": 0.9566922452947795,
      "learning_rate": 8.390485279489288e-06,
      "loss": 0.1557,
      "step": 9767
    },
    {
      "epoch": 0.2849641169263084,
      "grad_norm": 0.9743776238793905,
      "learning_rate": 8.39013803830405e-06,
      "loss": 0.1963,
      "step": 9768
    },
    {
      "epoch": 0.28499329015695196,
      "grad_norm": 0.8105312690064024,
      "learning_rate": 8.389790766852468e-06,
      "loss": 0.1618,
      "step": 9769
    },
    {
      "epoch": 0.2850224633875955,
      "grad_norm": 0.9612419895376347,
      "learning_rate": 8.389443465137644e-06,
      "loss": 0.1372,
      "step": 9770
    },
    {
      "epoch": 0.2850516366182391,
      "grad_norm": 0.989031838983565,
      "learning_rate": 8.389096133162676e-06,
      "loss": 0.1557,
      "step": 9771
    },
    {
      "epoch": 0.2850808098488827,
      "grad_norm": 0.7003070634055556,
      "learning_rate": 8.388748770930668e-06,
      "loss": 0.1578,
      "step": 9772
    },
    {
      "epoch": 0.28510998307952623,
      "grad_norm": 0.6892256137104693,
      "learning_rate": 8.38840137844472e-06,
      "loss": 0.1542,
      "step": 9773
    },
    {
      "epoch": 0.2851391563101698,
      "grad_norm": 0.7613776122796025,
      "learning_rate": 8.388053955707933e-06,
      "loss": 0.1462,
      "step": 9774
    },
    {
      "epoch": 0.28516832954081334,
      "grad_norm": 0.8396255255344625,
      "learning_rate": 8.387706502723411e-06,
      "loss": 0.1711,
      "step": 9775
    },
    {
      "epoch": 0.2851975027714569,
      "grad_norm": 0.8393162253934431,
      "learning_rate": 8.387359019494253e-06,
      "loss": 0.1616,
      "step": 9776
    },
    {
      "epoch": 0.2852266760021005,
      "grad_norm": 0.9361006689228223,
      "learning_rate": 8.38701150602356e-06,
      "loss": 0.1436,
      "step": 9777
    },
    {
      "epoch": 0.28525584923274405,
      "grad_norm": 0.7737408120928901,
      "learning_rate": 8.386663962314439e-06,
      "loss": 0.1526,
      "step": 9778
    },
    {
      "epoch": 0.2852850224633876,
      "grad_norm": 1.0143515421180576,
      "learning_rate": 8.38631638836999e-06,
      "loss": 0.1591,
      "step": 9779
    },
    {
      "epoch": 0.28531419569403116,
      "grad_norm": 0.9423559033595247,
      "learning_rate": 8.385968784193318e-06,
      "loss": 0.1482,
      "step": 9780
    },
    {
      "epoch": 0.2853433689246747,
      "grad_norm": 0.8088790668770177,
      "learning_rate": 8.385621149787523e-06,
      "loss": 0.1648,
      "step": 9781
    },
    {
      "epoch": 0.28537254215531827,
      "grad_norm": 1.2837111217628376,
      "learning_rate": 8.385273485155712e-06,
      "loss": 0.1449,
      "step": 9782
    },
    {
      "epoch": 0.2854017153859618,
      "grad_norm": 0.7041231284703034,
      "learning_rate": 8.384925790300988e-06,
      "loss": 0.1561,
      "step": 9783
    },
    {
      "epoch": 0.2854308886166054,
      "grad_norm": 0.7609432455307122,
      "learning_rate": 8.384578065226452e-06,
      "loss": 0.1636,
      "step": 9784
    },
    {
      "epoch": 0.285460061847249,
      "grad_norm": 1.0170438604752867,
      "learning_rate": 8.384230309935212e-06,
      "loss": 0.1524,
      "step": 9785
    },
    {
      "epoch": 0.28548923507789253,
      "grad_norm": 0.6991345591064341,
      "learning_rate": 8.383882524430373e-06,
      "loss": 0.1635,
      "step": 9786
    },
    {
      "epoch": 0.2855184083085361,
      "grad_norm": 0.793002041694541,
      "learning_rate": 8.383534708715039e-06,
      "loss": 0.1455,
      "step": 9787
    },
    {
      "epoch": 0.28554758153917964,
      "grad_norm": 0.9108779665096352,
      "learning_rate": 8.383186862792313e-06,
      "loss": 0.1561,
      "step": 9788
    },
    {
      "epoch": 0.2855767547698232,
      "grad_norm": 0.7557158888877378,
      "learning_rate": 8.382838986665303e-06,
      "loss": 0.1413,
      "step": 9789
    },
    {
      "epoch": 0.28560592800046675,
      "grad_norm": 0.6168635431655549,
      "learning_rate": 8.382491080337114e-06,
      "loss": 0.1429,
      "step": 9790
    },
    {
      "epoch": 0.28563510123111036,
      "grad_norm": 0.8606945167870395,
      "learning_rate": 8.382143143810853e-06,
      "loss": 0.1843,
      "step": 9791
    },
    {
      "epoch": 0.2856642744617539,
      "grad_norm": 0.8543899192255275,
      "learning_rate": 8.381795177089625e-06,
      "loss": 0.1742,
      "step": 9792
    },
    {
      "epoch": 0.28569344769239746,
      "grad_norm": 0.8743916722210373,
      "learning_rate": 8.381447180176536e-06,
      "loss": 0.1625,
      "step": 9793
    },
    {
      "epoch": 0.285722620923041,
      "grad_norm": 0.8911906150549053,
      "learning_rate": 8.381099153074694e-06,
      "loss": 0.1668,
      "step": 9794
    },
    {
      "epoch": 0.28575179415368457,
      "grad_norm": 0.8898417053456381,
      "learning_rate": 8.380751095787206e-06,
      "loss": 0.1621,
      "step": 9795
    },
    {
      "epoch": 0.2857809673843281,
      "grad_norm": 0.8492672080647266,
      "learning_rate": 8.38040300831718e-06,
      "loss": 0.1547,
      "step": 9796
    },
    {
      "epoch": 0.2858101406149717,
      "grad_norm": 0.8231220735251121,
      "learning_rate": 8.380054890667721e-06,
      "loss": 0.1542,
      "step": 9797
    },
    {
      "epoch": 0.2858393138456153,
      "grad_norm": 0.9392926209405533,
      "learning_rate": 8.379706742841942e-06,
      "loss": 0.1501,
      "step": 9798
    },
    {
      "epoch": 0.28586848707625884,
      "grad_norm": 0.8668033463206155,
      "learning_rate": 8.379358564842945e-06,
      "loss": 0.1825,
      "step": 9799
    },
    {
      "epoch": 0.2858976603069024,
      "grad_norm": 0.782579551210596,
      "learning_rate": 8.379010356673842e-06,
      "loss": 0.1397,
      "step": 9800
    },
    {
      "epoch": 0.28592683353754594,
      "grad_norm": 0.8429579693804491,
      "learning_rate": 8.378662118337744e-06,
      "loss": 0.1711,
      "step": 9801
    },
    {
      "epoch": 0.2859560067681895,
      "grad_norm": 1.1811981160323473,
      "learning_rate": 8.378313849837754e-06,
      "loss": 0.1583,
      "step": 9802
    },
    {
      "epoch": 0.28598517999883305,
      "grad_norm": 0.7390792447280129,
      "learning_rate": 8.377965551176986e-06,
      "loss": 0.1611,
      "step": 9803
    },
    {
      "epoch": 0.28601435322947666,
      "grad_norm": 0.8515452839779818,
      "learning_rate": 8.377617222358547e-06,
      "loss": 0.1976,
      "step": 9804
    },
    {
      "epoch": 0.2860435264601202,
      "grad_norm": 1.138326602151265,
      "learning_rate": 8.377268863385548e-06,
      "loss": 0.1824,
      "step": 9805
    },
    {
      "epoch": 0.28607269969076377,
      "grad_norm": 0.6932803493196364,
      "learning_rate": 8.376920474261098e-06,
      "loss": 0.1539,
      "step": 9806
    },
    {
      "epoch": 0.2861018729214073,
      "grad_norm": 0.8042209766996735,
      "learning_rate": 8.37657205498831e-06,
      "loss": 0.1639,
      "step": 9807
    },
    {
      "epoch": 0.28613104615205087,
      "grad_norm": 0.9771992419589725,
      "learning_rate": 8.376223605570292e-06,
      "loss": 0.1563,
      "step": 9808
    },
    {
      "epoch": 0.2861602193826944,
      "grad_norm": 0.8513448143246213,
      "learning_rate": 8.375875126010156e-06,
      "loss": 0.1436,
      "step": 9809
    },
    {
      "epoch": 0.286189392613338,
      "grad_norm": 0.7190331498425138,
      "learning_rate": 8.375526616311012e-06,
      "loss": 0.157,
      "step": 9810
    },
    {
      "epoch": 0.2862185658439816,
      "grad_norm": 0.7185870328303054,
      "learning_rate": 8.375178076475971e-06,
      "loss": 0.1276,
      "step": 9811
    },
    {
      "epoch": 0.28624773907462514,
      "grad_norm": 1.1960626524080855,
      "learning_rate": 8.37482950650815e-06,
      "loss": 0.1703,
      "step": 9812
    },
    {
      "epoch": 0.2862769123052687,
      "grad_norm": 0.9473444616150966,
      "learning_rate": 8.374480906410651e-06,
      "loss": 0.1562,
      "step": 9813
    },
    {
      "epoch": 0.28630608553591225,
      "grad_norm": 1.5379768613731213,
      "learning_rate": 8.374132276186596e-06,
      "loss": 0.1624,
      "step": 9814
    },
    {
      "epoch": 0.2863352587665558,
      "grad_norm": 0.8579904917878909,
      "learning_rate": 8.373783615839093e-06,
      "loss": 0.1488,
      "step": 9815
    },
    {
      "epoch": 0.28636443199719935,
      "grad_norm": 0.8139606444233826,
      "learning_rate": 8.373434925371255e-06,
      "loss": 0.1467,
      "step": 9816
    },
    {
      "epoch": 0.2863936052278429,
      "grad_norm": 0.8252844186380475,
      "learning_rate": 8.373086204786195e-06,
      "loss": 0.1696,
      "step": 9817
    },
    {
      "epoch": 0.2864227784584865,
      "grad_norm": 0.846608937522948,
      "learning_rate": 8.372737454087026e-06,
      "loss": 0.1355,
      "step": 9818
    },
    {
      "epoch": 0.28645195168913007,
      "grad_norm": 0.6950788202532748,
      "learning_rate": 8.372388673276864e-06,
      "loss": 0.1526,
      "step": 9819
    },
    {
      "epoch": 0.2864811249197736,
      "grad_norm": 0.7839518495820436,
      "learning_rate": 8.37203986235882e-06,
      "loss": 0.1835,
      "step": 9820
    },
    {
      "epoch": 0.2865102981504172,
      "grad_norm": 0.8047945199413639,
      "learning_rate": 8.371691021336008e-06,
      "loss": 0.13,
      "step": 9821
    },
    {
      "epoch": 0.28653947138106073,
      "grad_norm": 0.6528806430649705,
      "learning_rate": 8.371342150211544e-06,
      "loss": 0.1504,
      "step": 9822
    },
    {
      "epoch": 0.2865686446117043,
      "grad_norm": 0.8877748140982329,
      "learning_rate": 8.370993248988543e-06,
      "loss": 0.1557,
      "step": 9823
    },
    {
      "epoch": 0.28659781784234784,
      "grad_norm": 1.015067989317165,
      "learning_rate": 8.370644317670118e-06,
      "loss": 0.1635,
      "step": 9824
    },
    {
      "epoch": 0.28662699107299144,
      "grad_norm": 0.9142522240636217,
      "learning_rate": 8.370295356259386e-06,
      "loss": 0.1652,
      "step": 9825
    },
    {
      "epoch": 0.286656164303635,
      "grad_norm": 0.8756461731394208,
      "learning_rate": 8.369946364759462e-06,
      "loss": 0.1646,
      "step": 9826
    },
    {
      "epoch": 0.28668533753427855,
      "grad_norm": 1.1130965289972723,
      "learning_rate": 8.36959734317346e-06,
      "loss": 0.1558,
      "step": 9827
    },
    {
      "epoch": 0.2867145107649221,
      "grad_norm": 0.8259424285503624,
      "learning_rate": 8.369248291504497e-06,
      "loss": 0.1576,
      "step": 9828
    },
    {
      "epoch": 0.28674368399556566,
      "grad_norm": 0.9562641163907196,
      "learning_rate": 8.368899209755691e-06,
      "loss": 0.1625,
      "step": 9829
    },
    {
      "epoch": 0.2867728572262092,
      "grad_norm": 1.2872070525831119,
      "learning_rate": 8.368550097930156e-06,
      "loss": 0.1291,
      "step": 9830
    },
    {
      "epoch": 0.2868020304568528,
      "grad_norm": 0.8581258318130291,
      "learning_rate": 8.368200956031011e-06,
      "loss": 0.145,
      "step": 9831
    },
    {
      "epoch": 0.28683120368749637,
      "grad_norm": 1.087307890910858,
      "learning_rate": 8.367851784061371e-06,
      "loss": 0.1416,
      "step": 9832
    },
    {
      "epoch": 0.2868603769181399,
      "grad_norm": 0.9742143876335019,
      "learning_rate": 8.367502582024354e-06,
      "loss": 0.1525,
      "step": 9833
    },
    {
      "epoch": 0.2868895501487835,
      "grad_norm": 1.0880570097246778,
      "learning_rate": 8.367153349923078e-06,
      "loss": 0.2012,
      "step": 9834
    },
    {
      "epoch": 0.28691872337942703,
      "grad_norm": 1.1997412533739402,
      "learning_rate": 8.366804087760662e-06,
      "loss": 0.1496,
      "step": 9835
    },
    {
      "epoch": 0.2869478966100706,
      "grad_norm": 0.8698108124035253,
      "learning_rate": 8.366454795540221e-06,
      "loss": 0.1756,
      "step": 9836
    },
    {
      "epoch": 0.28697706984071414,
      "grad_norm": 1.1371208532155463,
      "learning_rate": 8.366105473264877e-06,
      "loss": 0.1822,
      "step": 9837
    },
    {
      "epoch": 0.28700624307135775,
      "grad_norm": 1.349272065080481,
      "learning_rate": 8.365756120937746e-06,
      "loss": 0.1827,
      "step": 9838
    },
    {
      "epoch": 0.2870354163020013,
      "grad_norm": 1.159707127945847,
      "learning_rate": 8.365406738561948e-06,
      "loss": 0.1712,
      "step": 9839
    },
    {
      "epoch": 0.28706458953264485,
      "grad_norm": 0.7695853217531026,
      "learning_rate": 8.365057326140602e-06,
      "loss": 0.1624,
      "step": 9840
    },
    {
      "epoch": 0.2870937627632884,
      "grad_norm": 1.2176623436540903,
      "learning_rate": 8.364707883676826e-06,
      "loss": 0.1629,
      "step": 9841
    },
    {
      "epoch": 0.28712293599393196,
      "grad_norm": 0.7969760569180943,
      "learning_rate": 8.364358411173742e-06,
      "loss": 0.1564,
      "step": 9842
    },
    {
      "epoch": 0.2871521092245755,
      "grad_norm": 0.7393559798164078,
      "learning_rate": 8.36400890863447e-06,
      "loss": 0.1545,
      "step": 9843
    },
    {
      "epoch": 0.28718128245521907,
      "grad_norm": 0.7461860357689802,
      "learning_rate": 8.363659376062129e-06,
      "loss": 0.1378,
      "step": 9844
    },
    {
      "epoch": 0.2872104556858627,
      "grad_norm": 0.9308486159446256,
      "learning_rate": 8.36330981345984e-06,
      "loss": 0.1397,
      "step": 9845
    },
    {
      "epoch": 0.28723962891650623,
      "grad_norm": 0.8344962007898478,
      "learning_rate": 8.362960220830725e-06,
      "loss": 0.1647,
      "step": 9846
    },
    {
      "epoch": 0.2872688021471498,
      "grad_norm": 0.7315788043770404,
      "learning_rate": 8.362610598177904e-06,
      "loss": 0.1698,
      "step": 9847
    },
    {
      "epoch": 0.28729797537779334,
      "grad_norm": 0.9402750814257732,
      "learning_rate": 8.362260945504497e-06,
      "loss": 0.1553,
      "step": 9848
    },
    {
      "epoch": 0.2873271486084369,
      "grad_norm": 0.8061549441004859,
      "learning_rate": 8.361911262813628e-06,
      "loss": 0.1546,
      "step": 9849
    },
    {
      "epoch": 0.28735632183908044,
      "grad_norm": 0.847045834650828,
      "learning_rate": 8.361561550108417e-06,
      "loss": 0.1336,
      "step": 9850
    },
    {
      "epoch": 0.287385495069724,
      "grad_norm": 0.8061753933894321,
      "learning_rate": 8.361211807391987e-06,
      "loss": 0.1699,
      "step": 9851
    },
    {
      "epoch": 0.2874146683003676,
      "grad_norm": 0.8444227995422279,
      "learning_rate": 8.36086203466746e-06,
      "loss": 0.15,
      "step": 9852
    },
    {
      "epoch": 0.28744384153101116,
      "grad_norm": 0.8212005765713866,
      "learning_rate": 8.36051223193796e-06,
      "loss": 0.167,
      "step": 9853
    },
    {
      "epoch": 0.2874730147616547,
      "grad_norm": 0.7896960182652282,
      "learning_rate": 8.360162399206609e-06,
      "loss": 0.1541,
      "step": 9854
    },
    {
      "epoch": 0.28750218799229826,
      "grad_norm": 0.7054874119732321,
      "learning_rate": 8.35981253647653e-06,
      "loss": 0.156,
      "step": 9855
    },
    {
      "epoch": 0.2875313612229418,
      "grad_norm": 0.794441548546251,
      "learning_rate": 8.359462643750847e-06,
      "loss": 0.1455,
      "step": 9856
    },
    {
      "epoch": 0.28756053445358537,
      "grad_norm": 0.7243266466759475,
      "learning_rate": 8.359112721032682e-06,
      "loss": 0.1522,
      "step": 9857
    },
    {
      "epoch": 0.287589707684229,
      "grad_norm": 0.854715048031843,
      "learning_rate": 8.358762768325162e-06,
      "loss": 0.1588,
      "step": 9858
    },
    {
      "epoch": 0.28761888091487253,
      "grad_norm": 0.7871523200061623,
      "learning_rate": 8.35841278563141e-06,
      "loss": 0.1573,
      "step": 9859
    },
    {
      "epoch": 0.2876480541455161,
      "grad_norm": 0.9531408083364188,
      "learning_rate": 8.358062772954549e-06,
      "loss": 0.1682,
      "step": 9860
    },
    {
      "epoch": 0.28767722737615964,
      "grad_norm": 0.6812932180373655,
      "learning_rate": 8.357712730297707e-06,
      "loss": 0.147,
      "step": 9861
    },
    {
      "epoch": 0.2877064006068032,
      "grad_norm": 0.7838031131738635,
      "learning_rate": 8.357362657664005e-06,
      "loss": 0.1545,
      "step": 9862
    },
    {
      "epoch": 0.28773557383744675,
      "grad_norm": 1.1692794571002676,
      "learning_rate": 8.357012555056571e-06,
      "loss": 0.1631,
      "step": 9863
    },
    {
      "epoch": 0.2877647470680903,
      "grad_norm": 0.759695566501756,
      "learning_rate": 8.356662422478532e-06,
      "loss": 0.1505,
      "step": 9864
    },
    {
      "epoch": 0.2877939202987339,
      "grad_norm": 0.8668270726138102,
      "learning_rate": 8.356312259933013e-06,
      "loss": 0.179,
      "step": 9865
    },
    {
      "epoch": 0.28782309352937746,
      "grad_norm": 1.2673528714091171,
      "learning_rate": 8.355962067423135e-06,
      "loss": 0.1636,
      "step": 9866
    },
    {
      "epoch": 0.287852266760021,
      "grad_norm": 0.8678672769322022,
      "learning_rate": 8.355611844952033e-06,
      "loss": 0.1697,
      "step": 9867
    },
    {
      "epoch": 0.28788143999066457,
      "grad_norm": 0.8174206486742935,
      "learning_rate": 8.355261592522828e-06,
      "loss": 0.1515,
      "step": 9868
    },
    {
      "epoch": 0.2879106132213081,
      "grad_norm": 0.7988968203734829,
      "learning_rate": 8.354911310138647e-06,
      "loss": 0.1491,
      "step": 9869
    },
    {
      "epoch": 0.2879397864519517,
      "grad_norm": 0.6804670031915753,
      "learning_rate": 8.354560997802622e-06,
      "loss": 0.1627,
      "step": 9870
    },
    {
      "epoch": 0.2879689596825952,
      "grad_norm": 0.7502123958402307,
      "learning_rate": 8.354210655517876e-06,
      "loss": 0.1439,
      "step": 9871
    },
    {
      "epoch": 0.28799813291323884,
      "grad_norm": 0.9163857814759543,
      "learning_rate": 8.353860283287535e-06,
      "loss": 0.1749,
      "step": 9872
    },
    {
      "epoch": 0.2880273061438824,
      "grad_norm": 0.7503514897510583,
      "learning_rate": 8.353509881114734e-06,
      "loss": 0.1671,
      "step": 9873
    },
    {
      "epoch": 0.28805647937452594,
      "grad_norm": 0.9986993786625777,
      "learning_rate": 8.353159449002595e-06,
      "loss": 0.1709,
      "step": 9874
    },
    {
      "epoch": 0.2880856526051695,
      "grad_norm": 0.7901623980860604,
      "learning_rate": 8.352808986954251e-06,
      "loss": 0.1723,
      "step": 9875
    },
    {
      "epoch": 0.28811482583581305,
      "grad_norm": 0.6467608429051405,
      "learning_rate": 8.352458494972825e-06,
      "loss": 0.1469,
      "step": 9876
    },
    {
      "epoch": 0.2881439990664566,
      "grad_norm": 0.7686440350854351,
      "learning_rate": 8.352107973061455e-06,
      "loss": 0.1693,
      "step": 9877
    },
    {
      "epoch": 0.28817317229710016,
      "grad_norm": 0.7673670580493841,
      "learning_rate": 8.351757421223262e-06,
      "loss": 0.145,
      "step": 9878
    },
    {
      "epoch": 0.28820234552774376,
      "grad_norm": 0.7527342040107813,
      "learning_rate": 8.351406839461378e-06,
      "loss": 0.1473,
      "step": 9879
    },
    {
      "epoch": 0.2882315187583873,
      "grad_norm": 0.9030520454763455,
      "learning_rate": 8.351056227778935e-06,
      "loss": 0.1328,
      "step": 9880
    },
    {
      "epoch": 0.28826069198903087,
      "grad_norm": 0.826526670234512,
      "learning_rate": 8.350705586179063e-06,
      "loss": 0.1554,
      "step": 9881
    },
    {
      "epoch": 0.2882898652196744,
      "grad_norm": 0.9628114119687834,
      "learning_rate": 8.35035491466489e-06,
      "loss": 0.1413,
      "step": 9882
    },
    {
      "epoch": 0.288319038450318,
      "grad_norm": 0.7046764569947076,
      "learning_rate": 8.350004213239549e-06,
      "loss": 0.1451,
      "step": 9883
    },
    {
      "epoch": 0.28834821168096153,
      "grad_norm": 0.7790342088529321,
      "learning_rate": 8.349653481906169e-06,
      "loss": 0.1656,
      "step": 9884
    },
    {
      "epoch": 0.2883773849116051,
      "grad_norm": 0.9868218322854813,
      "learning_rate": 8.349302720667883e-06,
      "loss": 0.1703,
      "step": 9885
    },
    {
      "epoch": 0.2884065581422487,
      "grad_norm": 0.7137767194711725,
      "learning_rate": 8.348951929527822e-06,
      "loss": 0.137,
      "step": 9886
    },
    {
      "epoch": 0.28843573137289225,
      "grad_norm": 0.8227141535054573,
      "learning_rate": 8.348601108489118e-06,
      "loss": 0.1455,
      "step": 9887
    },
    {
      "epoch": 0.2884649046035358,
      "grad_norm": 0.8436155558799002,
      "learning_rate": 8.348250257554902e-06,
      "loss": 0.1618,
      "step": 9888
    },
    {
      "epoch": 0.28849407783417935,
      "grad_norm": 1.0102542206494416,
      "learning_rate": 8.347899376728307e-06,
      "loss": 0.1252,
      "step": 9889
    },
    {
      "epoch": 0.2885232510648229,
      "grad_norm": 0.6229948350671283,
      "learning_rate": 8.347548466012464e-06,
      "loss": 0.1235,
      "step": 9890
    },
    {
      "epoch": 0.28855242429546646,
      "grad_norm": 0.7487960006338579,
      "learning_rate": 8.34719752541051e-06,
      "loss": 0.154,
      "step": 9891
    },
    {
      "epoch": 0.28858159752611007,
      "grad_norm": 0.8203898754336488,
      "learning_rate": 8.346846554925577e-06,
      "loss": 0.1546,
      "step": 9892
    },
    {
      "epoch": 0.2886107707567536,
      "grad_norm": 0.741354464163945,
      "learning_rate": 8.346495554560794e-06,
      "loss": 0.1654,
      "step": 9893
    },
    {
      "epoch": 0.2886399439873972,
      "grad_norm": 0.928620964154592,
      "learning_rate": 8.346144524319298e-06,
      "loss": 0.1563,
      "step": 9894
    },
    {
      "epoch": 0.2886691172180407,
      "grad_norm": 0.7041305464807207,
      "learning_rate": 8.345793464204221e-06,
      "loss": 0.1405,
      "step": 9895
    },
    {
      "epoch": 0.2886982904486843,
      "grad_norm": 0.7809452957929224,
      "learning_rate": 8.345442374218702e-06,
      "loss": 0.1347,
      "step": 9896
    },
    {
      "epoch": 0.28872746367932783,
      "grad_norm": 0.7657322015755201,
      "learning_rate": 8.34509125436587e-06,
      "loss": 0.139,
      "step": 9897
    },
    {
      "epoch": 0.2887566369099714,
      "grad_norm": 0.7689742927200562,
      "learning_rate": 8.344740104648862e-06,
      "loss": 0.1622,
      "step": 9898
    },
    {
      "epoch": 0.288785810140615,
      "grad_norm": 0.9481928855239199,
      "learning_rate": 8.344388925070812e-06,
      "loss": 0.1568,
      "step": 9899
    },
    {
      "epoch": 0.28881498337125855,
      "grad_norm": 0.7098603340041709,
      "learning_rate": 8.344037715634859e-06,
      "loss": 0.1382,
      "step": 9900
    },
    {
      "epoch": 0.2888441566019021,
      "grad_norm": 1.4781048055873522,
      "learning_rate": 8.343686476344132e-06,
      "loss": 0.1657,
      "step": 9901
    },
    {
      "epoch": 0.28887332983254566,
      "grad_norm": 0.9229871341863847,
      "learning_rate": 8.343335207201773e-06,
      "loss": 0.1345,
      "step": 9902
    },
    {
      "epoch": 0.2889025030631892,
      "grad_norm": 0.7753778008196123,
      "learning_rate": 8.342983908210915e-06,
      "loss": 0.1233,
      "step": 9903
    },
    {
      "epoch": 0.28893167629383276,
      "grad_norm": 0.8450474439015068,
      "learning_rate": 8.342632579374693e-06,
      "loss": 0.1416,
      "step": 9904
    },
    {
      "epoch": 0.2889608495244763,
      "grad_norm": 0.7643050333338456,
      "learning_rate": 8.342281220696246e-06,
      "loss": 0.1737,
      "step": 9905
    },
    {
      "epoch": 0.2889900227551199,
      "grad_norm": 0.8086534856495711,
      "learning_rate": 8.341929832178712e-06,
      "loss": 0.1298,
      "step": 9906
    },
    {
      "epoch": 0.2890191959857635,
      "grad_norm": 1.0202434340116664,
      "learning_rate": 8.341578413825224e-06,
      "loss": 0.1546,
      "step": 9907
    },
    {
      "epoch": 0.28904836921640703,
      "grad_norm": 0.8856129755694068,
      "learning_rate": 8.341226965638922e-06,
      "loss": 0.1483,
      "step": 9908
    },
    {
      "epoch": 0.2890775424470506,
      "grad_norm": 1.0051947137453312,
      "learning_rate": 8.340875487622944e-06,
      "loss": 0.1575,
      "step": 9909
    },
    {
      "epoch": 0.28910671567769414,
      "grad_norm": 0.8923354587071173,
      "learning_rate": 8.340523979780426e-06,
      "loss": 0.1697,
      "step": 9910
    },
    {
      "epoch": 0.2891358889083377,
      "grad_norm": 0.7912794953994675,
      "learning_rate": 8.340172442114509e-06,
      "loss": 0.138,
      "step": 9911
    },
    {
      "epoch": 0.28916506213898124,
      "grad_norm": 0.7861918709653906,
      "learning_rate": 8.33982087462833e-06,
      "loss": 0.1494,
      "step": 9912
    },
    {
      "epoch": 0.28919423536962485,
      "grad_norm": 0.9112625258963118,
      "learning_rate": 8.339469277325025e-06,
      "loss": 0.1475,
      "step": 9913
    },
    {
      "epoch": 0.2892234086002684,
      "grad_norm": 0.8893112375319981,
      "learning_rate": 8.339117650207738e-06,
      "loss": 0.1506,
      "step": 9914
    },
    {
      "epoch": 0.28925258183091196,
      "grad_norm": 0.9022641279176083,
      "learning_rate": 8.338765993279604e-06,
      "loss": 0.1437,
      "step": 9915
    },
    {
      "epoch": 0.2892817550615555,
      "grad_norm": 0.9962674200604096,
      "learning_rate": 8.338414306543764e-06,
      "loss": 0.1698,
      "step": 9916
    },
    {
      "epoch": 0.28931092829219907,
      "grad_norm": 0.8840193722246299,
      "learning_rate": 8.33806259000336e-06,
      "loss": 0.1334,
      "step": 9917
    },
    {
      "epoch": 0.2893401015228426,
      "grad_norm": 0.8890595472257062,
      "learning_rate": 8.337710843661528e-06,
      "loss": 0.1557,
      "step": 9918
    },
    {
      "epoch": 0.2893692747534862,
      "grad_norm": 0.8783307524752315,
      "learning_rate": 8.337359067521411e-06,
      "loss": 0.1654,
      "step": 9919
    },
    {
      "epoch": 0.2893984479841298,
      "grad_norm": 0.7667862558799786,
      "learning_rate": 8.33700726158615e-06,
      "loss": 0.1601,
      "step": 9920
    },
    {
      "epoch": 0.28942762121477333,
      "grad_norm": 1.0607707908253314,
      "learning_rate": 8.336655425858885e-06,
      "loss": 0.1531,
      "step": 9921
    },
    {
      "epoch": 0.2894567944454169,
      "grad_norm": 0.8227189793116355,
      "learning_rate": 8.336303560342756e-06,
      "loss": 0.1605,
      "step": 9922
    },
    {
      "epoch": 0.28948596767606044,
      "grad_norm": 0.8533704546216876,
      "learning_rate": 8.335951665040904e-06,
      "loss": 0.1693,
      "step": 9923
    },
    {
      "epoch": 0.289515140906704,
      "grad_norm": 0.8029944727248988,
      "learning_rate": 8.335599739956474e-06,
      "loss": 0.1656,
      "step": 9924
    },
    {
      "epoch": 0.28954431413734755,
      "grad_norm": 0.8797297804836803,
      "learning_rate": 8.335247785092604e-06,
      "loss": 0.1341,
      "step": 9925
    },
    {
      "epoch": 0.28957348736799116,
      "grad_norm": 0.8583529494083441,
      "learning_rate": 8.33489580045244e-06,
      "loss": 0.1602,
      "step": 9926
    },
    {
      "epoch": 0.2896026605986347,
      "grad_norm": 0.7376563991148708,
      "learning_rate": 8.334543786039122e-06,
      "loss": 0.1736,
      "step": 9927
    },
    {
      "epoch": 0.28963183382927826,
      "grad_norm": 0.8326224275218932,
      "learning_rate": 8.33419174185579e-06,
      "loss": 0.1708,
      "step": 9928
    },
    {
      "epoch": 0.2896610070599218,
      "grad_norm": 0.7862764218821267,
      "learning_rate": 8.333839667905594e-06,
      "loss": 0.1532,
      "step": 9929
    },
    {
      "epoch": 0.28969018029056537,
      "grad_norm": 0.6965774250218687,
      "learning_rate": 8.333487564191672e-06,
      "loss": 0.1631,
      "step": 9930
    },
    {
      "epoch": 0.2897193535212089,
      "grad_norm": 0.7734103847833171,
      "learning_rate": 8.333135430717167e-06,
      "loss": 0.1421,
      "step": 9931
    },
    {
      "epoch": 0.2897485267518525,
      "grad_norm": 0.7402186181779997,
      "learning_rate": 8.332783267485227e-06,
      "loss": 0.1544,
      "step": 9932
    },
    {
      "epoch": 0.2897776999824961,
      "grad_norm": 0.8362642679456843,
      "learning_rate": 8.332431074498992e-06,
      "loss": 0.1569,
      "step": 9933
    },
    {
      "epoch": 0.28980687321313964,
      "grad_norm": 0.8623462724671603,
      "learning_rate": 8.33207885176161e-06,
      "loss": 0.14,
      "step": 9934
    },
    {
      "epoch": 0.2898360464437832,
      "grad_norm": 0.8445223702334346,
      "learning_rate": 8.331726599276221e-06,
      "loss": 0.1729,
      "step": 9935
    },
    {
      "epoch": 0.28986521967442674,
      "grad_norm": 0.885988204287692,
      "learning_rate": 8.331374317045974e-06,
      "loss": 0.1449,
      "step": 9936
    },
    {
      "epoch": 0.2898943929050703,
      "grad_norm": 0.7388917651173174,
      "learning_rate": 8.33102200507401e-06,
      "loss": 0.1503,
      "step": 9937
    },
    {
      "epoch": 0.28992356613571385,
      "grad_norm": 0.7618862505541767,
      "learning_rate": 8.33066966336348e-06,
      "loss": 0.158,
      "step": 9938
    },
    {
      "epoch": 0.2899527393663574,
      "grad_norm": 0.7039575963698365,
      "learning_rate": 8.330317291917525e-06,
      "loss": 0.1407,
      "step": 9939
    },
    {
      "epoch": 0.289981912597001,
      "grad_norm": 0.9483758954090866,
      "learning_rate": 8.32996489073929e-06,
      "loss": 0.1624,
      "step": 9940
    },
    {
      "epoch": 0.29001108582764457,
      "grad_norm": 0.7212525909556974,
      "learning_rate": 8.329612459831926e-06,
      "loss": 0.1492,
      "step": 9941
    },
    {
      "epoch": 0.2900402590582881,
      "grad_norm": 0.8573451154135282,
      "learning_rate": 8.329259999198577e-06,
      "loss": 0.1365,
      "step": 9942
    },
    {
      "epoch": 0.2900694322889317,
      "grad_norm": 0.9432659078236447,
      "learning_rate": 8.328907508842388e-06,
      "loss": 0.1544,
      "step": 9943
    },
    {
      "epoch": 0.2900986055195752,
      "grad_norm": 0.7315033120337017,
      "learning_rate": 8.328554988766509e-06,
      "loss": 0.1493,
      "step": 9944
    },
    {
      "epoch": 0.2901277787502188,
      "grad_norm": 1.167455282089146,
      "learning_rate": 8.328202438974083e-06,
      "loss": 0.1396,
      "step": 9945
    },
    {
      "epoch": 0.2901569519808624,
      "grad_norm": 1.1215715354094669,
      "learning_rate": 8.327849859468263e-06,
      "loss": 0.1559,
      "step": 9946
    },
    {
      "epoch": 0.29018612521150594,
      "grad_norm": 0.8945220203736282,
      "learning_rate": 8.327497250252192e-06,
      "loss": 0.1417,
      "step": 9947
    },
    {
      "epoch": 0.2902152984421495,
      "grad_norm": 1.0190752539257046,
      "learning_rate": 8.327144611329022e-06,
      "loss": 0.1674,
      "step": 9948
    },
    {
      "epoch": 0.29024447167279305,
      "grad_norm": 0.933557154708929,
      "learning_rate": 8.326791942701895e-06,
      "loss": 0.1578,
      "step": 9949
    },
    {
      "epoch": 0.2902736449034366,
      "grad_norm": 0.9863857215165585,
      "learning_rate": 8.326439244373968e-06,
      "loss": 0.1636,
      "step": 9950
    },
    {
      "epoch": 0.29030281813408015,
      "grad_norm": 1.4159947206301777,
      "learning_rate": 8.326086516348384e-06,
      "loss": 0.1558,
      "step": 9951
    },
    {
      "epoch": 0.2903319913647237,
      "grad_norm": 0.8933563458370728,
      "learning_rate": 8.325733758628292e-06,
      "loss": 0.1575,
      "step": 9952
    },
    {
      "epoch": 0.2903611645953673,
      "grad_norm": 1.0723245638303787,
      "learning_rate": 8.325380971216846e-06,
      "loss": 0.1594,
      "step": 9953
    },
    {
      "epoch": 0.29039033782601087,
      "grad_norm": 0.9162685575737721,
      "learning_rate": 8.325028154117191e-06,
      "loss": 0.1939,
      "step": 9954
    },
    {
      "epoch": 0.2904195110566544,
      "grad_norm": 0.7659311648089028,
      "learning_rate": 8.324675307332478e-06,
      "loss": 0.1496,
      "step": 9955
    },
    {
      "epoch": 0.290448684287298,
      "grad_norm": 0.9303261328242991,
      "learning_rate": 8.324322430865858e-06,
      "loss": 0.1618,
      "step": 9956
    },
    {
      "epoch": 0.29047785751794153,
      "grad_norm": 0.8380924897880033,
      "learning_rate": 8.32396952472048e-06,
      "loss": 0.137,
      "step": 9957
    },
    {
      "epoch": 0.2905070307485851,
      "grad_norm": 1.0817912252194481,
      "learning_rate": 8.323616588899497e-06,
      "loss": 0.1697,
      "step": 9958
    },
    {
      "epoch": 0.29053620397922864,
      "grad_norm": 0.7405987750868062,
      "learning_rate": 8.323263623406057e-06,
      "loss": 0.1583,
      "step": 9959
    },
    {
      "epoch": 0.29056537720987224,
      "grad_norm": 1.1149496128661358,
      "learning_rate": 8.322910628243314e-06,
      "loss": 0.1495,
      "step": 9960
    },
    {
      "epoch": 0.2905945504405158,
      "grad_norm": 0.8458534683375639,
      "learning_rate": 8.322557603414418e-06,
      "loss": 0.1503,
      "step": 9961
    },
    {
      "epoch": 0.29062372367115935,
      "grad_norm": 0.9372275666730926,
      "learning_rate": 8.322204548922521e-06,
      "loss": 0.1401,
      "step": 9962
    },
    {
      "epoch": 0.2906528969018029,
      "grad_norm": 0.7629056453068565,
      "learning_rate": 8.321851464770775e-06,
      "loss": 0.1817,
      "step": 9963
    },
    {
      "epoch": 0.29068207013244646,
      "grad_norm": 1.1246679773874921,
      "learning_rate": 8.321498350962331e-06,
      "loss": 0.1607,
      "step": 9964
    },
    {
      "epoch": 0.29071124336309,
      "grad_norm": 0.8833611714238749,
      "learning_rate": 8.321145207500343e-06,
      "loss": 0.1478,
      "step": 9965
    },
    {
      "epoch": 0.29074041659373356,
      "grad_norm": 0.8184608900826866,
      "learning_rate": 8.320792034387964e-06,
      "loss": 0.1447,
      "step": 9966
    },
    {
      "epoch": 0.2907695898243772,
      "grad_norm": 0.9607555983475776,
      "learning_rate": 8.320438831628345e-06,
      "loss": 0.164,
      "step": 9967
    },
    {
      "epoch": 0.2907987630550207,
      "grad_norm": 0.888310835837436,
      "learning_rate": 8.320085599224642e-06,
      "loss": 0.1523,
      "step": 9968
    },
    {
      "epoch": 0.2908279362856643,
      "grad_norm": 0.8757880741813339,
      "learning_rate": 8.319732337180008e-06,
      "loss": 0.1589,
      "step": 9969
    },
    {
      "epoch": 0.29085710951630783,
      "grad_norm": 0.8857833465418704,
      "learning_rate": 8.319379045497595e-06,
      "loss": 0.1686,
      "step": 9970
    },
    {
      "epoch": 0.2908862827469514,
      "grad_norm": 1.0051605070308964,
      "learning_rate": 8.319025724180559e-06,
      "loss": 0.1522,
      "step": 9971
    },
    {
      "epoch": 0.29091545597759494,
      "grad_norm": 0.8221400217366555,
      "learning_rate": 8.318672373232053e-06,
      "loss": 0.1533,
      "step": 9972
    },
    {
      "epoch": 0.29094462920823855,
      "grad_norm": 0.8176771708565067,
      "learning_rate": 8.318318992655232e-06,
      "loss": 0.1668,
      "step": 9973
    },
    {
      "epoch": 0.2909738024388821,
      "grad_norm": 0.8854928575385405,
      "learning_rate": 8.317965582453251e-06,
      "loss": 0.1623,
      "step": 9974
    },
    {
      "epoch": 0.29100297566952565,
      "grad_norm": 0.8171128503970739,
      "learning_rate": 8.317612142629268e-06,
      "loss": 0.1705,
      "step": 9975
    },
    {
      "epoch": 0.2910321489001692,
      "grad_norm": 0.9079090036597574,
      "learning_rate": 8.317258673186432e-06,
      "loss": 0.1538,
      "step": 9976
    },
    {
      "epoch": 0.29106132213081276,
      "grad_norm": 0.7968587160417797,
      "learning_rate": 8.316905174127906e-06,
      "loss": 0.1505,
      "step": 9977
    },
    {
      "epoch": 0.2910904953614563,
      "grad_norm": 0.8164199405906625,
      "learning_rate": 8.31655164545684e-06,
      "loss": 0.1415,
      "step": 9978
    },
    {
      "epoch": 0.29111966859209987,
      "grad_norm": 0.8377358511468971,
      "learning_rate": 8.316198087176393e-06,
      "loss": 0.1704,
      "step": 9979
    },
    {
      "epoch": 0.2911488418227435,
      "grad_norm": 0.6789145992788284,
      "learning_rate": 8.315844499289722e-06,
      "loss": 0.161,
      "step": 9980
    },
    {
      "epoch": 0.29117801505338703,
      "grad_norm": 0.9878176562475328,
      "learning_rate": 8.315490881799982e-06,
      "loss": 0.1962,
      "step": 9981
    },
    {
      "epoch": 0.2912071882840306,
      "grad_norm": 0.7551533792819878,
      "learning_rate": 8.315137234710332e-06,
      "loss": 0.1341,
      "step": 9982
    },
    {
      "epoch": 0.29123636151467414,
      "grad_norm": 1.044286035733997,
      "learning_rate": 8.314783558023927e-06,
      "loss": 0.1564,
      "step": 9983
    },
    {
      "epoch": 0.2912655347453177,
      "grad_norm": 0.7874113543302953,
      "learning_rate": 8.314429851743927e-06,
      "loss": 0.1464,
      "step": 9984
    },
    {
      "epoch": 0.29129470797596124,
      "grad_norm": 0.9118496172043951,
      "learning_rate": 8.314076115873485e-06,
      "loss": 0.1596,
      "step": 9985
    },
    {
      "epoch": 0.2913238812066048,
      "grad_norm": 0.966762188743754,
      "learning_rate": 8.313722350415767e-06,
      "loss": 0.1763,
      "step": 9986
    },
    {
      "epoch": 0.2913530544372484,
      "grad_norm": 0.8326195256442548,
      "learning_rate": 8.313368555373925e-06,
      "loss": 0.1515,
      "step": 9987
    },
    {
      "epoch": 0.29138222766789196,
      "grad_norm": 0.9867023969084737,
      "learning_rate": 8.313014730751119e-06,
      "loss": 0.1868,
      "step": 9988
    },
    {
      "epoch": 0.2914114008985355,
      "grad_norm": 0.9944314437123973,
      "learning_rate": 8.312660876550509e-06,
      "loss": 0.1834,
      "step": 9989
    },
    {
      "epoch": 0.29144057412917906,
      "grad_norm": 0.7193657700096948,
      "learning_rate": 8.312306992775254e-06,
      "loss": 0.1385,
      "step": 9990
    },
    {
      "epoch": 0.2914697473598226,
      "grad_norm": 0.7827293906850198,
      "learning_rate": 8.311953079428511e-06,
      "loss": 0.1443,
      "step": 9991
    },
    {
      "epoch": 0.29149892059046617,
      "grad_norm": 0.775521931058426,
      "learning_rate": 8.311599136513443e-06,
      "loss": 0.1345,
      "step": 9992
    },
    {
      "epoch": 0.2915280938211097,
      "grad_norm": 0.7918526820206877,
      "learning_rate": 8.311245164033208e-06,
      "loss": 0.1337,
      "step": 9993
    },
    {
      "epoch": 0.29155726705175333,
      "grad_norm": 0.9449828723661597,
      "learning_rate": 8.310891161990967e-06,
      "loss": 0.14,
      "step": 9994
    },
    {
      "epoch": 0.2915864402823969,
      "grad_norm": 0.8535034442896857,
      "learning_rate": 8.31053713038988e-06,
      "loss": 0.1706,
      "step": 9995
    },
    {
      "epoch": 0.29161561351304044,
      "grad_norm": 0.9759757025382537,
      "learning_rate": 8.31018306923311e-06,
      "loss": 0.1319,
      "step": 9996
    },
    {
      "epoch": 0.291644786743684,
      "grad_norm": 0.7421867198696321,
      "learning_rate": 8.30982897852381e-06,
      "loss": 0.1462,
      "step": 9997
    },
    {
      "epoch": 0.29167395997432755,
      "grad_norm": 0.8340210160364849,
      "learning_rate": 8.309474858265153e-06,
      "loss": 0.1441,
      "step": 9998
    },
    {
      "epoch": 0.2917031332049711,
      "grad_norm": 0.9196255388910509,
      "learning_rate": 8.309120708460291e-06,
      "loss": 0.1611,
      "step": 9999
    },
    {
      "epoch": 0.29173230643561465,
      "grad_norm": 0.7995903341022464,
      "learning_rate": 8.30876652911239e-06,
      "loss": 0.1376,
      "step": 10000
    },
    {
      "epoch": 0.29176147966625826,
      "grad_norm": 0.5775818656754369,
      "learning_rate": 8.308412320224612e-06,
      "loss": 0.1346,
      "step": 10001
    },
    {
      "epoch": 0.2917906528969018,
      "grad_norm": 0.7585769294015465,
      "learning_rate": 8.30805808180012e-06,
      "loss": 0.1564,
      "step": 10002
    },
    {
      "epoch": 0.29181982612754537,
      "grad_norm": 0.9220451686588651,
      "learning_rate": 8.307703813842071e-06,
      "loss": 0.1724,
      "step": 10003
    },
    {
      "epoch": 0.2918489993581889,
      "grad_norm": 0.7012222953328154,
      "learning_rate": 8.307349516353634e-06,
      "loss": 0.1286,
      "step": 10004
    },
    {
      "epoch": 0.2918781725888325,
      "grad_norm": 0.7923834248307075,
      "learning_rate": 8.306995189337973e-06,
      "loss": 0.1679,
      "step": 10005
    },
    {
      "epoch": 0.291907345819476,
      "grad_norm": 0.946432255528313,
      "learning_rate": 8.306640832798242e-06,
      "loss": 0.1706,
      "step": 10006
    },
    {
      "epoch": 0.29193651905011964,
      "grad_norm": 0.8277375669619255,
      "learning_rate": 8.306286446737616e-06,
      "loss": 0.1492,
      "step": 10007
    },
    {
      "epoch": 0.2919656922807632,
      "grad_norm": 0.9954771463628386,
      "learning_rate": 8.305932031159253e-06,
      "loss": 0.1566,
      "step": 10008
    },
    {
      "epoch": 0.29199486551140674,
      "grad_norm": 0.8562541686694175,
      "learning_rate": 8.305577586066317e-06,
      "loss": 0.1625,
      "step": 10009
    },
    {
      "epoch": 0.2920240387420503,
      "grad_norm": 1.2764191456792593,
      "learning_rate": 8.305223111461975e-06,
      "loss": 0.1524,
      "step": 10010
    },
    {
      "epoch": 0.29205321197269385,
      "grad_norm": 1.1348108065448188,
      "learning_rate": 8.30486860734939e-06,
      "loss": 0.1657,
      "step": 10011
    },
    {
      "epoch": 0.2920823852033374,
      "grad_norm": 0.6247148365727904,
      "learning_rate": 8.304514073731724e-06,
      "loss": 0.1542,
      "step": 10012
    },
    {
      "epoch": 0.29211155843398096,
      "grad_norm": 0.8604647433551958,
      "learning_rate": 8.304159510612149e-06,
      "loss": 0.1557,
      "step": 10013
    },
    {
      "epoch": 0.29214073166462456,
      "grad_norm": 1.0468673531886143,
      "learning_rate": 8.303804917993825e-06,
      "loss": 0.1381,
      "step": 10014
    },
    {
      "epoch": 0.2921699048952681,
      "grad_norm": 0.9221026240612864,
      "learning_rate": 8.303450295879917e-06,
      "loss": 0.1417,
      "step": 10015
    },
    {
      "epoch": 0.29219907812591167,
      "grad_norm": 0.9779188760797921,
      "learning_rate": 8.303095644273598e-06,
      "loss": 0.1761,
      "step": 10016
    },
    {
      "epoch": 0.2922282513565552,
      "grad_norm": 0.95628535233947,
      "learning_rate": 8.302740963178026e-06,
      "loss": 0.1462,
      "step": 10017
    },
    {
      "epoch": 0.2922574245871988,
      "grad_norm": 0.8146698359549647,
      "learning_rate": 8.302386252596372e-06,
      "loss": 0.1274,
      "step": 10018
    },
    {
      "epoch": 0.29228659781784233,
      "grad_norm": 1.0327793173177984,
      "learning_rate": 8.302031512531802e-06,
      "loss": 0.1451,
      "step": 10019
    },
    {
      "epoch": 0.2923157710484859,
      "grad_norm": 0.783726058461353,
      "learning_rate": 8.301676742987484e-06,
      "loss": 0.1518,
      "step": 10020
    },
    {
      "epoch": 0.2923449442791295,
      "grad_norm": 0.7114796690993442,
      "learning_rate": 8.301321943966583e-06,
      "loss": 0.1537,
      "step": 10021
    },
    {
      "epoch": 0.29237411750977305,
      "grad_norm": 0.8343504034340079,
      "learning_rate": 8.30096711547227e-06,
      "loss": 0.1378,
      "step": 10022
    },
    {
      "epoch": 0.2924032907404166,
      "grad_norm": 1.0848139303363526,
      "learning_rate": 8.300612257507707e-06,
      "loss": 0.1599,
      "step": 10023
    },
    {
      "epoch": 0.29243246397106015,
      "grad_norm": 0.7767597599214419,
      "learning_rate": 8.300257370076069e-06,
      "loss": 0.1409,
      "step": 10024
    },
    {
      "epoch": 0.2924616372017037,
      "grad_norm": 0.8318580216756729,
      "learning_rate": 8.29990245318052e-06,
      "loss": 0.1302,
      "step": 10025
    },
    {
      "epoch": 0.29249081043234726,
      "grad_norm": 0.9190345106170331,
      "learning_rate": 8.299547506824228e-06,
      "loss": 0.1367,
      "step": 10026
    },
    {
      "epoch": 0.2925199836629908,
      "grad_norm": 0.8454636319416081,
      "learning_rate": 8.299192531010365e-06,
      "loss": 0.141,
      "step": 10027
    },
    {
      "epoch": 0.2925491568936344,
      "grad_norm": 0.9496611352931372,
      "learning_rate": 8.298837525742099e-06,
      "loss": 0.1216,
      "step": 10028
    },
    {
      "epoch": 0.292578330124278,
      "grad_norm": 1.0058824718148227,
      "learning_rate": 8.298482491022597e-06,
      "loss": 0.1559,
      "step": 10029
    },
    {
      "epoch": 0.2926075033549215,
      "grad_norm": 0.7105909931539826,
      "learning_rate": 8.298127426855032e-06,
      "loss": 0.1563,
      "step": 10030
    },
    {
      "epoch": 0.2926366765855651,
      "grad_norm": 0.8594947440795527,
      "learning_rate": 8.297772333242572e-06,
      "loss": 0.1431,
      "step": 10031
    },
    {
      "epoch": 0.29266584981620863,
      "grad_norm": 0.9180978214471736,
      "learning_rate": 8.29741721018839e-06,
      "loss": 0.1788,
      "step": 10032
    },
    {
      "epoch": 0.2926950230468522,
      "grad_norm": 0.8572780249185475,
      "learning_rate": 8.297062057695653e-06,
      "loss": 0.1425,
      "step": 10033
    },
    {
      "epoch": 0.2927241962774958,
      "grad_norm": 0.8718833186795538,
      "learning_rate": 8.296706875767533e-06,
      "loss": 0.1757,
      "step": 10034
    },
    {
      "epoch": 0.29275336950813935,
      "grad_norm": 1.0267974594728448,
      "learning_rate": 8.2963516644072e-06,
      "loss": 0.1799,
      "step": 10035
    },
    {
      "epoch": 0.2927825427387829,
      "grad_norm": 0.9630029145702377,
      "learning_rate": 8.295996423617828e-06,
      "loss": 0.1456,
      "step": 10036
    },
    {
      "epoch": 0.29281171596942646,
      "grad_norm": 0.9891852313316698,
      "learning_rate": 8.295641153402586e-06,
      "loss": 0.1544,
      "step": 10037
    },
    {
      "epoch": 0.29284088920007,
      "grad_norm": 0.8636462159642979,
      "learning_rate": 8.295285853764647e-06,
      "loss": 0.1688,
      "step": 10038
    },
    {
      "epoch": 0.29287006243071356,
      "grad_norm": 0.9216727137755006,
      "learning_rate": 8.294930524707181e-06,
      "loss": 0.1628,
      "step": 10039
    },
    {
      "epoch": 0.2928992356613571,
      "grad_norm": 1.1656075453781285,
      "learning_rate": 8.294575166233364e-06,
      "loss": 0.16,
      "step": 10040
    },
    {
      "epoch": 0.2929284088920007,
      "grad_norm": 0.8517564097130488,
      "learning_rate": 8.294219778346366e-06,
      "loss": 0.1477,
      "step": 10041
    },
    {
      "epoch": 0.2929575821226443,
      "grad_norm": 0.8107151679766771,
      "learning_rate": 8.293864361049358e-06,
      "loss": 0.1482,
      "step": 10042
    },
    {
      "epoch": 0.29298675535328783,
      "grad_norm": 0.7619916270717272,
      "learning_rate": 8.293508914345517e-06,
      "loss": 0.1508,
      "step": 10043
    },
    {
      "epoch": 0.2930159285839314,
      "grad_norm": 0.7420486994078926,
      "learning_rate": 8.293153438238015e-06,
      "loss": 0.1631,
      "step": 10044
    },
    {
      "epoch": 0.29304510181457494,
      "grad_norm": 0.7447317873850063,
      "learning_rate": 8.292797932730023e-06,
      "loss": 0.1728,
      "step": 10045
    },
    {
      "epoch": 0.2930742750452185,
      "grad_norm": 0.7478213728555458,
      "learning_rate": 8.292442397824721e-06,
      "loss": 0.1699,
      "step": 10046
    },
    {
      "epoch": 0.29310344827586204,
      "grad_norm": 0.6485024024547189,
      "learning_rate": 8.292086833525277e-06,
      "loss": 0.1519,
      "step": 10047
    },
    {
      "epoch": 0.29313262150650565,
      "grad_norm": 0.7590094261702236,
      "learning_rate": 8.291731239834865e-06,
      "loss": 0.1584,
      "step": 10048
    },
    {
      "epoch": 0.2931617947371492,
      "grad_norm": 0.8811821365401096,
      "learning_rate": 8.291375616756666e-06,
      "loss": 0.1418,
      "step": 10049
    },
    {
      "epoch": 0.29319096796779276,
      "grad_norm": 0.8239299166996248,
      "learning_rate": 8.291019964293852e-06,
      "loss": 0.2009,
      "step": 10050
    },
    {
      "epoch": 0.2932201411984363,
      "grad_norm": 0.6186205133828201,
      "learning_rate": 8.290664282449594e-06,
      "loss": 0.1417,
      "step": 10051
    },
    {
      "epoch": 0.29324931442907987,
      "grad_norm": 0.6901163615293291,
      "learning_rate": 8.290308571227073e-06,
      "loss": 0.138,
      "step": 10052
    },
    {
      "epoch": 0.2932784876597234,
      "grad_norm": 0.8567262364175189,
      "learning_rate": 8.289952830629462e-06,
      "loss": 0.1611,
      "step": 10053
    },
    {
      "epoch": 0.293307660890367,
      "grad_norm": 0.826080934820219,
      "learning_rate": 8.289597060659937e-06,
      "loss": 0.1556,
      "step": 10054
    },
    {
      "epoch": 0.2933368341210106,
      "grad_norm": 0.741746620216231,
      "learning_rate": 8.289241261321674e-06,
      "loss": 0.1523,
      "step": 10055
    },
    {
      "epoch": 0.29336600735165413,
      "grad_norm": 0.8285011445736965,
      "learning_rate": 8.288885432617853e-06,
      "loss": 0.1785,
      "step": 10056
    },
    {
      "epoch": 0.2933951805822977,
      "grad_norm": 0.7628317405006118,
      "learning_rate": 8.288529574551645e-06,
      "loss": 0.1475,
      "step": 10057
    },
    {
      "epoch": 0.29342435381294124,
      "grad_norm": 0.7375996428106932,
      "learning_rate": 8.288173687126231e-06,
      "loss": 0.1475,
      "step": 10058
    },
    {
      "epoch": 0.2934535270435848,
      "grad_norm": 0.9312608079105043,
      "learning_rate": 8.287817770344789e-06,
      "loss": 0.1627,
      "step": 10059
    },
    {
      "epoch": 0.29348270027422835,
      "grad_norm": 0.6759824880556607,
      "learning_rate": 8.287461824210491e-06,
      "loss": 0.1598,
      "step": 10060
    },
    {
      "epoch": 0.29351187350487196,
      "grad_norm": 1.141226546046676,
      "learning_rate": 8.287105848726523e-06,
      "loss": 0.1404,
      "step": 10061
    },
    {
      "epoch": 0.2935410467355155,
      "grad_norm": 1.0168099581722219,
      "learning_rate": 8.286749843896058e-06,
      "loss": 0.1657,
      "step": 10062
    },
    {
      "epoch": 0.29357021996615906,
      "grad_norm": 1.096694261391748,
      "learning_rate": 8.286393809722272e-06,
      "loss": 0.1248,
      "step": 10063
    },
    {
      "epoch": 0.2935993931968026,
      "grad_norm": 1.1444504946474283,
      "learning_rate": 8.286037746208348e-06,
      "loss": 0.1403,
      "step": 10064
    },
    {
      "epoch": 0.29362856642744617,
      "grad_norm": 0.8336631442716201,
      "learning_rate": 8.285681653357465e-06,
      "loss": 0.1317,
      "step": 10065
    },
    {
      "epoch": 0.2936577396580897,
      "grad_norm": 1.1462786308024369,
      "learning_rate": 8.2853255311728e-06,
      "loss": 0.1669,
      "step": 10066
    },
    {
      "epoch": 0.2936869128887333,
      "grad_norm": 0.6630550088012315,
      "learning_rate": 8.28496937965753e-06,
      "loss": 0.1295,
      "step": 10067
    },
    {
      "epoch": 0.2937160861193769,
      "grad_norm": 0.7847929484569912,
      "learning_rate": 8.28461319881484e-06,
      "loss": 0.136,
      "step": 10068
    },
    {
      "epoch": 0.29374525935002044,
      "grad_norm": 0.800683760933707,
      "learning_rate": 8.284256988647907e-06,
      "loss": 0.1539,
      "step": 10069
    },
    {
      "epoch": 0.293774432580664,
      "grad_norm": 0.8101367953085461,
      "learning_rate": 8.283900749159912e-06,
      "loss": 0.1429,
      "step": 10070
    },
    {
      "epoch": 0.29380360581130754,
      "grad_norm": 0.8707948290695925,
      "learning_rate": 8.283544480354036e-06,
      "loss": 0.1364,
      "step": 10071
    },
    {
      "epoch": 0.2938327790419511,
      "grad_norm": 1.0013094993900706,
      "learning_rate": 8.283188182233458e-06,
      "loss": 0.1401,
      "step": 10072
    },
    {
      "epoch": 0.29386195227259465,
      "grad_norm": 0.810950696245934,
      "learning_rate": 8.282831854801359e-06,
      "loss": 0.1635,
      "step": 10073
    },
    {
      "epoch": 0.2938911255032382,
      "grad_norm": 0.9939931738019238,
      "learning_rate": 8.28247549806092e-06,
      "loss": 0.1494,
      "step": 10074
    },
    {
      "epoch": 0.2939202987338818,
      "grad_norm": 0.9989568642233154,
      "learning_rate": 8.282119112015325e-06,
      "loss": 0.1496,
      "step": 10075
    },
    {
      "epoch": 0.29394947196452537,
      "grad_norm": 0.6881224454052522,
      "learning_rate": 8.281762696667755e-06,
      "loss": 0.1391,
      "step": 10076
    },
    {
      "epoch": 0.2939786451951689,
      "grad_norm": 1.1326747256186758,
      "learning_rate": 8.281406252021389e-06,
      "loss": 0.1647,
      "step": 10077
    },
    {
      "epoch": 0.2940078184258125,
      "grad_norm": 0.9086346742634053,
      "learning_rate": 8.28104977807941e-06,
      "loss": 0.195,
      "step": 10078
    },
    {
      "epoch": 0.294036991656456,
      "grad_norm": 1.705925428776988,
      "learning_rate": 8.280693274845006e-06,
      "loss": 0.137,
      "step": 10079
    },
    {
      "epoch": 0.2940661648870996,
      "grad_norm": 1.0773313951320238,
      "learning_rate": 8.280336742321351e-06,
      "loss": 0.1713,
      "step": 10080
    },
    {
      "epoch": 0.29409533811774313,
      "grad_norm": 1.0134830466090594,
      "learning_rate": 8.279980180511636e-06,
      "loss": 0.1641,
      "step": 10081
    },
    {
      "epoch": 0.29412451134838674,
      "grad_norm": 0.7687904917679984,
      "learning_rate": 8.279623589419041e-06,
      "loss": 0.1658,
      "step": 10082
    },
    {
      "epoch": 0.2941536845790303,
      "grad_norm": 1.1714835304382243,
      "learning_rate": 8.279266969046748e-06,
      "loss": 0.1585,
      "step": 10083
    },
    {
      "epoch": 0.29418285780967385,
      "grad_norm": 0.7753132212968409,
      "learning_rate": 8.278910319397944e-06,
      "loss": 0.1488,
      "step": 10084
    },
    {
      "epoch": 0.2942120310403174,
      "grad_norm": 0.7890368342165006,
      "learning_rate": 8.27855364047581e-06,
      "loss": 0.1437,
      "step": 10085
    },
    {
      "epoch": 0.29424120427096095,
      "grad_norm": 0.998941095158542,
      "learning_rate": 8.27819693228353e-06,
      "loss": 0.1393,
      "step": 10086
    },
    {
      "epoch": 0.2942703775016045,
      "grad_norm": 1.1003955530389602,
      "learning_rate": 8.277840194824293e-06,
      "loss": 0.1545,
      "step": 10087
    },
    {
      "epoch": 0.2942995507322481,
      "grad_norm": 0.8417741980652979,
      "learning_rate": 8.277483428101282e-06,
      "loss": 0.1558,
      "step": 10088
    },
    {
      "epoch": 0.29432872396289167,
      "grad_norm": 0.8643247738015186,
      "learning_rate": 8.277126632117678e-06,
      "loss": 0.1614,
      "step": 10089
    },
    {
      "epoch": 0.2943578971935352,
      "grad_norm": 0.8045667629341287,
      "learning_rate": 8.276769806876672e-06,
      "loss": 0.1687,
      "step": 10090
    },
    {
      "epoch": 0.2943870704241788,
      "grad_norm": 0.7557136704949229,
      "learning_rate": 8.276412952381447e-06,
      "loss": 0.1519,
      "step": 10091
    },
    {
      "epoch": 0.29441624365482233,
      "grad_norm": 0.8300803948164209,
      "learning_rate": 8.27605606863519e-06,
      "loss": 0.1488,
      "step": 10092
    },
    {
      "epoch": 0.2944454168854659,
      "grad_norm": 0.8716545199341675,
      "learning_rate": 8.275699155641086e-06,
      "loss": 0.1232,
      "step": 10093
    },
    {
      "epoch": 0.29447459011610944,
      "grad_norm": 0.9740788443599583,
      "learning_rate": 8.275342213402323e-06,
      "loss": 0.1553,
      "step": 10094
    },
    {
      "epoch": 0.29450376334675304,
      "grad_norm": 0.908314983029271,
      "learning_rate": 8.274985241922085e-06,
      "loss": 0.159,
      "step": 10095
    },
    {
      "epoch": 0.2945329365773966,
      "grad_norm": 0.7413311019259263,
      "learning_rate": 8.274628241203559e-06,
      "loss": 0.1329,
      "step": 10096
    },
    {
      "epoch": 0.29456210980804015,
      "grad_norm": 0.9126331962160001,
      "learning_rate": 8.274271211249936e-06,
      "loss": 0.1348,
      "step": 10097
    },
    {
      "epoch": 0.2945912830386837,
      "grad_norm": 0.735422309302129,
      "learning_rate": 8.273914152064402e-06,
      "loss": 0.1381,
      "step": 10098
    },
    {
      "epoch": 0.29462045626932726,
      "grad_norm": 0.9076706463326716,
      "learning_rate": 8.273557063650142e-06,
      "loss": 0.1653,
      "step": 10099
    },
    {
      "epoch": 0.2946496294999708,
      "grad_norm": 0.8381643215388135,
      "learning_rate": 8.27319994601035e-06,
      "loss": 0.1624,
      "step": 10100
    },
    {
      "epoch": 0.29467880273061436,
      "grad_norm": 0.7525793137297849,
      "learning_rate": 8.272842799148204e-06,
      "loss": 0.1559,
      "step": 10101
    },
    {
      "epoch": 0.294707975961258,
      "grad_norm": 0.845378689064027,
      "learning_rate": 8.272485623066902e-06,
      "loss": 0.1535,
      "step": 10102
    },
    {
      "epoch": 0.2947371491919015,
      "grad_norm": 0.7426295645551895,
      "learning_rate": 8.272128417769631e-06,
      "loss": 0.1551,
      "step": 10103
    },
    {
      "epoch": 0.2947663224225451,
      "grad_norm": 0.7030265053522543,
      "learning_rate": 8.271771183259576e-06,
      "loss": 0.1632,
      "step": 10104
    },
    {
      "epoch": 0.29479549565318863,
      "grad_norm": 0.8667857278370785,
      "learning_rate": 8.27141391953993e-06,
      "loss": 0.1812,
      "step": 10105
    },
    {
      "epoch": 0.2948246688838322,
      "grad_norm": 0.757826397612349,
      "learning_rate": 8.271056626613882e-06,
      "loss": 0.1428,
      "step": 10106
    },
    {
      "epoch": 0.29485384211447574,
      "grad_norm": 0.7010888562714828,
      "learning_rate": 8.27069930448462e-06,
      "loss": 0.1496,
      "step": 10107
    },
    {
      "epoch": 0.2948830153451193,
      "grad_norm": 0.8489102107716576,
      "learning_rate": 8.270341953155337e-06,
      "loss": 0.1667,
      "step": 10108
    },
    {
      "epoch": 0.2949121885757629,
      "grad_norm": 0.7182762903971774,
      "learning_rate": 8.269984572629221e-06,
      "loss": 0.1508,
      "step": 10109
    },
    {
      "epoch": 0.29494136180640645,
      "grad_norm": 0.9205830169267184,
      "learning_rate": 8.269627162909464e-06,
      "loss": 0.1374,
      "step": 10110
    },
    {
      "epoch": 0.29497053503705,
      "grad_norm": 0.9013683714979126,
      "learning_rate": 8.269269723999254e-06,
      "loss": 0.1667,
      "step": 10111
    },
    {
      "epoch": 0.29499970826769356,
      "grad_norm": 0.9022818034859249,
      "learning_rate": 8.268912255901787e-06,
      "loss": 0.1521,
      "step": 10112
    },
    {
      "epoch": 0.2950288814983371,
      "grad_norm": 0.8998209867337773,
      "learning_rate": 8.268554758620251e-06,
      "loss": 0.1366,
      "step": 10113
    },
    {
      "epoch": 0.29505805472898067,
      "grad_norm": 0.9093486643780566,
      "learning_rate": 8.268197232157838e-06,
      "loss": 0.1428,
      "step": 10114
    },
    {
      "epoch": 0.2950872279596243,
      "grad_norm": 0.8043344415927015,
      "learning_rate": 8.26783967651774e-06,
      "loss": 0.1538,
      "step": 10115
    },
    {
      "epoch": 0.29511640119026783,
      "grad_norm": 0.708856084485211,
      "learning_rate": 8.267482091703149e-06,
      "loss": 0.1394,
      "step": 10116
    },
    {
      "epoch": 0.2951455744209114,
      "grad_norm": 1.2013644115603497,
      "learning_rate": 8.26712447771726e-06,
      "loss": 0.1527,
      "step": 10117
    },
    {
      "epoch": 0.29517474765155494,
      "grad_norm": 0.9668843632885805,
      "learning_rate": 8.266766834563262e-06,
      "loss": 0.16,
      "step": 10118
    },
    {
      "epoch": 0.2952039208821985,
      "grad_norm": 0.82648255757909,
      "learning_rate": 8.266409162244349e-06,
      "loss": 0.167,
      "step": 10119
    },
    {
      "epoch": 0.29523309411284204,
      "grad_norm": 0.8733450903456376,
      "learning_rate": 8.266051460763715e-06,
      "loss": 0.1487,
      "step": 10120
    },
    {
      "epoch": 0.2952622673434856,
      "grad_norm": 1.0592545546496737,
      "learning_rate": 8.265693730124554e-06,
      "loss": 0.1856,
      "step": 10121
    },
    {
      "epoch": 0.2952914405741292,
      "grad_norm": 0.8973038592028657,
      "learning_rate": 8.26533597033006e-06,
      "loss": 0.1414,
      "step": 10122
    },
    {
      "epoch": 0.29532061380477276,
      "grad_norm": 0.9415657241169062,
      "learning_rate": 8.264978181383423e-06,
      "loss": 0.167,
      "step": 10123
    },
    {
      "epoch": 0.2953497870354163,
      "grad_norm": 1.0662482061733354,
      "learning_rate": 8.264620363287844e-06,
      "loss": 0.1469,
      "step": 10124
    },
    {
      "epoch": 0.29537896026605986,
      "grad_norm": 0.954724171184433,
      "learning_rate": 8.26426251604651e-06,
      "loss": 0.1817,
      "step": 10125
    },
    {
      "epoch": 0.2954081334967034,
      "grad_norm": 0.9343136494408995,
      "learning_rate": 8.26390463966262e-06,
      "loss": 0.1478,
      "step": 10126
    },
    {
      "epoch": 0.29543730672734697,
      "grad_norm": 0.9635716826314182,
      "learning_rate": 8.263546734139372e-06,
      "loss": 0.1444,
      "step": 10127
    },
    {
      "epoch": 0.2954664799579905,
      "grad_norm": 0.7939738464961386,
      "learning_rate": 8.263188799479955e-06,
      "loss": 0.1521,
      "step": 10128
    },
    {
      "epoch": 0.29549565318863413,
      "grad_norm": 0.9325713030748594,
      "learning_rate": 8.262830835687568e-06,
      "loss": 0.1633,
      "step": 10129
    },
    {
      "epoch": 0.2955248264192777,
      "grad_norm": 1.0552677541169195,
      "learning_rate": 8.262472842765405e-06,
      "loss": 0.1472,
      "step": 10130
    },
    {
      "epoch": 0.29555399964992124,
      "grad_norm": 0.9015243805482912,
      "learning_rate": 8.262114820716665e-06,
      "loss": 0.1473,
      "step": 10131
    },
    {
      "epoch": 0.2955831728805648,
      "grad_norm": 0.7980512752168968,
      "learning_rate": 8.261756769544541e-06,
      "loss": 0.1392,
      "step": 10132
    },
    {
      "epoch": 0.29561234611120835,
      "grad_norm": 1.1291745660181034,
      "learning_rate": 8.261398689252234e-06,
      "loss": 0.1683,
      "step": 10133
    },
    {
      "epoch": 0.2956415193418519,
      "grad_norm": 1.0890608310415952,
      "learning_rate": 8.261040579842933e-06,
      "loss": 0.161,
      "step": 10134
    },
    {
      "epoch": 0.29567069257249545,
      "grad_norm": 0.8316305480572167,
      "learning_rate": 8.260682441319845e-06,
      "loss": 0.1539,
      "step": 10135
    },
    {
      "epoch": 0.29569986580313906,
      "grad_norm": 0.9928843511492947,
      "learning_rate": 8.26032427368616e-06,
      "loss": 0.1514,
      "step": 10136
    },
    {
      "epoch": 0.2957290390337826,
      "grad_norm": 0.8259319623286105,
      "learning_rate": 8.25996607694508e-06,
      "loss": 0.1602,
      "step": 10137
    },
    {
      "epoch": 0.29575821226442617,
      "grad_norm": 0.9239118835587717,
      "learning_rate": 8.2596078510998e-06,
      "loss": 0.1593,
      "step": 10138
    },
    {
      "epoch": 0.2957873854950697,
      "grad_norm": 0.8069166392881194,
      "learning_rate": 8.259249596153521e-06,
      "loss": 0.1392,
      "step": 10139
    },
    {
      "epoch": 0.2958165587257133,
      "grad_norm": 0.9033003788543029,
      "learning_rate": 8.258891312109435e-06,
      "loss": 0.1503,
      "step": 10140
    },
    {
      "epoch": 0.2958457319563568,
      "grad_norm": 0.6946109693948403,
      "learning_rate": 8.25853299897075e-06,
      "loss": 0.1545,
      "step": 10141
    },
    {
      "epoch": 0.2958749051870004,
      "grad_norm": 0.9662825112047134,
      "learning_rate": 8.258174656740659e-06,
      "loss": 0.1629,
      "step": 10142
    },
    {
      "epoch": 0.295904078417644,
      "grad_norm": 0.7973506026688649,
      "learning_rate": 8.257816285422362e-06,
      "loss": 0.1412,
      "step": 10143
    },
    {
      "epoch": 0.29593325164828754,
      "grad_norm": 0.7062174155878412,
      "learning_rate": 8.257457885019059e-06,
      "loss": 0.1593,
      "step": 10144
    },
    {
      "epoch": 0.2959624248789311,
      "grad_norm": 0.8000916472159385,
      "learning_rate": 8.25709945553395e-06,
      "loss": 0.1429,
      "step": 10145
    },
    {
      "epoch": 0.29599159810957465,
      "grad_norm": 0.9658574160510581,
      "learning_rate": 8.256740996970233e-06,
      "loss": 0.1582,
      "step": 10146
    },
    {
      "epoch": 0.2960207713402182,
      "grad_norm": 0.6404408032298828,
      "learning_rate": 8.256382509331111e-06,
      "loss": 0.1401,
      "step": 10147
    },
    {
      "epoch": 0.29604994457086176,
      "grad_norm": 0.7047901170856019,
      "learning_rate": 8.256023992619784e-06,
      "loss": 0.1565,
      "step": 10148
    },
    {
      "epoch": 0.29607911780150536,
      "grad_norm": 0.9099826941267748,
      "learning_rate": 8.255665446839452e-06,
      "loss": 0.1507,
      "step": 10149
    },
    {
      "epoch": 0.2961082910321489,
      "grad_norm": 0.7380877626178057,
      "learning_rate": 8.255306871993314e-06,
      "loss": 0.1369,
      "step": 10150
    },
    {
      "epoch": 0.29613746426279247,
      "grad_norm": 0.7092539338751792,
      "learning_rate": 8.254948268084577e-06,
      "loss": 0.1561,
      "step": 10151
    },
    {
      "epoch": 0.296166637493436,
      "grad_norm": 0.8490075569884373,
      "learning_rate": 8.254589635116436e-06,
      "loss": 0.1482,
      "step": 10152
    },
    {
      "epoch": 0.2961958107240796,
      "grad_norm": 0.7187741221837821,
      "learning_rate": 8.254230973092097e-06,
      "loss": 0.1334,
      "step": 10153
    },
    {
      "epoch": 0.29622498395472313,
      "grad_norm": 0.7570635529022741,
      "learning_rate": 8.253872282014759e-06,
      "loss": 0.1869,
      "step": 10154
    },
    {
      "epoch": 0.2962541571853667,
      "grad_norm": 0.7745372825322814,
      "learning_rate": 8.253513561887627e-06,
      "loss": 0.148,
      "step": 10155
    },
    {
      "epoch": 0.2962833304160103,
      "grad_norm": 0.9402534676032971,
      "learning_rate": 8.253154812713903e-06,
      "loss": 0.1736,
      "step": 10156
    },
    {
      "epoch": 0.29631250364665385,
      "grad_norm": 0.9180733339274976,
      "learning_rate": 8.252796034496791e-06,
      "loss": 0.1718,
      "step": 10157
    },
    {
      "epoch": 0.2963416768772974,
      "grad_norm": 0.8470308732823799,
      "learning_rate": 8.252437227239489e-06,
      "loss": 0.1631,
      "step": 10158
    },
    {
      "epoch": 0.29637085010794095,
      "grad_norm": 0.7592252529268745,
      "learning_rate": 8.252078390945206e-06,
      "loss": 0.1559,
      "step": 10159
    },
    {
      "epoch": 0.2964000233385845,
      "grad_norm": 0.8559338330101544,
      "learning_rate": 8.251719525617144e-06,
      "loss": 0.1336,
      "step": 10160
    },
    {
      "epoch": 0.29642919656922806,
      "grad_norm": 0.8125242378958871,
      "learning_rate": 8.251360631258503e-06,
      "loss": 0.1435,
      "step": 10161
    },
    {
      "epoch": 0.2964583697998716,
      "grad_norm": 0.657284100177316,
      "learning_rate": 8.251001707872495e-06,
      "loss": 0.1442,
      "step": 10162
    },
    {
      "epoch": 0.2964875430305152,
      "grad_norm": 0.6370550522371218,
      "learning_rate": 8.250642755462318e-06,
      "loss": 0.1543,
      "step": 10163
    },
    {
      "epoch": 0.2965167162611588,
      "grad_norm": 0.7726338232353873,
      "learning_rate": 8.250283774031175e-06,
      "loss": 0.1427,
      "step": 10164
    },
    {
      "epoch": 0.29654588949180233,
      "grad_norm": 0.8420372492225615,
      "learning_rate": 8.249924763582278e-06,
      "loss": 0.175,
      "step": 10165
    },
    {
      "epoch": 0.2965750627224459,
      "grad_norm": 0.8813049245433957,
      "learning_rate": 8.249565724118828e-06,
      "loss": 0.1377,
      "step": 10166
    },
    {
      "epoch": 0.29660423595308943,
      "grad_norm": 0.7851547264469557,
      "learning_rate": 8.249206655644032e-06,
      "loss": 0.1731,
      "step": 10167
    },
    {
      "epoch": 0.296633409183733,
      "grad_norm": 0.9119747534119117,
      "learning_rate": 8.248847558161093e-06,
      "loss": 0.1742,
      "step": 10168
    },
    {
      "epoch": 0.29666258241437654,
      "grad_norm": 0.9045248243802394,
      "learning_rate": 8.248488431673221e-06,
      "loss": 0.1656,
      "step": 10169
    },
    {
      "epoch": 0.29669175564502015,
      "grad_norm": 0.8469130071152402,
      "learning_rate": 8.248129276183616e-06,
      "loss": 0.1624,
      "step": 10170
    },
    {
      "epoch": 0.2967209288756637,
      "grad_norm": 0.7706688757352926,
      "learning_rate": 8.247770091695491e-06,
      "loss": 0.1475,
      "step": 10171
    },
    {
      "epoch": 0.29675010210630726,
      "grad_norm": 0.9874774742372416,
      "learning_rate": 8.24741087821205e-06,
      "loss": 0.152,
      "step": 10172
    },
    {
      "epoch": 0.2967792753369508,
      "grad_norm": 0.9054971824276945,
      "learning_rate": 8.247051635736498e-06,
      "loss": 0.1824,
      "step": 10173
    },
    {
      "epoch": 0.29680844856759436,
      "grad_norm": 0.9128203976783008,
      "learning_rate": 8.246692364272045e-06,
      "loss": 0.1541,
      "step": 10174
    },
    {
      "epoch": 0.2968376217982379,
      "grad_norm": 1.1635042957392343,
      "learning_rate": 8.246333063821898e-06,
      "loss": 0.1382,
      "step": 10175
    },
    {
      "epoch": 0.2968667950288815,
      "grad_norm": 0.9255205883852237,
      "learning_rate": 8.245973734389263e-06,
      "loss": 0.18,
      "step": 10176
    },
    {
      "epoch": 0.2968959682595251,
      "grad_norm": 0.7999807333850611,
      "learning_rate": 8.24561437597735e-06,
      "loss": 0.1883,
      "step": 10177
    },
    {
      "epoch": 0.29692514149016863,
      "grad_norm": 0.9396477424366436,
      "learning_rate": 8.24525498858937e-06,
      "loss": 0.1387,
      "step": 10178
    },
    {
      "epoch": 0.2969543147208122,
      "grad_norm": 1.026522258031758,
      "learning_rate": 8.244895572228524e-06,
      "loss": 0.1718,
      "step": 10179
    },
    {
      "epoch": 0.29698348795145574,
      "grad_norm": 0.8259889089290392,
      "learning_rate": 8.244536126898025e-06,
      "loss": 0.177,
      "step": 10180
    },
    {
      "epoch": 0.2970126611820993,
      "grad_norm": 0.9117572925297596,
      "learning_rate": 8.244176652601084e-06,
      "loss": 0.1825,
      "step": 10181
    },
    {
      "epoch": 0.29704183441274284,
      "grad_norm": 0.8321216196138452,
      "learning_rate": 8.243817149340906e-06,
      "loss": 0.1501,
      "step": 10182
    },
    {
      "epoch": 0.29707100764338645,
      "grad_norm": 0.7396207053227941,
      "learning_rate": 8.243457617120705e-06,
      "loss": 0.1601,
      "step": 10183
    },
    {
      "epoch": 0.29710018087403,
      "grad_norm": 0.9472317680086649,
      "learning_rate": 8.243098055943687e-06,
      "loss": 0.1638,
      "step": 10184
    },
    {
      "epoch": 0.29712935410467356,
      "grad_norm": 0.7518333084253055,
      "learning_rate": 8.242738465813066e-06,
      "loss": 0.1533,
      "step": 10185
    },
    {
      "epoch": 0.2971585273353171,
      "grad_norm": 0.7751418699107506,
      "learning_rate": 8.242378846732048e-06,
      "loss": 0.1578,
      "step": 10186
    },
    {
      "epoch": 0.29718770056596067,
      "grad_norm": 0.7512033072350223,
      "learning_rate": 8.242019198703848e-06,
      "loss": 0.1619,
      "step": 10187
    },
    {
      "epoch": 0.2972168737966042,
      "grad_norm": 0.8138719776250573,
      "learning_rate": 8.241659521731672e-06,
      "loss": 0.1805,
      "step": 10188
    },
    {
      "epoch": 0.2972460470272478,
      "grad_norm": 0.705947187282989,
      "learning_rate": 8.241299815818735e-06,
      "loss": 0.1452,
      "step": 10189
    },
    {
      "epoch": 0.2972752202578914,
      "grad_norm": 0.8837392994603281,
      "learning_rate": 8.240940080968247e-06,
      "loss": 0.1459,
      "step": 10190
    },
    {
      "epoch": 0.29730439348853493,
      "grad_norm": 0.7240890297328977,
      "learning_rate": 8.240580317183419e-06,
      "loss": 0.1457,
      "step": 10191
    },
    {
      "epoch": 0.2973335667191785,
      "grad_norm": 0.7813744532725917,
      "learning_rate": 8.240220524467464e-06,
      "loss": 0.1622,
      "step": 10192
    },
    {
      "epoch": 0.29736273994982204,
      "grad_norm": 0.9073252697563148,
      "learning_rate": 8.239860702823595e-06,
      "loss": 0.1398,
      "step": 10193
    },
    {
      "epoch": 0.2973919131804656,
      "grad_norm": 0.7897438605174113,
      "learning_rate": 8.23950085225502e-06,
      "loss": 0.1438,
      "step": 10194
    },
    {
      "epoch": 0.29742108641110915,
      "grad_norm": 1.2624818808897202,
      "learning_rate": 8.239140972764956e-06,
      "loss": 0.1853,
      "step": 10195
    },
    {
      "epoch": 0.2974502596417527,
      "grad_norm": 1.672777151134189,
      "learning_rate": 8.238781064356616e-06,
      "loss": 0.1582,
      "step": 10196
    },
    {
      "epoch": 0.2974794328723963,
      "grad_norm": 0.8680324500789193,
      "learning_rate": 8.238421127033209e-06,
      "loss": 0.1537,
      "step": 10197
    },
    {
      "epoch": 0.29750860610303986,
      "grad_norm": 0.7200645621115024,
      "learning_rate": 8.238061160797955e-06,
      "loss": 0.1411,
      "step": 10198
    },
    {
      "epoch": 0.2975377793336834,
      "grad_norm": 0.9405584044150864,
      "learning_rate": 8.237701165654061e-06,
      "loss": 0.143,
      "step": 10199
    },
    {
      "epoch": 0.29756695256432697,
      "grad_norm": 0.904492235212471,
      "learning_rate": 8.237341141604744e-06,
      "loss": 0.1489,
      "step": 10200
    },
    {
      "epoch": 0.2975961257949705,
      "grad_norm": 0.8956774962792353,
      "learning_rate": 8.23698108865322e-06,
      "loss": 0.147,
      "step": 10201
    },
    {
      "epoch": 0.2976252990256141,
      "grad_norm": 0.8081462348216187,
      "learning_rate": 8.2366210068027e-06,
      "loss": 0.1521,
      "step": 10202
    },
    {
      "epoch": 0.2976544722562577,
      "grad_norm": 0.8383484266872394,
      "learning_rate": 8.2362608960564e-06,
      "loss": 0.138,
      "step": 10203
    },
    {
      "epoch": 0.29768364548690124,
      "grad_norm": 0.7245719642499691,
      "learning_rate": 8.235900756417536e-06,
      "loss": 0.141,
      "step": 10204
    },
    {
      "epoch": 0.2977128187175448,
      "grad_norm": 0.8899043513103961,
      "learning_rate": 8.235540587889323e-06,
      "loss": 0.1451,
      "step": 10205
    },
    {
      "epoch": 0.29774199194818834,
      "grad_norm": 0.8104554293482947,
      "learning_rate": 8.235180390474974e-06,
      "loss": 0.169,
      "step": 10206
    },
    {
      "epoch": 0.2977711651788319,
      "grad_norm": 0.9087468555306801,
      "learning_rate": 8.23482016417771e-06,
      "loss": 0.1293,
      "step": 10207
    },
    {
      "epoch": 0.29780033840947545,
      "grad_norm": 0.7162043882960849,
      "learning_rate": 8.234459909000743e-06,
      "loss": 0.1351,
      "step": 10208
    },
    {
      "epoch": 0.297829511640119,
      "grad_norm": 0.7840134994351502,
      "learning_rate": 8.234099624947289e-06,
      "loss": 0.1674,
      "step": 10209
    },
    {
      "epoch": 0.2978586848707626,
      "grad_norm": 1.0159502554130149,
      "learning_rate": 8.233739312020565e-06,
      "loss": 0.1701,
      "step": 10210
    },
    {
      "epoch": 0.29788785810140617,
      "grad_norm": 0.8907157936985476,
      "learning_rate": 8.233378970223789e-06,
      "loss": 0.1721,
      "step": 10211
    },
    {
      "epoch": 0.2979170313320497,
      "grad_norm": 0.6892916911517295,
      "learning_rate": 8.23301859956018e-06,
      "loss": 0.1455,
      "step": 10212
    },
    {
      "epoch": 0.2979462045626933,
      "grad_norm": 0.7885020635472038,
      "learning_rate": 8.232658200032948e-06,
      "loss": 0.1685,
      "step": 10213
    },
    {
      "epoch": 0.2979753777933368,
      "grad_norm": 0.9534180734690023,
      "learning_rate": 8.232297771645318e-06,
      "loss": 0.1571,
      "step": 10214
    },
    {
      "epoch": 0.2980045510239804,
      "grad_norm": 0.7569478004758581,
      "learning_rate": 8.231937314400505e-06,
      "loss": 0.1542,
      "step": 10215
    },
    {
      "epoch": 0.29803372425462393,
      "grad_norm": 0.805664931552756,
      "learning_rate": 8.231576828301725e-06,
      "loss": 0.1614,
      "step": 10216
    },
    {
      "epoch": 0.29806289748526754,
      "grad_norm": 0.7313367021495826,
      "learning_rate": 8.2312163133522e-06,
      "loss": 0.1556,
      "step": 10217
    },
    {
      "epoch": 0.2980920707159111,
      "grad_norm": 0.7169676352550582,
      "learning_rate": 8.23085576955515e-06,
      "loss": 0.1415,
      "step": 10218
    },
    {
      "epoch": 0.29812124394655465,
      "grad_norm": 0.6631959426055198,
      "learning_rate": 8.230495196913788e-06,
      "loss": 0.1578,
      "step": 10219
    },
    {
      "epoch": 0.2981504171771982,
      "grad_norm": 0.813108070470013,
      "learning_rate": 8.230134595431337e-06,
      "loss": 0.1515,
      "step": 10220
    },
    {
      "epoch": 0.29817959040784175,
      "grad_norm": 0.8102880711543592,
      "learning_rate": 8.229773965111014e-06,
      "loss": 0.1363,
      "step": 10221
    },
    {
      "epoch": 0.2982087636384853,
      "grad_norm": 0.7438666778322964,
      "learning_rate": 8.229413305956043e-06,
      "loss": 0.1604,
      "step": 10222
    },
    {
      "epoch": 0.29823793686912886,
      "grad_norm": 0.8293960771238174,
      "learning_rate": 8.229052617969637e-06,
      "loss": 0.1505,
      "step": 10223
    },
    {
      "epoch": 0.29826711009977247,
      "grad_norm": 0.7914743911608186,
      "learning_rate": 8.228691901155022e-06,
      "loss": 0.1632,
      "step": 10224
    },
    {
      "epoch": 0.298296283330416,
      "grad_norm": 0.7882176192556702,
      "learning_rate": 8.228331155515417e-06,
      "loss": 0.146,
      "step": 10225
    },
    {
      "epoch": 0.2983254565610596,
      "grad_norm": 0.7575265575180946,
      "learning_rate": 8.227970381054042e-06,
      "loss": 0.1801,
      "step": 10226
    },
    {
      "epoch": 0.29835462979170313,
      "grad_norm": 0.6871519850408472,
      "learning_rate": 8.227609577774116e-06,
      "loss": 0.125,
      "step": 10227
    },
    {
      "epoch": 0.2983838030223467,
      "grad_norm": 1.143200474102427,
      "learning_rate": 8.227248745678865e-06,
      "loss": 0.1453,
      "step": 10228
    },
    {
      "epoch": 0.29841297625299024,
      "grad_norm": 0.8561203769958857,
      "learning_rate": 8.226887884771506e-06,
      "loss": 0.1404,
      "step": 10229
    },
    {
      "epoch": 0.29844214948363385,
      "grad_norm": 0.8693956702410475,
      "learning_rate": 8.226526995055263e-06,
      "loss": 0.1463,
      "step": 10230
    },
    {
      "epoch": 0.2984713227142774,
      "grad_norm": 0.8539535376655861,
      "learning_rate": 8.226166076533357e-06,
      "loss": 0.1588,
      "step": 10231
    },
    {
      "epoch": 0.29850049594492095,
      "grad_norm": 0.73328261929883,
      "learning_rate": 8.22580512920901e-06,
      "loss": 0.1264,
      "step": 10232
    },
    {
      "epoch": 0.2985296691755645,
      "grad_norm": 0.9856024289609955,
      "learning_rate": 8.225444153085445e-06,
      "loss": 0.1547,
      "step": 10233
    },
    {
      "epoch": 0.29855884240620806,
      "grad_norm": 0.6253266718298031,
      "learning_rate": 8.225083148165885e-06,
      "loss": 0.1387,
      "step": 10234
    },
    {
      "epoch": 0.2985880156368516,
      "grad_norm": 0.8751337165851122,
      "learning_rate": 8.224722114453553e-06,
      "loss": 0.1614,
      "step": 10235
    },
    {
      "epoch": 0.29861718886749516,
      "grad_norm": 0.7518899303442966,
      "learning_rate": 8.22436105195167e-06,
      "loss": 0.1718,
      "step": 10236
    },
    {
      "epoch": 0.2986463620981388,
      "grad_norm": 0.7498274503018532,
      "learning_rate": 8.223999960663463e-06,
      "loss": 0.1501,
      "step": 10237
    },
    {
      "epoch": 0.2986755353287823,
      "grad_norm": 0.7797355591447336,
      "learning_rate": 8.223638840592154e-06,
      "loss": 0.1387,
      "step": 10238
    },
    {
      "epoch": 0.2987047085594259,
      "grad_norm": 0.9011564981490462,
      "learning_rate": 8.223277691740966e-06,
      "loss": 0.1606,
      "step": 10239
    },
    {
      "epoch": 0.29873388179006943,
      "grad_norm": 1.04411022699535,
      "learning_rate": 8.222916514113125e-06,
      "loss": 0.1498,
      "step": 10240
    },
    {
      "epoch": 0.298763055020713,
      "grad_norm": 0.7183070537426233,
      "learning_rate": 8.222555307711852e-06,
      "loss": 0.1487,
      "step": 10241
    },
    {
      "epoch": 0.29879222825135654,
      "grad_norm": 0.8366805257443651,
      "learning_rate": 8.222194072540377e-06,
      "loss": 0.1476,
      "step": 10242
    },
    {
      "epoch": 0.2988214014820001,
      "grad_norm": 0.774981856596977,
      "learning_rate": 8.221832808601925e-06,
      "loss": 0.1492,
      "step": 10243
    },
    {
      "epoch": 0.2988505747126437,
      "grad_norm": 0.9555939794967316,
      "learning_rate": 8.221471515899714e-06,
      "loss": 0.1579,
      "step": 10244
    },
    {
      "epoch": 0.29887974794328726,
      "grad_norm": 0.8608671731454439,
      "learning_rate": 8.221110194436976e-06,
      "loss": 0.1329,
      "step": 10245
    },
    {
      "epoch": 0.2989089211739308,
      "grad_norm": 0.6972785003151737,
      "learning_rate": 8.220748844216936e-06,
      "loss": 0.1436,
      "step": 10246
    },
    {
      "epoch": 0.29893809440457436,
      "grad_norm": 0.7262080941899789,
      "learning_rate": 8.220387465242819e-06,
      "loss": 0.1566,
      "step": 10247
    },
    {
      "epoch": 0.2989672676352179,
      "grad_norm": 0.8696697212962222,
      "learning_rate": 8.22002605751785e-06,
      "loss": 0.1439,
      "step": 10248
    },
    {
      "epoch": 0.29899644086586147,
      "grad_norm": 0.8963960824103208,
      "learning_rate": 8.219664621045258e-06,
      "loss": 0.1463,
      "step": 10249
    },
    {
      "epoch": 0.299025614096505,
      "grad_norm": 0.9134689373427836,
      "learning_rate": 8.21930315582827e-06,
      "loss": 0.1756,
      "step": 10250
    },
    {
      "epoch": 0.29905478732714863,
      "grad_norm": 0.791892052304756,
      "learning_rate": 8.21894166187011e-06,
      "loss": 0.1523,
      "step": 10251
    },
    {
      "epoch": 0.2990839605577922,
      "grad_norm": 0.8090637785611028,
      "learning_rate": 8.21858013917401e-06,
      "loss": 0.1562,
      "step": 10252
    },
    {
      "epoch": 0.29911313378843574,
      "grad_norm": 1.021161273179451,
      "learning_rate": 8.218218587743192e-06,
      "loss": 0.1435,
      "step": 10253
    },
    {
      "epoch": 0.2991423070190793,
      "grad_norm": 0.7197261281266153,
      "learning_rate": 8.217857007580888e-06,
      "loss": 0.1464,
      "step": 10254
    },
    {
      "epoch": 0.29917148024972284,
      "grad_norm": 0.9457110521780498,
      "learning_rate": 8.217495398690324e-06,
      "loss": 0.1701,
      "step": 10255
    },
    {
      "epoch": 0.2992006534803664,
      "grad_norm": 0.9163208966796982,
      "learning_rate": 8.21713376107473e-06,
      "loss": 0.1465,
      "step": 10256
    },
    {
      "epoch": 0.29922982671100995,
      "grad_norm": 0.8249674709456052,
      "learning_rate": 8.216772094737332e-06,
      "loss": 0.1586,
      "step": 10257
    },
    {
      "epoch": 0.29925899994165356,
      "grad_norm": 0.8585279812762839,
      "learning_rate": 8.216410399681365e-06,
      "loss": 0.1451,
      "step": 10258
    },
    {
      "epoch": 0.2992881731722971,
      "grad_norm": 0.6816281737593278,
      "learning_rate": 8.21604867591005e-06,
      "loss": 0.1662,
      "step": 10259
    },
    {
      "epoch": 0.29931734640294066,
      "grad_norm": 0.9843714814859522,
      "learning_rate": 8.215686923426622e-06,
      "loss": 0.1723,
      "step": 10260
    },
    {
      "epoch": 0.2993465196335842,
      "grad_norm": 0.7630391592359387,
      "learning_rate": 8.215325142234307e-06,
      "loss": 0.1229,
      "step": 10261
    },
    {
      "epoch": 0.29937569286422777,
      "grad_norm": 0.9016404250362807,
      "learning_rate": 8.214963332336339e-06,
      "loss": 0.1385,
      "step": 10262
    },
    {
      "epoch": 0.2994048660948713,
      "grad_norm": 0.8493932257954774,
      "learning_rate": 8.214601493735942e-06,
      "loss": 0.1648,
      "step": 10263
    },
    {
      "epoch": 0.29943403932551493,
      "grad_norm": 0.9210046561471733,
      "learning_rate": 8.214239626436354e-06,
      "loss": 0.1739,
      "step": 10264
    },
    {
      "epoch": 0.2994632125561585,
      "grad_norm": 0.7523427517724842,
      "learning_rate": 8.2138777304408e-06,
      "loss": 0.1584,
      "step": 10265
    },
    {
      "epoch": 0.29949238578680204,
      "grad_norm": 0.8284269398140005,
      "learning_rate": 8.213515805752513e-06,
      "loss": 0.1504,
      "step": 10266
    },
    {
      "epoch": 0.2995215590174456,
      "grad_norm": 0.7850184968983689,
      "learning_rate": 8.213153852374726e-06,
      "loss": 0.1479,
      "step": 10267
    },
    {
      "epoch": 0.29955073224808915,
      "grad_norm": 0.9958689700645952,
      "learning_rate": 8.212791870310665e-06,
      "loss": 0.1479,
      "step": 10268
    },
    {
      "epoch": 0.2995799054787327,
      "grad_norm": 0.7823202795779842,
      "learning_rate": 8.212429859563569e-06,
      "loss": 0.1367,
      "step": 10269
    },
    {
      "epoch": 0.29960907870937625,
      "grad_norm": 0.9517726455881705,
      "learning_rate": 8.212067820136663e-06,
      "loss": 0.1482,
      "step": 10270
    },
    {
      "epoch": 0.29963825194001986,
      "grad_norm": 0.747457871674271,
      "learning_rate": 8.211705752033183e-06,
      "loss": 0.1494,
      "step": 10271
    },
    {
      "epoch": 0.2996674251706634,
      "grad_norm": 0.6483071425870026,
      "learning_rate": 8.211343655256361e-06,
      "loss": 0.1229,
      "step": 10272
    },
    {
      "epoch": 0.29969659840130697,
      "grad_norm": 0.9479736087280959,
      "learning_rate": 8.210981529809432e-06,
      "loss": 0.1444,
      "step": 10273
    },
    {
      "epoch": 0.2997257716319505,
      "grad_norm": 0.6231169873050983,
      "learning_rate": 8.210619375695622e-06,
      "loss": 0.15,
      "step": 10274
    },
    {
      "epoch": 0.2997549448625941,
      "grad_norm": 0.8844057892049493,
      "learning_rate": 8.210257192918172e-06,
      "loss": 0.1386,
      "step": 10275
    },
    {
      "epoch": 0.29978411809323763,
      "grad_norm": 0.7838326925631378,
      "learning_rate": 8.20989498148031e-06,
      "loss": 0.1394,
      "step": 10276
    },
    {
      "epoch": 0.2998132913238812,
      "grad_norm": 0.7596321222748872,
      "learning_rate": 8.209532741385273e-06,
      "loss": 0.14,
      "step": 10277
    },
    {
      "epoch": 0.2998424645545248,
      "grad_norm": 0.7995005177917673,
      "learning_rate": 8.209170472636293e-06,
      "loss": 0.1774,
      "step": 10278
    },
    {
      "epoch": 0.29987163778516834,
      "grad_norm": 0.8847720040873738,
      "learning_rate": 8.208808175236607e-06,
      "loss": 0.1642,
      "step": 10279
    },
    {
      "epoch": 0.2999008110158119,
      "grad_norm": 0.7847791299400547,
      "learning_rate": 8.208445849189445e-06,
      "loss": 0.1556,
      "step": 10280
    },
    {
      "epoch": 0.29992998424645545,
      "grad_norm": 0.8683120041271374,
      "learning_rate": 8.208083494498045e-06,
      "loss": 0.1385,
      "step": 10281
    },
    {
      "epoch": 0.299959157477099,
      "grad_norm": 0.9240785679067222,
      "learning_rate": 8.207721111165643e-06,
      "loss": 0.1565,
      "step": 10282
    },
    {
      "epoch": 0.29998833070774256,
      "grad_norm": 0.7920139812191519,
      "learning_rate": 8.207358699195471e-06,
      "loss": 0.1437,
      "step": 10283
    },
    {
      "epoch": 0.3000175039383861,
      "grad_norm": 1.0384454371715393,
      "learning_rate": 8.206996258590767e-06,
      "loss": 0.1624,
      "step": 10284
    },
    {
      "epoch": 0.3000466771690297,
      "grad_norm": 1.1790725332448648,
      "learning_rate": 8.206633789354766e-06,
      "loss": 0.147,
      "step": 10285
    },
    {
      "epoch": 0.30007585039967327,
      "grad_norm": 0.7010278425790094,
      "learning_rate": 8.206271291490704e-06,
      "loss": 0.1624,
      "step": 10286
    },
    {
      "epoch": 0.3001050236303168,
      "grad_norm": 1.2912617849744015,
      "learning_rate": 8.205908765001817e-06,
      "loss": 0.1483,
      "step": 10287
    },
    {
      "epoch": 0.3001341968609604,
      "grad_norm": 1.1345470992794995,
      "learning_rate": 8.205546209891341e-06,
      "loss": 0.1336,
      "step": 10288
    },
    {
      "epoch": 0.30016337009160393,
      "grad_norm": 0.7795416089869779,
      "learning_rate": 8.205183626162515e-06,
      "loss": 0.1351,
      "step": 10289
    },
    {
      "epoch": 0.3001925433222475,
      "grad_norm": 0.9744407377689432,
      "learning_rate": 8.204821013818576e-06,
      "loss": 0.1624,
      "step": 10290
    },
    {
      "epoch": 0.3002217165528911,
      "grad_norm": 1.1623602615663815,
      "learning_rate": 8.204458372862757e-06,
      "loss": 0.1511,
      "step": 10291
    },
    {
      "epoch": 0.30025088978353465,
      "grad_norm": 1.5405276564005022,
      "learning_rate": 8.2040957032983e-06,
      "loss": 0.1634,
      "step": 10292
    },
    {
      "epoch": 0.3002800630141782,
      "grad_norm": 0.8384976869936589,
      "learning_rate": 8.203733005128443e-06,
      "loss": 0.1395,
      "step": 10293
    },
    {
      "epoch": 0.30030923624482175,
      "grad_norm": 1.538099235954295,
      "learning_rate": 8.203370278356422e-06,
      "loss": 0.1606,
      "step": 10294
    },
    {
      "epoch": 0.3003384094754653,
      "grad_norm": 0.9786164778485515,
      "learning_rate": 8.203007522985474e-06,
      "loss": 0.1685,
      "step": 10295
    },
    {
      "epoch": 0.30036758270610886,
      "grad_norm": 0.9661251732423624,
      "learning_rate": 8.202644739018839e-06,
      "loss": 0.168,
      "step": 10296
    },
    {
      "epoch": 0.3003967559367524,
      "grad_norm": 0.6613973036691084,
      "learning_rate": 8.20228192645976e-06,
      "loss": 0.1399,
      "step": 10297
    },
    {
      "epoch": 0.300425929167396,
      "grad_norm": 0.8107857653177807,
      "learning_rate": 8.201919085311468e-06,
      "loss": 0.1361,
      "step": 10298
    },
    {
      "epoch": 0.3004551023980396,
      "grad_norm": 0.9302113463508935,
      "learning_rate": 8.20155621557721e-06,
      "loss": 0.154,
      "step": 10299
    },
    {
      "epoch": 0.30048427562868313,
      "grad_norm": 0.725640049113617,
      "learning_rate": 8.20119331726022e-06,
      "loss": 0.1506,
      "step": 10300
    },
    {
      "epoch": 0.3005134488593267,
      "grad_norm": 1.125724549815784,
      "learning_rate": 8.200830390363741e-06,
      "loss": 0.1611,
      "step": 10301
    },
    {
      "epoch": 0.30054262208997023,
      "grad_norm": 0.987039406035594,
      "learning_rate": 8.200467434891013e-06,
      "loss": 0.1486,
      "step": 10302
    },
    {
      "epoch": 0.3005717953206138,
      "grad_norm": 0.7619418183972609,
      "learning_rate": 8.200104450845276e-06,
      "loss": 0.1359,
      "step": 10303
    },
    {
      "epoch": 0.30060096855125734,
      "grad_norm": 0.9464439971922779,
      "learning_rate": 8.19974143822977e-06,
      "loss": 0.1679,
      "step": 10304
    },
    {
      "epoch": 0.30063014178190095,
      "grad_norm": 0.9102843331183711,
      "learning_rate": 8.199378397047737e-06,
      "loss": 0.146,
      "step": 10305
    },
    {
      "epoch": 0.3006593150125445,
      "grad_norm": 0.8757140850972511,
      "learning_rate": 8.199015327302416e-06,
      "loss": 0.1658,
      "step": 10306
    },
    {
      "epoch": 0.30068848824318806,
      "grad_norm": 1.0102006920627888,
      "learning_rate": 8.19865222899705e-06,
      "loss": 0.2044,
      "step": 10307
    },
    {
      "epoch": 0.3007176614738316,
      "grad_norm": 1.0609644563273757,
      "learning_rate": 8.198289102134883e-06,
      "loss": 0.1744,
      "step": 10308
    },
    {
      "epoch": 0.30074683470447516,
      "grad_norm": 0.938656871725238,
      "learning_rate": 8.197925946719152e-06,
      "loss": 0.171,
      "step": 10309
    },
    {
      "epoch": 0.3007760079351187,
      "grad_norm": 0.8870932026683985,
      "learning_rate": 8.197562762753102e-06,
      "loss": 0.1357,
      "step": 10310
    },
    {
      "epoch": 0.30080518116576227,
      "grad_norm": 0.8816972483532031,
      "learning_rate": 8.197199550239974e-06,
      "loss": 0.1605,
      "step": 10311
    },
    {
      "epoch": 0.3008343543964059,
      "grad_norm": 0.9581892569146573,
      "learning_rate": 8.196836309183014e-06,
      "loss": 0.1447,
      "step": 10312
    },
    {
      "epoch": 0.30086352762704943,
      "grad_norm": 0.8903482159640526,
      "learning_rate": 8.19647303958546e-06,
      "loss": 0.1508,
      "step": 10313
    },
    {
      "epoch": 0.300892700857693,
      "grad_norm": 1.0125581683334,
      "learning_rate": 8.19610974145056e-06,
      "loss": 0.1396,
      "step": 10314
    },
    {
      "epoch": 0.30092187408833654,
      "grad_norm": 0.9137559289052214,
      "learning_rate": 8.195746414781554e-06,
      "loss": 0.1712,
      "step": 10315
    },
    {
      "epoch": 0.3009510473189801,
      "grad_norm": 0.9293851882221098,
      "learning_rate": 8.195383059581685e-06,
      "loss": 0.1784,
      "step": 10316
    },
    {
      "epoch": 0.30098022054962364,
      "grad_norm": 0.7644480786179568,
      "learning_rate": 8.195019675854201e-06,
      "loss": 0.1461,
      "step": 10317
    },
    {
      "epoch": 0.30100939378026725,
      "grad_norm": 0.954902379280209,
      "learning_rate": 8.194656263602345e-06,
      "loss": 0.1467,
      "step": 10318
    },
    {
      "epoch": 0.3010385670109108,
      "grad_norm": 0.9365848295917715,
      "learning_rate": 8.194292822829359e-06,
      "loss": 0.1612,
      "step": 10319
    },
    {
      "epoch": 0.30106774024155436,
      "grad_norm": 0.889638080171425,
      "learning_rate": 8.19392935353849e-06,
      "loss": 0.1601,
      "step": 10320
    },
    {
      "epoch": 0.3010969134721979,
      "grad_norm": 0.900540095228079,
      "learning_rate": 8.193565855732982e-06,
      "loss": 0.1455,
      "step": 10321
    },
    {
      "epoch": 0.30112608670284147,
      "grad_norm": 0.6722095940519582,
      "learning_rate": 8.193202329416079e-06,
      "loss": 0.162,
      "step": 10322
    },
    {
      "epoch": 0.301155259933485,
      "grad_norm": 1.0135786558021573,
      "learning_rate": 8.19283877459103e-06,
      "loss": 0.1422,
      "step": 10323
    },
    {
      "epoch": 0.3011844331641286,
      "grad_norm": 0.9075304908466894,
      "learning_rate": 8.192475191261078e-06,
      "loss": 0.1362,
      "step": 10324
    },
    {
      "epoch": 0.3012136063947722,
      "grad_norm": 0.9742175270809147,
      "learning_rate": 8.19211157942947e-06,
      "loss": 0.168,
      "step": 10325
    },
    {
      "epoch": 0.30124277962541574,
      "grad_norm": 0.8020497956000485,
      "learning_rate": 8.19174793909945e-06,
      "loss": 0.1618,
      "step": 10326
    },
    {
      "epoch": 0.3012719528560593,
      "grad_norm": 0.8269449779623378,
      "learning_rate": 8.191384270274267e-06,
      "loss": 0.1741,
      "step": 10327
    },
    {
      "epoch": 0.30130112608670284,
      "grad_norm": 0.8019645339230919,
      "learning_rate": 8.191020572957168e-06,
      "loss": 0.1693,
      "step": 10328
    },
    {
      "epoch": 0.3013302993173464,
      "grad_norm": 0.8147885163158093,
      "learning_rate": 8.190656847151399e-06,
      "loss": 0.1385,
      "step": 10329
    },
    {
      "epoch": 0.30135947254798995,
      "grad_norm": 0.621541687023796,
      "learning_rate": 8.190293092860206e-06,
      "loss": 0.1617,
      "step": 10330
    },
    {
      "epoch": 0.3013886457786335,
      "grad_norm": 0.7998469469603753,
      "learning_rate": 8.18992931008684e-06,
      "loss": 0.171,
      "step": 10331
    },
    {
      "epoch": 0.3014178190092771,
      "grad_norm": 0.6410746273269085,
      "learning_rate": 8.189565498834545e-06,
      "loss": 0.148,
      "step": 10332
    },
    {
      "epoch": 0.30144699223992066,
      "grad_norm": 0.7768540062680194,
      "learning_rate": 8.18920165910657e-06,
      "loss": 0.1833,
      "step": 10333
    },
    {
      "epoch": 0.3014761654705642,
      "grad_norm": 0.7328658348242143,
      "learning_rate": 8.188837790906166e-06,
      "loss": 0.1354,
      "step": 10334
    },
    {
      "epoch": 0.30150533870120777,
      "grad_norm": 0.6725912272901395,
      "learning_rate": 8.18847389423658e-06,
      "loss": 0.1334,
      "step": 10335
    },
    {
      "epoch": 0.3015345119318513,
      "grad_norm": 0.6057666850636925,
      "learning_rate": 8.188109969101057e-06,
      "loss": 0.1394,
      "step": 10336
    },
    {
      "epoch": 0.3015636851624949,
      "grad_norm": 0.9733823505793422,
      "learning_rate": 8.187746015502851e-06,
      "loss": 0.161,
      "step": 10337
    },
    {
      "epoch": 0.30159285839313843,
      "grad_norm": 0.7831182117923976,
      "learning_rate": 8.187382033445209e-06,
      "loss": 0.1712,
      "step": 10338
    },
    {
      "epoch": 0.30162203162378204,
      "grad_norm": 0.9387900721378101,
      "learning_rate": 8.187018022931383e-06,
      "loss": 0.1333,
      "step": 10339
    },
    {
      "epoch": 0.3016512048544256,
      "grad_norm": 0.882741465257527,
      "learning_rate": 8.18665398396462e-06,
      "loss": 0.1591,
      "step": 10340
    },
    {
      "epoch": 0.30168037808506915,
      "grad_norm": 0.7772272118956491,
      "learning_rate": 8.186289916548169e-06,
      "loss": 0.1591,
      "step": 10341
    },
    {
      "epoch": 0.3017095513157127,
      "grad_norm": 0.7726661431253278,
      "learning_rate": 8.185925820685283e-06,
      "loss": 0.152,
      "step": 10342
    },
    {
      "epoch": 0.30173872454635625,
      "grad_norm": 0.8159915507142284,
      "learning_rate": 8.185561696379213e-06,
      "loss": 0.167,
      "step": 10343
    },
    {
      "epoch": 0.3017678977769998,
      "grad_norm": 0.7668837526204888,
      "learning_rate": 8.185197543633207e-06,
      "loss": 0.1474,
      "step": 10344
    },
    {
      "epoch": 0.3017970710076434,
      "grad_norm": 0.7665248750590504,
      "learning_rate": 8.18483336245052e-06,
      "loss": 0.1538,
      "step": 10345
    },
    {
      "epoch": 0.30182624423828697,
      "grad_norm": 0.8241660718574091,
      "learning_rate": 8.1844691528344e-06,
      "loss": 0.1423,
      "step": 10346
    },
    {
      "epoch": 0.3018554174689305,
      "grad_norm": 0.7919856246679993,
      "learning_rate": 8.1841049147881e-06,
      "loss": 0.1321,
      "step": 10347
    },
    {
      "epoch": 0.3018845906995741,
      "grad_norm": 0.742251352204128,
      "learning_rate": 8.183740648314871e-06,
      "loss": 0.1747,
      "step": 10348
    },
    {
      "epoch": 0.3019137639302176,
      "grad_norm": 0.8004211233653133,
      "learning_rate": 8.183376353417965e-06,
      "loss": 0.1432,
      "step": 10349
    },
    {
      "epoch": 0.3019429371608612,
      "grad_norm": 0.9220228761361982,
      "learning_rate": 8.183012030100634e-06,
      "loss": 0.1413,
      "step": 10350
    },
    {
      "epoch": 0.30197211039150473,
      "grad_norm": 0.656116739552237,
      "learning_rate": 8.182647678366133e-06,
      "loss": 0.1532,
      "step": 10351
    },
    {
      "epoch": 0.30200128362214834,
      "grad_norm": 0.8473274892563499,
      "learning_rate": 8.182283298217712e-06,
      "loss": 0.1712,
      "step": 10352
    },
    {
      "epoch": 0.3020304568527919,
      "grad_norm": 0.9386840779750911,
      "learning_rate": 8.181918889658626e-06,
      "loss": 0.175,
      "step": 10353
    },
    {
      "epoch": 0.30205963008343545,
      "grad_norm": 0.7958658354844564,
      "learning_rate": 8.18155445269213e-06,
      "loss": 0.1527,
      "step": 10354
    },
    {
      "epoch": 0.302088803314079,
      "grad_norm": 0.8519466938766371,
      "learning_rate": 8.181189987321472e-06,
      "loss": 0.1483,
      "step": 10355
    },
    {
      "epoch": 0.30211797654472256,
      "grad_norm": 0.847514887919152,
      "learning_rate": 8.180825493549911e-06,
      "loss": 0.1497,
      "step": 10356
    },
    {
      "epoch": 0.3021471497753661,
      "grad_norm": 0.7475612548723383,
      "learning_rate": 8.180460971380699e-06,
      "loss": 0.1508,
      "step": 10357
    },
    {
      "epoch": 0.30217632300600966,
      "grad_norm": 0.838551162543459,
      "learning_rate": 8.18009642081709e-06,
      "loss": 0.1632,
      "step": 10358
    },
    {
      "epoch": 0.30220549623665327,
      "grad_norm": 0.9474378218034406,
      "learning_rate": 8.17973184186234e-06,
      "loss": 0.1561,
      "step": 10359
    },
    {
      "epoch": 0.3022346694672968,
      "grad_norm": 0.9161082185613947,
      "learning_rate": 8.179367234519704e-06,
      "loss": 0.1462,
      "step": 10360
    },
    {
      "epoch": 0.3022638426979404,
      "grad_norm": 0.8657332818263342,
      "learning_rate": 8.179002598792435e-06,
      "loss": 0.1411,
      "step": 10361
    },
    {
      "epoch": 0.30229301592858393,
      "grad_norm": 0.9058986082532352,
      "learning_rate": 8.17863793468379e-06,
      "loss": 0.1604,
      "step": 10362
    },
    {
      "epoch": 0.3023221891592275,
      "grad_norm": 0.7833653167162915,
      "learning_rate": 8.178273242197025e-06,
      "loss": 0.1455,
      "step": 10363
    },
    {
      "epoch": 0.30235136238987104,
      "grad_norm": 0.752484372612084,
      "learning_rate": 8.177908521335395e-06,
      "loss": 0.1505,
      "step": 10364
    },
    {
      "epoch": 0.3023805356205146,
      "grad_norm": 0.8873062199169135,
      "learning_rate": 8.177543772102155e-06,
      "loss": 0.1355,
      "step": 10365
    },
    {
      "epoch": 0.3024097088511582,
      "grad_norm": 0.8772193812904566,
      "learning_rate": 8.177178994500564e-06,
      "loss": 0.1776,
      "step": 10366
    },
    {
      "epoch": 0.30243888208180175,
      "grad_norm": 0.7696517234416018,
      "learning_rate": 8.176814188533877e-06,
      "loss": 0.1451,
      "step": 10367
    },
    {
      "epoch": 0.3024680553124453,
      "grad_norm": 0.8426114226823732,
      "learning_rate": 8.17644935420535e-06,
      "loss": 0.1495,
      "step": 10368
    },
    {
      "epoch": 0.30249722854308886,
      "grad_norm": 0.8623379632947579,
      "learning_rate": 8.176084491518245e-06,
      "loss": 0.1502,
      "step": 10369
    },
    {
      "epoch": 0.3025264017737324,
      "grad_norm": 0.7152243468324411,
      "learning_rate": 8.175719600475813e-06,
      "loss": 0.1406,
      "step": 10370
    },
    {
      "epoch": 0.30255557500437597,
      "grad_norm": 0.9131510402685786,
      "learning_rate": 8.175354681081316e-06,
      "loss": 0.1512,
      "step": 10371
    },
    {
      "epoch": 0.3025847482350195,
      "grad_norm": 0.8318978801392292,
      "learning_rate": 8.174989733338009e-06,
      "loss": 0.1558,
      "step": 10372
    },
    {
      "epoch": 0.3026139214656631,
      "grad_norm": 0.7242443691616607,
      "learning_rate": 8.174624757249153e-06,
      "loss": 0.1456,
      "step": 10373
    },
    {
      "epoch": 0.3026430946963067,
      "grad_norm": 0.8414540738241345,
      "learning_rate": 8.174259752818003e-06,
      "loss": 0.1537,
      "step": 10374
    },
    {
      "epoch": 0.30267226792695023,
      "grad_norm": 0.7253542838346363,
      "learning_rate": 8.173894720047821e-06,
      "loss": 0.1369,
      "step": 10375
    },
    {
      "epoch": 0.3027014411575938,
      "grad_norm": 0.7510016927922919,
      "learning_rate": 8.173529658941865e-06,
      "loss": 0.1605,
      "step": 10376
    },
    {
      "epoch": 0.30273061438823734,
      "grad_norm": 0.7228815788623432,
      "learning_rate": 8.173164569503393e-06,
      "loss": 0.1385,
      "step": 10377
    },
    {
      "epoch": 0.3027597876188809,
      "grad_norm": 0.947579151274672,
      "learning_rate": 8.172799451735666e-06,
      "loss": 0.2099,
      "step": 10378
    },
    {
      "epoch": 0.3027889608495245,
      "grad_norm": 0.9877209902427233,
      "learning_rate": 8.17243430564194e-06,
      "loss": 0.1683,
      "step": 10379
    },
    {
      "epoch": 0.30281813408016806,
      "grad_norm": 0.8461806348773305,
      "learning_rate": 8.172069131225481e-06,
      "loss": 0.1508,
      "step": 10380
    },
    {
      "epoch": 0.3028473073108116,
      "grad_norm": 1.0047050747683886,
      "learning_rate": 8.171703928489548e-06,
      "loss": 0.1471,
      "step": 10381
    },
    {
      "epoch": 0.30287648054145516,
      "grad_norm": 0.7623453747136857,
      "learning_rate": 8.171338697437394e-06,
      "loss": 0.1327,
      "step": 10382
    },
    {
      "epoch": 0.3029056537720987,
      "grad_norm": 0.7533753878572584,
      "learning_rate": 8.170973438072289e-06,
      "loss": 0.1624,
      "step": 10383
    },
    {
      "epoch": 0.30293482700274227,
      "grad_norm": 0.8229887691219726,
      "learning_rate": 8.170608150397489e-06,
      "loss": 0.1401,
      "step": 10384
    },
    {
      "epoch": 0.3029640002333858,
      "grad_norm": 0.6903285197247508,
      "learning_rate": 8.170242834416256e-06,
      "loss": 0.1565,
      "step": 10385
    },
    {
      "epoch": 0.30299317346402943,
      "grad_norm": 0.8827581146553132,
      "learning_rate": 8.169877490131852e-06,
      "loss": 0.1414,
      "step": 10386
    },
    {
      "epoch": 0.303022346694673,
      "grad_norm": 0.8311994245631472,
      "learning_rate": 8.16951211754754e-06,
      "loss": 0.1255,
      "step": 10387
    },
    {
      "epoch": 0.30305151992531654,
      "grad_norm": 0.9045715324433118,
      "learning_rate": 8.169146716666578e-06,
      "loss": 0.1647,
      "step": 10388
    },
    {
      "epoch": 0.3030806931559601,
      "grad_norm": 0.7350612393133354,
      "learning_rate": 8.168781287492232e-06,
      "loss": 0.1418,
      "step": 10389
    },
    {
      "epoch": 0.30310986638660364,
      "grad_norm": 0.898199394934361,
      "learning_rate": 8.168415830027762e-06,
      "loss": 0.1561,
      "step": 10390
    },
    {
      "epoch": 0.3031390396172472,
      "grad_norm": 0.8829793775558368,
      "learning_rate": 8.168050344276434e-06,
      "loss": 0.1499,
      "step": 10391
    },
    {
      "epoch": 0.30316821284789075,
      "grad_norm": 0.8110120390463106,
      "learning_rate": 8.167684830241506e-06,
      "loss": 0.1282,
      "step": 10392
    },
    {
      "epoch": 0.30319738607853436,
      "grad_norm": 0.7874125730460129,
      "learning_rate": 8.167319287926247e-06,
      "loss": 0.1314,
      "step": 10393
    },
    {
      "epoch": 0.3032265593091779,
      "grad_norm": 0.8854226381926515,
      "learning_rate": 8.166953717333915e-06,
      "loss": 0.1629,
      "step": 10394
    },
    {
      "epoch": 0.30325573253982147,
      "grad_norm": 0.9192712835689084,
      "learning_rate": 8.166588118467778e-06,
      "loss": 0.1611,
      "step": 10395
    },
    {
      "epoch": 0.303284905770465,
      "grad_norm": 1.1209985968149403,
      "learning_rate": 8.166222491331097e-06,
      "loss": 0.1592,
      "step": 10396
    },
    {
      "epoch": 0.30331407900110857,
      "grad_norm": 0.8058659321648536,
      "learning_rate": 8.165856835927138e-06,
      "loss": 0.143,
      "step": 10397
    },
    {
      "epoch": 0.3033432522317521,
      "grad_norm": 0.8609479958453766,
      "learning_rate": 8.165491152259163e-06,
      "loss": 0.1882,
      "step": 10398
    },
    {
      "epoch": 0.3033724254623957,
      "grad_norm": 0.71334878740112,
      "learning_rate": 8.165125440330443e-06,
      "loss": 0.1528,
      "step": 10399
    },
    {
      "epoch": 0.3034015986930393,
      "grad_norm": 0.8731792207689436,
      "learning_rate": 8.164759700144235e-06,
      "loss": 0.1399,
      "step": 10400
    },
    {
      "epoch": 0.30343077192368284,
      "grad_norm": 0.7985744200953458,
      "learning_rate": 8.16439393170381e-06,
      "loss": 0.1398,
      "step": 10401
    },
    {
      "epoch": 0.3034599451543264,
      "grad_norm": 0.8038517488074439,
      "learning_rate": 8.164028135012429e-06,
      "loss": 0.1645,
      "step": 10402
    },
    {
      "epoch": 0.30348911838496995,
      "grad_norm": 0.8762013050995305,
      "learning_rate": 8.163662310073362e-06,
      "loss": 0.1447,
      "step": 10403
    },
    {
      "epoch": 0.3035182916156135,
      "grad_norm": 0.8191903536601476,
      "learning_rate": 8.163296456889873e-06,
      "loss": 0.1365,
      "step": 10404
    },
    {
      "epoch": 0.30354746484625705,
      "grad_norm": 0.7931625614152237,
      "learning_rate": 8.162930575465228e-06,
      "loss": 0.1434,
      "step": 10405
    },
    {
      "epoch": 0.30357663807690066,
      "grad_norm": 0.742726212924904,
      "learning_rate": 8.162564665802693e-06,
      "loss": 0.1678,
      "step": 10406
    },
    {
      "epoch": 0.3036058113075442,
      "grad_norm": 0.8429872191417145,
      "learning_rate": 8.162198727905536e-06,
      "loss": 0.1287,
      "step": 10407
    },
    {
      "epoch": 0.30363498453818777,
      "grad_norm": 0.7898703474677157,
      "learning_rate": 8.161832761777024e-06,
      "loss": 0.1505,
      "step": 10408
    },
    {
      "epoch": 0.3036641577688313,
      "grad_norm": 0.7891222648082646,
      "learning_rate": 8.161466767420426e-06,
      "loss": 0.1725,
      "step": 10409
    },
    {
      "epoch": 0.3036933309994749,
      "grad_norm": 0.8572627779806057,
      "learning_rate": 8.161100744839004e-06,
      "loss": 0.1568,
      "step": 10410
    },
    {
      "epoch": 0.30372250423011843,
      "grad_norm": 0.8907723320209793,
      "learning_rate": 8.160734694036031e-06,
      "loss": 0.1539,
      "step": 10411
    },
    {
      "epoch": 0.303751677460762,
      "grad_norm": 0.8754776835040876,
      "learning_rate": 8.160368615014771e-06,
      "loss": 0.1557,
      "step": 10412
    },
    {
      "epoch": 0.3037808506914056,
      "grad_norm": 0.7988711441501878,
      "learning_rate": 8.160002507778497e-06,
      "loss": 0.1639,
      "step": 10413
    },
    {
      "epoch": 0.30381002392204914,
      "grad_norm": 1.0929980824225913,
      "learning_rate": 8.159636372330475e-06,
      "loss": 0.167,
      "step": 10414
    },
    {
      "epoch": 0.3038391971526927,
      "grad_norm": 1.0097993171818347,
      "learning_rate": 8.159270208673973e-06,
      "loss": 0.1495,
      "step": 10415
    },
    {
      "epoch": 0.30386837038333625,
      "grad_norm": 0.802350305488741,
      "learning_rate": 8.15890401681226e-06,
      "loss": 0.1539,
      "step": 10416
    },
    {
      "epoch": 0.3038975436139798,
      "grad_norm": 0.8516468241319436,
      "learning_rate": 8.158537796748607e-06,
      "loss": 0.1612,
      "step": 10417
    },
    {
      "epoch": 0.30392671684462336,
      "grad_norm": 0.8920408566504613,
      "learning_rate": 8.158171548486281e-06,
      "loss": 0.1745,
      "step": 10418
    },
    {
      "epoch": 0.3039558900752669,
      "grad_norm": 0.7488362337002084,
      "learning_rate": 8.157805272028557e-06,
      "loss": 0.1687,
      "step": 10419
    },
    {
      "epoch": 0.3039850633059105,
      "grad_norm": 0.6807176760151589,
      "learning_rate": 8.157438967378697e-06,
      "loss": 0.1696,
      "step": 10420
    },
    {
      "epoch": 0.30401423653655407,
      "grad_norm": 0.8600650742579082,
      "learning_rate": 8.157072634539977e-06,
      "loss": 0.1498,
      "step": 10421
    },
    {
      "epoch": 0.3040434097671976,
      "grad_norm": 0.7670456470979753,
      "learning_rate": 8.156706273515667e-06,
      "loss": 0.1672,
      "step": 10422
    },
    {
      "epoch": 0.3040725829978412,
      "grad_norm": 0.7617168129050363,
      "learning_rate": 8.156339884309038e-06,
      "loss": 0.1683,
      "step": 10423
    },
    {
      "epoch": 0.30410175622848473,
      "grad_norm": 1.0888207429500198,
      "learning_rate": 8.155973466923359e-06,
      "loss": 0.1537,
      "step": 10424
    },
    {
      "epoch": 0.3041309294591283,
      "grad_norm": 0.9988337315375342,
      "learning_rate": 8.155607021361903e-06,
      "loss": 0.1798,
      "step": 10425
    },
    {
      "epoch": 0.30416010268977184,
      "grad_norm": 0.7287563782009567,
      "learning_rate": 8.155240547627938e-06,
      "loss": 0.1542,
      "step": 10426
    },
    {
      "epoch": 0.30418927592041545,
      "grad_norm": 0.755339563244242,
      "learning_rate": 8.15487404572474e-06,
      "loss": 0.1574,
      "step": 10427
    },
    {
      "epoch": 0.304218449151059,
      "grad_norm": 0.8623245463594157,
      "learning_rate": 8.154507515655581e-06,
      "loss": 0.1942,
      "step": 10428
    },
    {
      "epoch": 0.30424762238170255,
      "grad_norm": 0.7249600682792448,
      "learning_rate": 8.15414095742373e-06,
      "loss": 0.1322,
      "step": 10429
    },
    {
      "epoch": 0.3042767956123461,
      "grad_norm": 0.7004999686107767,
      "learning_rate": 8.153774371032464e-06,
      "loss": 0.1464,
      "step": 10430
    },
    {
      "epoch": 0.30430596884298966,
      "grad_norm": 0.8112538713363775,
      "learning_rate": 8.15340775648505e-06,
      "loss": 0.161,
      "step": 10431
    },
    {
      "epoch": 0.3043351420736332,
      "grad_norm": 0.8591419625633835,
      "learning_rate": 8.153041113784767e-06,
      "loss": 0.1521,
      "step": 10432
    },
    {
      "epoch": 0.3043643153042768,
      "grad_norm": 0.8700572414342698,
      "learning_rate": 8.152674442934885e-06,
      "loss": 0.155,
      "step": 10433
    },
    {
      "epoch": 0.3043934885349204,
      "grad_norm": 0.7908035965519012,
      "learning_rate": 8.152307743938677e-06,
      "loss": 0.1467,
      "step": 10434
    },
    {
      "epoch": 0.30442266176556393,
      "grad_norm": 0.8839608715563377,
      "learning_rate": 8.151941016799419e-06,
      "loss": 0.173,
      "step": 10435
    },
    {
      "epoch": 0.3044518349962075,
      "grad_norm": 0.7360087090301753,
      "learning_rate": 8.151574261520383e-06,
      "loss": 0.1415,
      "step": 10436
    },
    {
      "epoch": 0.30448100822685104,
      "grad_norm": 0.6797613757354378,
      "learning_rate": 8.151207478104845e-06,
      "loss": 0.1459,
      "step": 10437
    },
    {
      "epoch": 0.3045101814574946,
      "grad_norm": 0.9030865435243874,
      "learning_rate": 8.15084066655608e-06,
      "loss": 0.1582,
      "step": 10438
    },
    {
      "epoch": 0.30453935468813814,
      "grad_norm": 0.6909089962920174,
      "learning_rate": 8.150473826877362e-06,
      "loss": 0.1622,
      "step": 10439
    },
    {
      "epoch": 0.30456852791878175,
      "grad_norm": 0.6740257320563846,
      "learning_rate": 8.150106959071964e-06,
      "loss": 0.1495,
      "step": 10440
    },
    {
      "epoch": 0.3045977011494253,
      "grad_norm": 0.9058561789803754,
      "learning_rate": 8.149740063143164e-06,
      "loss": 0.1684,
      "step": 10441
    },
    {
      "epoch": 0.30462687438006886,
      "grad_norm": 0.7488456272778348,
      "learning_rate": 8.149373139094234e-06,
      "loss": 0.1491,
      "step": 10442
    },
    {
      "epoch": 0.3046560476107124,
      "grad_norm": 0.8715300170098201,
      "learning_rate": 8.149006186928456e-06,
      "loss": 0.1731,
      "step": 10443
    },
    {
      "epoch": 0.30468522084135596,
      "grad_norm": 0.6919558722174817,
      "learning_rate": 8.148639206649102e-06,
      "loss": 0.1624,
      "step": 10444
    },
    {
      "epoch": 0.3047143940719995,
      "grad_norm": 0.7923866396440833,
      "learning_rate": 8.148272198259447e-06,
      "loss": 0.1586,
      "step": 10445
    },
    {
      "epoch": 0.30474356730264307,
      "grad_norm": 0.8144296132624399,
      "learning_rate": 8.14790516176277e-06,
      "loss": 0.126,
      "step": 10446
    },
    {
      "epoch": 0.3047727405332867,
      "grad_norm": 0.7364901169217135,
      "learning_rate": 8.147538097162348e-06,
      "loss": 0.1407,
      "step": 10447
    },
    {
      "epoch": 0.30480191376393023,
      "grad_norm": 0.9646854588622465,
      "learning_rate": 8.147171004461456e-06,
      "loss": 0.157,
      "step": 10448
    },
    {
      "epoch": 0.3048310869945738,
      "grad_norm": 0.7595725187358502,
      "learning_rate": 8.146803883663374e-06,
      "loss": 0.1316,
      "step": 10449
    },
    {
      "epoch": 0.30486026022521734,
      "grad_norm": 0.8946622909608126,
      "learning_rate": 8.146436734771377e-06,
      "loss": 0.1509,
      "step": 10450
    },
    {
      "epoch": 0.3048894334558609,
      "grad_norm": 1.0007198457280269,
      "learning_rate": 8.146069557788745e-06,
      "loss": 0.1618,
      "step": 10451
    },
    {
      "epoch": 0.30491860668650445,
      "grad_norm": 0.9129991902060763,
      "learning_rate": 8.145702352718754e-06,
      "loss": 0.1501,
      "step": 10452
    },
    {
      "epoch": 0.304947779917148,
      "grad_norm": 0.8097005749941277,
      "learning_rate": 8.145335119564683e-06,
      "loss": 0.1555,
      "step": 10453
    },
    {
      "epoch": 0.3049769531477916,
      "grad_norm": 0.9170231266424669,
      "learning_rate": 8.144967858329813e-06,
      "loss": 0.1869,
      "step": 10454
    },
    {
      "epoch": 0.30500612637843516,
      "grad_norm": 0.9509365711142003,
      "learning_rate": 8.14460056901742e-06,
      "loss": 0.1622,
      "step": 10455
    },
    {
      "epoch": 0.3050352996090787,
      "grad_norm": 0.9542346725135301,
      "learning_rate": 8.144233251630782e-06,
      "loss": 0.1453,
      "step": 10456
    },
    {
      "epoch": 0.30506447283972227,
      "grad_norm": 0.7056139469173172,
      "learning_rate": 8.14386590617318e-06,
      "loss": 0.1483,
      "step": 10457
    },
    {
      "epoch": 0.3050936460703658,
      "grad_norm": 0.8549857988433522,
      "learning_rate": 8.143498532647897e-06,
      "loss": 0.1475,
      "step": 10458
    },
    {
      "epoch": 0.3051228193010094,
      "grad_norm": 1.0269668522883968,
      "learning_rate": 8.143131131058208e-06,
      "loss": 0.16,
      "step": 10459
    },
    {
      "epoch": 0.305151992531653,
      "grad_norm": 0.7689087023102948,
      "learning_rate": 8.142763701407392e-06,
      "loss": 0.1167,
      "step": 10460
    },
    {
      "epoch": 0.30518116576229654,
      "grad_norm": 1.0460279979785334,
      "learning_rate": 8.142396243698735e-06,
      "loss": 0.1608,
      "step": 10461
    },
    {
      "epoch": 0.3052103389929401,
      "grad_norm": 0.9350770804843989,
      "learning_rate": 8.142028757935512e-06,
      "loss": 0.1536,
      "step": 10462
    },
    {
      "epoch": 0.30523951222358364,
      "grad_norm": 1.0405166274056603,
      "learning_rate": 8.141661244121008e-06,
      "loss": 0.1754,
      "step": 10463
    },
    {
      "epoch": 0.3052686854542272,
      "grad_norm": 0.6683346329780173,
      "learning_rate": 8.141293702258503e-06,
      "loss": 0.1338,
      "step": 10464
    },
    {
      "epoch": 0.30529785868487075,
      "grad_norm": 0.6545227732087646,
      "learning_rate": 8.140926132351276e-06,
      "loss": 0.1357,
      "step": 10465
    },
    {
      "epoch": 0.3053270319155143,
      "grad_norm": 0.8560348701733025,
      "learning_rate": 8.140558534402612e-06,
      "loss": 0.1387,
      "step": 10466
    },
    {
      "epoch": 0.3053562051461579,
      "grad_norm": 0.6970725379754402,
      "learning_rate": 8.14019090841579e-06,
      "loss": 0.1431,
      "step": 10467
    },
    {
      "epoch": 0.30538537837680146,
      "grad_norm": 0.7710512787846989,
      "learning_rate": 8.139823254394093e-06,
      "loss": 0.1352,
      "step": 10468
    },
    {
      "epoch": 0.305414551607445,
      "grad_norm": 0.8962857019792468,
      "learning_rate": 8.139455572340805e-06,
      "loss": 0.1635,
      "step": 10469
    },
    {
      "epoch": 0.30544372483808857,
      "grad_norm": 0.905799204557261,
      "learning_rate": 8.139087862259207e-06,
      "loss": 0.1646,
      "step": 10470
    },
    {
      "epoch": 0.3054728980687321,
      "grad_norm": 0.8407657457303579,
      "learning_rate": 8.138720124152579e-06,
      "loss": 0.1542,
      "step": 10471
    },
    {
      "epoch": 0.3055020712993757,
      "grad_norm": 0.8183441760590756,
      "learning_rate": 8.13835235802421e-06,
      "loss": 0.1626,
      "step": 10472
    },
    {
      "epoch": 0.30553124453001923,
      "grad_norm": 0.763458797159557,
      "learning_rate": 8.137984563877379e-06,
      "loss": 0.1529,
      "step": 10473
    },
    {
      "epoch": 0.30556041776066284,
      "grad_norm": 0.937601898474738,
      "learning_rate": 8.137616741715371e-06,
      "loss": 0.1529,
      "step": 10474
    },
    {
      "epoch": 0.3055895909913064,
      "grad_norm": 0.9490012727975684,
      "learning_rate": 8.137248891541471e-06,
      "loss": 0.1453,
      "step": 10475
    },
    {
      "epoch": 0.30561876422194995,
      "grad_norm": 0.8531084592155752,
      "learning_rate": 8.136881013358961e-06,
      "loss": 0.1378,
      "step": 10476
    },
    {
      "epoch": 0.3056479374525935,
      "grad_norm": 0.8723797164563764,
      "learning_rate": 8.136513107171125e-06,
      "loss": 0.1579,
      "step": 10477
    },
    {
      "epoch": 0.30567711068323705,
      "grad_norm": 0.8086968325029635,
      "learning_rate": 8.13614517298125e-06,
      "loss": 0.161,
      "step": 10478
    },
    {
      "epoch": 0.3057062839138806,
      "grad_norm": 0.8715856525622632,
      "learning_rate": 8.13577721079262e-06,
      "loss": 0.146,
      "step": 10479
    },
    {
      "epoch": 0.30573545714452416,
      "grad_norm": 0.7591362339179039,
      "learning_rate": 8.13540922060852e-06,
      "loss": 0.1557,
      "step": 10480
    },
    {
      "epoch": 0.30576463037516777,
      "grad_norm": 0.8213478670060186,
      "learning_rate": 8.135041202432233e-06,
      "loss": 0.127,
      "step": 10481
    },
    {
      "epoch": 0.3057938036058113,
      "grad_norm": 0.8292046129731733,
      "learning_rate": 8.134673156267048e-06,
      "loss": 0.1474,
      "step": 10482
    },
    {
      "epoch": 0.3058229768364549,
      "grad_norm": 0.7640714590123188,
      "learning_rate": 8.134305082116247e-06,
      "loss": 0.1731,
      "step": 10483
    },
    {
      "epoch": 0.3058521500670984,
      "grad_norm": 0.7413570950614466,
      "learning_rate": 8.133936979983122e-06,
      "loss": 0.1592,
      "step": 10484
    },
    {
      "epoch": 0.305881323297742,
      "grad_norm": 0.9120628833778192,
      "learning_rate": 8.133568849870953e-06,
      "loss": 0.1588,
      "step": 10485
    },
    {
      "epoch": 0.30591049652838553,
      "grad_norm": 0.7695217672867839,
      "learning_rate": 8.13320069178303e-06,
      "loss": 0.1248,
      "step": 10486
    },
    {
      "epoch": 0.30593966975902914,
      "grad_norm": 0.999547933643516,
      "learning_rate": 8.13283250572264e-06,
      "loss": 0.1553,
      "step": 10487
    },
    {
      "epoch": 0.3059688429896727,
      "grad_norm": 0.8916076244179395,
      "learning_rate": 8.132464291693068e-06,
      "loss": 0.1727,
      "step": 10488
    },
    {
      "epoch": 0.30599801622031625,
      "grad_norm": 0.6844763225824567,
      "learning_rate": 8.132096049697604e-06,
      "loss": 0.1527,
      "step": 10489
    },
    {
      "epoch": 0.3060271894509598,
      "grad_norm": 0.9559178165337375,
      "learning_rate": 8.131727779739533e-06,
      "loss": 0.159,
      "step": 10490
    },
    {
      "epoch": 0.30605636268160336,
      "grad_norm": 0.9284463156598932,
      "learning_rate": 8.131359481822145e-06,
      "loss": 0.181,
      "step": 10491
    },
    {
      "epoch": 0.3060855359122469,
      "grad_norm": 0.781265055697093,
      "learning_rate": 8.130991155948726e-06,
      "loss": 0.1523,
      "step": 10492
    },
    {
      "epoch": 0.30611470914289046,
      "grad_norm": 0.7559726874149129,
      "learning_rate": 8.130622802122566e-06,
      "loss": 0.13,
      "step": 10493
    },
    {
      "epoch": 0.30614388237353407,
      "grad_norm": 0.9472351681259725,
      "learning_rate": 8.130254420346954e-06,
      "loss": 0.1477,
      "step": 10494
    },
    {
      "epoch": 0.3061730556041776,
      "grad_norm": 0.8739245150334625,
      "learning_rate": 8.129886010625176e-06,
      "loss": 0.1725,
      "step": 10495
    },
    {
      "epoch": 0.3062022288348212,
      "grad_norm": 0.9484586419264308,
      "learning_rate": 8.129517572960523e-06,
      "loss": 0.1354,
      "step": 10496
    },
    {
      "epoch": 0.30623140206546473,
      "grad_norm": 1.0090227898368496,
      "learning_rate": 8.129149107356285e-06,
      "loss": 0.1495,
      "step": 10497
    },
    {
      "epoch": 0.3062605752961083,
      "grad_norm": 0.9908528918921414,
      "learning_rate": 8.12878061381575e-06,
      "loss": 0.1388,
      "step": 10498
    },
    {
      "epoch": 0.30628974852675184,
      "grad_norm": 1.005759873694505,
      "learning_rate": 8.12841209234221e-06,
      "loss": 0.1626,
      "step": 10499
    },
    {
      "epoch": 0.3063189217573954,
      "grad_norm": 0.977183583589827,
      "learning_rate": 8.128043542938953e-06,
      "loss": 0.155,
      "step": 10500
    },
    {
      "epoch": 0.306348094988039,
      "grad_norm": 0.9762998814957362,
      "learning_rate": 8.12767496560927e-06,
      "loss": 0.1639,
      "step": 10501
    },
    {
      "epoch": 0.30637726821868255,
      "grad_norm": 0.8543228431308588,
      "learning_rate": 8.127306360356451e-06,
      "loss": 0.1751,
      "step": 10502
    },
    {
      "epoch": 0.3064064414493261,
      "grad_norm": 0.9352274648423903,
      "learning_rate": 8.126937727183789e-06,
      "loss": 0.1697,
      "step": 10503
    },
    {
      "epoch": 0.30643561467996966,
      "grad_norm": 1.011660987303486,
      "learning_rate": 8.12656906609457e-06,
      "loss": 0.1798,
      "step": 10504
    },
    {
      "epoch": 0.3064647879106132,
      "grad_norm": 0.8967067042704103,
      "learning_rate": 8.12620037709209e-06,
      "loss": 0.1365,
      "step": 10505
    },
    {
      "epoch": 0.30649396114125677,
      "grad_norm": 1.0385156300922904,
      "learning_rate": 8.125831660179642e-06,
      "loss": 0.171,
      "step": 10506
    },
    {
      "epoch": 0.3065231343719003,
      "grad_norm": 1.5670291613771283,
      "learning_rate": 8.125462915360511e-06,
      "loss": 0.18,
      "step": 10507
    },
    {
      "epoch": 0.3065523076025439,
      "grad_norm": 0.9472429438047364,
      "learning_rate": 8.125094142637997e-06,
      "loss": 0.163,
      "step": 10508
    },
    {
      "epoch": 0.3065814808331875,
      "grad_norm": 0.764220114055355,
      "learning_rate": 8.124725342015387e-06,
      "loss": 0.1461,
      "step": 10509
    },
    {
      "epoch": 0.30661065406383103,
      "grad_norm": 0.9179438054200285,
      "learning_rate": 8.124356513495975e-06,
      "loss": 0.1493,
      "step": 10510
    },
    {
      "epoch": 0.3066398272944746,
      "grad_norm": 0.8480395922489338,
      "learning_rate": 8.123987657083054e-06,
      "loss": 0.165,
      "step": 10511
    },
    {
      "epoch": 0.30666900052511814,
      "grad_norm": 0.6859944965232568,
      "learning_rate": 8.123618772779917e-06,
      "loss": 0.1409,
      "step": 10512
    },
    {
      "epoch": 0.3066981737557617,
      "grad_norm": 0.7930331392558869,
      "learning_rate": 8.123249860589856e-06,
      "loss": 0.1642,
      "step": 10513
    },
    {
      "epoch": 0.30672734698640525,
      "grad_norm": 0.8710376750806804,
      "learning_rate": 8.122880920516167e-06,
      "loss": 0.1315,
      "step": 10514
    },
    {
      "epoch": 0.30675652021704886,
      "grad_norm": 0.8011444341675579,
      "learning_rate": 8.122511952562143e-06,
      "loss": 0.1536,
      "step": 10515
    },
    {
      "epoch": 0.3067856934476924,
      "grad_norm": 1.2824621487969918,
      "learning_rate": 8.122142956731078e-06,
      "loss": 0.1644,
      "step": 10516
    },
    {
      "epoch": 0.30681486667833596,
      "grad_norm": 0.9464360122593795,
      "learning_rate": 8.121773933026265e-06,
      "loss": 0.1442,
      "step": 10517
    },
    {
      "epoch": 0.3068440399089795,
      "grad_norm": 0.9458128911218686,
      "learning_rate": 8.121404881451e-06,
      "loss": 0.1433,
      "step": 10518
    },
    {
      "epoch": 0.30687321313962307,
      "grad_norm": 0.9214215518317815,
      "learning_rate": 8.121035802008577e-06,
      "loss": 0.1413,
      "step": 10519
    },
    {
      "epoch": 0.3069023863702666,
      "grad_norm": 1.0435395184292795,
      "learning_rate": 8.120666694702292e-06,
      "loss": 0.1555,
      "step": 10520
    },
    {
      "epoch": 0.30693155960091023,
      "grad_norm": 0.9804105724680121,
      "learning_rate": 8.12029755953544e-06,
      "loss": 0.1479,
      "step": 10521
    },
    {
      "epoch": 0.3069607328315538,
      "grad_norm": 0.8337310791436562,
      "learning_rate": 8.119928396511315e-06,
      "loss": 0.1397,
      "step": 10522
    },
    {
      "epoch": 0.30698990606219734,
      "grad_norm": 0.8634544296137432,
      "learning_rate": 8.119559205633213e-06,
      "loss": 0.1714,
      "step": 10523
    },
    {
      "epoch": 0.3070190792928409,
      "grad_norm": 1.511782194331731,
      "learning_rate": 8.119189986904435e-06,
      "loss": 0.1663,
      "step": 10524
    },
    {
      "epoch": 0.30704825252348444,
      "grad_norm": 0.9658302254436144,
      "learning_rate": 8.11882074032827e-06,
      "loss": 0.1587,
      "step": 10525
    },
    {
      "epoch": 0.307077425754128,
      "grad_norm": 0.8059627139149879,
      "learning_rate": 8.11845146590802e-06,
      "loss": 0.155,
      "step": 10526
    },
    {
      "epoch": 0.30710659898477155,
      "grad_norm": 0.7921105237179331,
      "learning_rate": 8.118082163646979e-06,
      "loss": 0.1771,
      "step": 10527
    },
    {
      "epoch": 0.30713577221541516,
      "grad_norm": 0.8326608499445625,
      "learning_rate": 8.117712833548443e-06,
      "loss": 0.1532,
      "step": 10528
    },
    {
      "epoch": 0.3071649454460587,
      "grad_norm": 0.9911650912599619,
      "learning_rate": 8.117343475615714e-06,
      "loss": 0.179,
      "step": 10529
    },
    {
      "epoch": 0.30719411867670227,
      "grad_norm": 0.7040185556638828,
      "learning_rate": 8.116974089852085e-06,
      "loss": 0.1483,
      "step": 10530
    },
    {
      "epoch": 0.3072232919073458,
      "grad_norm": 1.1280098054724013,
      "learning_rate": 8.116604676260855e-06,
      "loss": 0.1892,
      "step": 10531
    },
    {
      "epoch": 0.3072524651379894,
      "grad_norm": 0.6794934403780349,
      "learning_rate": 8.116235234845324e-06,
      "loss": 0.1301,
      "step": 10532
    },
    {
      "epoch": 0.3072816383686329,
      "grad_norm": 0.7077044569622198,
      "learning_rate": 8.115865765608789e-06,
      "loss": 0.1644,
      "step": 10533
    },
    {
      "epoch": 0.3073108115992765,
      "grad_norm": 0.7466181617760245,
      "learning_rate": 8.115496268554545e-06,
      "loss": 0.1698,
      "step": 10534
    },
    {
      "epoch": 0.3073399848299201,
      "grad_norm": 0.6420098664136437,
      "learning_rate": 8.115126743685897e-06,
      "loss": 0.1364,
      "step": 10535
    },
    {
      "epoch": 0.30736915806056364,
      "grad_norm": 0.8882542711176362,
      "learning_rate": 8.114757191006141e-06,
      "loss": 0.1362,
      "step": 10536
    },
    {
      "epoch": 0.3073983312912072,
      "grad_norm": 1.1086496996804374,
      "learning_rate": 8.114387610518574e-06,
      "loss": 0.1501,
      "step": 10537
    },
    {
      "epoch": 0.30742750452185075,
      "grad_norm": 0.7485858463299588,
      "learning_rate": 8.1140180022265e-06,
      "loss": 0.1461,
      "step": 10538
    },
    {
      "epoch": 0.3074566777524943,
      "grad_norm": 1.022310161568353,
      "learning_rate": 8.113648366133218e-06,
      "loss": 0.1257,
      "step": 10539
    },
    {
      "epoch": 0.30748585098313785,
      "grad_norm": 0.8740042421782029,
      "learning_rate": 8.113278702242025e-06,
      "loss": 0.1386,
      "step": 10540
    },
    {
      "epoch": 0.3075150242137814,
      "grad_norm": 1.071707657078678,
      "learning_rate": 8.112909010556222e-06,
      "loss": 0.1383,
      "step": 10541
    },
    {
      "epoch": 0.307544197444425,
      "grad_norm": 1.1158674356285905,
      "learning_rate": 8.11253929107911e-06,
      "loss": 0.16,
      "step": 10542
    },
    {
      "epoch": 0.30757337067506857,
      "grad_norm": 0.8078489878682861,
      "learning_rate": 8.112169543813992e-06,
      "loss": 0.1356,
      "step": 10543
    },
    {
      "epoch": 0.3076025439057121,
      "grad_norm": 0.8692041404439562,
      "learning_rate": 8.111799768764169e-06,
      "loss": 0.1359,
      "step": 10544
    },
    {
      "epoch": 0.3076317171363557,
      "grad_norm": 1.1023630639028916,
      "learning_rate": 8.111429965932938e-06,
      "loss": 0.1697,
      "step": 10545
    },
    {
      "epoch": 0.30766089036699923,
      "grad_norm": 0.8951416247908781,
      "learning_rate": 8.111060135323601e-06,
      "loss": 0.1631,
      "step": 10546
    },
    {
      "epoch": 0.3076900635976428,
      "grad_norm": 1.138681929210952,
      "learning_rate": 8.110690276939466e-06,
      "loss": 0.1515,
      "step": 10547
    },
    {
      "epoch": 0.3077192368282864,
      "grad_norm": 1.1975022017271477,
      "learning_rate": 8.110320390783828e-06,
      "loss": 0.1498,
      "step": 10548
    },
    {
      "epoch": 0.30774841005892994,
      "grad_norm": 0.780744692482144,
      "learning_rate": 8.109950476859993e-06,
      "loss": 0.1731,
      "step": 10549
    },
    {
      "epoch": 0.3077775832895735,
      "grad_norm": 0.7934698933211289,
      "learning_rate": 8.109580535171262e-06,
      "loss": 0.137,
      "step": 10550
    },
    {
      "epoch": 0.30780675652021705,
      "grad_norm": 1.0296175217764811,
      "learning_rate": 8.10921056572094e-06,
      "loss": 0.1326,
      "step": 10551
    },
    {
      "epoch": 0.3078359297508606,
      "grad_norm": 0.7560402079317322,
      "learning_rate": 8.108840568512326e-06,
      "loss": 0.1429,
      "step": 10552
    },
    {
      "epoch": 0.30786510298150416,
      "grad_norm": 0.6237040766499032,
      "learning_rate": 8.108470543548728e-06,
      "loss": 0.1237,
      "step": 10553
    },
    {
      "epoch": 0.3078942762121477,
      "grad_norm": 1.0722082215091353,
      "learning_rate": 8.108100490833444e-06,
      "loss": 0.1441,
      "step": 10554
    },
    {
      "epoch": 0.3079234494427913,
      "grad_norm": 0.8508576037764862,
      "learning_rate": 8.107730410369783e-06,
      "loss": 0.1475,
      "step": 10555
    },
    {
      "epoch": 0.3079526226734349,
      "grad_norm": 0.7939273351996611,
      "learning_rate": 8.107360302161047e-06,
      "loss": 0.1687,
      "step": 10556
    },
    {
      "epoch": 0.3079817959040784,
      "grad_norm": 0.7899107972686913,
      "learning_rate": 8.106990166210539e-06,
      "loss": 0.1441,
      "step": 10557
    },
    {
      "epoch": 0.308010969134722,
      "grad_norm": 0.902306511065696,
      "learning_rate": 8.106620002521564e-06,
      "loss": 0.1463,
      "step": 10558
    },
    {
      "epoch": 0.30804014236536553,
      "grad_norm": 0.8155122241241145,
      "learning_rate": 8.106249811097428e-06,
      "loss": 0.1644,
      "step": 10559
    },
    {
      "epoch": 0.3080693155960091,
      "grad_norm": 0.8296850751204471,
      "learning_rate": 8.105879591941436e-06,
      "loss": 0.1453,
      "step": 10560
    },
    {
      "epoch": 0.30809848882665264,
      "grad_norm": 0.8270095028586417,
      "learning_rate": 8.10550934505689e-06,
      "loss": 0.181,
      "step": 10561
    },
    {
      "epoch": 0.30812766205729625,
      "grad_norm": 0.8338867825945535,
      "learning_rate": 8.1051390704471e-06,
      "loss": 0.1396,
      "step": 10562
    },
    {
      "epoch": 0.3081568352879398,
      "grad_norm": 0.8411356780114958,
      "learning_rate": 8.10476876811537e-06,
      "loss": 0.1719,
      "step": 10563
    },
    {
      "epoch": 0.30818600851858335,
      "grad_norm": 0.8384720164246909,
      "learning_rate": 8.104398438065004e-06,
      "loss": 0.1436,
      "step": 10564
    },
    {
      "epoch": 0.3082151817492269,
      "grad_norm": 1.586304716249349,
      "learning_rate": 8.10402808029931e-06,
      "loss": 0.1643,
      "step": 10565
    },
    {
      "epoch": 0.30824435497987046,
      "grad_norm": 0.7091876904890907,
      "learning_rate": 8.103657694821597e-06,
      "loss": 0.1438,
      "step": 10566
    },
    {
      "epoch": 0.308273528210514,
      "grad_norm": 0.6861675408497042,
      "learning_rate": 8.103287281635165e-06,
      "loss": 0.1282,
      "step": 10567
    },
    {
      "epoch": 0.30830270144115757,
      "grad_norm": 0.8024224508177042,
      "learning_rate": 8.102916840743327e-06,
      "loss": 0.142,
      "step": 10568
    },
    {
      "epoch": 0.3083318746718012,
      "grad_norm": 0.739856121913459,
      "learning_rate": 8.102546372149389e-06,
      "loss": 0.1523,
      "step": 10569
    },
    {
      "epoch": 0.30836104790244473,
      "grad_norm": 0.7441723765909202,
      "learning_rate": 8.102175875856655e-06,
      "loss": 0.156,
      "step": 10570
    },
    {
      "epoch": 0.3083902211330883,
      "grad_norm": 0.8616164805014292,
      "learning_rate": 8.101805351868438e-06,
      "loss": 0.1452,
      "step": 10571
    },
    {
      "epoch": 0.30841939436373184,
      "grad_norm": 0.7507438067787525,
      "learning_rate": 8.101434800188042e-06,
      "loss": 0.1618,
      "step": 10572
    },
    {
      "epoch": 0.3084485675943754,
      "grad_norm": 0.6780369471077534,
      "learning_rate": 8.101064220818776e-06,
      "loss": 0.1563,
      "step": 10573
    },
    {
      "epoch": 0.30847774082501894,
      "grad_norm": 0.6803403032015081,
      "learning_rate": 8.10069361376395e-06,
      "loss": 0.1392,
      "step": 10574
    },
    {
      "epoch": 0.30850691405566255,
      "grad_norm": 0.7246904738957859,
      "learning_rate": 8.100322979026872e-06,
      "loss": 0.1521,
      "step": 10575
    },
    {
      "epoch": 0.3085360872863061,
      "grad_norm": 0.781055731214032,
      "learning_rate": 8.099952316610849e-06,
      "loss": 0.1332,
      "step": 10576
    },
    {
      "epoch": 0.30856526051694966,
      "grad_norm": 0.7652344949530506,
      "learning_rate": 8.099581626519193e-06,
      "loss": 0.1525,
      "step": 10577
    },
    {
      "epoch": 0.3085944337475932,
      "grad_norm": 0.7697622437224937,
      "learning_rate": 8.099210908755213e-06,
      "loss": 0.1789,
      "step": 10578
    },
    {
      "epoch": 0.30862360697823676,
      "grad_norm": 1.137822249189753,
      "learning_rate": 8.098840163322215e-06,
      "loss": 0.1519,
      "step": 10579
    },
    {
      "epoch": 0.3086527802088803,
      "grad_norm": 0.9379822407843574,
      "learning_rate": 8.098469390223514e-06,
      "loss": 0.1594,
      "step": 10580
    },
    {
      "epoch": 0.30868195343952387,
      "grad_norm": 0.7880585471282775,
      "learning_rate": 8.098098589462416e-06,
      "loss": 0.1458,
      "step": 10581
    },
    {
      "epoch": 0.3087111266701675,
      "grad_norm": 1.0291747194947232,
      "learning_rate": 8.097727761042236e-06,
      "loss": 0.1373,
      "step": 10582
    },
    {
      "epoch": 0.30874029990081103,
      "grad_norm": 0.7812957602030183,
      "learning_rate": 8.09735690496628e-06,
      "loss": 0.1317,
      "step": 10583
    },
    {
      "epoch": 0.3087694731314546,
      "grad_norm": 0.8718213140293359,
      "learning_rate": 8.096986021237863e-06,
      "loss": 0.1727,
      "step": 10584
    },
    {
      "epoch": 0.30879864636209814,
      "grad_norm": 0.9414986040552127,
      "learning_rate": 8.096615109860291e-06,
      "loss": 0.1537,
      "step": 10585
    },
    {
      "epoch": 0.3088278195927417,
      "grad_norm": 0.7669372121470045,
      "learning_rate": 8.09624417083688e-06,
      "loss": 0.122,
      "step": 10586
    },
    {
      "epoch": 0.30885699282338525,
      "grad_norm": 0.762580240102507,
      "learning_rate": 8.09587320417094e-06,
      "loss": 0.166,
      "step": 10587
    },
    {
      "epoch": 0.3088861660540288,
      "grad_norm": 0.6681375948337287,
      "learning_rate": 8.095502209865785e-06,
      "loss": 0.1337,
      "step": 10588
    },
    {
      "epoch": 0.3089153392846724,
      "grad_norm": 0.8626854000460653,
      "learning_rate": 8.095131187924723e-06,
      "loss": 0.1658,
      "step": 10589
    },
    {
      "epoch": 0.30894451251531596,
      "grad_norm": 0.7997351499456614,
      "learning_rate": 8.09476013835107e-06,
      "loss": 0.1515,
      "step": 10590
    },
    {
      "epoch": 0.3089736857459595,
      "grad_norm": 0.9376171703942249,
      "learning_rate": 8.094389061148135e-06,
      "loss": 0.1401,
      "step": 10591
    },
    {
      "epoch": 0.30900285897660307,
      "grad_norm": 0.7558847608908864,
      "learning_rate": 8.094017956319236e-06,
      "loss": 0.1524,
      "step": 10592
    },
    {
      "epoch": 0.3090320322072466,
      "grad_norm": 0.8543530620818583,
      "learning_rate": 8.093646823867683e-06,
      "loss": 0.1326,
      "step": 10593
    },
    {
      "epoch": 0.3090612054378902,
      "grad_norm": 0.7740183128550507,
      "learning_rate": 8.093275663796787e-06,
      "loss": 0.1584,
      "step": 10594
    },
    {
      "epoch": 0.3090903786685337,
      "grad_norm": 0.8451521187831812,
      "learning_rate": 8.092904476109867e-06,
      "loss": 0.1485,
      "step": 10595
    },
    {
      "epoch": 0.30911955189917734,
      "grad_norm": 0.7527218799395663,
      "learning_rate": 8.092533260810234e-06,
      "loss": 0.1547,
      "step": 10596
    },
    {
      "epoch": 0.3091487251298209,
      "grad_norm": 0.8702787459635344,
      "learning_rate": 8.0921620179012e-06,
      "loss": 0.1334,
      "step": 10597
    },
    {
      "epoch": 0.30917789836046444,
      "grad_norm": 0.8224779270059287,
      "learning_rate": 8.091790747386084e-06,
      "loss": 0.1384,
      "step": 10598
    },
    {
      "epoch": 0.309207071591108,
      "grad_norm": 0.9221937293844968,
      "learning_rate": 8.091419449268197e-06,
      "loss": 0.1703,
      "step": 10599
    },
    {
      "epoch": 0.30923624482175155,
      "grad_norm": 0.9491475623337832,
      "learning_rate": 8.091048123550855e-06,
      "loss": 0.136,
      "step": 10600
    },
    {
      "epoch": 0.3092654180523951,
      "grad_norm": 0.7995688228938687,
      "learning_rate": 8.090676770237374e-06,
      "loss": 0.1482,
      "step": 10601
    },
    {
      "epoch": 0.3092945912830387,
      "grad_norm": 0.9582166128890751,
      "learning_rate": 8.090305389331069e-06,
      "loss": 0.1453,
      "step": 10602
    },
    {
      "epoch": 0.30932376451368226,
      "grad_norm": 0.9879063390849203,
      "learning_rate": 8.089933980835254e-06,
      "loss": 0.1476,
      "step": 10603
    },
    {
      "epoch": 0.3093529377443258,
      "grad_norm": 0.9051906040954101,
      "learning_rate": 8.089562544753247e-06,
      "loss": 0.1451,
      "step": 10604
    },
    {
      "epoch": 0.30938211097496937,
      "grad_norm": 1.0179094349743891,
      "learning_rate": 8.089191081088364e-06,
      "loss": 0.1574,
      "step": 10605
    },
    {
      "epoch": 0.3094112842056129,
      "grad_norm": 0.8367263083938471,
      "learning_rate": 8.088819589843919e-06,
      "loss": 0.1905,
      "step": 10606
    },
    {
      "epoch": 0.3094404574362565,
      "grad_norm": 1.0329858362778113,
      "learning_rate": 8.08844807102323e-06,
      "loss": 0.1341,
      "step": 10607
    },
    {
      "epoch": 0.30946963066690003,
      "grad_norm": 1.1484767834595642,
      "learning_rate": 8.088076524629613e-06,
      "loss": 0.1603,
      "step": 10608
    },
    {
      "epoch": 0.30949880389754364,
      "grad_norm": 1.0068689908140545,
      "learning_rate": 8.087704950666388e-06,
      "loss": 0.1458,
      "step": 10609
    },
    {
      "epoch": 0.3095279771281872,
      "grad_norm": 1.0035871570590194,
      "learning_rate": 8.08733334913687e-06,
      "loss": 0.1787,
      "step": 10610
    },
    {
      "epoch": 0.30955715035883075,
      "grad_norm": 0.841565240388496,
      "learning_rate": 8.086961720044374e-06,
      "loss": 0.1485,
      "step": 10611
    },
    {
      "epoch": 0.3095863235894743,
      "grad_norm": 1.01173294811488,
      "learning_rate": 8.086590063392224e-06,
      "loss": 0.1374,
      "step": 10612
    },
    {
      "epoch": 0.30961549682011785,
      "grad_norm": 1.083260824888384,
      "learning_rate": 8.086218379183735e-06,
      "loss": 0.1599,
      "step": 10613
    },
    {
      "epoch": 0.3096446700507614,
      "grad_norm": 0.8371162877497406,
      "learning_rate": 8.085846667422224e-06,
      "loss": 0.1554,
      "step": 10614
    },
    {
      "epoch": 0.30967384328140496,
      "grad_norm": 0.9618314941000575,
      "learning_rate": 8.08547492811101e-06,
      "loss": 0.1601,
      "step": 10615
    },
    {
      "epoch": 0.30970301651204857,
      "grad_norm": 0.8562001405810296,
      "learning_rate": 8.085103161253413e-06,
      "loss": 0.1595,
      "step": 10616
    },
    {
      "epoch": 0.3097321897426921,
      "grad_norm": 0.7229896698756507,
      "learning_rate": 8.084731366852752e-06,
      "loss": 0.1459,
      "step": 10617
    },
    {
      "epoch": 0.3097613629733357,
      "grad_norm": 0.7145254810818518,
      "learning_rate": 8.084359544912344e-06,
      "loss": 0.1332,
      "step": 10618
    },
    {
      "epoch": 0.3097905362039792,
      "grad_norm": 0.7985107534143561,
      "learning_rate": 8.08398769543551e-06,
      "loss": 0.1589,
      "step": 10619
    },
    {
      "epoch": 0.3098197094346228,
      "grad_norm": 0.8866769673396308,
      "learning_rate": 8.083615818425573e-06,
      "loss": 0.1632,
      "step": 10620
    },
    {
      "epoch": 0.30984888266526633,
      "grad_norm": 0.9127343915015304,
      "learning_rate": 8.083243913885848e-06,
      "loss": 0.1417,
      "step": 10621
    },
    {
      "epoch": 0.3098780558959099,
      "grad_norm": 0.6578977562893924,
      "learning_rate": 8.082871981819658e-06,
      "loss": 0.1552,
      "step": 10622
    },
    {
      "epoch": 0.3099072291265535,
      "grad_norm": 1.1127677504484195,
      "learning_rate": 8.082500022230323e-06,
      "loss": 0.1483,
      "step": 10623
    },
    {
      "epoch": 0.30993640235719705,
      "grad_norm": 0.8847830017787616,
      "learning_rate": 8.082128035121162e-06,
      "loss": 0.186,
      "step": 10624
    },
    {
      "epoch": 0.3099655755878406,
      "grad_norm": 0.6800510056270985,
      "learning_rate": 8.081756020495501e-06,
      "loss": 0.1247,
      "step": 10625
    },
    {
      "epoch": 0.30999474881848416,
      "grad_norm": 0.7799657725364644,
      "learning_rate": 8.081383978356655e-06,
      "loss": 0.1543,
      "step": 10626
    },
    {
      "epoch": 0.3100239220491277,
      "grad_norm": 0.8811745190164654,
      "learning_rate": 8.08101190870795e-06,
      "loss": 0.153,
      "step": 10627
    },
    {
      "epoch": 0.31005309527977126,
      "grad_norm": 0.7482189825762555,
      "learning_rate": 8.080639811552704e-06,
      "loss": 0.1504,
      "step": 10628
    },
    {
      "epoch": 0.3100822685104148,
      "grad_norm": 0.7595443605436023,
      "learning_rate": 8.080267686894244e-06,
      "loss": 0.1514,
      "step": 10629
    },
    {
      "epoch": 0.3101114417410584,
      "grad_norm": 0.7780154206427352,
      "learning_rate": 8.079895534735887e-06,
      "loss": 0.1475,
      "step": 10630
    },
    {
      "epoch": 0.310140614971702,
      "grad_norm": 0.7048107343833214,
      "learning_rate": 8.07952335508096e-06,
      "loss": 0.1656,
      "step": 10631
    },
    {
      "epoch": 0.31016978820234553,
      "grad_norm": 0.7852511955892731,
      "learning_rate": 8.079151147932783e-06,
      "loss": 0.1604,
      "step": 10632
    },
    {
      "epoch": 0.3101989614329891,
      "grad_norm": 0.8862456259772208,
      "learning_rate": 8.078778913294677e-06,
      "loss": 0.1579,
      "step": 10633
    },
    {
      "epoch": 0.31022813466363264,
      "grad_norm": 0.859633153948346,
      "learning_rate": 8.078406651169972e-06,
      "loss": 0.1588,
      "step": 10634
    },
    {
      "epoch": 0.3102573078942762,
      "grad_norm": 0.7830934120308404,
      "learning_rate": 8.078034361561986e-06,
      "loss": 0.1706,
      "step": 10635
    },
    {
      "epoch": 0.3102864811249198,
      "grad_norm": 0.6885264756111578,
      "learning_rate": 8.077662044474043e-06,
      "loss": 0.1322,
      "step": 10636
    },
    {
      "epoch": 0.31031565435556335,
      "grad_norm": 0.9129535942341924,
      "learning_rate": 8.077289699909467e-06,
      "loss": 0.1569,
      "step": 10637
    },
    {
      "epoch": 0.3103448275862069,
      "grad_norm": 1.5824290449546556,
      "learning_rate": 8.076917327871585e-06,
      "loss": 0.1634,
      "step": 10638
    },
    {
      "epoch": 0.31037400081685046,
      "grad_norm": 0.6993724461831868,
      "learning_rate": 8.07654492836372e-06,
      "loss": 0.1311,
      "step": 10639
    },
    {
      "epoch": 0.310403174047494,
      "grad_norm": 0.7090772943109829,
      "learning_rate": 8.076172501389194e-06,
      "loss": 0.1416,
      "step": 10640
    },
    {
      "epoch": 0.31043234727813757,
      "grad_norm": 0.7689088325897413,
      "learning_rate": 8.075800046951336e-06,
      "loss": 0.1214,
      "step": 10641
    },
    {
      "epoch": 0.3104615205087811,
      "grad_norm": 0.9255989646444487,
      "learning_rate": 8.075427565053471e-06,
      "loss": 0.1551,
      "step": 10642
    },
    {
      "epoch": 0.31049069373942473,
      "grad_norm": 0.9407014546893899,
      "learning_rate": 8.07505505569892e-06,
      "loss": 0.1643,
      "step": 10643
    },
    {
      "epoch": 0.3105198669700683,
      "grad_norm": 0.6631045486438311,
      "learning_rate": 8.074682518891013e-06,
      "loss": 0.1692,
      "step": 10644
    },
    {
      "epoch": 0.31054904020071183,
      "grad_norm": 0.7748464554000842,
      "learning_rate": 8.074309954633074e-06,
      "loss": 0.1507,
      "step": 10645
    },
    {
      "epoch": 0.3105782134313554,
      "grad_norm": 0.7765330736455465,
      "learning_rate": 8.07393736292843e-06,
      "loss": 0.1413,
      "step": 10646
    },
    {
      "epoch": 0.31060738666199894,
      "grad_norm": 0.8579947591245294,
      "learning_rate": 8.073564743780407e-06,
      "loss": 0.1758,
      "step": 10647
    },
    {
      "epoch": 0.3106365598926425,
      "grad_norm": 0.7410292246732326,
      "learning_rate": 8.07319209719233e-06,
      "loss": 0.137,
      "step": 10648
    },
    {
      "epoch": 0.31066573312328605,
      "grad_norm": 0.7330946753608799,
      "learning_rate": 8.072819423167529e-06,
      "loss": 0.1572,
      "step": 10649
    },
    {
      "epoch": 0.31069490635392966,
      "grad_norm": 0.9172111121028844,
      "learning_rate": 8.07244672170933e-06,
      "loss": 0.144,
      "step": 10650
    },
    {
      "epoch": 0.3107240795845732,
      "grad_norm": 0.8347018719207745,
      "learning_rate": 8.07207399282106e-06,
      "loss": 0.165,
      "step": 10651
    },
    {
      "epoch": 0.31075325281521676,
      "grad_norm": 0.9026895241321442,
      "learning_rate": 8.071701236506046e-06,
      "loss": 0.1516,
      "step": 10652
    },
    {
      "epoch": 0.3107824260458603,
      "grad_norm": 0.7653364453481548,
      "learning_rate": 8.071328452767616e-06,
      "loss": 0.1541,
      "step": 10653
    },
    {
      "epoch": 0.31081159927650387,
      "grad_norm": 0.842937097174414,
      "learning_rate": 8.0709556416091e-06,
      "loss": 0.1536,
      "step": 10654
    },
    {
      "epoch": 0.3108407725071474,
      "grad_norm": 0.7441741293056079,
      "learning_rate": 8.070582803033827e-06,
      "loss": 0.1593,
      "step": 10655
    },
    {
      "epoch": 0.310869945737791,
      "grad_norm": 0.8379730241885758,
      "learning_rate": 8.07020993704512e-06,
      "loss": 0.1361,
      "step": 10656
    },
    {
      "epoch": 0.3108991189684346,
      "grad_norm": 0.8639332739638844,
      "learning_rate": 8.069837043646313e-06,
      "loss": 0.1453,
      "step": 10657
    },
    {
      "epoch": 0.31092829219907814,
      "grad_norm": 0.8095227989886883,
      "learning_rate": 8.069464122840736e-06,
      "loss": 0.1624,
      "step": 10658
    },
    {
      "epoch": 0.3109574654297217,
      "grad_norm": 0.8009859399076495,
      "learning_rate": 8.069091174631713e-06,
      "loss": 0.1314,
      "step": 10659
    },
    {
      "epoch": 0.31098663866036524,
      "grad_norm": 0.7809940192959718,
      "learning_rate": 8.068718199022578e-06,
      "loss": 0.1241,
      "step": 10660
    },
    {
      "epoch": 0.3110158118910088,
      "grad_norm": 0.8787555741360006,
      "learning_rate": 8.06834519601666e-06,
      "loss": 0.1445,
      "step": 10661
    },
    {
      "epoch": 0.31104498512165235,
      "grad_norm": 0.8156662472298689,
      "learning_rate": 8.067972165617287e-06,
      "loss": 0.1576,
      "step": 10662
    },
    {
      "epoch": 0.31107415835229596,
      "grad_norm": 0.8065762398079646,
      "learning_rate": 8.067599107827793e-06,
      "loss": 0.1546,
      "step": 10663
    },
    {
      "epoch": 0.3111033315829395,
      "grad_norm": 0.7447673881203032,
      "learning_rate": 8.067226022651505e-06,
      "loss": 0.1406,
      "step": 10664
    },
    {
      "epoch": 0.31113250481358307,
      "grad_norm": 0.8428734686193561,
      "learning_rate": 8.066852910091754e-06,
      "loss": 0.1521,
      "step": 10665
    },
    {
      "epoch": 0.3111616780442266,
      "grad_norm": 0.7604425710000424,
      "learning_rate": 8.066479770151875e-06,
      "loss": 0.1539,
      "step": 10666
    },
    {
      "epoch": 0.3111908512748702,
      "grad_norm": 0.8009054114318241,
      "learning_rate": 8.066106602835195e-06,
      "loss": 0.1547,
      "step": 10667
    },
    {
      "epoch": 0.3112200245055137,
      "grad_norm": 1.0157049129411353,
      "learning_rate": 8.065733408145047e-06,
      "loss": 0.1528,
      "step": 10668
    },
    {
      "epoch": 0.3112491977361573,
      "grad_norm": 0.970607898681344,
      "learning_rate": 8.065360186084764e-06,
      "loss": 0.1535,
      "step": 10669
    },
    {
      "epoch": 0.3112783709668009,
      "grad_norm": 0.692521856646399,
      "learning_rate": 8.064986936657678e-06,
      "loss": 0.1637,
      "step": 10670
    },
    {
      "epoch": 0.31130754419744444,
      "grad_norm": 0.9277228764146348,
      "learning_rate": 8.064613659867117e-06,
      "loss": 0.18,
      "step": 10671
    },
    {
      "epoch": 0.311336717428088,
      "grad_norm": 1.0267082516333221,
      "learning_rate": 8.06424035571642e-06,
      "loss": 0.1336,
      "step": 10672
    },
    {
      "epoch": 0.31136589065873155,
      "grad_norm": 0.7567316283677745,
      "learning_rate": 8.063867024208915e-06,
      "loss": 0.1424,
      "step": 10673
    },
    {
      "epoch": 0.3113950638893751,
      "grad_norm": 0.8372580592986668,
      "learning_rate": 8.063493665347937e-06,
      "loss": 0.1521,
      "step": 10674
    },
    {
      "epoch": 0.31142423712001865,
      "grad_norm": 0.8257915263991669,
      "learning_rate": 8.063120279136818e-06,
      "loss": 0.1568,
      "step": 10675
    },
    {
      "epoch": 0.3114534103506622,
      "grad_norm": 0.6840641979015937,
      "learning_rate": 8.062746865578894e-06,
      "loss": 0.1512,
      "step": 10676
    },
    {
      "epoch": 0.3114825835813058,
      "grad_norm": 0.956027467205039,
      "learning_rate": 8.062373424677497e-06,
      "loss": 0.1588,
      "step": 10677
    },
    {
      "epoch": 0.31151175681194937,
      "grad_norm": 0.74587657090834,
      "learning_rate": 8.061999956435959e-06,
      "loss": 0.132,
      "step": 10678
    },
    {
      "epoch": 0.3115409300425929,
      "grad_norm": 0.7686836235919776,
      "learning_rate": 8.061626460857618e-06,
      "loss": 0.1306,
      "step": 10679
    },
    {
      "epoch": 0.3115701032732365,
      "grad_norm": 0.6952941128726813,
      "learning_rate": 8.061252937945807e-06,
      "loss": 0.1406,
      "step": 10680
    },
    {
      "epoch": 0.31159927650388003,
      "grad_norm": 0.7551025388799266,
      "learning_rate": 8.06087938770386e-06,
      "loss": 0.1355,
      "step": 10681
    },
    {
      "epoch": 0.3116284497345236,
      "grad_norm": 0.7968590949508415,
      "learning_rate": 8.060505810135113e-06,
      "loss": 0.123,
      "step": 10682
    },
    {
      "epoch": 0.31165762296516714,
      "grad_norm": 0.7104227466698682,
      "learning_rate": 8.0601322052429e-06,
      "loss": 0.1587,
      "step": 10683
    },
    {
      "epoch": 0.31168679619581074,
      "grad_norm": 0.770712191753551,
      "learning_rate": 8.059758573030559e-06,
      "loss": 0.137,
      "step": 10684
    },
    {
      "epoch": 0.3117159694264543,
      "grad_norm": 1.0207202104356439,
      "learning_rate": 8.059384913501422e-06,
      "loss": 0.1379,
      "step": 10685
    },
    {
      "epoch": 0.31174514265709785,
      "grad_norm": 0.9498034214744107,
      "learning_rate": 8.059011226658826e-06,
      "loss": 0.1337,
      "step": 10686
    },
    {
      "epoch": 0.3117743158877414,
      "grad_norm": 0.7442823311245105,
      "learning_rate": 8.05863751250611e-06,
      "loss": 0.1565,
      "step": 10687
    },
    {
      "epoch": 0.31180348911838496,
      "grad_norm": 0.8086803621633196,
      "learning_rate": 8.058263771046608e-06,
      "loss": 0.1645,
      "step": 10688
    },
    {
      "epoch": 0.3118326623490285,
      "grad_norm": 0.7835500579927944,
      "learning_rate": 8.057890002283657e-06,
      "loss": 0.13,
      "step": 10689
    },
    {
      "epoch": 0.3118618355796721,
      "grad_norm": 0.8949264288312289,
      "learning_rate": 8.057516206220594e-06,
      "loss": 0.1867,
      "step": 10690
    },
    {
      "epoch": 0.3118910088103157,
      "grad_norm": 0.763094435902557,
      "learning_rate": 8.057142382860757e-06,
      "loss": 0.1439,
      "step": 10691
    },
    {
      "epoch": 0.3119201820409592,
      "grad_norm": 1.1047382814335238,
      "learning_rate": 8.05676853220748e-06,
      "loss": 0.1707,
      "step": 10692
    },
    {
      "epoch": 0.3119493552716028,
      "grad_norm": 1.0420450594740067,
      "learning_rate": 8.056394654264107e-06,
      "loss": 0.1338,
      "step": 10693
    },
    {
      "epoch": 0.31197852850224633,
      "grad_norm": 1.1649076445090332,
      "learning_rate": 8.056020749033968e-06,
      "loss": 0.1528,
      "step": 10694
    },
    {
      "epoch": 0.3120077017328899,
      "grad_norm": 0.9895356237269789,
      "learning_rate": 8.055646816520409e-06,
      "loss": 0.1424,
      "step": 10695
    },
    {
      "epoch": 0.31203687496353344,
      "grad_norm": 1.155915915417647,
      "learning_rate": 8.05527285672676e-06,
      "loss": 0.1454,
      "step": 10696
    },
    {
      "epoch": 0.31206604819417705,
      "grad_norm": 0.8550922229798301,
      "learning_rate": 8.05489886965637e-06,
      "loss": 0.1343,
      "step": 10697
    },
    {
      "epoch": 0.3120952214248206,
      "grad_norm": 1.0372996907065362,
      "learning_rate": 8.054524855312568e-06,
      "loss": 0.1417,
      "step": 10698
    },
    {
      "epoch": 0.31212439465546415,
      "grad_norm": 0.8225030740209992,
      "learning_rate": 8.0541508136987e-06,
      "loss": 0.149,
      "step": 10699
    },
    {
      "epoch": 0.3121535678861077,
      "grad_norm": 1.0389846090648762,
      "learning_rate": 8.053776744818102e-06,
      "loss": 0.1408,
      "step": 10700
    },
    {
      "epoch": 0.31218274111675126,
      "grad_norm": 0.8972509564498101,
      "learning_rate": 8.053402648674113e-06,
      "loss": 0.1606,
      "step": 10701
    },
    {
      "epoch": 0.3122119143473948,
      "grad_norm": 0.7994333451254089,
      "learning_rate": 8.053028525270075e-06,
      "loss": 0.1475,
      "step": 10702
    },
    {
      "epoch": 0.31224108757803837,
      "grad_norm": 1.2633577308984363,
      "learning_rate": 8.052654374609326e-06,
      "loss": 0.1556,
      "step": 10703
    },
    {
      "epoch": 0.312270260808682,
      "grad_norm": 1.0440090002034648,
      "learning_rate": 8.052280196695209e-06,
      "loss": 0.1432,
      "step": 10704
    },
    {
      "epoch": 0.31229943403932553,
      "grad_norm": 0.7322160840033916,
      "learning_rate": 8.051905991531061e-06,
      "loss": 0.1416,
      "step": 10705
    },
    {
      "epoch": 0.3123286072699691,
      "grad_norm": 0.9900805434997059,
      "learning_rate": 8.051531759120228e-06,
      "loss": 0.1613,
      "step": 10706
    },
    {
      "epoch": 0.31235778050061264,
      "grad_norm": 0.9174698172214244,
      "learning_rate": 8.051157499466044e-06,
      "loss": 0.131,
      "step": 10707
    },
    {
      "epoch": 0.3123869537312562,
      "grad_norm": 0.7010178794851797,
      "learning_rate": 8.050783212571857e-06,
      "loss": 0.1499,
      "step": 10708
    },
    {
      "epoch": 0.31241612696189974,
      "grad_norm": 0.9298159267560002,
      "learning_rate": 8.050408898441005e-06,
      "loss": 0.1613,
      "step": 10709
    },
    {
      "epoch": 0.3124453001925433,
      "grad_norm": 0.8912738723340679,
      "learning_rate": 8.050034557076831e-06,
      "loss": 0.1572,
      "step": 10710
    },
    {
      "epoch": 0.3124744734231869,
      "grad_norm": 0.7617971180421135,
      "learning_rate": 8.049660188482677e-06,
      "loss": 0.1328,
      "step": 10711
    },
    {
      "epoch": 0.31250364665383046,
      "grad_norm": 0.6849817819796101,
      "learning_rate": 8.049285792661882e-06,
      "loss": 0.1429,
      "step": 10712
    },
    {
      "epoch": 0.312532819884474,
      "grad_norm": 0.912788931517718,
      "learning_rate": 8.048911369617794e-06,
      "loss": 0.1484,
      "step": 10713
    },
    {
      "epoch": 0.31256199311511756,
      "grad_norm": 0.8351000231911664,
      "learning_rate": 8.048536919353753e-06,
      "loss": 0.1559,
      "step": 10714
    },
    {
      "epoch": 0.3125911663457611,
      "grad_norm": 0.7026278300710169,
      "learning_rate": 8.048162441873102e-06,
      "loss": 0.1551,
      "step": 10715
    },
    {
      "epoch": 0.31262033957640467,
      "grad_norm": 0.6711410878079361,
      "learning_rate": 8.047787937179183e-06,
      "loss": 0.1316,
      "step": 10716
    },
    {
      "epoch": 0.3126495128070483,
      "grad_norm": 0.7666660355141117,
      "learning_rate": 8.047413405275344e-06,
      "loss": 0.1463,
      "step": 10717
    },
    {
      "epoch": 0.31267868603769183,
      "grad_norm": 0.8413731982089626,
      "learning_rate": 8.047038846164923e-06,
      "loss": 0.1822,
      "step": 10718
    },
    {
      "epoch": 0.3127078592683354,
      "grad_norm": 0.8177612017088078,
      "learning_rate": 8.046664259851267e-06,
      "loss": 0.1576,
      "step": 10719
    },
    {
      "epoch": 0.31273703249897894,
      "grad_norm": 0.9037697769111159,
      "learning_rate": 8.046289646337719e-06,
      "loss": 0.1275,
      "step": 10720
    },
    {
      "epoch": 0.3127662057296225,
      "grad_norm": 0.8146306173853757,
      "learning_rate": 8.045915005627626e-06,
      "loss": 0.1588,
      "step": 10721
    },
    {
      "epoch": 0.31279537896026605,
      "grad_norm": 0.8398277328802198,
      "learning_rate": 8.045540337724329e-06,
      "loss": 0.1623,
      "step": 10722
    },
    {
      "epoch": 0.3128245521909096,
      "grad_norm": 0.6977706410600809,
      "learning_rate": 8.045165642631176e-06,
      "loss": 0.1473,
      "step": 10723
    },
    {
      "epoch": 0.3128537254215532,
      "grad_norm": 0.8347512600324639,
      "learning_rate": 8.044790920351512e-06,
      "loss": 0.1282,
      "step": 10724
    },
    {
      "epoch": 0.31288289865219676,
      "grad_norm": 1.0121807184248806,
      "learning_rate": 8.044416170888681e-06,
      "loss": 0.1446,
      "step": 10725
    },
    {
      "epoch": 0.3129120718828403,
      "grad_norm": 0.6374140196195595,
      "learning_rate": 8.044041394246027e-06,
      "loss": 0.1518,
      "step": 10726
    },
    {
      "epoch": 0.31294124511348387,
      "grad_norm": 1.0939177543901855,
      "learning_rate": 8.0436665904269e-06,
      "loss": 0.1559,
      "step": 10727
    },
    {
      "epoch": 0.3129704183441274,
      "grad_norm": 1.0342650290269129,
      "learning_rate": 8.043291759434643e-06,
      "loss": 0.1361,
      "step": 10728
    },
    {
      "epoch": 0.312999591574771,
      "grad_norm": 0.906841026379668,
      "learning_rate": 8.042916901272606e-06,
      "loss": 0.16,
      "step": 10729
    },
    {
      "epoch": 0.3130287648054145,
      "grad_norm": 0.7827500777722924,
      "learning_rate": 8.042542015944133e-06,
      "loss": 0.1565,
      "step": 10730
    },
    {
      "epoch": 0.31305793803605814,
      "grad_norm": 0.9417677569325281,
      "learning_rate": 8.04216710345257e-06,
      "loss": 0.1701,
      "step": 10731
    },
    {
      "epoch": 0.3130871112667017,
      "grad_norm": 0.6544675059020303,
      "learning_rate": 8.041792163801266e-06,
      "loss": 0.1428,
      "step": 10732
    },
    {
      "epoch": 0.31311628449734524,
      "grad_norm": 0.7878177342915902,
      "learning_rate": 8.041417196993565e-06,
      "loss": 0.1716,
      "step": 10733
    },
    {
      "epoch": 0.3131454577279888,
      "grad_norm": 0.8376969242944549,
      "learning_rate": 8.041042203032821e-06,
      "loss": 0.1472,
      "step": 10734
    },
    {
      "epoch": 0.31317463095863235,
      "grad_norm": 0.9388497991176934,
      "learning_rate": 8.040667181922378e-06,
      "loss": 0.1382,
      "step": 10735
    },
    {
      "epoch": 0.3132038041892759,
      "grad_norm": 0.7996431337234259,
      "learning_rate": 8.040292133665582e-06,
      "loss": 0.1428,
      "step": 10736
    },
    {
      "epoch": 0.31323297741991946,
      "grad_norm": 0.9414314353418634,
      "learning_rate": 8.039917058265784e-06,
      "loss": 0.1388,
      "step": 10737
    },
    {
      "epoch": 0.31326215065056306,
      "grad_norm": 1.0338513178501352,
      "learning_rate": 8.039541955726333e-06,
      "loss": 0.1438,
      "step": 10738
    },
    {
      "epoch": 0.3132913238812066,
      "grad_norm": 0.9033452820449029,
      "learning_rate": 8.039166826050577e-06,
      "loss": 0.146,
      "step": 10739
    },
    {
      "epoch": 0.31332049711185017,
      "grad_norm": 0.9429621006855641,
      "learning_rate": 8.038791669241865e-06,
      "loss": 0.1303,
      "step": 10740
    },
    {
      "epoch": 0.3133496703424937,
      "grad_norm": 0.7545239946383687,
      "learning_rate": 8.038416485303546e-06,
      "loss": 0.1469,
      "step": 10741
    },
    {
      "epoch": 0.3133788435731373,
      "grad_norm": 0.8022933400590296,
      "learning_rate": 8.03804127423897e-06,
      "loss": 0.1571,
      "step": 10742
    },
    {
      "epoch": 0.31340801680378083,
      "grad_norm": 0.9467000448849283,
      "learning_rate": 8.037666036051489e-06,
      "loss": 0.19,
      "step": 10743
    },
    {
      "epoch": 0.3134371900344244,
      "grad_norm": 0.7750292485174162,
      "learning_rate": 8.037290770744448e-06,
      "loss": 0.1495,
      "step": 10744
    },
    {
      "epoch": 0.313466363265068,
      "grad_norm": 1.0258822155581384,
      "learning_rate": 8.036915478321201e-06,
      "loss": 0.1524,
      "step": 10745
    },
    {
      "epoch": 0.31349553649571155,
      "grad_norm": 0.8124866017833455,
      "learning_rate": 8.036540158785097e-06,
      "loss": 0.1536,
      "step": 10746
    },
    {
      "epoch": 0.3135247097263551,
      "grad_norm": 1.0427948302415047,
      "learning_rate": 8.036164812139487e-06,
      "loss": 0.132,
      "step": 10747
    },
    {
      "epoch": 0.31355388295699865,
      "grad_norm": 0.8445958957902322,
      "learning_rate": 8.035789438387724e-06,
      "loss": 0.1376,
      "step": 10748
    },
    {
      "epoch": 0.3135830561876422,
      "grad_norm": 1.2067884629560839,
      "learning_rate": 8.035414037533156e-06,
      "loss": 0.1911,
      "step": 10749
    },
    {
      "epoch": 0.31361222941828576,
      "grad_norm": 1.1192234183737655,
      "learning_rate": 8.035038609579138e-06,
      "loss": 0.1509,
      "step": 10750
    },
    {
      "epoch": 0.31364140264892937,
      "grad_norm": 0.8019585745421046,
      "learning_rate": 8.034663154529018e-06,
      "loss": 0.1631,
      "step": 10751
    },
    {
      "epoch": 0.3136705758795729,
      "grad_norm": 1.1192644625980521,
      "learning_rate": 8.03428767238615e-06,
      "loss": 0.1314,
      "step": 10752
    },
    {
      "epoch": 0.3136997491102165,
      "grad_norm": 0.8762285213604124,
      "learning_rate": 8.033912163153886e-06,
      "loss": 0.1622,
      "step": 10753
    },
    {
      "epoch": 0.31372892234086003,
      "grad_norm": 0.8767256040478457,
      "learning_rate": 8.03353662683558e-06,
      "loss": 0.1578,
      "step": 10754
    },
    {
      "epoch": 0.3137580955715036,
      "grad_norm": 0.999344346633624,
      "learning_rate": 8.033161063434582e-06,
      "loss": 0.1516,
      "step": 10755
    },
    {
      "epoch": 0.31378726880214713,
      "grad_norm": 0.7808600078335992,
      "learning_rate": 8.032785472954246e-06,
      "loss": 0.143,
      "step": 10756
    },
    {
      "epoch": 0.3138164420327907,
      "grad_norm": 0.8899712922793491,
      "learning_rate": 8.032409855397925e-06,
      "loss": 0.1265,
      "step": 10757
    },
    {
      "epoch": 0.3138456152634343,
      "grad_norm": 0.8781754079412691,
      "learning_rate": 8.032034210768973e-06,
      "loss": 0.1348,
      "step": 10758
    },
    {
      "epoch": 0.31387478849407785,
      "grad_norm": 0.9344278429889267,
      "learning_rate": 8.031658539070744e-06,
      "loss": 0.1515,
      "step": 10759
    },
    {
      "epoch": 0.3139039617247214,
      "grad_norm": 0.7558407745735243,
      "learning_rate": 8.03128284030659e-06,
      "loss": 0.1305,
      "step": 10760
    },
    {
      "epoch": 0.31393313495536496,
      "grad_norm": 0.9094098942735137,
      "learning_rate": 8.030907114479866e-06,
      "loss": 0.1572,
      "step": 10761
    },
    {
      "epoch": 0.3139623081860085,
      "grad_norm": 0.8515731402323798,
      "learning_rate": 8.03053136159393e-06,
      "loss": 0.1671,
      "step": 10762
    },
    {
      "epoch": 0.31399148141665206,
      "grad_norm": 0.8192516539648291,
      "learning_rate": 8.030155581652131e-06,
      "loss": 0.1429,
      "step": 10763
    },
    {
      "epoch": 0.3140206546472956,
      "grad_norm": 0.7003987349239982,
      "learning_rate": 8.029779774657827e-06,
      "loss": 0.1488,
      "step": 10764
    },
    {
      "epoch": 0.3140498278779392,
      "grad_norm": 0.6648401101509428,
      "learning_rate": 8.029403940614372e-06,
      "loss": 0.1438,
      "step": 10765
    },
    {
      "epoch": 0.3140790011085828,
      "grad_norm": 1.1170959064644317,
      "learning_rate": 8.029028079525124e-06,
      "loss": 0.1357,
      "step": 10766
    },
    {
      "epoch": 0.31410817433922633,
      "grad_norm": 0.8448134227537687,
      "learning_rate": 8.028652191393432e-06,
      "loss": 0.1645,
      "step": 10767
    },
    {
      "epoch": 0.3141373475698699,
      "grad_norm": 0.7307092293862033,
      "learning_rate": 8.028276276222658e-06,
      "loss": 0.1488,
      "step": 10768
    },
    {
      "epoch": 0.31416652080051344,
      "grad_norm": 0.9418361270201677,
      "learning_rate": 8.027900334016158e-06,
      "loss": 0.1589,
      "step": 10769
    },
    {
      "epoch": 0.314195694031157,
      "grad_norm": 0.9507383502112244,
      "learning_rate": 8.027524364777285e-06,
      "loss": 0.1523,
      "step": 10770
    },
    {
      "epoch": 0.31422486726180054,
      "grad_norm": 0.8114521425027372,
      "learning_rate": 8.027148368509398e-06,
      "loss": 0.1177,
      "step": 10771
    },
    {
      "epoch": 0.31425404049244415,
      "grad_norm": 0.9654237897383231,
      "learning_rate": 8.026772345215853e-06,
      "loss": 0.1436,
      "step": 10772
    },
    {
      "epoch": 0.3142832137230877,
      "grad_norm": 0.7326560639180407,
      "learning_rate": 8.026396294900007e-06,
      "loss": 0.1462,
      "step": 10773
    },
    {
      "epoch": 0.31431238695373126,
      "grad_norm": 0.9610973891969635,
      "learning_rate": 8.026020217565217e-06,
      "loss": 0.1469,
      "step": 10774
    },
    {
      "epoch": 0.3143415601843748,
      "grad_norm": 0.7081117753533893,
      "learning_rate": 8.02564411321484e-06,
      "loss": 0.1193,
      "step": 10775
    },
    {
      "epoch": 0.31437073341501837,
      "grad_norm": 0.8779128326353343,
      "learning_rate": 8.025267981852236e-06,
      "loss": 0.1724,
      "step": 10776
    },
    {
      "epoch": 0.3143999066456619,
      "grad_norm": 0.7002203640783967,
      "learning_rate": 8.024891823480763e-06,
      "loss": 0.139,
      "step": 10777
    },
    {
      "epoch": 0.31442907987630553,
      "grad_norm": 0.8890647628148157,
      "learning_rate": 8.024515638103775e-06,
      "loss": 0.1699,
      "step": 10778
    },
    {
      "epoch": 0.3144582531069491,
      "grad_norm": 0.7395894130367539,
      "learning_rate": 8.024139425724636e-06,
      "loss": 0.1491,
      "step": 10779
    },
    {
      "epoch": 0.31448742633759263,
      "grad_norm": 0.7167755306461832,
      "learning_rate": 8.023763186346701e-06,
      "loss": 0.1569,
      "step": 10780
    },
    {
      "epoch": 0.3145165995682362,
      "grad_norm": 0.8959477083050045,
      "learning_rate": 8.023386919973328e-06,
      "loss": 0.1428,
      "step": 10781
    },
    {
      "epoch": 0.31454577279887974,
      "grad_norm": 0.7926966857231422,
      "learning_rate": 8.023010626607881e-06,
      "loss": 0.1534,
      "step": 10782
    },
    {
      "epoch": 0.3145749460295233,
      "grad_norm": 0.7519276625120004,
      "learning_rate": 8.022634306253717e-06,
      "loss": 0.1517,
      "step": 10783
    },
    {
      "epoch": 0.31460411926016685,
      "grad_norm": 0.7797314713469649,
      "learning_rate": 8.022257958914194e-06,
      "loss": 0.1564,
      "step": 10784
    },
    {
      "epoch": 0.31463329249081046,
      "grad_norm": 1.3093424623422203,
      "learning_rate": 8.021881584592672e-06,
      "loss": 0.1461,
      "step": 10785
    },
    {
      "epoch": 0.314662465721454,
      "grad_norm": 1.0210827254759813,
      "learning_rate": 8.021505183292515e-06,
      "loss": 0.1674,
      "step": 10786
    },
    {
      "epoch": 0.31469163895209756,
      "grad_norm": 0.8662462238752835,
      "learning_rate": 8.02112875501708e-06,
      "loss": 0.1486,
      "step": 10787
    },
    {
      "epoch": 0.3147208121827411,
      "grad_norm": 0.8316937972387971,
      "learning_rate": 8.02075229976973e-06,
      "loss": 0.1325,
      "step": 10788
    },
    {
      "epoch": 0.31474998541338467,
      "grad_norm": 1.1354955370376583,
      "learning_rate": 8.020375817553824e-06,
      "loss": 0.1685,
      "step": 10789
    },
    {
      "epoch": 0.3147791586440282,
      "grad_norm": 0.9533332775851375,
      "learning_rate": 8.019999308372724e-06,
      "loss": 0.1638,
      "step": 10790
    },
    {
      "epoch": 0.3148083318746718,
      "grad_norm": 0.9078033304961471,
      "learning_rate": 8.01962277222979e-06,
      "loss": 0.1686,
      "step": 10791
    },
    {
      "epoch": 0.3148375051053154,
      "grad_norm": 0.9491410550554914,
      "learning_rate": 8.019246209128384e-06,
      "loss": 0.1622,
      "step": 10792
    },
    {
      "epoch": 0.31486667833595894,
      "grad_norm": 0.8494628393581969,
      "learning_rate": 8.01886961907187e-06,
      "loss": 0.1465,
      "step": 10793
    },
    {
      "epoch": 0.3148958515666025,
      "grad_norm": 1.2820199107453916,
      "learning_rate": 8.018493002063608e-06,
      "loss": 0.145,
      "step": 10794
    },
    {
      "epoch": 0.31492502479724604,
      "grad_norm": 1.037733081029843,
      "learning_rate": 8.018116358106962e-06,
      "loss": 0.1601,
      "step": 10795
    },
    {
      "epoch": 0.3149541980278896,
      "grad_norm": 0.9043489129817143,
      "learning_rate": 8.017739687205295e-06,
      "loss": 0.1668,
      "step": 10796
    },
    {
      "epoch": 0.31498337125853315,
      "grad_norm": 1.2169740694511075,
      "learning_rate": 8.017362989361965e-06,
      "loss": 0.1394,
      "step": 10797
    },
    {
      "epoch": 0.3150125444891767,
      "grad_norm": 0.875495406598159,
      "learning_rate": 8.016986264580341e-06,
      "loss": 0.141,
      "step": 10798
    },
    {
      "epoch": 0.3150417177198203,
      "grad_norm": 0.9965183835394941,
      "learning_rate": 8.016609512863784e-06,
      "loss": 0.1465,
      "step": 10799
    },
    {
      "epoch": 0.31507089095046387,
      "grad_norm": 0.7638952406978357,
      "learning_rate": 8.016232734215655e-06,
      "loss": 0.1268,
      "step": 10800
    },
    {
      "epoch": 0.3151000641811074,
      "grad_norm": 0.8975328949139931,
      "learning_rate": 8.015855928639323e-06,
      "loss": 0.1742,
      "step": 10801
    },
    {
      "epoch": 0.315129237411751,
      "grad_norm": 0.6719977806764448,
      "learning_rate": 8.015479096138149e-06,
      "loss": 0.1366,
      "step": 10802
    },
    {
      "epoch": 0.3151584106423945,
      "grad_norm": 0.9731258658017219,
      "learning_rate": 8.015102236715494e-06,
      "loss": 0.1734,
      "step": 10803
    },
    {
      "epoch": 0.3151875838730381,
      "grad_norm": 0.8365768415108314,
      "learning_rate": 8.01472535037473e-06,
      "loss": 0.1477,
      "step": 10804
    },
    {
      "epoch": 0.3152167571036817,
      "grad_norm": 0.8668622645569349,
      "learning_rate": 8.014348437119215e-06,
      "loss": 0.1504,
      "step": 10805
    },
    {
      "epoch": 0.31524593033432524,
      "grad_norm": 0.8114028199900455,
      "learning_rate": 8.013971496952318e-06,
      "loss": 0.1433,
      "step": 10806
    },
    {
      "epoch": 0.3152751035649688,
      "grad_norm": 0.7153840424955196,
      "learning_rate": 8.013594529877402e-06,
      "loss": 0.1324,
      "step": 10807
    },
    {
      "epoch": 0.31530427679561235,
      "grad_norm": 0.7751645304753552,
      "learning_rate": 8.013217535897835e-06,
      "loss": 0.1318,
      "step": 10808
    },
    {
      "epoch": 0.3153334500262559,
      "grad_norm": 1.0414413983644775,
      "learning_rate": 8.012840515016979e-06,
      "loss": 0.1358,
      "step": 10809
    },
    {
      "epoch": 0.31536262325689945,
      "grad_norm": 1.9637889379868687,
      "learning_rate": 8.012463467238206e-06,
      "loss": 0.1597,
      "step": 10810
    },
    {
      "epoch": 0.315391796487543,
      "grad_norm": 0.7796162263142161,
      "learning_rate": 8.012086392564876e-06,
      "loss": 0.1422,
      "step": 10811
    },
    {
      "epoch": 0.3154209697181866,
      "grad_norm": 0.8997929953235346,
      "learning_rate": 8.011709291000356e-06,
      "loss": 0.1832,
      "step": 10812
    },
    {
      "epoch": 0.31545014294883017,
      "grad_norm": 0.874184850708513,
      "learning_rate": 8.011332162548016e-06,
      "loss": 0.1351,
      "step": 10813
    },
    {
      "epoch": 0.3154793161794737,
      "grad_norm": 0.8213953251088818,
      "learning_rate": 8.01095500721122e-06,
      "loss": 0.1437,
      "step": 10814
    },
    {
      "epoch": 0.3155084894101173,
      "grad_norm": 0.7463334749308627,
      "learning_rate": 8.01057782499334e-06,
      "loss": 0.1755,
      "step": 10815
    },
    {
      "epoch": 0.31553766264076083,
      "grad_norm": 0.8353718007235376,
      "learning_rate": 8.010200615897736e-06,
      "loss": 0.1573,
      "step": 10816
    },
    {
      "epoch": 0.3155668358714044,
      "grad_norm": 0.7838193827414881,
      "learning_rate": 8.00982337992778e-06,
      "loss": 0.1535,
      "step": 10817
    },
    {
      "epoch": 0.31559600910204794,
      "grad_norm": 0.9705819795490795,
      "learning_rate": 8.009446117086842e-06,
      "loss": 0.1528,
      "step": 10818
    },
    {
      "epoch": 0.31562518233269155,
      "grad_norm": 0.6172604382681017,
      "learning_rate": 8.009068827378286e-06,
      "loss": 0.143,
      "step": 10819
    },
    {
      "epoch": 0.3156543555633351,
      "grad_norm": 0.9071173585735542,
      "learning_rate": 8.008691510805483e-06,
      "loss": 0.1779,
      "step": 10820
    },
    {
      "epoch": 0.31568352879397865,
      "grad_norm": 0.8501037531554967,
      "learning_rate": 8.008314167371799e-06,
      "loss": 0.1423,
      "step": 10821
    },
    {
      "epoch": 0.3157127020246222,
      "grad_norm": 0.783255751313327,
      "learning_rate": 8.007936797080604e-06,
      "loss": 0.1383,
      "step": 10822
    },
    {
      "epoch": 0.31574187525526576,
      "grad_norm": 0.7944849478688778,
      "learning_rate": 8.007559399935267e-06,
      "loss": 0.1503,
      "step": 10823
    },
    {
      "epoch": 0.3157710484859093,
      "grad_norm": 3.6894205757231644,
      "learning_rate": 8.007181975939158e-06,
      "loss": 0.2021,
      "step": 10824
    },
    {
      "epoch": 0.31580022171655286,
      "grad_norm": 0.8167854693751999,
      "learning_rate": 8.006804525095646e-06,
      "loss": 0.146,
      "step": 10825
    },
    {
      "epoch": 0.3158293949471965,
      "grad_norm": 1.3318246565123562,
      "learning_rate": 8.006427047408103e-06,
      "loss": 0.1619,
      "step": 10826
    },
    {
      "epoch": 0.31585856817784,
      "grad_norm": 0.8930266059831007,
      "learning_rate": 8.006049542879894e-06,
      "loss": 0.1658,
      "step": 10827
    },
    {
      "epoch": 0.3158877414084836,
      "grad_norm": 0.8459288008179908,
      "learning_rate": 8.005672011514395e-06,
      "loss": 0.151,
      "step": 10828
    },
    {
      "epoch": 0.31591691463912713,
      "grad_norm": 1.0865249739569145,
      "learning_rate": 8.005294453314974e-06,
      "loss": 0.1411,
      "step": 10829
    },
    {
      "epoch": 0.3159460878697707,
      "grad_norm": 1.2482569102171208,
      "learning_rate": 8.004916868285e-06,
      "loss": 0.1595,
      "step": 10830
    },
    {
      "epoch": 0.31597526110041424,
      "grad_norm": 0.7735840041017777,
      "learning_rate": 8.004539256427845e-06,
      "loss": 0.1571,
      "step": 10831
    },
    {
      "epoch": 0.31600443433105785,
      "grad_norm": 0.8111414793263528,
      "learning_rate": 8.004161617746882e-06,
      "loss": 0.1326,
      "step": 10832
    },
    {
      "epoch": 0.3160336075617014,
      "grad_norm": 0.6201908759795967,
      "learning_rate": 8.003783952245481e-06,
      "loss": 0.1562,
      "step": 10833
    },
    {
      "epoch": 0.31606278079234496,
      "grad_norm": 0.7690556915432534,
      "learning_rate": 8.003406259927012e-06,
      "loss": 0.1743,
      "step": 10834
    },
    {
      "epoch": 0.3160919540229885,
      "grad_norm": 0.8998700133027496,
      "learning_rate": 8.003028540794852e-06,
      "loss": 0.1518,
      "step": 10835
    },
    {
      "epoch": 0.31612112725363206,
      "grad_norm": 0.7703325383165246,
      "learning_rate": 8.002650794852367e-06,
      "loss": 0.1296,
      "step": 10836
    },
    {
      "epoch": 0.3161503004842756,
      "grad_norm": 0.8496010461891292,
      "learning_rate": 8.002273022102936e-06,
      "loss": 0.1336,
      "step": 10837
    },
    {
      "epoch": 0.31617947371491917,
      "grad_norm": 0.9303915219740783,
      "learning_rate": 8.001895222549925e-06,
      "loss": 0.124,
      "step": 10838
    },
    {
      "epoch": 0.3162086469455628,
      "grad_norm": 0.8372252034605704,
      "learning_rate": 8.001517396196711e-06,
      "loss": 0.1697,
      "step": 10839
    },
    {
      "epoch": 0.31623782017620633,
      "grad_norm": 0.7688007130811148,
      "learning_rate": 8.001139543046668e-06,
      "loss": 0.1374,
      "step": 10840
    },
    {
      "epoch": 0.3162669934068499,
      "grad_norm": 0.9295937439114904,
      "learning_rate": 8.000761663103164e-06,
      "loss": 0.1521,
      "step": 10841
    },
    {
      "epoch": 0.31629616663749344,
      "grad_norm": 0.9722704463867623,
      "learning_rate": 8.00038375636958e-06,
      "loss": 0.1632,
      "step": 10842
    },
    {
      "epoch": 0.316325339868137,
      "grad_norm": 0.9706299166019182,
      "learning_rate": 8.000005822849284e-06,
      "loss": 0.1528,
      "step": 10843
    },
    {
      "epoch": 0.31635451309878054,
      "grad_norm": 0.7881230146869718,
      "learning_rate": 7.999627862545652e-06,
      "loss": 0.153,
      "step": 10844
    },
    {
      "epoch": 0.3163836863294241,
      "grad_norm": 1.3278552700245558,
      "learning_rate": 7.999249875462058e-06,
      "loss": 0.1779,
      "step": 10845
    },
    {
      "epoch": 0.3164128595600677,
      "grad_norm": 0.7948740939947869,
      "learning_rate": 7.99887186160188e-06,
      "loss": 0.1446,
      "step": 10846
    },
    {
      "epoch": 0.31644203279071126,
      "grad_norm": 1.0025482089247173,
      "learning_rate": 7.998493820968487e-06,
      "loss": 0.1516,
      "step": 10847
    },
    {
      "epoch": 0.3164712060213548,
      "grad_norm": 0.8067955914748581,
      "learning_rate": 7.998115753565259e-06,
      "loss": 0.1374,
      "step": 10848
    },
    {
      "epoch": 0.31650037925199836,
      "grad_norm": 1.141590069966231,
      "learning_rate": 7.997737659395569e-06,
      "loss": 0.1324,
      "step": 10849
    },
    {
      "epoch": 0.3165295524826419,
      "grad_norm": 0.6753486870475083,
      "learning_rate": 7.99735953846279e-06,
      "loss": 0.1698,
      "step": 10850
    },
    {
      "epoch": 0.31655872571328547,
      "grad_norm": 0.6681018223927342,
      "learning_rate": 7.996981390770305e-06,
      "loss": 0.1406,
      "step": 10851
    },
    {
      "epoch": 0.316587898943929,
      "grad_norm": 0.9748489643663281,
      "learning_rate": 7.996603216321482e-06,
      "loss": 0.1493,
      "step": 10852
    },
    {
      "epoch": 0.31661707217457263,
      "grad_norm": 0.8698199547853437,
      "learning_rate": 7.996225015119702e-06,
      "loss": 0.1403,
      "step": 10853
    },
    {
      "epoch": 0.3166462454052162,
      "grad_norm": 0.810940875301132,
      "learning_rate": 7.99584678716834e-06,
      "loss": 0.1348,
      "step": 10854
    },
    {
      "epoch": 0.31667541863585974,
      "grad_norm": 0.9992900550347783,
      "learning_rate": 7.995468532470773e-06,
      "loss": 0.1532,
      "step": 10855
    },
    {
      "epoch": 0.3167045918665033,
      "grad_norm": 0.8919525298022424,
      "learning_rate": 7.995090251030379e-06,
      "loss": 0.1314,
      "step": 10856
    },
    {
      "epoch": 0.31673376509714685,
      "grad_norm": 1.0945965505220643,
      "learning_rate": 7.994711942850536e-06,
      "loss": 0.1277,
      "step": 10857
    },
    {
      "epoch": 0.3167629383277904,
      "grad_norm": 1.0945924397721185,
      "learning_rate": 7.994333607934616e-06,
      "loss": 0.1483,
      "step": 10858
    },
    {
      "epoch": 0.316792111558434,
      "grad_norm": 0.8033755381283553,
      "learning_rate": 7.993955246286e-06,
      "loss": 0.1609,
      "step": 10859
    },
    {
      "epoch": 0.31682128478907756,
      "grad_norm": 0.8190977917699773,
      "learning_rate": 7.99357685790807e-06,
      "loss": 0.1388,
      "step": 10860
    },
    {
      "epoch": 0.3168504580197211,
      "grad_norm": 0.8652544400655409,
      "learning_rate": 7.993198442804198e-06,
      "loss": 0.1426,
      "step": 10861
    },
    {
      "epoch": 0.31687963125036467,
      "grad_norm": 1.0621468836251697,
      "learning_rate": 7.992820000977765e-06,
      "loss": 0.1746,
      "step": 10862
    },
    {
      "epoch": 0.3169088044810082,
      "grad_norm": 0.7581573525477618,
      "learning_rate": 7.99244153243215e-06,
      "loss": 0.1535,
      "step": 10863
    },
    {
      "epoch": 0.3169379777116518,
      "grad_norm": 1.0495793236692208,
      "learning_rate": 7.992063037170731e-06,
      "loss": 0.143,
      "step": 10864
    },
    {
      "epoch": 0.31696715094229533,
      "grad_norm": 0.9951663002642008,
      "learning_rate": 7.991684515196887e-06,
      "loss": 0.1601,
      "step": 10865
    },
    {
      "epoch": 0.31699632417293894,
      "grad_norm": 0.8261304615673515,
      "learning_rate": 7.991305966513998e-06,
      "loss": 0.151,
      "step": 10866
    },
    {
      "epoch": 0.3170254974035825,
      "grad_norm": 0.9458205934801588,
      "learning_rate": 7.990927391125445e-06,
      "loss": 0.1427,
      "step": 10867
    },
    {
      "epoch": 0.31705467063422604,
      "grad_norm": 0.8106575000502098,
      "learning_rate": 7.990548789034605e-06,
      "loss": 0.1373,
      "step": 10868
    },
    {
      "epoch": 0.3170838438648696,
      "grad_norm": 0.7383692640181932,
      "learning_rate": 7.990170160244859e-06,
      "loss": 0.1622,
      "step": 10869
    },
    {
      "epoch": 0.31711301709551315,
      "grad_norm": 0.8348757818591909,
      "learning_rate": 7.989791504759588e-06,
      "loss": 0.1498,
      "step": 10870
    },
    {
      "epoch": 0.3171421903261567,
      "grad_norm": 0.8478373700925567,
      "learning_rate": 7.989412822582172e-06,
      "loss": 0.1322,
      "step": 10871
    },
    {
      "epoch": 0.31717136355680026,
      "grad_norm": 0.7925508960137072,
      "learning_rate": 7.989034113715991e-06,
      "loss": 0.1785,
      "step": 10872
    },
    {
      "epoch": 0.31720053678744387,
      "grad_norm": 0.9072265128149225,
      "learning_rate": 7.98865537816443e-06,
      "loss": 0.1493,
      "step": 10873
    },
    {
      "epoch": 0.3172297100180874,
      "grad_norm": 0.6618899521697119,
      "learning_rate": 7.988276615930864e-06,
      "loss": 0.1317,
      "step": 10874
    },
    {
      "epoch": 0.31725888324873097,
      "grad_norm": 0.856390210165339,
      "learning_rate": 7.98789782701868e-06,
      "loss": 0.14,
      "step": 10875
    },
    {
      "epoch": 0.3172880564793745,
      "grad_norm": 0.8282340027579668,
      "learning_rate": 7.987519011431256e-06,
      "loss": 0.1585,
      "step": 10876
    },
    {
      "epoch": 0.3173172297100181,
      "grad_norm": 0.8133317569681802,
      "learning_rate": 7.987140169171976e-06,
      "loss": 0.1446,
      "step": 10877
    },
    {
      "epoch": 0.31734640294066163,
      "grad_norm": 0.6426067356846086,
      "learning_rate": 7.986761300244221e-06,
      "loss": 0.1391,
      "step": 10878
    },
    {
      "epoch": 0.3173755761713052,
      "grad_norm": 0.7135164368604657,
      "learning_rate": 7.986382404651376e-06,
      "loss": 0.1456,
      "step": 10879
    },
    {
      "epoch": 0.3174047494019488,
      "grad_norm": 0.8220779592727057,
      "learning_rate": 7.98600348239682e-06,
      "loss": 0.1581,
      "step": 10880
    },
    {
      "epoch": 0.31743392263259235,
      "grad_norm": 0.8985655432169064,
      "learning_rate": 7.985624533483938e-06,
      "loss": 0.1603,
      "step": 10881
    },
    {
      "epoch": 0.3174630958632359,
      "grad_norm": 0.8243899362161067,
      "learning_rate": 7.985245557916114e-06,
      "loss": 0.1499,
      "step": 10882
    },
    {
      "epoch": 0.31749226909387945,
      "grad_norm": 0.7845523521009368,
      "learning_rate": 7.984866555696728e-06,
      "loss": 0.1355,
      "step": 10883
    },
    {
      "epoch": 0.317521442324523,
      "grad_norm": 0.9071479875801252,
      "learning_rate": 7.984487526829168e-06,
      "loss": 0.1446,
      "step": 10884
    },
    {
      "epoch": 0.31755061555516656,
      "grad_norm": 1.0093951628565614,
      "learning_rate": 7.984108471316815e-06,
      "loss": 0.1612,
      "step": 10885
    },
    {
      "epoch": 0.3175797887858101,
      "grad_norm": 0.6823686103470579,
      "learning_rate": 7.983729389163054e-06,
      "loss": 0.1421,
      "step": 10886
    },
    {
      "epoch": 0.3176089620164537,
      "grad_norm": 0.8161962542510086,
      "learning_rate": 7.983350280371269e-06,
      "loss": 0.1542,
      "step": 10887
    },
    {
      "epoch": 0.3176381352470973,
      "grad_norm": 0.7319529789211147,
      "learning_rate": 7.982971144944846e-06,
      "loss": 0.1255,
      "step": 10888
    },
    {
      "epoch": 0.31766730847774083,
      "grad_norm": 0.6978920309349856,
      "learning_rate": 7.982591982887168e-06,
      "loss": 0.162,
      "step": 10889
    },
    {
      "epoch": 0.3176964817083844,
      "grad_norm": 0.7974316615751017,
      "learning_rate": 7.982212794201621e-06,
      "loss": 0.1428,
      "step": 10890
    },
    {
      "epoch": 0.31772565493902793,
      "grad_norm": 0.7324167704909141,
      "learning_rate": 7.98183357889159e-06,
      "loss": 0.1363,
      "step": 10891
    },
    {
      "epoch": 0.3177548281696715,
      "grad_norm": 0.9590530925832134,
      "learning_rate": 7.98145433696046e-06,
      "loss": 0.1514,
      "step": 10892
    },
    {
      "epoch": 0.3177840014003151,
      "grad_norm": 0.8461320732724353,
      "learning_rate": 7.98107506841162e-06,
      "loss": 0.161,
      "step": 10893
    },
    {
      "epoch": 0.31781317463095865,
      "grad_norm": 0.8229166627709099,
      "learning_rate": 7.98069577324845e-06,
      "loss": 0.1564,
      "step": 10894
    },
    {
      "epoch": 0.3178423478616022,
      "grad_norm": 0.8833468948109152,
      "learning_rate": 7.980316451474339e-06,
      "loss": 0.1361,
      "step": 10895
    },
    {
      "epoch": 0.31787152109224576,
      "grad_norm": 1.0419301433172863,
      "learning_rate": 7.979937103092677e-06,
      "loss": 0.153,
      "step": 10896
    },
    {
      "epoch": 0.3179006943228893,
      "grad_norm": 0.7309515740337175,
      "learning_rate": 7.979557728106848e-06,
      "loss": 0.168,
      "step": 10897
    },
    {
      "epoch": 0.31792986755353286,
      "grad_norm": 1.0051947532936936,
      "learning_rate": 7.979178326520238e-06,
      "loss": 0.1501,
      "step": 10898
    },
    {
      "epoch": 0.3179590407841764,
      "grad_norm": 0.8832778360594972,
      "learning_rate": 7.978798898336235e-06,
      "loss": 0.1376,
      "step": 10899
    },
    {
      "epoch": 0.31798821401482,
      "grad_norm": 0.8412014254184449,
      "learning_rate": 7.978419443558227e-06,
      "loss": 0.1861,
      "step": 10900
    },
    {
      "epoch": 0.3180173872454636,
      "grad_norm": 0.9142482863988516,
      "learning_rate": 7.9780399621896e-06,
      "loss": 0.1645,
      "step": 10901
    },
    {
      "epoch": 0.31804656047610713,
      "grad_norm": 1.1139117786825425,
      "learning_rate": 7.977660454233744e-06,
      "loss": 0.1618,
      "step": 10902
    },
    {
      "epoch": 0.3180757337067507,
      "grad_norm": 0.8952417141100726,
      "learning_rate": 7.977280919694047e-06,
      "loss": 0.1453,
      "step": 10903
    },
    {
      "epoch": 0.31810490693739424,
      "grad_norm": 1.2342890758949616,
      "learning_rate": 7.976901358573896e-06,
      "loss": 0.1479,
      "step": 10904
    },
    {
      "epoch": 0.3181340801680378,
      "grad_norm": 1.2172665966323095,
      "learning_rate": 7.976521770876679e-06,
      "loss": 0.1504,
      "step": 10905
    },
    {
      "epoch": 0.31816325339868134,
      "grad_norm": 0.9820515451969672,
      "learning_rate": 7.976142156605788e-06,
      "loss": 0.1514,
      "step": 10906
    },
    {
      "epoch": 0.31819242662932495,
      "grad_norm": 0.7379730837343433,
      "learning_rate": 7.97576251576461e-06,
      "loss": 0.1243,
      "step": 10907
    },
    {
      "epoch": 0.3182215998599685,
      "grad_norm": 1.2874446005764328,
      "learning_rate": 7.975382848356533e-06,
      "loss": 0.1566,
      "step": 10908
    },
    {
      "epoch": 0.31825077309061206,
      "grad_norm": 0.9823168690057406,
      "learning_rate": 7.97500315438495e-06,
      "loss": 0.145,
      "step": 10909
    },
    {
      "epoch": 0.3182799463212556,
      "grad_norm": 0.976866401389679,
      "learning_rate": 7.974623433853248e-06,
      "loss": 0.1529,
      "step": 10910
    },
    {
      "epoch": 0.31830911955189917,
      "grad_norm": 1.0329106099311451,
      "learning_rate": 7.97424368676482e-06,
      "loss": 0.1701,
      "step": 10911
    },
    {
      "epoch": 0.3183382927825427,
      "grad_norm": 1.238083789297518,
      "learning_rate": 7.973863913123053e-06,
      "loss": 0.1494,
      "step": 10912
    },
    {
      "epoch": 0.3183674660131863,
      "grad_norm": 0.8916058755722975,
      "learning_rate": 7.973484112931337e-06,
      "loss": 0.1447,
      "step": 10913
    },
    {
      "epoch": 0.3183966392438299,
      "grad_norm": 0.7613436905631191,
      "learning_rate": 7.973104286193067e-06,
      "loss": 0.1718,
      "step": 10914
    },
    {
      "epoch": 0.31842581247447344,
      "grad_norm": 1.0944601201705713,
      "learning_rate": 7.972724432911632e-06,
      "loss": 0.1562,
      "step": 10915
    },
    {
      "epoch": 0.318454985705117,
      "grad_norm": 0.8782577023184184,
      "learning_rate": 7.972344553090422e-06,
      "loss": 0.1608,
      "step": 10916
    },
    {
      "epoch": 0.31848415893576054,
      "grad_norm": 0.8529555948164445,
      "learning_rate": 7.97196464673283e-06,
      "loss": 0.1446,
      "step": 10917
    },
    {
      "epoch": 0.3185133321664041,
      "grad_norm": 0.8768972001228,
      "learning_rate": 7.971584713842247e-06,
      "loss": 0.141,
      "step": 10918
    },
    {
      "epoch": 0.31854250539704765,
      "grad_norm": 0.8389996599149626,
      "learning_rate": 7.971204754422063e-06,
      "loss": 0.1521,
      "step": 10919
    },
    {
      "epoch": 0.31857167862769126,
      "grad_norm": 0.939397250400976,
      "learning_rate": 7.970824768475675e-06,
      "loss": 0.1667,
      "step": 10920
    },
    {
      "epoch": 0.3186008518583348,
      "grad_norm": 0.7930986520373468,
      "learning_rate": 7.970444756006473e-06,
      "loss": 0.1543,
      "step": 10921
    },
    {
      "epoch": 0.31863002508897836,
      "grad_norm": 0.8159388536947406,
      "learning_rate": 7.970064717017847e-06,
      "loss": 0.141,
      "step": 10922
    },
    {
      "epoch": 0.3186591983196219,
      "grad_norm": 0.766377693268765,
      "learning_rate": 7.969684651513193e-06,
      "loss": 0.1422,
      "step": 10923
    },
    {
      "epoch": 0.31868837155026547,
      "grad_norm": 0.8639135229695858,
      "learning_rate": 7.969304559495903e-06,
      "loss": 0.1842,
      "step": 10924
    },
    {
      "epoch": 0.318717544780909,
      "grad_norm": 0.7901964245547788,
      "learning_rate": 7.968924440969372e-06,
      "loss": 0.1442,
      "step": 10925
    },
    {
      "epoch": 0.3187467180115526,
      "grad_norm": 0.7547189508705513,
      "learning_rate": 7.968544295936992e-06,
      "loss": 0.1261,
      "step": 10926
    },
    {
      "epoch": 0.3187758912421962,
      "grad_norm": 0.8062241964346836,
      "learning_rate": 7.968164124402156e-06,
      "loss": 0.1627,
      "step": 10927
    },
    {
      "epoch": 0.31880506447283974,
      "grad_norm": 0.7902701470212061,
      "learning_rate": 7.967783926368259e-06,
      "loss": 0.1312,
      "step": 10928
    },
    {
      "epoch": 0.3188342377034833,
      "grad_norm": 0.7790390263454149,
      "learning_rate": 7.967403701838697e-06,
      "loss": 0.1509,
      "step": 10929
    },
    {
      "epoch": 0.31886341093412685,
      "grad_norm": 0.8192497541244029,
      "learning_rate": 7.967023450816864e-06,
      "loss": 0.1816,
      "step": 10930
    },
    {
      "epoch": 0.3188925841647704,
      "grad_norm": 0.7206749785485109,
      "learning_rate": 7.966643173306151e-06,
      "loss": 0.1573,
      "step": 10931
    },
    {
      "epoch": 0.31892175739541395,
      "grad_norm": 0.7553026242956746,
      "learning_rate": 7.96626286930996e-06,
      "loss": 0.1279,
      "step": 10932
    },
    {
      "epoch": 0.3189509306260575,
      "grad_norm": 0.7900128280464674,
      "learning_rate": 7.965882538831678e-06,
      "loss": 0.1378,
      "step": 10933
    },
    {
      "epoch": 0.3189801038567011,
      "grad_norm": 0.781617927870265,
      "learning_rate": 7.965502181874707e-06,
      "loss": 0.1466,
      "step": 10934
    },
    {
      "epoch": 0.31900927708734467,
      "grad_norm": 0.7226488317673317,
      "learning_rate": 7.965121798442438e-06,
      "loss": 0.1346,
      "step": 10935
    },
    {
      "epoch": 0.3190384503179882,
      "grad_norm": 0.6723334210407697,
      "learning_rate": 7.964741388538272e-06,
      "loss": 0.1321,
      "step": 10936
    },
    {
      "epoch": 0.3190676235486318,
      "grad_norm": 0.8684507089063188,
      "learning_rate": 7.964360952165603e-06,
      "loss": 0.1424,
      "step": 10937
    },
    {
      "epoch": 0.3190967967792753,
      "grad_norm": 0.9239067511311433,
      "learning_rate": 7.963980489327826e-06,
      "loss": 0.1736,
      "step": 10938
    },
    {
      "epoch": 0.3191259700099189,
      "grad_norm": 0.8309033312700427,
      "learning_rate": 7.96360000002834e-06,
      "loss": 0.1342,
      "step": 10939
    },
    {
      "epoch": 0.31915514324056243,
      "grad_norm": 0.8864879593290103,
      "learning_rate": 7.963219484270537e-06,
      "loss": 0.151,
      "step": 10940
    },
    {
      "epoch": 0.31918431647120604,
      "grad_norm": 0.9543105795961997,
      "learning_rate": 7.962838942057821e-06,
      "loss": 0.1625,
      "step": 10941
    },
    {
      "epoch": 0.3192134897018496,
      "grad_norm": 0.7911350191859794,
      "learning_rate": 7.962458373393587e-06,
      "loss": 0.1401,
      "step": 10942
    },
    {
      "epoch": 0.31924266293249315,
      "grad_norm": 0.8265873061121952,
      "learning_rate": 7.96207777828123e-06,
      "loss": 0.1403,
      "step": 10943
    },
    {
      "epoch": 0.3192718361631367,
      "grad_norm": 1.0898136522483184,
      "learning_rate": 7.961697156724149e-06,
      "loss": 0.1652,
      "step": 10944
    },
    {
      "epoch": 0.31930100939378026,
      "grad_norm": 0.7436830795595055,
      "learning_rate": 7.961316508725745e-06,
      "loss": 0.1411,
      "step": 10945
    },
    {
      "epoch": 0.3193301826244238,
      "grad_norm": 1.0525989627328995,
      "learning_rate": 7.960935834289412e-06,
      "loss": 0.1467,
      "step": 10946
    },
    {
      "epoch": 0.3193593558550674,
      "grad_norm": 0.6856700623005294,
      "learning_rate": 7.960555133418551e-06,
      "loss": 0.1375,
      "step": 10947
    },
    {
      "epoch": 0.31938852908571097,
      "grad_norm": 0.7745108663757383,
      "learning_rate": 7.960174406116561e-06,
      "loss": 0.1351,
      "step": 10948
    },
    {
      "epoch": 0.3194177023163545,
      "grad_norm": 0.8947273049284142,
      "learning_rate": 7.959793652386841e-06,
      "loss": 0.1486,
      "step": 10949
    },
    {
      "epoch": 0.3194468755469981,
      "grad_norm": 0.8237067717890412,
      "learning_rate": 7.95941287223279e-06,
      "loss": 0.1483,
      "step": 10950
    },
    {
      "epoch": 0.31947604877764163,
      "grad_norm": 0.9335537446322002,
      "learning_rate": 7.959032065657807e-06,
      "loss": 0.1458,
      "step": 10951
    },
    {
      "epoch": 0.3195052220082852,
      "grad_norm": 0.7338829389501422,
      "learning_rate": 7.958651232665292e-06,
      "loss": 0.1611,
      "step": 10952
    },
    {
      "epoch": 0.31953439523892874,
      "grad_norm": 0.7767700317831092,
      "learning_rate": 7.958270373258645e-06,
      "loss": 0.1398,
      "step": 10953
    },
    {
      "epoch": 0.31956356846957235,
      "grad_norm": 0.6514863483326759,
      "learning_rate": 7.957889487441266e-06,
      "loss": 0.1501,
      "step": 10954
    },
    {
      "epoch": 0.3195927417002159,
      "grad_norm": 0.8655125459821137,
      "learning_rate": 7.957508575216556e-06,
      "loss": 0.1396,
      "step": 10955
    },
    {
      "epoch": 0.31962191493085945,
      "grad_norm": 0.7380382683302142,
      "learning_rate": 7.957127636587916e-06,
      "loss": 0.1596,
      "step": 10956
    },
    {
      "epoch": 0.319651088161503,
      "grad_norm": 0.832772327199341,
      "learning_rate": 7.956746671558746e-06,
      "loss": 0.1354,
      "step": 10957
    },
    {
      "epoch": 0.31968026139214656,
      "grad_norm": 0.8846179502107682,
      "learning_rate": 7.956365680132447e-06,
      "loss": 0.1505,
      "step": 10958
    },
    {
      "epoch": 0.3197094346227901,
      "grad_norm": 0.766310363080684,
      "learning_rate": 7.955984662312422e-06,
      "loss": 0.1433,
      "step": 10959
    },
    {
      "epoch": 0.31973860785343367,
      "grad_norm": 0.8378369436549159,
      "learning_rate": 7.955603618102072e-06,
      "loss": 0.1391,
      "step": 10960
    },
    {
      "epoch": 0.3197677810840773,
      "grad_norm": 0.9344404355154655,
      "learning_rate": 7.955222547504795e-06,
      "loss": 0.1541,
      "step": 10961
    },
    {
      "epoch": 0.3197969543147208,
      "grad_norm": 0.8942332460010611,
      "learning_rate": 7.954841450524e-06,
      "loss": 0.1649,
      "step": 10962
    },
    {
      "epoch": 0.3198261275453644,
      "grad_norm": 0.9588201479934079,
      "learning_rate": 7.954460327163085e-06,
      "loss": 0.1615,
      "step": 10963
    },
    {
      "epoch": 0.31985530077600793,
      "grad_norm": 1.065150453822113,
      "learning_rate": 7.954079177425454e-06,
      "loss": 0.1623,
      "step": 10964
    },
    {
      "epoch": 0.3198844740066515,
      "grad_norm": 1.104971740385175,
      "learning_rate": 7.953698001314508e-06,
      "loss": 0.1605,
      "step": 10965
    },
    {
      "epoch": 0.31991364723729504,
      "grad_norm": 0.8466155555025482,
      "learning_rate": 7.953316798833653e-06,
      "loss": 0.1789,
      "step": 10966
    },
    {
      "epoch": 0.3199428204679386,
      "grad_norm": 0.8382534225784651,
      "learning_rate": 7.952935569986289e-06,
      "loss": 0.1475,
      "step": 10967
    },
    {
      "epoch": 0.3199719936985822,
      "grad_norm": 0.8249852671925271,
      "learning_rate": 7.952554314775822e-06,
      "loss": 0.1221,
      "step": 10968
    },
    {
      "epoch": 0.32000116692922576,
      "grad_norm": 0.8597897758081215,
      "learning_rate": 7.952173033205654e-06,
      "loss": 0.1617,
      "step": 10969
    },
    {
      "epoch": 0.3200303401598693,
      "grad_norm": 0.7663886683609561,
      "learning_rate": 7.951791725279192e-06,
      "loss": 0.1851,
      "step": 10970
    },
    {
      "epoch": 0.32005951339051286,
      "grad_norm": 0.7510502612254416,
      "learning_rate": 7.951410390999836e-06,
      "loss": 0.1285,
      "step": 10971
    },
    {
      "epoch": 0.3200886866211564,
      "grad_norm": 0.84640212145857,
      "learning_rate": 7.951029030370993e-06,
      "loss": 0.1336,
      "step": 10972
    },
    {
      "epoch": 0.32011785985179997,
      "grad_norm": 0.6752485779770158,
      "learning_rate": 7.950647643396069e-06,
      "loss": 0.1377,
      "step": 10973
    },
    {
      "epoch": 0.3201470330824436,
      "grad_norm": 0.8721891486643623,
      "learning_rate": 7.950266230078465e-06,
      "loss": 0.157,
      "step": 10974
    },
    {
      "epoch": 0.32017620631308713,
      "grad_norm": 0.8238194749835336,
      "learning_rate": 7.949884790421591e-06,
      "loss": 0.1556,
      "step": 10975
    },
    {
      "epoch": 0.3202053795437307,
      "grad_norm": 0.6238059548766347,
      "learning_rate": 7.949503324428847e-06,
      "loss": 0.1603,
      "step": 10976
    },
    {
      "epoch": 0.32023455277437424,
      "grad_norm": 0.8611891416617158,
      "learning_rate": 7.949121832103643e-06,
      "loss": 0.1675,
      "step": 10977
    },
    {
      "epoch": 0.3202637260050178,
      "grad_norm": 0.8696323812718035,
      "learning_rate": 7.948740313449382e-06,
      "loss": 0.1566,
      "step": 10978
    },
    {
      "epoch": 0.32029289923566134,
      "grad_norm": 0.7918960502139011,
      "learning_rate": 7.948358768469473e-06,
      "loss": 0.1565,
      "step": 10979
    },
    {
      "epoch": 0.3203220724663049,
      "grad_norm": 0.8701276574822827,
      "learning_rate": 7.94797719716732e-06,
      "loss": 0.1347,
      "step": 10980
    },
    {
      "epoch": 0.3203512456969485,
      "grad_norm": 0.9816902745359031,
      "learning_rate": 7.94759559954633e-06,
      "loss": 0.1339,
      "step": 10981
    },
    {
      "epoch": 0.32038041892759206,
      "grad_norm": 0.9397271221685616,
      "learning_rate": 7.94721397560991e-06,
      "loss": 0.1261,
      "step": 10982
    },
    {
      "epoch": 0.3204095921582356,
      "grad_norm": 1.0055992255206205,
      "learning_rate": 7.946832325361468e-06,
      "loss": 0.1323,
      "step": 10983
    },
    {
      "epoch": 0.32043876538887917,
      "grad_norm": 0.7657455244757513,
      "learning_rate": 7.94645064880441e-06,
      "loss": 0.1447,
      "step": 10984
    },
    {
      "epoch": 0.3204679386195227,
      "grad_norm": 0.879737735234662,
      "learning_rate": 7.946068945942144e-06,
      "loss": 0.1293,
      "step": 10985
    },
    {
      "epoch": 0.32049711185016627,
      "grad_norm": 0.8606244572622053,
      "learning_rate": 7.945687216778078e-06,
      "loss": 0.1403,
      "step": 10986
    },
    {
      "epoch": 0.3205262850808098,
      "grad_norm": 0.8821171060557782,
      "learning_rate": 7.94530546131562e-06,
      "loss": 0.1705,
      "step": 10987
    },
    {
      "epoch": 0.32055545831145343,
      "grad_norm": 0.9948303760975354,
      "learning_rate": 7.944923679558176e-06,
      "loss": 0.1404,
      "step": 10988
    },
    {
      "epoch": 0.320584631542097,
      "grad_norm": 0.8341498066454957,
      "learning_rate": 7.944541871509159e-06,
      "loss": 0.1322,
      "step": 10989
    },
    {
      "epoch": 0.32061380477274054,
      "grad_norm": 0.7838082232009056,
      "learning_rate": 7.944160037171973e-06,
      "loss": 0.1305,
      "step": 10990
    },
    {
      "epoch": 0.3206429780033841,
      "grad_norm": 0.7141231750828113,
      "learning_rate": 7.94377817655003e-06,
      "loss": 0.1544,
      "step": 10991
    },
    {
      "epoch": 0.32067215123402765,
      "grad_norm": 0.8018060602628841,
      "learning_rate": 7.943396289646738e-06,
      "loss": 0.1526,
      "step": 10992
    },
    {
      "epoch": 0.3207013244646712,
      "grad_norm": 0.7334660975972801,
      "learning_rate": 7.943014376465508e-06,
      "loss": 0.1424,
      "step": 10993
    },
    {
      "epoch": 0.32073049769531475,
      "grad_norm": 0.704839933028303,
      "learning_rate": 7.942632437009746e-06,
      "loss": 0.1454,
      "step": 10994
    },
    {
      "epoch": 0.32075967092595836,
      "grad_norm": 0.8012705308577012,
      "learning_rate": 7.942250471282864e-06,
      "loss": 0.1636,
      "step": 10995
    },
    {
      "epoch": 0.3207888441566019,
      "grad_norm": 0.7911693022338174,
      "learning_rate": 7.941868479288276e-06,
      "loss": 0.1471,
      "step": 10996
    },
    {
      "epoch": 0.32081801738724547,
      "grad_norm": 0.7842991048808973,
      "learning_rate": 7.941486461029384e-06,
      "loss": 0.1414,
      "step": 10997
    },
    {
      "epoch": 0.320847190617889,
      "grad_norm": 0.719470077422427,
      "learning_rate": 7.941104416509604e-06,
      "loss": 0.158,
      "step": 10998
    },
    {
      "epoch": 0.3208763638485326,
      "grad_norm": 0.8242210322970973,
      "learning_rate": 7.940722345732347e-06,
      "loss": 0.1439,
      "step": 10999
    },
    {
      "epoch": 0.32090553707917613,
      "grad_norm": 0.8489609518286017,
      "learning_rate": 7.940340248701022e-06,
      "loss": 0.1579,
      "step": 11000
    },
    {
      "epoch": 0.3209347103098197,
      "grad_norm": 0.8551238030790933,
      "learning_rate": 7.939958125419042e-06,
      "loss": 0.1464,
      "step": 11001
    },
    {
      "epoch": 0.3209638835404633,
      "grad_norm": 0.7940226839705632,
      "learning_rate": 7.939575975889817e-06,
      "loss": 0.1482,
      "step": 11002
    },
    {
      "epoch": 0.32099305677110684,
      "grad_norm": 1.1013519935521343,
      "learning_rate": 7.93919380011676e-06,
      "loss": 0.126,
      "step": 11003
    },
    {
      "epoch": 0.3210222300017504,
      "grad_norm": 0.9228344579150133,
      "learning_rate": 7.938811598103282e-06,
      "loss": 0.1517,
      "step": 11004
    },
    {
      "epoch": 0.32105140323239395,
      "grad_norm": 1.227069686520391,
      "learning_rate": 7.938429369852796e-06,
      "loss": 0.1824,
      "step": 11005
    },
    {
      "epoch": 0.3210805764630375,
      "grad_norm": 0.761631095083584,
      "learning_rate": 7.938047115368713e-06,
      "loss": 0.1245,
      "step": 11006
    },
    {
      "epoch": 0.32110974969368106,
      "grad_norm": 1.0703644652778053,
      "learning_rate": 7.937664834654449e-06,
      "loss": 0.1436,
      "step": 11007
    },
    {
      "epoch": 0.32113892292432467,
      "grad_norm": 0.9210100666132759,
      "learning_rate": 7.937282527713412e-06,
      "loss": 0.1511,
      "step": 11008
    },
    {
      "epoch": 0.3211680961549682,
      "grad_norm": 0.8369537948127395,
      "learning_rate": 7.93690019454902e-06,
      "loss": 0.1563,
      "step": 11009
    },
    {
      "epoch": 0.32119726938561177,
      "grad_norm": 1.1750987802114254,
      "learning_rate": 7.936517835164682e-06,
      "loss": 0.1387,
      "step": 11010
    },
    {
      "epoch": 0.3212264426162553,
      "grad_norm": 1.078128425755581,
      "learning_rate": 7.936135449563815e-06,
      "loss": 0.1705,
      "step": 11011
    },
    {
      "epoch": 0.3212556158468989,
      "grad_norm": 1.0341634095727026,
      "learning_rate": 7.935753037749832e-06,
      "loss": 0.1482,
      "step": 11012
    },
    {
      "epoch": 0.32128478907754243,
      "grad_norm": 1.003273278719217,
      "learning_rate": 7.935370599726147e-06,
      "loss": 0.165,
      "step": 11013
    },
    {
      "epoch": 0.321313962308186,
      "grad_norm": 0.8501567233419036,
      "learning_rate": 7.93498813549617e-06,
      "loss": 0.1592,
      "step": 11014
    },
    {
      "epoch": 0.3213431355388296,
      "grad_norm": 1.1143881601142083,
      "learning_rate": 7.934605645063325e-06,
      "loss": 0.1757,
      "step": 11015
    },
    {
      "epoch": 0.32137230876947315,
      "grad_norm": 0.7427794374614133,
      "learning_rate": 7.934223128431017e-06,
      "loss": 0.1674,
      "step": 11016
    },
    {
      "epoch": 0.3214014820001167,
      "grad_norm": 0.9468726487888137,
      "learning_rate": 7.93384058560267e-06,
      "loss": 0.144,
      "step": 11017
    },
    {
      "epoch": 0.32143065523076025,
      "grad_norm": 0.7587061259930971,
      "learning_rate": 7.933458016581691e-06,
      "loss": 0.1369,
      "step": 11018
    },
    {
      "epoch": 0.3214598284614038,
      "grad_norm": 0.8365927342007075,
      "learning_rate": 7.9330754213715e-06,
      "loss": 0.161,
      "step": 11019
    },
    {
      "epoch": 0.32148900169204736,
      "grad_norm": 0.7843770152117566,
      "learning_rate": 7.932692799975513e-06,
      "loss": 0.1702,
      "step": 11020
    },
    {
      "epoch": 0.3215181749226909,
      "grad_norm": 0.8703596720960927,
      "learning_rate": 7.932310152397142e-06,
      "loss": 0.1456,
      "step": 11021
    },
    {
      "epoch": 0.3215473481533345,
      "grad_norm": 1.0118251000382694,
      "learning_rate": 7.931927478639809e-06,
      "loss": 0.156,
      "step": 11022
    },
    {
      "epoch": 0.3215765213839781,
      "grad_norm": 0.6723312241330154,
      "learning_rate": 7.931544778706925e-06,
      "loss": 0.1544,
      "step": 11023
    },
    {
      "epoch": 0.32160569461462163,
      "grad_norm": 0.6346271035640576,
      "learning_rate": 7.93116205260191e-06,
      "loss": 0.157,
      "step": 11024
    },
    {
      "epoch": 0.3216348678452652,
      "grad_norm": 0.6450277752550774,
      "learning_rate": 7.93077930032818e-06,
      "loss": 0.1293,
      "step": 11025
    },
    {
      "epoch": 0.32166404107590874,
      "grad_norm": 0.7727609392974565,
      "learning_rate": 7.930396521889152e-06,
      "loss": 0.164,
      "step": 11026
    },
    {
      "epoch": 0.3216932143065523,
      "grad_norm": 0.8947902710093486,
      "learning_rate": 7.930013717288244e-06,
      "loss": 0.1368,
      "step": 11027
    },
    {
      "epoch": 0.32172238753719584,
      "grad_norm": 0.7523660713209434,
      "learning_rate": 7.929630886528874e-06,
      "loss": 0.1491,
      "step": 11028
    },
    {
      "epoch": 0.32175156076783945,
      "grad_norm": 1.0094263864221134,
      "learning_rate": 7.929248029614455e-06,
      "loss": 0.1517,
      "step": 11029
    },
    {
      "epoch": 0.321780733998483,
      "grad_norm": 0.8769132684330191,
      "learning_rate": 7.928865146548411e-06,
      "loss": 0.156,
      "step": 11030
    },
    {
      "epoch": 0.32180990722912656,
      "grad_norm": 0.8737246949168511,
      "learning_rate": 7.928482237334159e-06,
      "loss": 0.1731,
      "step": 11031
    },
    {
      "epoch": 0.3218390804597701,
      "grad_norm": 0.8785322342147993,
      "learning_rate": 7.928099301975116e-06,
      "loss": 0.1587,
      "step": 11032
    },
    {
      "epoch": 0.32186825369041366,
      "grad_norm": 0.9112884315348686,
      "learning_rate": 7.927716340474701e-06,
      "loss": 0.1502,
      "step": 11033
    },
    {
      "epoch": 0.3218974269210572,
      "grad_norm": 0.7559231869687969,
      "learning_rate": 7.927333352836334e-06,
      "loss": 0.1507,
      "step": 11034
    },
    {
      "epoch": 0.3219266001517008,
      "grad_norm": 0.8276737793134261,
      "learning_rate": 7.926950339063435e-06,
      "loss": 0.134,
      "step": 11035
    },
    {
      "epoch": 0.3219557733823444,
      "grad_norm": 1.1149869744579006,
      "learning_rate": 7.92656729915942e-06,
      "loss": 0.1249,
      "step": 11036
    },
    {
      "epoch": 0.32198494661298793,
      "grad_norm": 0.9181834269609487,
      "learning_rate": 7.926184233127711e-06,
      "loss": 0.1415,
      "step": 11037
    },
    {
      "epoch": 0.3220141198436315,
      "grad_norm": 0.7895080741571071,
      "learning_rate": 7.925801140971728e-06,
      "loss": 0.1581,
      "step": 11038
    },
    {
      "epoch": 0.32204329307427504,
      "grad_norm": 0.8239369710506671,
      "learning_rate": 7.92541802269489e-06,
      "loss": 0.1574,
      "step": 11039
    },
    {
      "epoch": 0.3220724663049186,
      "grad_norm": 1.0015644202281127,
      "learning_rate": 7.925034878300619e-06,
      "loss": 0.1439,
      "step": 11040
    },
    {
      "epoch": 0.32210163953556215,
      "grad_norm": 0.8838377900826695,
      "learning_rate": 7.924651707792337e-06,
      "loss": 0.1629,
      "step": 11041
    },
    {
      "epoch": 0.32213081276620575,
      "grad_norm": 0.9723878533223169,
      "learning_rate": 7.924268511173459e-06,
      "loss": 0.1552,
      "step": 11042
    },
    {
      "epoch": 0.3221599859968493,
      "grad_norm": 0.8082210243148918,
      "learning_rate": 7.923885288447413e-06,
      "loss": 0.1437,
      "step": 11043
    },
    {
      "epoch": 0.32218915922749286,
      "grad_norm": 1.0104745462052036,
      "learning_rate": 7.923502039617615e-06,
      "loss": 0.1501,
      "step": 11044
    },
    {
      "epoch": 0.3222183324581364,
      "grad_norm": 0.9943546720506817,
      "learning_rate": 7.923118764687489e-06,
      "loss": 0.1871,
      "step": 11045
    },
    {
      "epoch": 0.32224750568877997,
      "grad_norm": 0.9721145965729276,
      "learning_rate": 7.922735463660455e-06,
      "loss": 0.1912,
      "step": 11046
    },
    {
      "epoch": 0.3222766789194235,
      "grad_norm": 0.7190358049891815,
      "learning_rate": 7.922352136539938e-06,
      "loss": 0.1494,
      "step": 11047
    },
    {
      "epoch": 0.3223058521500671,
      "grad_norm": 0.9486864584518475,
      "learning_rate": 7.921968783329362e-06,
      "loss": 0.1492,
      "step": 11048
    },
    {
      "epoch": 0.3223350253807107,
      "grad_norm": 1.1308214754558508,
      "learning_rate": 7.921585404032142e-06,
      "loss": 0.1556,
      "step": 11049
    },
    {
      "epoch": 0.32236419861135424,
      "grad_norm": 0.8976733890933533,
      "learning_rate": 7.921201998651707e-06,
      "loss": 0.1849,
      "step": 11050
    },
    {
      "epoch": 0.3223933718419978,
      "grad_norm": 0.7042217237944887,
      "learning_rate": 7.920818567191476e-06,
      "loss": 0.163,
      "step": 11051
    },
    {
      "epoch": 0.32242254507264134,
      "grad_norm": 0.9241326609308739,
      "learning_rate": 7.920435109654877e-06,
      "loss": 0.1505,
      "step": 11052
    },
    {
      "epoch": 0.3224517183032849,
      "grad_norm": 0.7560781844894718,
      "learning_rate": 7.920051626045326e-06,
      "loss": 0.1374,
      "step": 11053
    },
    {
      "epoch": 0.32248089153392845,
      "grad_norm": 0.8588511064051029,
      "learning_rate": 7.919668116366254e-06,
      "loss": 0.1693,
      "step": 11054
    },
    {
      "epoch": 0.322510064764572,
      "grad_norm": 0.7864607662435174,
      "learning_rate": 7.919284580621082e-06,
      "loss": 0.1531,
      "step": 11055
    },
    {
      "epoch": 0.3225392379952156,
      "grad_norm": 0.7873508435241552,
      "learning_rate": 7.918901018813234e-06,
      "loss": 0.1386,
      "step": 11056
    },
    {
      "epoch": 0.32256841122585916,
      "grad_norm": 0.7088069197804802,
      "learning_rate": 7.918517430946135e-06,
      "loss": 0.1156,
      "step": 11057
    },
    {
      "epoch": 0.3225975844565027,
      "grad_norm": 0.7059647387036411,
      "learning_rate": 7.91813381702321e-06,
      "loss": 0.1333,
      "step": 11058
    },
    {
      "epoch": 0.32262675768714627,
      "grad_norm": 0.666454605594955,
      "learning_rate": 7.917750177047881e-06,
      "loss": 0.1361,
      "step": 11059
    },
    {
      "epoch": 0.3226559309177898,
      "grad_norm": 1.2113628974280057,
      "learning_rate": 7.917366511023575e-06,
      "loss": 0.1457,
      "step": 11060
    },
    {
      "epoch": 0.3226851041484334,
      "grad_norm": 0.842500261956832,
      "learning_rate": 7.916982818953718e-06,
      "loss": 0.1524,
      "step": 11061
    },
    {
      "epoch": 0.322714277379077,
      "grad_norm": 0.8034266029849304,
      "learning_rate": 7.916599100841734e-06,
      "loss": 0.1482,
      "step": 11062
    },
    {
      "epoch": 0.32274345060972054,
      "grad_norm": 0.9418898273074237,
      "learning_rate": 7.916215356691051e-06,
      "loss": 0.1457,
      "step": 11063
    },
    {
      "epoch": 0.3227726238403641,
      "grad_norm": 0.8991728969743075,
      "learning_rate": 7.915831586505092e-06,
      "loss": 0.1658,
      "step": 11064
    },
    {
      "epoch": 0.32280179707100765,
      "grad_norm": 0.7920795098995894,
      "learning_rate": 7.915447790287285e-06,
      "loss": 0.1569,
      "step": 11065
    },
    {
      "epoch": 0.3228309703016512,
      "grad_norm": 0.8132999221667033,
      "learning_rate": 7.915063968041055e-06,
      "loss": 0.1798,
      "step": 11066
    },
    {
      "epoch": 0.32286014353229475,
      "grad_norm": 1.0027852714766794,
      "learning_rate": 7.914680119769831e-06,
      "loss": 0.1545,
      "step": 11067
    },
    {
      "epoch": 0.3228893167629383,
      "grad_norm": 0.6407065559864056,
      "learning_rate": 7.91429624547704e-06,
      "loss": 0.1309,
      "step": 11068
    },
    {
      "epoch": 0.3229184899935819,
      "grad_norm": 0.8188800605209654,
      "learning_rate": 7.913912345166106e-06,
      "loss": 0.1461,
      "step": 11069
    },
    {
      "epoch": 0.32294766322422547,
      "grad_norm": 0.8840368480843117,
      "learning_rate": 7.91352841884046e-06,
      "loss": 0.1815,
      "step": 11070
    },
    {
      "epoch": 0.322976836454869,
      "grad_norm": 0.963373660572464,
      "learning_rate": 7.913144466503524e-06,
      "loss": 0.1514,
      "step": 11071
    },
    {
      "epoch": 0.3230060096855126,
      "grad_norm": 0.847454837134214,
      "learning_rate": 7.912760488158732e-06,
      "loss": 0.1659,
      "step": 11072
    },
    {
      "epoch": 0.3230351829161561,
      "grad_norm": 0.8394342933315185,
      "learning_rate": 7.91237648380951e-06,
      "loss": 0.1512,
      "step": 11073
    },
    {
      "epoch": 0.3230643561467997,
      "grad_norm": 0.988287441046759,
      "learning_rate": 7.911992453459286e-06,
      "loss": 0.1424,
      "step": 11074
    },
    {
      "epoch": 0.32309352937744323,
      "grad_norm": 0.8230831543372418,
      "learning_rate": 7.911608397111488e-06,
      "loss": 0.1503,
      "step": 11075
    },
    {
      "epoch": 0.32312270260808684,
      "grad_norm": 0.8519448947315523,
      "learning_rate": 7.911224314769546e-06,
      "loss": 0.1641,
      "step": 11076
    },
    {
      "epoch": 0.3231518758387304,
      "grad_norm": 0.8377550743800041,
      "learning_rate": 7.910840206436888e-06,
      "loss": 0.1523,
      "step": 11077
    },
    {
      "epoch": 0.32318104906937395,
      "grad_norm": 0.610019788807047,
      "learning_rate": 7.910456072116944e-06,
      "loss": 0.1445,
      "step": 11078
    },
    {
      "epoch": 0.3232102223000175,
      "grad_norm": 0.8807595995263549,
      "learning_rate": 7.910071911813142e-06,
      "loss": 0.1551,
      "step": 11079
    },
    {
      "epoch": 0.32323939553066106,
      "grad_norm": 0.7677249527011757,
      "learning_rate": 7.909687725528911e-06,
      "loss": 0.1506,
      "step": 11080
    },
    {
      "epoch": 0.3232685687613046,
      "grad_norm": 0.7463840512551806,
      "learning_rate": 7.909303513267685e-06,
      "loss": 0.1361,
      "step": 11081
    },
    {
      "epoch": 0.32329774199194816,
      "grad_norm": 1.12408880891437,
      "learning_rate": 7.908919275032892e-06,
      "loss": 0.1363,
      "step": 11082
    },
    {
      "epoch": 0.32332691522259177,
      "grad_norm": 0.7552353405072342,
      "learning_rate": 7.90853501082796e-06,
      "loss": 0.1705,
      "step": 11083
    },
    {
      "epoch": 0.3233560884532353,
      "grad_norm": 0.7364333369784888,
      "learning_rate": 7.908150720656324e-06,
      "loss": 0.1795,
      "step": 11084
    },
    {
      "epoch": 0.3233852616838789,
      "grad_norm": 0.9176463827324679,
      "learning_rate": 7.907766404521414e-06,
      "loss": 0.1496,
      "step": 11085
    },
    {
      "epoch": 0.32341443491452243,
      "grad_norm": 0.8070896589297977,
      "learning_rate": 7.907382062426656e-06,
      "loss": 0.1337,
      "step": 11086
    },
    {
      "epoch": 0.323443608145166,
      "grad_norm": 0.8071384790599688,
      "learning_rate": 7.906997694375486e-06,
      "loss": 0.1577,
      "step": 11087
    },
    {
      "epoch": 0.32347278137580954,
      "grad_norm": 0.8882272638805864,
      "learning_rate": 7.906613300371336e-06,
      "loss": 0.1642,
      "step": 11088
    },
    {
      "epoch": 0.32350195460645315,
      "grad_norm": 0.9243385594022651,
      "learning_rate": 7.906228880417635e-06,
      "loss": 0.1372,
      "step": 11089
    },
    {
      "epoch": 0.3235311278370967,
      "grad_norm": 0.7258597190739321,
      "learning_rate": 7.905844434517816e-06,
      "loss": 0.1654,
      "step": 11090
    },
    {
      "epoch": 0.32356030106774025,
      "grad_norm": 0.7576725544155756,
      "learning_rate": 7.905459962675313e-06,
      "loss": 0.1519,
      "step": 11091
    },
    {
      "epoch": 0.3235894742983838,
      "grad_norm": 0.8156263374114935,
      "learning_rate": 7.905075464893555e-06,
      "loss": 0.1644,
      "step": 11092
    },
    {
      "epoch": 0.32361864752902736,
      "grad_norm": 0.9372645601270756,
      "learning_rate": 7.904690941175979e-06,
      "loss": 0.1293,
      "step": 11093
    },
    {
      "epoch": 0.3236478207596709,
      "grad_norm": 0.6610553053673265,
      "learning_rate": 7.904306391526012e-06,
      "loss": 0.125,
      "step": 11094
    },
    {
      "epoch": 0.32367699399031447,
      "grad_norm": 0.7163195473998811,
      "learning_rate": 7.903921815947095e-06,
      "loss": 0.1426,
      "step": 11095
    },
    {
      "epoch": 0.3237061672209581,
      "grad_norm": 1.1733843832401145,
      "learning_rate": 7.903537214442656e-06,
      "loss": 0.1623,
      "step": 11096
    },
    {
      "epoch": 0.3237353404516016,
      "grad_norm": 1.0320280774661348,
      "learning_rate": 7.90315258701613e-06,
      "loss": 0.1539,
      "step": 11097
    },
    {
      "epoch": 0.3237645136822452,
      "grad_norm": 1.084036003611658,
      "learning_rate": 7.90276793367095e-06,
      "loss": 0.1355,
      "step": 11098
    },
    {
      "epoch": 0.32379368691288873,
      "grad_norm": 0.8556990140036362,
      "learning_rate": 7.902383254410551e-06,
      "loss": 0.164,
      "step": 11099
    },
    {
      "epoch": 0.3238228601435323,
      "grad_norm": 0.8520628457765326,
      "learning_rate": 7.901998549238368e-06,
      "loss": 0.1372,
      "step": 11100
    },
    {
      "epoch": 0.32385203337417584,
      "grad_norm": 1.0005421369998007,
      "learning_rate": 7.901613818157834e-06,
      "loss": 0.1467,
      "step": 11101
    },
    {
      "epoch": 0.3238812066048194,
      "grad_norm": 1.1171458073610518,
      "learning_rate": 7.901229061172385e-06,
      "loss": 0.1362,
      "step": 11102
    },
    {
      "epoch": 0.323910379835463,
      "grad_norm": 0.8278324943556908,
      "learning_rate": 7.900844278285456e-06,
      "loss": 0.1517,
      "step": 11103
    },
    {
      "epoch": 0.32393955306610656,
      "grad_norm": 0.8315488441333372,
      "learning_rate": 7.900459469500479e-06,
      "loss": 0.1347,
      "step": 11104
    },
    {
      "epoch": 0.3239687262967501,
      "grad_norm": 1.2264931770295593,
      "learning_rate": 7.900074634820895e-06,
      "loss": 0.159,
      "step": 11105
    },
    {
      "epoch": 0.32399789952739366,
      "grad_norm": 0.8233679230374752,
      "learning_rate": 7.899689774250135e-06,
      "loss": 0.1328,
      "step": 11106
    },
    {
      "epoch": 0.3240270727580372,
      "grad_norm": 0.8688961977315816,
      "learning_rate": 7.899304887791639e-06,
      "loss": 0.1586,
      "step": 11107
    },
    {
      "epoch": 0.32405624598868077,
      "grad_norm": 0.9345347455118921,
      "learning_rate": 7.89891997544884e-06,
      "loss": 0.1552,
      "step": 11108
    },
    {
      "epoch": 0.3240854192193243,
      "grad_norm": 0.9791120219413809,
      "learning_rate": 7.898535037225175e-06,
      "loss": 0.1317,
      "step": 11109
    },
    {
      "epoch": 0.32411459244996793,
      "grad_norm": 0.9586664401398748,
      "learning_rate": 7.898150073124082e-06,
      "loss": 0.1489,
      "step": 11110
    },
    {
      "epoch": 0.3241437656806115,
      "grad_norm": 0.9522432142225266,
      "learning_rate": 7.897765083148996e-06,
      "loss": 0.1444,
      "step": 11111
    },
    {
      "epoch": 0.32417293891125504,
      "grad_norm": 0.8064721428674649,
      "learning_rate": 7.897380067303358e-06,
      "loss": 0.1707,
      "step": 11112
    },
    {
      "epoch": 0.3242021121418986,
      "grad_norm": 1.0066213801822907,
      "learning_rate": 7.896995025590599e-06,
      "loss": 0.1688,
      "step": 11113
    },
    {
      "epoch": 0.32423128537254214,
      "grad_norm": 0.998585894294648,
      "learning_rate": 7.896609958014161e-06,
      "loss": 0.1603,
      "step": 11114
    },
    {
      "epoch": 0.3242604586031857,
      "grad_norm": 0.7110054043003348,
      "learning_rate": 7.896224864577481e-06,
      "loss": 0.1472,
      "step": 11115
    },
    {
      "epoch": 0.32428963183382925,
      "grad_norm": 1.0472696116758133,
      "learning_rate": 7.895839745283995e-06,
      "loss": 0.159,
      "step": 11116
    },
    {
      "epoch": 0.32431880506447286,
      "grad_norm": 1.0111319698013854,
      "learning_rate": 7.895454600137146e-06,
      "loss": 0.1261,
      "step": 11117
    },
    {
      "epoch": 0.3243479782951164,
      "grad_norm": 0.9057512398346269,
      "learning_rate": 7.895069429140368e-06,
      "loss": 0.1294,
      "step": 11118
    },
    {
      "epoch": 0.32437715152575997,
      "grad_norm": 0.7968962884674599,
      "learning_rate": 7.894684232297102e-06,
      "loss": 0.1548,
      "step": 11119
    },
    {
      "epoch": 0.3244063247564035,
      "grad_norm": 0.9322557724599413,
      "learning_rate": 7.894299009610785e-06,
      "loss": 0.146,
      "step": 11120
    },
    {
      "epoch": 0.3244354979870471,
      "grad_norm": 0.8162066814814293,
      "learning_rate": 7.89391376108486e-06,
      "loss": 0.1448,
      "step": 11121
    },
    {
      "epoch": 0.3244646712176906,
      "grad_norm": 0.880392781658322,
      "learning_rate": 7.89352848672276e-06,
      "loss": 0.1897,
      "step": 11122
    },
    {
      "epoch": 0.32449384444833423,
      "grad_norm": 0.9233284810847567,
      "learning_rate": 7.893143186527932e-06,
      "loss": 0.1256,
      "step": 11123
    },
    {
      "epoch": 0.3245230176789778,
      "grad_norm": 0.7914260869194623,
      "learning_rate": 7.892757860503811e-06,
      "loss": 0.1541,
      "step": 11124
    },
    {
      "epoch": 0.32455219090962134,
      "grad_norm": 1.0147958123374983,
      "learning_rate": 7.892372508653836e-06,
      "loss": 0.1603,
      "step": 11125
    },
    {
      "epoch": 0.3245813641402649,
      "grad_norm": 0.778283953114621,
      "learning_rate": 7.891987130981453e-06,
      "loss": 0.1393,
      "step": 11126
    },
    {
      "epoch": 0.32461053737090845,
      "grad_norm": 0.9710467584953915,
      "learning_rate": 7.891601727490097e-06,
      "loss": 0.1487,
      "step": 11127
    },
    {
      "epoch": 0.324639710601552,
      "grad_norm": 0.7689397872836617,
      "learning_rate": 7.891216298183211e-06,
      "loss": 0.1341,
      "step": 11128
    },
    {
      "epoch": 0.32466888383219555,
      "grad_norm": 0.8023403353438116,
      "learning_rate": 7.890830843064238e-06,
      "loss": 0.1435,
      "step": 11129
    },
    {
      "epoch": 0.32469805706283916,
      "grad_norm": 0.8413398113707,
      "learning_rate": 7.890445362136617e-06,
      "loss": 0.1591,
      "step": 11130
    },
    {
      "epoch": 0.3247272302934827,
      "grad_norm": 0.6539052901783666,
      "learning_rate": 7.890059855403788e-06,
      "loss": 0.1716,
      "step": 11131
    },
    {
      "epoch": 0.32475640352412627,
      "grad_norm": 0.963041856513202,
      "learning_rate": 7.889674322869197e-06,
      "loss": 0.14,
      "step": 11132
    },
    {
      "epoch": 0.3247855767547698,
      "grad_norm": 0.7747670818321699,
      "learning_rate": 7.889288764536283e-06,
      "loss": 0.1558,
      "step": 11133
    },
    {
      "epoch": 0.3248147499854134,
      "grad_norm": 0.7081080602780688,
      "learning_rate": 7.888903180408487e-06,
      "loss": 0.1487,
      "step": 11134
    },
    {
      "epoch": 0.32484392321605693,
      "grad_norm": 0.7767953386113995,
      "learning_rate": 7.888517570489254e-06,
      "loss": 0.1484,
      "step": 11135
    },
    {
      "epoch": 0.3248730964467005,
      "grad_norm": 0.9086030933297429,
      "learning_rate": 7.888131934782025e-06,
      "loss": 0.16,
      "step": 11136
    },
    {
      "epoch": 0.3249022696773441,
      "grad_norm": 0.8442980583402525,
      "learning_rate": 7.887746273290244e-06,
      "loss": 0.1743,
      "step": 11137
    },
    {
      "epoch": 0.32493144290798764,
      "grad_norm": 0.781528325486156,
      "learning_rate": 7.887360586017355e-06,
      "loss": 0.1633,
      "step": 11138
    },
    {
      "epoch": 0.3249606161386312,
      "grad_norm": 0.8827801295303677,
      "learning_rate": 7.886974872966797e-06,
      "loss": 0.1407,
      "step": 11139
    },
    {
      "epoch": 0.32498978936927475,
      "grad_norm": 0.861913956074121,
      "learning_rate": 7.88658913414202e-06,
      "loss": 0.152,
      "step": 11140
    },
    {
      "epoch": 0.3250189625999183,
      "grad_norm": 0.7879894827965718,
      "learning_rate": 7.88620336954646e-06,
      "loss": 0.1577,
      "step": 11141
    },
    {
      "epoch": 0.32504813583056186,
      "grad_norm": 0.9862164150006236,
      "learning_rate": 7.885817579183568e-06,
      "loss": 0.1499,
      "step": 11142
    },
    {
      "epoch": 0.3250773090612054,
      "grad_norm": 0.7817851699096838,
      "learning_rate": 7.885431763056785e-06,
      "loss": 0.1492,
      "step": 11143
    },
    {
      "epoch": 0.325106482291849,
      "grad_norm": 0.7994504919131414,
      "learning_rate": 7.885045921169558e-06,
      "loss": 0.1532,
      "step": 11144
    },
    {
      "epoch": 0.3251356555224926,
      "grad_norm": 0.8545913647459151,
      "learning_rate": 7.884660053525328e-06,
      "loss": 0.1661,
      "step": 11145
    },
    {
      "epoch": 0.3251648287531361,
      "grad_norm": 0.7178098624736637,
      "learning_rate": 7.88427416012754e-06,
      "loss": 0.1312,
      "step": 11146
    },
    {
      "epoch": 0.3251940019837797,
      "grad_norm": 0.9187691784701346,
      "learning_rate": 7.883888240979645e-06,
      "loss": 0.1677,
      "step": 11147
    },
    {
      "epoch": 0.32522317521442323,
      "grad_norm": 0.8330749505255329,
      "learning_rate": 7.883502296085082e-06,
      "loss": 0.1505,
      "step": 11148
    },
    {
      "epoch": 0.3252523484450668,
      "grad_norm": 0.9117442196393197,
      "learning_rate": 7.883116325447297e-06,
      "loss": 0.1354,
      "step": 11149
    },
    {
      "epoch": 0.3252815216757104,
      "grad_norm": 0.8664809256906031,
      "learning_rate": 7.88273032906974e-06,
      "loss": 0.1235,
      "step": 11150
    },
    {
      "epoch": 0.32531069490635395,
      "grad_norm": 0.8885645752805146,
      "learning_rate": 7.882344306955854e-06,
      "loss": 0.1585,
      "step": 11151
    },
    {
      "epoch": 0.3253398681369975,
      "grad_norm": 1.043108426199016,
      "learning_rate": 7.881958259109086e-06,
      "loss": 0.1667,
      "step": 11152
    },
    {
      "epoch": 0.32536904136764105,
      "grad_norm": 0.797157519242401,
      "learning_rate": 7.881572185532883e-06,
      "loss": 0.1454,
      "step": 11153
    },
    {
      "epoch": 0.3253982145982846,
      "grad_norm": 0.7372472441294787,
      "learning_rate": 7.881186086230692e-06,
      "loss": 0.1498,
      "step": 11154
    },
    {
      "epoch": 0.32542738782892816,
      "grad_norm": 0.8927634683470496,
      "learning_rate": 7.880799961205958e-06,
      "loss": 0.1338,
      "step": 11155
    },
    {
      "epoch": 0.3254565610595717,
      "grad_norm": 0.9679172359122158,
      "learning_rate": 7.880413810462131e-06,
      "loss": 0.1573,
      "step": 11156
    },
    {
      "epoch": 0.3254857342902153,
      "grad_norm": 0.8530429571627943,
      "learning_rate": 7.880027634002656e-06,
      "loss": 0.1401,
      "step": 11157
    },
    {
      "epoch": 0.3255149075208589,
      "grad_norm": 1.0792902672644435,
      "learning_rate": 7.879641431830982e-06,
      "loss": 0.1489,
      "step": 11158
    },
    {
      "epoch": 0.32554408075150243,
      "grad_norm": 0.8090864583869836,
      "learning_rate": 7.879255203950558e-06,
      "loss": 0.151,
      "step": 11159
    },
    {
      "epoch": 0.325573253982146,
      "grad_norm": 0.8242460238232276,
      "learning_rate": 7.87886895036483e-06,
      "loss": 0.1628,
      "step": 11160
    },
    {
      "epoch": 0.32560242721278954,
      "grad_norm": 0.955244334399679,
      "learning_rate": 7.878482671077245e-06,
      "loss": 0.1436,
      "step": 11161
    },
    {
      "epoch": 0.3256316004434331,
      "grad_norm": 0.8717935363950352,
      "learning_rate": 7.878096366091257e-06,
      "loss": 0.1548,
      "step": 11162
    },
    {
      "epoch": 0.32566077367407664,
      "grad_norm": 0.7861828283605923,
      "learning_rate": 7.87771003541031e-06,
      "loss": 0.1532,
      "step": 11163
    },
    {
      "epoch": 0.32568994690472025,
      "grad_norm": 1.0991238720235803,
      "learning_rate": 7.877323679037856e-06,
      "loss": 0.1376,
      "step": 11164
    },
    {
      "epoch": 0.3257191201353638,
      "grad_norm": 0.8782449589718162,
      "learning_rate": 7.876937296977343e-06,
      "loss": 0.1605,
      "step": 11165
    },
    {
      "epoch": 0.32574829336600736,
      "grad_norm": 0.9904516402225627,
      "learning_rate": 7.87655088923222e-06,
      "loss": 0.1545,
      "step": 11166
    },
    {
      "epoch": 0.3257774665966509,
      "grad_norm": 0.926925223891559,
      "learning_rate": 7.876164455805936e-06,
      "loss": 0.1526,
      "step": 11167
    },
    {
      "epoch": 0.32580663982729446,
      "grad_norm": 1.0697371220281324,
      "learning_rate": 7.875777996701945e-06,
      "loss": 0.1456,
      "step": 11168
    },
    {
      "epoch": 0.325835813057938,
      "grad_norm": 0.8192916929295959,
      "learning_rate": 7.875391511923694e-06,
      "loss": 0.1539,
      "step": 11169
    },
    {
      "epoch": 0.32586498628858157,
      "grad_norm": 0.9150330513863495,
      "learning_rate": 7.875005001474634e-06,
      "loss": 0.1766,
      "step": 11170
    },
    {
      "epoch": 0.3258941595192252,
      "grad_norm": 0.9828826048360307,
      "learning_rate": 7.874618465358214e-06,
      "loss": 0.1422,
      "step": 11171
    },
    {
      "epoch": 0.32592333274986873,
      "grad_norm": 0.6383145733164322,
      "learning_rate": 7.874231903577888e-06,
      "loss": 0.1409,
      "step": 11172
    },
    {
      "epoch": 0.3259525059805123,
      "grad_norm": 0.7695161636554368,
      "learning_rate": 7.873845316137105e-06,
      "loss": 0.1356,
      "step": 11173
    },
    {
      "epoch": 0.32598167921115584,
      "grad_norm": 1.0856298287608799,
      "learning_rate": 7.873458703039318e-06,
      "loss": 0.1471,
      "step": 11174
    },
    {
      "epoch": 0.3260108524417994,
      "grad_norm": 0.9055813711090364,
      "learning_rate": 7.873072064287977e-06,
      "loss": 0.1394,
      "step": 11175
    },
    {
      "epoch": 0.32604002567244295,
      "grad_norm": 0.7847444403890237,
      "learning_rate": 7.872685399886534e-06,
      "loss": 0.1183,
      "step": 11176
    },
    {
      "epoch": 0.32606919890308655,
      "grad_norm": 0.906607351781194,
      "learning_rate": 7.872298709838442e-06,
      "loss": 0.1475,
      "step": 11177
    },
    {
      "epoch": 0.3260983721337301,
      "grad_norm": 0.7337407212504137,
      "learning_rate": 7.871911994147153e-06,
      "loss": 0.1605,
      "step": 11178
    },
    {
      "epoch": 0.32612754536437366,
      "grad_norm": 0.9478752233020101,
      "learning_rate": 7.871525252816118e-06,
      "loss": 0.1473,
      "step": 11179
    },
    {
      "epoch": 0.3261567185950172,
      "grad_norm": 1.0254850132373712,
      "learning_rate": 7.871138485848792e-06,
      "loss": 0.1634,
      "step": 11180
    },
    {
      "epoch": 0.32618589182566077,
      "grad_norm": 0.8695701462858811,
      "learning_rate": 7.870751693248629e-06,
      "loss": 0.1402,
      "step": 11181
    },
    {
      "epoch": 0.3262150650563043,
      "grad_norm": 0.8789685242400097,
      "learning_rate": 7.870364875019077e-06,
      "loss": 0.1551,
      "step": 11182
    },
    {
      "epoch": 0.3262442382869479,
      "grad_norm": 1.056578533196887,
      "learning_rate": 7.869978031163595e-06,
      "loss": 0.1532,
      "step": 11183
    },
    {
      "epoch": 0.3262734115175915,
      "grad_norm": 0.8102537330696241,
      "learning_rate": 7.869591161685632e-06,
      "loss": 0.1528,
      "step": 11184
    },
    {
      "epoch": 0.32630258474823504,
      "grad_norm": 0.676497318379484,
      "learning_rate": 7.869204266588646e-06,
      "loss": 0.137,
      "step": 11185
    },
    {
      "epoch": 0.3263317579788786,
      "grad_norm": 0.7706067624671409,
      "learning_rate": 7.868817345876087e-06,
      "loss": 0.1326,
      "step": 11186
    },
    {
      "epoch": 0.32636093120952214,
      "grad_norm": 0.9601753302633754,
      "learning_rate": 7.868430399551414e-06,
      "loss": 0.1532,
      "step": 11187
    },
    {
      "epoch": 0.3263901044401657,
      "grad_norm": 0.8017299868137221,
      "learning_rate": 7.868043427618079e-06,
      "loss": 0.1731,
      "step": 11188
    },
    {
      "epoch": 0.32641927767080925,
      "grad_norm": 0.9979207894961692,
      "learning_rate": 7.867656430079536e-06,
      "loss": 0.1564,
      "step": 11189
    },
    {
      "epoch": 0.3264484509014528,
      "grad_norm": 0.8489733284107609,
      "learning_rate": 7.867269406939241e-06,
      "loss": 0.1531,
      "step": 11190
    },
    {
      "epoch": 0.3264776241320964,
      "grad_norm": 1.2871244862954452,
      "learning_rate": 7.86688235820065e-06,
      "loss": 0.1477,
      "step": 11191
    },
    {
      "epoch": 0.32650679736273996,
      "grad_norm": 0.8434115513925409,
      "learning_rate": 7.866495283867217e-06,
      "loss": 0.1592,
      "step": 11192
    },
    {
      "epoch": 0.3265359705933835,
      "grad_norm": 0.7868800112584352,
      "learning_rate": 7.866108183942398e-06,
      "loss": 0.1339,
      "step": 11193
    },
    {
      "epoch": 0.32656514382402707,
      "grad_norm": 0.7322802973463202,
      "learning_rate": 7.86572105842965e-06,
      "loss": 0.145,
      "step": 11194
    },
    {
      "epoch": 0.3265943170546706,
      "grad_norm": 0.7045795705883792,
      "learning_rate": 7.865333907332428e-06,
      "loss": 0.1633,
      "step": 11195
    },
    {
      "epoch": 0.3266234902853142,
      "grad_norm": 0.8515529749835823,
      "learning_rate": 7.864946730654189e-06,
      "loss": 0.1372,
      "step": 11196
    },
    {
      "epoch": 0.32665266351595773,
      "grad_norm": 0.8143587191431548,
      "learning_rate": 7.864559528398389e-06,
      "loss": 0.1445,
      "step": 11197
    },
    {
      "epoch": 0.32668183674660134,
      "grad_norm": 0.7803727200147857,
      "learning_rate": 7.864172300568486e-06,
      "loss": 0.1749,
      "step": 11198
    },
    {
      "epoch": 0.3267110099772449,
      "grad_norm": 0.8890237745265267,
      "learning_rate": 7.863785047167937e-06,
      "loss": 0.1546,
      "step": 11199
    },
    {
      "epoch": 0.32674018320788845,
      "grad_norm": 0.8192458116294992,
      "learning_rate": 7.863397768200199e-06,
      "loss": 0.1536,
      "step": 11200
    },
    {
      "epoch": 0.326769356438532,
      "grad_norm": 0.8109583909799909,
      "learning_rate": 7.863010463668727e-06,
      "loss": 0.166,
      "step": 11201
    },
    {
      "epoch": 0.32679852966917555,
      "grad_norm": 0.7512780401103101,
      "learning_rate": 7.862623133576983e-06,
      "loss": 0.1344,
      "step": 11202
    },
    {
      "epoch": 0.3268277028998191,
      "grad_norm": 0.8751608725724604,
      "learning_rate": 7.862235777928421e-06,
      "loss": 0.1426,
      "step": 11203
    },
    {
      "epoch": 0.3268568761304627,
      "grad_norm": 0.7561860223338784,
      "learning_rate": 7.861848396726503e-06,
      "loss": 0.1424,
      "step": 11204
    },
    {
      "epoch": 0.32688604936110627,
      "grad_norm": 1.0115884453789517,
      "learning_rate": 7.861460989974687e-06,
      "loss": 0.1652,
      "step": 11205
    },
    {
      "epoch": 0.3269152225917498,
      "grad_norm": 0.7903954517382313,
      "learning_rate": 7.86107355767643e-06,
      "loss": 0.1373,
      "step": 11206
    },
    {
      "epoch": 0.3269443958223934,
      "grad_norm": 0.7381217575098457,
      "learning_rate": 7.860686099835189e-06,
      "loss": 0.1402,
      "step": 11207
    },
    {
      "epoch": 0.3269735690530369,
      "grad_norm": 0.8546309919055587,
      "learning_rate": 7.860298616454427e-06,
      "loss": 0.1529,
      "step": 11208
    },
    {
      "epoch": 0.3270027422836805,
      "grad_norm": 0.7078441242209368,
      "learning_rate": 7.8599111075376e-06,
      "loss": 0.154,
      "step": 11209
    },
    {
      "epoch": 0.32703191551432403,
      "grad_norm": 1.0809135431051031,
      "learning_rate": 7.85952357308817e-06,
      "loss": 0.1312,
      "step": 11210
    },
    {
      "epoch": 0.32706108874496764,
      "grad_norm": 0.9314678237368279,
      "learning_rate": 7.8591360131096e-06,
      "loss": 0.1228,
      "step": 11211
    },
    {
      "epoch": 0.3270902619756112,
      "grad_norm": 0.9749281045999717,
      "learning_rate": 7.85874842760534e-06,
      "loss": 0.1342,
      "step": 11212
    },
    {
      "epoch": 0.32711943520625475,
      "grad_norm": 0.8503717148202816,
      "learning_rate": 7.85836081657886e-06,
      "loss": 0.14,
      "step": 11213
    },
    {
      "epoch": 0.3271486084368983,
      "grad_norm": 0.7811678816801917,
      "learning_rate": 7.857973180033615e-06,
      "loss": 0.1725,
      "step": 11214
    },
    {
      "epoch": 0.32717778166754186,
      "grad_norm": 0.8289429545831913,
      "learning_rate": 7.85758551797307e-06,
      "loss": 0.1597,
      "step": 11215
    },
    {
      "epoch": 0.3272069548981854,
      "grad_norm": 0.8870308805479863,
      "learning_rate": 7.857197830400683e-06,
      "loss": 0.1655,
      "step": 11216
    },
    {
      "epoch": 0.32723612812882896,
      "grad_norm": 0.6846381544502131,
      "learning_rate": 7.856810117319916e-06,
      "loss": 0.1552,
      "step": 11217
    },
    {
      "epoch": 0.32726530135947257,
      "grad_norm": 0.778537156230549,
      "learning_rate": 7.85642237873423e-06,
      "loss": 0.1317,
      "step": 11218
    },
    {
      "epoch": 0.3272944745901161,
      "grad_norm": 0.741098038821696,
      "learning_rate": 7.856034614647087e-06,
      "loss": 0.1648,
      "step": 11219
    },
    {
      "epoch": 0.3273236478207597,
      "grad_norm": 0.7653311032549058,
      "learning_rate": 7.855646825061948e-06,
      "loss": 0.1472,
      "step": 11220
    },
    {
      "epoch": 0.32735282105140323,
      "grad_norm": 0.5955863209548142,
      "learning_rate": 7.855259009982275e-06,
      "loss": 0.1336,
      "step": 11221
    },
    {
      "epoch": 0.3273819942820468,
      "grad_norm": 0.7148012467713774,
      "learning_rate": 7.854871169411533e-06,
      "loss": 0.1552,
      "step": 11222
    },
    {
      "epoch": 0.32741116751269034,
      "grad_norm": 0.8127493975852811,
      "learning_rate": 7.854483303353182e-06,
      "loss": 0.1856,
      "step": 11223
    },
    {
      "epoch": 0.3274403407433339,
      "grad_norm": 0.6241468024838582,
      "learning_rate": 7.854095411810688e-06,
      "loss": 0.1233,
      "step": 11224
    },
    {
      "epoch": 0.3274695139739775,
      "grad_norm": 0.8904881057328751,
      "learning_rate": 7.853707494787508e-06,
      "loss": 0.1457,
      "step": 11225
    },
    {
      "epoch": 0.32749868720462105,
      "grad_norm": 0.9114078043253493,
      "learning_rate": 7.85331955228711e-06,
      "loss": 0.1591,
      "step": 11226
    },
    {
      "epoch": 0.3275278604352646,
      "grad_norm": 0.8735414792196101,
      "learning_rate": 7.852931584312955e-06,
      "loss": 0.1543,
      "step": 11227
    },
    {
      "epoch": 0.32755703366590816,
      "grad_norm": 1.187150524167911,
      "learning_rate": 7.85254359086851e-06,
      "loss": 0.1556,
      "step": 11228
    },
    {
      "epoch": 0.3275862068965517,
      "grad_norm": 0.896711636972266,
      "learning_rate": 7.852155571957237e-06,
      "loss": 0.1389,
      "step": 11229
    },
    {
      "epoch": 0.32761538012719527,
      "grad_norm": 1.2077028882700518,
      "learning_rate": 7.851767527582597e-06,
      "loss": 0.1527,
      "step": 11230
    },
    {
      "epoch": 0.3276445533578389,
      "grad_norm": 0.7263188814374241,
      "learning_rate": 7.851379457748058e-06,
      "loss": 0.1503,
      "step": 11231
    },
    {
      "epoch": 0.32767372658848243,
      "grad_norm": 0.7322433282618662,
      "learning_rate": 7.850991362457086e-06,
      "loss": 0.1095,
      "step": 11232
    },
    {
      "epoch": 0.327702899819126,
      "grad_norm": 0.8758994807569604,
      "learning_rate": 7.850603241713143e-06,
      "loss": 0.1504,
      "step": 11233
    },
    {
      "epoch": 0.32773207304976953,
      "grad_norm": 0.8231619583819102,
      "learning_rate": 7.850215095519693e-06,
      "loss": 0.1395,
      "step": 11234
    },
    {
      "epoch": 0.3277612462804131,
      "grad_norm": 0.7710888808662312,
      "learning_rate": 7.849826923880205e-06,
      "loss": 0.1651,
      "step": 11235
    },
    {
      "epoch": 0.32779041951105664,
      "grad_norm": 0.8255779225335644,
      "learning_rate": 7.849438726798142e-06,
      "loss": 0.1653,
      "step": 11236
    },
    {
      "epoch": 0.3278195927417002,
      "grad_norm": 1.2851151130354712,
      "learning_rate": 7.84905050427697e-06,
      "loss": 0.1373,
      "step": 11237
    },
    {
      "epoch": 0.3278487659723438,
      "grad_norm": 0.9665058607869698,
      "learning_rate": 7.848662256320155e-06,
      "loss": 0.1725,
      "step": 11238
    },
    {
      "epoch": 0.32787793920298736,
      "grad_norm": 0.7286947726251944,
      "learning_rate": 7.848273982931164e-06,
      "loss": 0.1347,
      "step": 11239
    },
    {
      "epoch": 0.3279071124336309,
      "grad_norm": 0.9619457127205155,
      "learning_rate": 7.847885684113463e-06,
      "loss": 0.1782,
      "step": 11240
    },
    {
      "epoch": 0.32793628566427446,
      "grad_norm": 1.3098646305027817,
      "learning_rate": 7.847497359870517e-06,
      "loss": 0.1584,
      "step": 11241
    },
    {
      "epoch": 0.327965458894918,
      "grad_norm": 0.7374484823036807,
      "learning_rate": 7.847109010205796e-06,
      "loss": 0.1514,
      "step": 11242
    },
    {
      "epoch": 0.32799463212556157,
      "grad_norm": 0.847237314622174,
      "learning_rate": 7.846720635122765e-06,
      "loss": 0.1322,
      "step": 11243
    },
    {
      "epoch": 0.3280238053562051,
      "grad_norm": 0.9292241527720175,
      "learning_rate": 7.84633223462489e-06,
      "loss": 0.16,
      "step": 11244
    },
    {
      "epoch": 0.32805297858684873,
      "grad_norm": 0.8562620323172302,
      "learning_rate": 7.845943808715643e-06,
      "loss": 0.1466,
      "step": 11245
    },
    {
      "epoch": 0.3280821518174923,
      "grad_norm": 0.7034178594607732,
      "learning_rate": 7.845555357398488e-06,
      "loss": 0.1357,
      "step": 11246
    },
    {
      "epoch": 0.32811132504813584,
      "grad_norm": 0.8738043470643679,
      "learning_rate": 7.845166880676894e-06,
      "loss": 0.1457,
      "step": 11247
    },
    {
      "epoch": 0.3281404982787794,
      "grad_norm": 0.7889693949055656,
      "learning_rate": 7.844778378554328e-06,
      "loss": 0.1391,
      "step": 11248
    },
    {
      "epoch": 0.32816967150942294,
      "grad_norm": 0.9570973099032345,
      "learning_rate": 7.844389851034262e-06,
      "loss": 0.1621,
      "step": 11249
    },
    {
      "epoch": 0.3281988447400665,
      "grad_norm": 1.0272368292421759,
      "learning_rate": 7.84400129812016e-06,
      "loss": 0.1403,
      "step": 11250
    },
    {
      "epoch": 0.32822801797071005,
      "grad_norm": 0.7281150320970144,
      "learning_rate": 7.843612719815495e-06,
      "loss": 0.1505,
      "step": 11251
    },
    {
      "epoch": 0.32825719120135366,
      "grad_norm": 0.9004117672538315,
      "learning_rate": 7.843224116123735e-06,
      "loss": 0.1709,
      "step": 11252
    },
    {
      "epoch": 0.3282863644319972,
      "grad_norm": 0.9160051166247537,
      "learning_rate": 7.842835487048347e-06,
      "loss": 0.1388,
      "step": 11253
    },
    {
      "epoch": 0.32831553766264077,
      "grad_norm": 0.6360534837243493,
      "learning_rate": 7.842446832592805e-06,
      "loss": 0.1462,
      "step": 11254
    },
    {
      "epoch": 0.3283447108932843,
      "grad_norm": 0.8936577309356724,
      "learning_rate": 7.842058152760573e-06,
      "loss": 0.1816,
      "step": 11255
    },
    {
      "epoch": 0.3283738841239279,
      "grad_norm": 0.8079007857039887,
      "learning_rate": 7.841669447555126e-06,
      "loss": 0.1429,
      "step": 11256
    },
    {
      "epoch": 0.3284030573545714,
      "grad_norm": 0.8079294421546867,
      "learning_rate": 7.841280716979933e-06,
      "loss": 0.1311,
      "step": 11257
    },
    {
      "epoch": 0.328432230585215,
      "grad_norm": 0.6798051594740071,
      "learning_rate": 7.840891961038464e-06,
      "loss": 0.1484,
      "step": 11258
    },
    {
      "epoch": 0.3284614038158586,
      "grad_norm": 0.8981734630181988,
      "learning_rate": 7.840503179734188e-06,
      "loss": 0.1597,
      "step": 11259
    },
    {
      "epoch": 0.32849057704650214,
      "grad_norm": 0.79375380572684,
      "learning_rate": 7.840114373070579e-06,
      "loss": 0.1344,
      "step": 11260
    },
    {
      "epoch": 0.3285197502771457,
      "grad_norm": 0.8088605455674179,
      "learning_rate": 7.839725541051106e-06,
      "loss": 0.1217,
      "step": 11261
    },
    {
      "epoch": 0.32854892350778925,
      "grad_norm": 0.7509650738130004,
      "learning_rate": 7.839336683679241e-06,
      "loss": 0.1379,
      "step": 11262
    },
    {
      "epoch": 0.3285780967384328,
      "grad_norm": 0.9784735787095662,
      "learning_rate": 7.838947800958459e-06,
      "loss": 0.1506,
      "step": 11263
    },
    {
      "epoch": 0.32860726996907635,
      "grad_norm": 0.9860942217770315,
      "learning_rate": 7.838558892892226e-06,
      "loss": 0.1491,
      "step": 11264
    },
    {
      "epoch": 0.32863644319971996,
      "grad_norm": 0.8076819378761262,
      "learning_rate": 7.838169959484017e-06,
      "loss": 0.1514,
      "step": 11265
    },
    {
      "epoch": 0.3286656164303635,
      "grad_norm": 0.9269881626634534,
      "learning_rate": 7.837781000737306e-06,
      "loss": 0.1586,
      "step": 11266
    },
    {
      "epoch": 0.32869478966100707,
      "grad_norm": 0.7327365327819917,
      "learning_rate": 7.837392016655562e-06,
      "loss": 0.1646,
      "step": 11267
    },
    {
      "epoch": 0.3287239628916506,
      "grad_norm": 0.831318890265991,
      "learning_rate": 7.837003007242258e-06,
      "loss": 0.1245,
      "step": 11268
    },
    {
      "epoch": 0.3287531361222942,
      "grad_norm": 0.8241302883260383,
      "learning_rate": 7.83661397250087e-06,
      "loss": 0.1414,
      "step": 11269
    },
    {
      "epoch": 0.32878230935293773,
      "grad_norm": 0.9117858533668711,
      "learning_rate": 7.83622491243487e-06,
      "loss": 0.1463,
      "step": 11270
    },
    {
      "epoch": 0.3288114825835813,
      "grad_norm": 0.9879726116733071,
      "learning_rate": 7.835835827047731e-06,
      "loss": 0.1549,
      "step": 11271
    },
    {
      "epoch": 0.3288406558142249,
      "grad_norm": 0.8261101970410129,
      "learning_rate": 7.835446716342926e-06,
      "loss": 0.1554,
      "step": 11272
    },
    {
      "epoch": 0.32886982904486844,
      "grad_norm": 0.6414598425966459,
      "learning_rate": 7.83505758032393e-06,
      "loss": 0.1415,
      "step": 11273
    },
    {
      "epoch": 0.328899002275512,
      "grad_norm": 0.7659185417449016,
      "learning_rate": 7.834668418994216e-06,
      "loss": 0.1532,
      "step": 11274
    },
    {
      "epoch": 0.32892817550615555,
      "grad_norm": 1.0189263475884969,
      "learning_rate": 7.834279232357261e-06,
      "loss": 0.1509,
      "step": 11275
    },
    {
      "epoch": 0.3289573487367991,
      "grad_norm": 0.6077407312814666,
      "learning_rate": 7.833890020416537e-06,
      "loss": 0.1503,
      "step": 11276
    },
    {
      "epoch": 0.32898652196744266,
      "grad_norm": 0.8050501152685532,
      "learning_rate": 7.833500783175518e-06,
      "loss": 0.1685,
      "step": 11277
    },
    {
      "epoch": 0.3290156951980862,
      "grad_norm": 0.7442021567327869,
      "learning_rate": 7.833111520637681e-06,
      "loss": 0.135,
      "step": 11278
    },
    {
      "epoch": 0.3290448684287298,
      "grad_norm": 0.7083448176879351,
      "learning_rate": 7.832722232806503e-06,
      "loss": 0.1499,
      "step": 11279
    },
    {
      "epoch": 0.3290740416593734,
      "grad_norm": 0.715504077425142,
      "learning_rate": 7.832332919685452e-06,
      "loss": 0.1408,
      "step": 11280
    },
    {
      "epoch": 0.3291032148900169,
      "grad_norm": 0.9422272669677965,
      "learning_rate": 7.831943581278011e-06,
      "loss": 0.142,
      "step": 11281
    },
    {
      "epoch": 0.3291323881206605,
      "grad_norm": 0.704431642332087,
      "learning_rate": 7.831554217587655e-06,
      "loss": 0.1437,
      "step": 11282
    },
    {
      "epoch": 0.32916156135130403,
      "grad_norm": 0.6947801321932113,
      "learning_rate": 7.831164828617858e-06,
      "loss": 0.1325,
      "step": 11283
    },
    {
      "epoch": 0.3291907345819476,
      "grad_norm": 0.7415241606652632,
      "learning_rate": 7.830775414372099e-06,
      "loss": 0.1795,
      "step": 11284
    },
    {
      "epoch": 0.32921990781259114,
      "grad_norm": 0.8856096077059094,
      "learning_rate": 7.830385974853852e-06,
      "loss": 0.1756,
      "step": 11285
    },
    {
      "epoch": 0.32924908104323475,
      "grad_norm": 1.024118628830476,
      "learning_rate": 7.829996510066594e-06,
      "loss": 0.1392,
      "step": 11286
    },
    {
      "epoch": 0.3292782542738783,
      "grad_norm": 0.9095232382551075,
      "learning_rate": 7.829607020013802e-06,
      "loss": 0.1477,
      "step": 11287
    },
    {
      "epoch": 0.32930742750452185,
      "grad_norm": 0.9476127453849916,
      "learning_rate": 7.829217504698957e-06,
      "loss": 0.1559,
      "step": 11288
    },
    {
      "epoch": 0.3293366007351654,
      "grad_norm": 0.8061722285929731,
      "learning_rate": 7.82882796412553e-06,
      "loss": 0.1477,
      "step": 11289
    },
    {
      "epoch": 0.32936577396580896,
      "grad_norm": 0.6870770038574902,
      "learning_rate": 7.828438398297005e-06,
      "loss": 0.1429,
      "step": 11290
    },
    {
      "epoch": 0.3293949471964525,
      "grad_norm": 0.9215153764572268,
      "learning_rate": 7.828048807216854e-06,
      "loss": 0.1488,
      "step": 11291
    },
    {
      "epoch": 0.3294241204270961,
      "grad_norm": 0.8951236392736993,
      "learning_rate": 7.827659190888562e-06,
      "loss": 0.1558,
      "step": 11292
    },
    {
      "epoch": 0.3294532936577397,
      "grad_norm": 0.9029770747599283,
      "learning_rate": 7.827269549315602e-06,
      "loss": 0.1737,
      "step": 11293
    },
    {
      "epoch": 0.32948246688838323,
      "grad_norm": 0.926310766870844,
      "learning_rate": 7.826879882501455e-06,
      "loss": 0.134,
      "step": 11294
    },
    {
      "epoch": 0.3295116401190268,
      "grad_norm": 0.8409863761331066,
      "learning_rate": 7.826490190449596e-06,
      "loss": 0.1479,
      "step": 11295
    },
    {
      "epoch": 0.32954081334967034,
      "grad_norm": 0.8437637679566358,
      "learning_rate": 7.826100473163512e-06,
      "loss": 0.1388,
      "step": 11296
    },
    {
      "epoch": 0.3295699865803139,
      "grad_norm": 0.891714974199195,
      "learning_rate": 7.825710730646676e-06,
      "loss": 0.156,
      "step": 11297
    },
    {
      "epoch": 0.32959915981095744,
      "grad_norm": 0.897468335129447,
      "learning_rate": 7.825320962902568e-06,
      "loss": 0.1492,
      "step": 11298
    },
    {
      "epoch": 0.32962833304160105,
      "grad_norm": 0.6932552940129606,
      "learning_rate": 7.82493116993467e-06,
      "loss": 0.126,
      "step": 11299
    },
    {
      "epoch": 0.3296575062722446,
      "grad_norm": 0.8449570914458303,
      "learning_rate": 7.82454135174646e-06,
      "loss": 0.144,
      "step": 11300
    },
    {
      "epoch": 0.32968667950288816,
      "grad_norm": 0.9087588850523538,
      "learning_rate": 7.82415150834142e-06,
      "loss": 0.1432,
      "step": 11301
    },
    {
      "epoch": 0.3297158527335317,
      "grad_norm": 0.7557663548751011,
      "learning_rate": 7.823761639723029e-06,
      "loss": 0.1441,
      "step": 11302
    },
    {
      "epoch": 0.32974502596417526,
      "grad_norm": 0.887443115216238,
      "learning_rate": 7.823371745894768e-06,
      "loss": 0.143,
      "step": 11303
    },
    {
      "epoch": 0.3297741991948188,
      "grad_norm": 0.7834573258451816,
      "learning_rate": 7.822981826860118e-06,
      "loss": 0.1434,
      "step": 11304
    },
    {
      "epoch": 0.32980337242546237,
      "grad_norm": 0.8623294560083001,
      "learning_rate": 7.822591882622562e-06,
      "loss": 0.1396,
      "step": 11305
    },
    {
      "epoch": 0.329832545656106,
      "grad_norm": 0.9454631376020142,
      "learning_rate": 7.822201913185577e-06,
      "loss": 0.1551,
      "step": 11306
    },
    {
      "epoch": 0.32986171888674953,
      "grad_norm": 0.8654563307718793,
      "learning_rate": 7.821811918552647e-06,
      "loss": 0.1896,
      "step": 11307
    },
    {
      "epoch": 0.3298908921173931,
      "grad_norm": 0.953560744709995,
      "learning_rate": 7.821421898727255e-06,
      "loss": 0.1817,
      "step": 11308
    },
    {
      "epoch": 0.32992006534803664,
      "grad_norm": 0.8154254191760685,
      "learning_rate": 7.821031853712881e-06,
      "loss": 0.133,
      "step": 11309
    },
    {
      "epoch": 0.3299492385786802,
      "grad_norm": 0.9257272792921443,
      "learning_rate": 7.82064178351301e-06,
      "loss": 0.1518,
      "step": 11310
    },
    {
      "epoch": 0.32997841180932375,
      "grad_norm": 0.803301284297927,
      "learning_rate": 7.820251688131121e-06,
      "loss": 0.1299,
      "step": 11311
    },
    {
      "epoch": 0.3300075850399673,
      "grad_norm": 0.818206556440267,
      "learning_rate": 7.819861567570699e-06,
      "loss": 0.1424,
      "step": 11312
    },
    {
      "epoch": 0.3300367582706109,
      "grad_norm": 0.9667275436284948,
      "learning_rate": 7.819471421835224e-06,
      "loss": 0.1454,
      "step": 11313
    },
    {
      "epoch": 0.33006593150125446,
      "grad_norm": 0.9182018915794946,
      "learning_rate": 7.819081250928184e-06,
      "loss": 0.132,
      "step": 11314
    },
    {
      "epoch": 0.330095104731898,
      "grad_norm": 0.9555021564657732,
      "learning_rate": 7.818691054853056e-06,
      "loss": 0.1572,
      "step": 11315
    },
    {
      "epoch": 0.33012427796254157,
      "grad_norm": 0.7809252659139025,
      "learning_rate": 7.81830083361333e-06,
      "loss": 0.1366,
      "step": 11316
    },
    {
      "epoch": 0.3301534511931851,
      "grad_norm": 0.9081021460098465,
      "learning_rate": 7.817910587212486e-06,
      "loss": 0.1532,
      "step": 11317
    },
    {
      "epoch": 0.3301826244238287,
      "grad_norm": 0.8547658909787655,
      "learning_rate": 7.81752031565401e-06,
      "loss": 0.1382,
      "step": 11318
    },
    {
      "epoch": 0.3302117976544723,
      "grad_norm": 0.8022174461139985,
      "learning_rate": 7.817130018941383e-06,
      "loss": 0.1544,
      "step": 11319
    },
    {
      "epoch": 0.33024097088511584,
      "grad_norm": 0.97159226609784,
      "learning_rate": 7.816739697078094e-06,
      "loss": 0.127,
      "step": 11320
    },
    {
      "epoch": 0.3302701441157594,
      "grad_norm": 0.853486364731287,
      "learning_rate": 7.816349350067625e-06,
      "loss": 0.1342,
      "step": 11321
    },
    {
      "epoch": 0.33029931734640294,
      "grad_norm": 1.0613319647337767,
      "learning_rate": 7.81595897791346e-06,
      "loss": 0.1628,
      "step": 11322
    },
    {
      "epoch": 0.3303284905770465,
      "grad_norm": 0.9607553484038386,
      "learning_rate": 7.815568580619087e-06,
      "loss": 0.1689,
      "step": 11323
    },
    {
      "epoch": 0.33035766380769005,
      "grad_norm": 0.9772394208745946,
      "learning_rate": 7.815178158187991e-06,
      "loss": 0.1398,
      "step": 11324
    },
    {
      "epoch": 0.3303868370383336,
      "grad_norm": 1.2193309788058466,
      "learning_rate": 7.814787710623652e-06,
      "loss": 0.1434,
      "step": 11325
    },
    {
      "epoch": 0.3304160102689772,
      "grad_norm": 1.0430780770787516,
      "learning_rate": 7.814397237929564e-06,
      "loss": 0.1672,
      "step": 11326
    },
    {
      "epoch": 0.33044518349962076,
      "grad_norm": 0.9829767299141772,
      "learning_rate": 7.814006740109208e-06,
      "loss": 0.14,
      "step": 11327
    },
    {
      "epoch": 0.3304743567302643,
      "grad_norm": 1.1205465339136425,
      "learning_rate": 7.813616217166071e-06,
      "loss": 0.1486,
      "step": 11328
    },
    {
      "epoch": 0.33050352996090787,
      "grad_norm": 0.8423887911422739,
      "learning_rate": 7.813225669103641e-06,
      "loss": 0.1601,
      "step": 11329
    },
    {
      "epoch": 0.3305327031915514,
      "grad_norm": 0.7399347113271597,
      "learning_rate": 7.812835095925404e-06,
      "loss": 0.1799,
      "step": 11330
    },
    {
      "epoch": 0.330561876422195,
      "grad_norm": 1.0475515746343944,
      "learning_rate": 7.812444497634847e-06,
      "loss": 0.1327,
      "step": 11331
    },
    {
      "epoch": 0.33059104965283853,
      "grad_norm": 0.7769497032823693,
      "learning_rate": 7.812053874235455e-06,
      "loss": 0.1253,
      "step": 11332
    },
    {
      "epoch": 0.33062022288348214,
      "grad_norm": 0.8137021183200754,
      "learning_rate": 7.811663225730718e-06,
      "loss": 0.1522,
      "step": 11333
    },
    {
      "epoch": 0.3306493961141257,
      "grad_norm": 0.6840079985260334,
      "learning_rate": 7.811272552124125e-06,
      "loss": 0.1333,
      "step": 11334
    },
    {
      "epoch": 0.33067856934476925,
      "grad_norm": 0.8757888851495554,
      "learning_rate": 7.81088185341916e-06,
      "loss": 0.1583,
      "step": 11335
    },
    {
      "epoch": 0.3307077425754128,
      "grad_norm": 0.9502181614649482,
      "learning_rate": 7.810491129619314e-06,
      "loss": 0.152,
      "step": 11336
    },
    {
      "epoch": 0.33073691580605635,
      "grad_norm": 1.0521417441197338,
      "learning_rate": 7.810100380728072e-06,
      "loss": 0.1318,
      "step": 11337
    },
    {
      "epoch": 0.3307660890366999,
      "grad_norm": 0.7521275234108433,
      "learning_rate": 7.809709606748926e-06,
      "loss": 0.159,
      "step": 11338
    },
    {
      "epoch": 0.33079526226734346,
      "grad_norm": 0.9068589984650683,
      "learning_rate": 7.809318807685364e-06,
      "loss": 0.1591,
      "step": 11339
    },
    {
      "epoch": 0.33082443549798707,
      "grad_norm": 0.9063226841119603,
      "learning_rate": 7.808927983540873e-06,
      "loss": 0.141,
      "step": 11340
    },
    {
      "epoch": 0.3308536087286306,
      "grad_norm": 1.023090238082688,
      "learning_rate": 7.808537134318944e-06,
      "loss": 0.1406,
      "step": 11341
    },
    {
      "epoch": 0.3308827819592742,
      "grad_norm": 0.9189415234848043,
      "learning_rate": 7.808146260023067e-06,
      "loss": 0.1472,
      "step": 11342
    },
    {
      "epoch": 0.33091195518991773,
      "grad_norm": 0.9280011152793862,
      "learning_rate": 7.807755360656727e-06,
      "loss": 0.1763,
      "step": 11343
    },
    {
      "epoch": 0.3309411284205613,
      "grad_norm": 1.0233840700040582,
      "learning_rate": 7.807364436223422e-06,
      "loss": 0.1477,
      "step": 11344
    },
    {
      "epoch": 0.33097030165120483,
      "grad_norm": 1.161617052132787,
      "learning_rate": 7.806973486726634e-06,
      "loss": 0.1357,
      "step": 11345
    },
    {
      "epoch": 0.33099947488184844,
      "grad_norm": 1.083374635455969,
      "learning_rate": 7.806582512169859e-06,
      "loss": 0.1408,
      "step": 11346
    },
    {
      "epoch": 0.331028648112492,
      "grad_norm": 0.9179876388683236,
      "learning_rate": 7.806191512556584e-06,
      "loss": 0.1467,
      "step": 11347
    },
    {
      "epoch": 0.33105782134313555,
      "grad_norm": 0.9674249558014094,
      "learning_rate": 7.805800487890302e-06,
      "loss": 0.1539,
      "step": 11348
    },
    {
      "epoch": 0.3310869945737791,
      "grad_norm": 1.1368088176756828,
      "learning_rate": 7.805409438174502e-06,
      "loss": 0.1404,
      "step": 11349
    },
    {
      "epoch": 0.33111616780442266,
      "grad_norm": 0.8900375496043085,
      "learning_rate": 7.805018363412677e-06,
      "loss": 0.1312,
      "step": 11350
    },
    {
      "epoch": 0.3311453410350662,
      "grad_norm": 0.7199386376933307,
      "learning_rate": 7.804627263608317e-06,
      "loss": 0.155,
      "step": 11351
    },
    {
      "epoch": 0.33117451426570976,
      "grad_norm": 1.0488604656936606,
      "learning_rate": 7.804236138764916e-06,
      "loss": 0.1585,
      "step": 11352
    },
    {
      "epoch": 0.33120368749635337,
      "grad_norm": 0.9974434078440045,
      "learning_rate": 7.803844988885962e-06,
      "loss": 0.1404,
      "step": 11353
    },
    {
      "epoch": 0.3312328607269969,
      "grad_norm": 0.8802585061595912,
      "learning_rate": 7.803453813974951e-06,
      "loss": 0.1821,
      "step": 11354
    },
    {
      "epoch": 0.3312620339576405,
      "grad_norm": 0.9447214851208002,
      "learning_rate": 7.803062614035372e-06,
      "loss": 0.1803,
      "step": 11355
    },
    {
      "epoch": 0.33129120718828403,
      "grad_norm": 1.341498285531634,
      "learning_rate": 7.802671389070721e-06,
      "loss": 0.1629,
      "step": 11356
    },
    {
      "epoch": 0.3313203804189276,
      "grad_norm": 0.8542320307106278,
      "learning_rate": 7.802280139084489e-06,
      "loss": 0.1138,
      "step": 11357
    },
    {
      "epoch": 0.33134955364957114,
      "grad_norm": 0.8783499068564741,
      "learning_rate": 7.801888864080166e-06,
      "loss": 0.1682,
      "step": 11358
    },
    {
      "epoch": 0.3313787268802147,
      "grad_norm": 0.9882950313021468,
      "learning_rate": 7.80149756406125e-06,
      "loss": 0.1627,
      "step": 11359
    },
    {
      "epoch": 0.3314079001108583,
      "grad_norm": 0.7591211774130825,
      "learning_rate": 7.801106239031233e-06,
      "loss": 0.1412,
      "step": 11360
    },
    {
      "epoch": 0.33143707334150185,
      "grad_norm": 0.7556389361556377,
      "learning_rate": 7.800714888993607e-06,
      "loss": 0.1457,
      "step": 11361
    },
    {
      "epoch": 0.3314662465721454,
      "grad_norm": 0.9666078241016921,
      "learning_rate": 7.800323513951867e-06,
      "loss": 0.1707,
      "step": 11362
    },
    {
      "epoch": 0.33149541980278896,
      "grad_norm": 0.8416880894335402,
      "learning_rate": 7.799932113909508e-06,
      "loss": 0.1347,
      "step": 11363
    },
    {
      "epoch": 0.3315245930334325,
      "grad_norm": 0.7060471191911536,
      "learning_rate": 7.799540688870024e-06,
      "loss": 0.1154,
      "step": 11364
    },
    {
      "epoch": 0.33155376626407607,
      "grad_norm": 0.9235554422842465,
      "learning_rate": 7.799149238836908e-06,
      "loss": 0.1428,
      "step": 11365
    },
    {
      "epoch": 0.3315829394947196,
      "grad_norm": 0.794240013814919,
      "learning_rate": 7.798757763813656e-06,
      "loss": 0.1502,
      "step": 11366
    },
    {
      "epoch": 0.33161211272536323,
      "grad_norm": 0.7457371350575628,
      "learning_rate": 7.798366263803763e-06,
      "loss": 0.1691,
      "step": 11367
    },
    {
      "epoch": 0.3316412859560068,
      "grad_norm": 0.8171198498176778,
      "learning_rate": 7.797974738810723e-06,
      "loss": 0.1499,
      "step": 11368
    },
    {
      "epoch": 0.33167045918665033,
      "grad_norm": 0.7518992320645509,
      "learning_rate": 7.797583188838033e-06,
      "loss": 0.1799,
      "step": 11369
    },
    {
      "epoch": 0.3316996324172939,
      "grad_norm": 0.9249122309323343,
      "learning_rate": 7.79719161388919e-06,
      "loss": 0.1528,
      "step": 11370
    },
    {
      "epoch": 0.33172880564793744,
      "grad_norm": 0.908845387575393,
      "learning_rate": 7.796800013967685e-06,
      "loss": 0.1468,
      "step": 11371
    },
    {
      "epoch": 0.331757978878581,
      "grad_norm": 0.7230178464134541,
      "learning_rate": 7.79640838907702e-06,
      "loss": 0.164,
      "step": 11372
    },
    {
      "epoch": 0.33178715210922455,
      "grad_norm": 0.8837016122612601,
      "learning_rate": 7.796016739220686e-06,
      "loss": 0.1611,
      "step": 11373
    },
    {
      "epoch": 0.33181632533986816,
      "grad_norm": 1.1638407280343497,
      "learning_rate": 7.795625064402184e-06,
      "loss": 0.1473,
      "step": 11374
    },
    {
      "epoch": 0.3318454985705117,
      "grad_norm": 1.0006773533680604,
      "learning_rate": 7.795233364625008e-06,
      "loss": 0.1458,
      "step": 11375
    },
    {
      "epoch": 0.33187467180115526,
      "grad_norm": 0.87323873384313,
      "learning_rate": 7.794841639892655e-06,
      "loss": 0.1576,
      "step": 11376
    },
    {
      "epoch": 0.3319038450317988,
      "grad_norm": 0.7583736870201176,
      "learning_rate": 7.794449890208624e-06,
      "loss": 0.1535,
      "step": 11377
    },
    {
      "epoch": 0.33193301826244237,
      "grad_norm": 0.907097595137542,
      "learning_rate": 7.794058115576411e-06,
      "loss": 0.1434,
      "step": 11378
    },
    {
      "epoch": 0.3319621914930859,
      "grad_norm": 1.078460085956634,
      "learning_rate": 7.793666315999514e-06,
      "loss": 0.1501,
      "step": 11379
    },
    {
      "epoch": 0.33199136472372953,
      "grad_norm": 0.7874561281958534,
      "learning_rate": 7.793274491481431e-06,
      "loss": 0.1457,
      "step": 11380
    },
    {
      "epoch": 0.3320205379543731,
      "grad_norm": 0.9717033244524153,
      "learning_rate": 7.792882642025662e-06,
      "loss": 0.1608,
      "step": 11381
    },
    {
      "epoch": 0.33204971118501664,
      "grad_norm": 1.1234856164189455,
      "learning_rate": 7.7924907676357e-06,
      "loss": 0.1338,
      "step": 11382
    },
    {
      "epoch": 0.3320788844156602,
      "grad_norm": 0.6348800428920081,
      "learning_rate": 7.79209886831505e-06,
      "loss": 0.1479,
      "step": 11383
    },
    {
      "epoch": 0.33210805764630374,
      "grad_norm": 0.9411558584280161,
      "learning_rate": 7.791706944067207e-06,
      "loss": 0.1586,
      "step": 11384
    },
    {
      "epoch": 0.3321372308769473,
      "grad_norm": 0.9933133911246549,
      "learning_rate": 7.79131499489567e-06,
      "loss": 0.1839,
      "step": 11385
    },
    {
      "epoch": 0.33216640410759085,
      "grad_norm": 0.8544700110182338,
      "learning_rate": 7.79092302080394e-06,
      "loss": 0.1702,
      "step": 11386
    },
    {
      "epoch": 0.33219557733823446,
      "grad_norm": 0.8343955580679631,
      "learning_rate": 7.790531021795516e-06,
      "loss": 0.1392,
      "step": 11387
    },
    {
      "epoch": 0.332224750568878,
      "grad_norm": 0.8935637225075622,
      "learning_rate": 7.790138997873895e-06,
      "loss": 0.1613,
      "step": 11388
    },
    {
      "epoch": 0.33225392379952157,
      "grad_norm": 0.7621996166956655,
      "learning_rate": 7.789746949042582e-06,
      "loss": 0.1307,
      "step": 11389
    },
    {
      "epoch": 0.3322830970301651,
      "grad_norm": 0.8073744013821377,
      "learning_rate": 7.789354875305074e-06,
      "loss": 0.1692,
      "step": 11390
    },
    {
      "epoch": 0.3323122702608087,
      "grad_norm": 0.8965316595742587,
      "learning_rate": 7.788962776664867e-06,
      "loss": 0.165,
      "step": 11391
    },
    {
      "epoch": 0.3323414434914522,
      "grad_norm": 0.7934239369806525,
      "learning_rate": 7.78857065312547e-06,
      "loss": 0.1411,
      "step": 11392
    },
    {
      "epoch": 0.3323706167220958,
      "grad_norm": 0.8277492835564576,
      "learning_rate": 7.78817850469038e-06,
      "loss": 0.1432,
      "step": 11393
    },
    {
      "epoch": 0.3323997899527394,
      "grad_norm": 0.7552515074734151,
      "learning_rate": 7.787786331363097e-06,
      "loss": 0.1502,
      "step": 11394
    },
    {
      "epoch": 0.33242896318338294,
      "grad_norm": 0.9938117671261162,
      "learning_rate": 7.787394133147125e-06,
      "loss": 0.1521,
      "step": 11395
    },
    {
      "epoch": 0.3324581364140265,
      "grad_norm": 0.8481254081862278,
      "learning_rate": 7.787001910045962e-06,
      "loss": 0.1471,
      "step": 11396
    },
    {
      "epoch": 0.33248730964467005,
      "grad_norm": 0.8684717639587272,
      "learning_rate": 7.786609662063109e-06,
      "loss": 0.1685,
      "step": 11397
    },
    {
      "epoch": 0.3325164828753136,
      "grad_norm": 0.9166406583040055,
      "learning_rate": 7.786217389202073e-06,
      "loss": 0.1614,
      "step": 11398
    },
    {
      "epoch": 0.33254565610595715,
      "grad_norm": 0.7848013886076809,
      "learning_rate": 7.785825091466352e-06,
      "loss": 0.1466,
      "step": 11399
    },
    {
      "epoch": 0.3325748293366007,
      "grad_norm": 0.6776690801310096,
      "learning_rate": 7.78543276885945e-06,
      "loss": 0.1657,
      "step": 11400
    },
    {
      "epoch": 0.3326040025672443,
      "grad_norm": 1.0952363863083787,
      "learning_rate": 7.785040421384871e-06,
      "loss": 0.1528,
      "step": 11401
    },
    {
      "epoch": 0.33263317579788787,
      "grad_norm": 0.7588286368887359,
      "learning_rate": 7.784648049046114e-06,
      "loss": 0.1511,
      "step": 11402
    },
    {
      "epoch": 0.3326623490285314,
      "grad_norm": 0.8805903717398548,
      "learning_rate": 7.784255651846684e-06,
      "loss": 0.1444,
      "step": 11403
    },
    {
      "epoch": 0.332691522259175,
      "grad_norm": 0.7726353599224017,
      "learning_rate": 7.783863229790085e-06,
      "loss": 0.134,
      "step": 11404
    },
    {
      "epoch": 0.33272069548981853,
      "grad_norm": 0.7227797656123466,
      "learning_rate": 7.783470782879818e-06,
      "loss": 0.137,
      "step": 11405
    },
    {
      "epoch": 0.3327498687204621,
      "grad_norm": 0.6219612835543467,
      "learning_rate": 7.783078311119389e-06,
      "loss": 0.1406,
      "step": 11406
    },
    {
      "epoch": 0.3327790419511057,
      "grad_norm": 0.7399841750316177,
      "learning_rate": 7.782685814512303e-06,
      "loss": 0.1339,
      "step": 11407
    },
    {
      "epoch": 0.33280821518174925,
      "grad_norm": 0.5929710896739792,
      "learning_rate": 7.782293293062062e-06,
      "loss": 0.1421,
      "step": 11408
    },
    {
      "epoch": 0.3328373884123928,
      "grad_norm": 0.6908336840206548,
      "learning_rate": 7.781900746772169e-06,
      "loss": 0.1606,
      "step": 11409
    },
    {
      "epoch": 0.33286656164303635,
      "grad_norm": 0.7495708806481434,
      "learning_rate": 7.78150817564613e-06,
      "loss": 0.1692,
      "step": 11410
    },
    {
      "epoch": 0.3328957348736799,
      "grad_norm": 0.8252047910093573,
      "learning_rate": 7.781115579687452e-06,
      "loss": 0.1226,
      "step": 11411
    },
    {
      "epoch": 0.33292490810432346,
      "grad_norm": 0.8055027083945296,
      "learning_rate": 7.780722958899637e-06,
      "loss": 0.168,
      "step": 11412
    },
    {
      "epoch": 0.332954081334967,
      "grad_norm": 0.7067676823503098,
      "learning_rate": 7.78033031328619e-06,
      "loss": 0.1467,
      "step": 11413
    },
    {
      "epoch": 0.3329832545656106,
      "grad_norm": 0.9506405613626454,
      "learning_rate": 7.779937642850618e-06,
      "loss": 0.1352,
      "step": 11414
    },
    {
      "epoch": 0.3330124277962542,
      "grad_norm": 1.0165299854068452,
      "learning_rate": 7.779544947596428e-06,
      "loss": 0.1617,
      "step": 11415
    },
    {
      "epoch": 0.3330416010268977,
      "grad_norm": 0.7459070939394024,
      "learning_rate": 7.779152227527124e-06,
      "loss": 0.1506,
      "step": 11416
    },
    {
      "epoch": 0.3330707742575413,
      "grad_norm": 0.9564467771215571,
      "learning_rate": 7.778759482646213e-06,
      "loss": 0.1498,
      "step": 11417
    },
    {
      "epoch": 0.33309994748818483,
      "grad_norm": 0.8113753157089846,
      "learning_rate": 7.778366712957198e-06,
      "loss": 0.1232,
      "step": 11418
    },
    {
      "epoch": 0.3331291207188284,
      "grad_norm": 0.8962795501339426,
      "learning_rate": 7.77797391846359e-06,
      "loss": 0.1415,
      "step": 11419
    },
    {
      "epoch": 0.33315829394947194,
      "grad_norm": 0.9959166871234717,
      "learning_rate": 7.777581099168894e-06,
      "loss": 0.1635,
      "step": 11420
    },
    {
      "epoch": 0.33318746718011555,
      "grad_norm": 0.8844159556240272,
      "learning_rate": 7.777188255076616e-06,
      "loss": 0.1437,
      "step": 11421
    },
    {
      "epoch": 0.3332166404107591,
      "grad_norm": 0.8218999750495313,
      "learning_rate": 7.776795386190265e-06,
      "loss": 0.1365,
      "step": 11422
    },
    {
      "epoch": 0.33324581364140266,
      "grad_norm": 0.8954471981358914,
      "learning_rate": 7.77640249251335e-06,
      "loss": 0.1316,
      "step": 11423
    },
    {
      "epoch": 0.3332749868720462,
      "grad_norm": 0.8534036207605681,
      "learning_rate": 7.776009574049373e-06,
      "loss": 0.1469,
      "step": 11424
    },
    {
      "epoch": 0.33330416010268976,
      "grad_norm": 0.8490377272865366,
      "learning_rate": 7.775616630801846e-06,
      "loss": 0.1331,
      "step": 11425
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 0.888002051237779,
      "learning_rate": 7.775223662774276e-06,
      "loss": 0.1599,
      "step": 11426
    },
    {
      "epoch": 0.33336250656397687,
      "grad_norm": 0.7401795024990379,
      "learning_rate": 7.774830669970172e-06,
      "loss": 0.1525,
      "step": 11427
    },
    {
      "epoch": 0.3333916797946205,
      "grad_norm": 1.0355651212089865,
      "learning_rate": 7.774437652393042e-06,
      "loss": 0.1568,
      "step": 11428
    },
    {
      "epoch": 0.33342085302526403,
      "grad_norm": 0.7684071493659119,
      "learning_rate": 7.774044610046396e-06,
      "loss": 0.1581,
      "step": 11429
    },
    {
      "epoch": 0.3334500262559076,
      "grad_norm": 0.7654692111072509,
      "learning_rate": 7.77365154293374e-06,
      "loss": 0.1529,
      "step": 11430
    },
    {
      "epoch": 0.33347919948655114,
      "grad_norm": 0.8563814345870975,
      "learning_rate": 7.773258451058587e-06,
      "loss": 0.1444,
      "step": 11431
    },
    {
      "epoch": 0.3335083727171947,
      "grad_norm": 0.7146638452884608,
      "learning_rate": 7.772865334424444e-06,
      "loss": 0.1467,
      "step": 11432
    },
    {
      "epoch": 0.33353754594783824,
      "grad_norm": 0.8164867364796478,
      "learning_rate": 7.772472193034821e-06,
      "loss": 0.1572,
      "step": 11433
    },
    {
      "epoch": 0.33356671917848185,
      "grad_norm": 0.9721989059492175,
      "learning_rate": 7.772079026893229e-06,
      "loss": 0.1618,
      "step": 11434
    },
    {
      "epoch": 0.3335958924091254,
      "grad_norm": 0.9092877986747195,
      "learning_rate": 7.771685836003175e-06,
      "loss": 0.123,
      "step": 11435
    },
    {
      "epoch": 0.33362506563976896,
      "grad_norm": 0.8869602588924006,
      "learning_rate": 7.771292620368173e-06,
      "loss": 0.1313,
      "step": 11436
    },
    {
      "epoch": 0.3336542388704125,
      "grad_norm": 0.8444640841614083,
      "learning_rate": 7.770899379991732e-06,
      "loss": 0.1567,
      "step": 11437
    },
    {
      "epoch": 0.33368341210105606,
      "grad_norm": 1.1219937013782346,
      "learning_rate": 7.770506114877364e-06,
      "loss": 0.181,
      "step": 11438
    },
    {
      "epoch": 0.3337125853316996,
      "grad_norm": 0.9832192985802704,
      "learning_rate": 7.770112825028578e-06,
      "loss": 0.1154,
      "step": 11439
    },
    {
      "epoch": 0.33374175856234317,
      "grad_norm": 0.7641647964742511,
      "learning_rate": 7.769719510448886e-06,
      "loss": 0.1601,
      "step": 11440
    },
    {
      "epoch": 0.3337709317929868,
      "grad_norm": 1.0673713528525672,
      "learning_rate": 7.769326171141797e-06,
      "loss": 0.147,
      "step": 11441
    },
    {
      "epoch": 0.33380010502363033,
      "grad_norm": 1.0112107621321438,
      "learning_rate": 7.768932807110828e-06,
      "loss": 0.1354,
      "step": 11442
    },
    {
      "epoch": 0.3338292782542739,
      "grad_norm": 0.5999351158126687,
      "learning_rate": 7.768539418359487e-06,
      "loss": 0.1398,
      "step": 11443
    },
    {
      "epoch": 0.33385845148491744,
      "grad_norm": 0.7862293484002059,
      "learning_rate": 7.768146004891287e-06,
      "loss": 0.1294,
      "step": 11444
    },
    {
      "epoch": 0.333887624715561,
      "grad_norm": 0.7798420221815294,
      "learning_rate": 7.767752566709739e-06,
      "loss": 0.1478,
      "step": 11445
    },
    {
      "epoch": 0.33391679794620455,
      "grad_norm": 1.0460569583065815,
      "learning_rate": 7.767359103818357e-06,
      "loss": 0.1767,
      "step": 11446
    },
    {
      "epoch": 0.3339459711768481,
      "grad_norm": 0.7185436296585129,
      "learning_rate": 7.766965616220655e-06,
      "loss": 0.1661,
      "step": 11447
    },
    {
      "epoch": 0.3339751444074917,
      "grad_norm": 1.0710920071104186,
      "learning_rate": 7.766572103920144e-06,
      "loss": 0.1636,
      "step": 11448
    },
    {
      "epoch": 0.33400431763813526,
      "grad_norm": 0.8645450446415387,
      "learning_rate": 7.766178566920338e-06,
      "loss": 0.1599,
      "step": 11449
    },
    {
      "epoch": 0.3340334908687788,
      "grad_norm": 0.8817345169230677,
      "learning_rate": 7.76578500522475e-06,
      "loss": 0.1291,
      "step": 11450
    },
    {
      "epoch": 0.33406266409942237,
      "grad_norm": 0.9304675892587817,
      "learning_rate": 7.765391418836893e-06,
      "loss": 0.1468,
      "step": 11451
    },
    {
      "epoch": 0.3340918373300659,
      "grad_norm": 0.7462799203420152,
      "learning_rate": 7.764997807760283e-06,
      "loss": 0.1332,
      "step": 11452
    },
    {
      "epoch": 0.3341210105607095,
      "grad_norm": 0.8722627404862797,
      "learning_rate": 7.764604171998432e-06,
      "loss": 0.1615,
      "step": 11453
    },
    {
      "epoch": 0.33415018379135303,
      "grad_norm": 0.937752211240302,
      "learning_rate": 7.764210511554854e-06,
      "loss": 0.1536,
      "step": 11454
    },
    {
      "epoch": 0.33417935702199664,
      "grad_norm": 0.777923375392146,
      "learning_rate": 7.763816826433066e-06,
      "loss": 0.1522,
      "step": 11455
    },
    {
      "epoch": 0.3342085302526402,
      "grad_norm": 1.0454405741657025,
      "learning_rate": 7.76342311663658e-06,
      "loss": 0.155,
      "step": 11456
    },
    {
      "epoch": 0.33423770348328374,
      "grad_norm": 0.9664911830553712,
      "learning_rate": 7.763029382168912e-06,
      "loss": 0.1381,
      "step": 11457
    },
    {
      "epoch": 0.3342668767139273,
      "grad_norm": 0.9646019848903161,
      "learning_rate": 7.762635623033577e-06,
      "loss": 0.1464,
      "step": 11458
    },
    {
      "epoch": 0.33429604994457085,
      "grad_norm": 0.959057745452574,
      "learning_rate": 7.76224183923409e-06,
      "loss": 0.1415,
      "step": 11459
    },
    {
      "epoch": 0.3343252231752144,
      "grad_norm": 0.815788890676571,
      "learning_rate": 7.76184803077397e-06,
      "loss": 0.1317,
      "step": 11460
    },
    {
      "epoch": 0.334354396405858,
      "grad_norm": 0.6938665614662191,
      "learning_rate": 7.761454197656728e-06,
      "loss": 0.1191,
      "step": 11461
    },
    {
      "epoch": 0.33438356963650157,
      "grad_norm": 0.8584014957548999,
      "learning_rate": 7.761060339885882e-06,
      "loss": 0.1425,
      "step": 11462
    },
    {
      "epoch": 0.3344127428671451,
      "grad_norm": 0.9439635147918899,
      "learning_rate": 7.76066645746495e-06,
      "loss": 0.1584,
      "step": 11463
    },
    {
      "epoch": 0.33444191609778867,
      "grad_norm": 0.7275435427161401,
      "learning_rate": 7.760272550397446e-06,
      "loss": 0.1487,
      "step": 11464
    },
    {
      "epoch": 0.3344710893284322,
      "grad_norm": 0.8284264484764152,
      "learning_rate": 7.759878618686886e-06,
      "loss": 0.1596,
      "step": 11465
    },
    {
      "epoch": 0.3345002625590758,
      "grad_norm": 0.9728561120807099,
      "learning_rate": 7.759484662336792e-06,
      "loss": 0.1761,
      "step": 11466
    },
    {
      "epoch": 0.33452943578971933,
      "grad_norm": 0.7633814891351184,
      "learning_rate": 7.759090681350676e-06,
      "loss": 0.1312,
      "step": 11467
    },
    {
      "epoch": 0.33455860902036294,
      "grad_norm": 0.6563076696473864,
      "learning_rate": 7.758696675732057e-06,
      "loss": 0.153,
      "step": 11468
    },
    {
      "epoch": 0.3345877822510065,
      "grad_norm": 0.907111826757,
      "learning_rate": 7.758302645484451e-06,
      "loss": 0.1594,
      "step": 11469
    },
    {
      "epoch": 0.33461695548165005,
      "grad_norm": 0.7887719592653158,
      "learning_rate": 7.75790859061138e-06,
      "loss": 0.1394,
      "step": 11470
    },
    {
      "epoch": 0.3346461287122936,
      "grad_norm": 0.6941328921783568,
      "learning_rate": 7.757514511116358e-06,
      "loss": 0.1366,
      "step": 11471
    },
    {
      "epoch": 0.33467530194293715,
      "grad_norm": 0.7424400834815343,
      "learning_rate": 7.757120407002904e-06,
      "loss": 0.1467,
      "step": 11472
    },
    {
      "epoch": 0.3347044751735807,
      "grad_norm": 0.7666029824850172,
      "learning_rate": 7.75672627827454e-06,
      "loss": 0.1444,
      "step": 11473
    },
    {
      "epoch": 0.33473364840422426,
      "grad_norm": 0.8542415574993383,
      "learning_rate": 7.75633212493478e-06,
      "loss": 0.1442,
      "step": 11474
    },
    {
      "epoch": 0.33476282163486787,
      "grad_norm": 0.8128021005906819,
      "learning_rate": 7.755937946987144e-06,
      "loss": 0.1456,
      "step": 11475
    },
    {
      "epoch": 0.3347919948655114,
      "grad_norm": 0.882984283684404,
      "learning_rate": 7.755543744435153e-06,
      "loss": 0.1631,
      "step": 11476
    },
    {
      "epoch": 0.334821168096155,
      "grad_norm": 0.8129933914221654,
      "learning_rate": 7.755149517282325e-06,
      "loss": 0.1695,
      "step": 11477
    },
    {
      "epoch": 0.33485034132679853,
      "grad_norm": 0.8860049986178933,
      "learning_rate": 7.75475526553218e-06,
      "loss": 0.144,
      "step": 11478
    },
    {
      "epoch": 0.3348795145574421,
      "grad_norm": 0.8283335356209518,
      "learning_rate": 7.754360989188237e-06,
      "loss": 0.1469,
      "step": 11479
    },
    {
      "epoch": 0.33490868778808563,
      "grad_norm": 0.635544225279093,
      "learning_rate": 7.753966688254018e-06,
      "loss": 0.1456,
      "step": 11480
    },
    {
      "epoch": 0.3349378610187292,
      "grad_norm": 1.1415888434866515,
      "learning_rate": 7.75357236273304e-06,
      "loss": 0.1827,
      "step": 11481
    },
    {
      "epoch": 0.3349670342493728,
      "grad_norm": 0.9347864709686792,
      "learning_rate": 7.753178012628826e-06,
      "loss": 0.1437,
      "step": 11482
    },
    {
      "epoch": 0.33499620748001635,
      "grad_norm": 0.6804694142483877,
      "learning_rate": 7.752783637944897e-06,
      "loss": 0.1429,
      "step": 11483
    },
    {
      "epoch": 0.3350253807106599,
      "grad_norm": 1.253721368214493,
      "learning_rate": 7.752389238684773e-06,
      "loss": 0.1703,
      "step": 11484
    },
    {
      "epoch": 0.33505455394130346,
      "grad_norm": 1.0616431022335604,
      "learning_rate": 7.751994814851973e-06,
      "loss": 0.1159,
      "step": 11485
    },
    {
      "epoch": 0.335083727171947,
      "grad_norm": 0.8984131766963803,
      "learning_rate": 7.751600366450021e-06,
      "loss": 0.158,
      "step": 11486
    },
    {
      "epoch": 0.33511290040259056,
      "grad_norm": 0.9184852145861464,
      "learning_rate": 7.751205893482438e-06,
      "loss": 0.1545,
      "step": 11487
    },
    {
      "epoch": 0.33514207363323417,
      "grad_norm": 0.7671535794109193,
      "learning_rate": 7.750811395952745e-06,
      "loss": 0.1354,
      "step": 11488
    },
    {
      "epoch": 0.3351712468638777,
      "grad_norm": 0.8323228137742448,
      "learning_rate": 7.750416873864464e-06,
      "loss": 0.1406,
      "step": 11489
    },
    {
      "epoch": 0.3352004200945213,
      "grad_norm": 0.777114115614653,
      "learning_rate": 7.75002232722112e-06,
      "loss": 0.1619,
      "step": 11490
    },
    {
      "epoch": 0.33522959332516483,
      "grad_norm": 0.8828507213690756,
      "learning_rate": 7.749627756026232e-06,
      "loss": 0.1614,
      "step": 11491
    },
    {
      "epoch": 0.3352587665558084,
      "grad_norm": 0.790047056780413,
      "learning_rate": 7.749233160283323e-06,
      "loss": 0.1747,
      "step": 11492
    },
    {
      "epoch": 0.33528793978645194,
      "grad_norm": 0.9592747218110751,
      "learning_rate": 7.748838539995918e-06,
      "loss": 0.1744,
      "step": 11493
    },
    {
      "epoch": 0.3353171130170955,
      "grad_norm": 0.7547276464424753,
      "learning_rate": 7.748443895167539e-06,
      "loss": 0.1469,
      "step": 11494
    },
    {
      "epoch": 0.3353462862477391,
      "grad_norm": 0.7952485905164294,
      "learning_rate": 7.748049225801706e-06,
      "loss": 0.1523,
      "step": 11495
    },
    {
      "epoch": 0.33537545947838265,
      "grad_norm": 1.1703727583077554,
      "learning_rate": 7.747654531901949e-06,
      "loss": 0.1455,
      "step": 11496
    },
    {
      "epoch": 0.3354046327090262,
      "grad_norm": 0.7693174535264024,
      "learning_rate": 7.747259813471786e-06,
      "loss": 0.1323,
      "step": 11497
    },
    {
      "epoch": 0.33543380593966976,
      "grad_norm": 0.7369482111349072,
      "learning_rate": 7.746865070514744e-06,
      "loss": 0.1522,
      "step": 11498
    },
    {
      "epoch": 0.3354629791703133,
      "grad_norm": 0.9564162516945413,
      "learning_rate": 7.746470303034347e-06,
      "loss": 0.1771,
      "step": 11499
    },
    {
      "epoch": 0.33549215240095687,
      "grad_norm": 0.8108868752021353,
      "learning_rate": 7.746075511034119e-06,
      "loss": 0.1578,
      "step": 11500
    },
    {
      "epoch": 0.3355213256316004,
      "grad_norm": 0.7785537166136959,
      "learning_rate": 7.745680694517582e-06,
      "loss": 0.1497,
      "step": 11501
    },
    {
      "epoch": 0.33555049886224403,
      "grad_norm": 0.958205389114564,
      "learning_rate": 7.745285853488264e-06,
      "loss": 0.1813,
      "step": 11502
    },
    {
      "epoch": 0.3355796720928876,
      "grad_norm": 0.9537185309746866,
      "learning_rate": 7.74489098794969e-06,
      "loss": 0.1475,
      "step": 11503
    },
    {
      "epoch": 0.33560884532353114,
      "grad_norm": 0.6389980035757383,
      "learning_rate": 7.744496097905385e-06,
      "loss": 0.1361,
      "step": 11504
    },
    {
      "epoch": 0.3356380185541747,
      "grad_norm": 0.800956536064156,
      "learning_rate": 7.744101183358874e-06,
      "loss": 0.1943,
      "step": 11505
    },
    {
      "epoch": 0.33566719178481824,
      "grad_norm": 0.923807856964023,
      "learning_rate": 7.743706244313682e-06,
      "loss": 0.1681,
      "step": 11506
    },
    {
      "epoch": 0.3356963650154618,
      "grad_norm": 0.7330873874583181,
      "learning_rate": 7.743311280773335e-06,
      "loss": 0.1356,
      "step": 11507
    },
    {
      "epoch": 0.33572553824610535,
      "grad_norm": 0.7937806466044719,
      "learning_rate": 7.742916292741363e-06,
      "loss": 0.1694,
      "step": 11508
    },
    {
      "epoch": 0.33575471147674896,
      "grad_norm": 1.0402736819907141,
      "learning_rate": 7.742521280221286e-06,
      "loss": 0.1381,
      "step": 11509
    },
    {
      "epoch": 0.3357838847073925,
      "grad_norm": 0.6845691795976966,
      "learning_rate": 7.742126243216635e-06,
      "loss": 0.1355,
      "step": 11510
    },
    {
      "epoch": 0.33581305793803606,
      "grad_norm": 0.9618473390404858,
      "learning_rate": 7.741731181730933e-06,
      "loss": 0.1573,
      "step": 11511
    },
    {
      "epoch": 0.3358422311686796,
      "grad_norm": 0.9770323611181168,
      "learning_rate": 7.741336095767713e-06,
      "loss": 0.1521,
      "step": 11512
    },
    {
      "epoch": 0.33587140439932317,
      "grad_norm": 0.7055360652263694,
      "learning_rate": 7.740940985330497e-06,
      "loss": 0.1747,
      "step": 11513
    },
    {
      "epoch": 0.3359005776299667,
      "grad_norm": 0.9706015849531999,
      "learning_rate": 7.740545850422813e-06,
      "loss": 0.1533,
      "step": 11514
    },
    {
      "epoch": 0.3359297508606103,
      "grad_norm": 0.8807825946758363,
      "learning_rate": 7.740150691048192e-06,
      "loss": 0.1536,
      "step": 11515
    },
    {
      "epoch": 0.3359589240912539,
      "grad_norm": 0.8525706200236469,
      "learning_rate": 7.73975550721016e-06,
      "loss": 0.1347,
      "step": 11516
    },
    {
      "epoch": 0.33598809732189744,
      "grad_norm": 0.7138250781620418,
      "learning_rate": 7.739360298912243e-06,
      "loss": 0.1617,
      "step": 11517
    },
    {
      "epoch": 0.336017270552541,
      "grad_norm": 0.8366224157733974,
      "learning_rate": 7.738965066157973e-06,
      "loss": 0.1487,
      "step": 11518
    },
    {
      "epoch": 0.33604644378318455,
      "grad_norm": 0.9277490964824728,
      "learning_rate": 7.738569808950875e-06,
      "loss": 0.1802,
      "step": 11519
    },
    {
      "epoch": 0.3360756170138281,
      "grad_norm": 0.8420714910620926,
      "learning_rate": 7.738174527294481e-06,
      "loss": 0.1388,
      "step": 11520
    },
    {
      "epoch": 0.33610479024447165,
      "grad_norm": 0.8219743641842782,
      "learning_rate": 7.737779221192317e-06,
      "loss": 0.1344,
      "step": 11521
    },
    {
      "epoch": 0.33613396347511526,
      "grad_norm": 0.8191873161261526,
      "learning_rate": 7.737383890647915e-06,
      "loss": 0.1721,
      "step": 11522
    },
    {
      "epoch": 0.3361631367057588,
      "grad_norm": 0.7910467427290071,
      "learning_rate": 7.736988535664803e-06,
      "loss": 0.1642,
      "step": 11523
    },
    {
      "epoch": 0.33619230993640237,
      "grad_norm": 0.9137471482546433,
      "learning_rate": 7.73659315624651e-06,
      "loss": 0.1749,
      "step": 11524
    },
    {
      "epoch": 0.3362214831670459,
      "grad_norm": 0.8395428256052949,
      "learning_rate": 7.736197752396566e-06,
      "loss": 0.1739,
      "step": 11525
    },
    {
      "epoch": 0.3362506563976895,
      "grad_norm": 0.7931969987369323,
      "learning_rate": 7.735802324118503e-06,
      "loss": 0.1773,
      "step": 11526
    },
    {
      "epoch": 0.336279829628333,
      "grad_norm": 0.760285127003303,
      "learning_rate": 7.73540687141585e-06,
      "loss": 0.162,
      "step": 11527
    },
    {
      "epoch": 0.3363090028589766,
      "grad_norm": 0.7667508518337527,
      "learning_rate": 7.735011394292136e-06,
      "loss": 0.1632,
      "step": 11528
    },
    {
      "epoch": 0.3363381760896202,
      "grad_norm": 0.659323906184719,
      "learning_rate": 7.734615892750895e-06,
      "loss": 0.1462,
      "step": 11529
    },
    {
      "epoch": 0.33636734932026374,
      "grad_norm": 0.7444712343622968,
      "learning_rate": 7.734220366795655e-06,
      "loss": 0.1387,
      "step": 11530
    },
    {
      "epoch": 0.3363965225509073,
      "grad_norm": 0.9170292521706552,
      "learning_rate": 7.733824816429948e-06,
      "loss": 0.1288,
      "step": 11531
    },
    {
      "epoch": 0.33642569578155085,
      "grad_norm": 0.8803357947564434,
      "learning_rate": 7.733429241657306e-06,
      "loss": 0.1354,
      "step": 11532
    },
    {
      "epoch": 0.3364548690121944,
      "grad_norm": 0.8195762284716492,
      "learning_rate": 7.73303364248126e-06,
      "loss": 0.1425,
      "step": 11533
    },
    {
      "epoch": 0.33648404224283796,
      "grad_norm": 0.778225354573916,
      "learning_rate": 7.732638018905343e-06,
      "loss": 0.1501,
      "step": 11534
    },
    {
      "epoch": 0.3365132154734815,
      "grad_norm": 0.8929304391087508,
      "learning_rate": 7.732242370933085e-06,
      "loss": 0.1262,
      "step": 11535
    },
    {
      "epoch": 0.3365423887041251,
      "grad_norm": 0.7725661324199767,
      "learning_rate": 7.731846698568021e-06,
      "loss": 0.1244,
      "step": 11536
    },
    {
      "epoch": 0.33657156193476867,
      "grad_norm": 0.8611246450752171,
      "learning_rate": 7.73145100181368e-06,
      "loss": 0.1502,
      "step": 11537
    },
    {
      "epoch": 0.3366007351654122,
      "grad_norm": 0.9232382153814258,
      "learning_rate": 7.731055280673598e-06,
      "loss": 0.1326,
      "step": 11538
    },
    {
      "epoch": 0.3366299083960558,
      "grad_norm": 0.80485965733028,
      "learning_rate": 7.730659535151306e-06,
      "loss": 0.1585,
      "step": 11539
    },
    {
      "epoch": 0.33665908162669933,
      "grad_norm": 0.7852042836798314,
      "learning_rate": 7.730263765250337e-06,
      "loss": 0.1658,
      "step": 11540
    },
    {
      "epoch": 0.3366882548573429,
      "grad_norm": 0.8940431546891692,
      "learning_rate": 7.729867970974223e-06,
      "loss": 0.1592,
      "step": 11541
    },
    {
      "epoch": 0.33671742808798644,
      "grad_norm": 0.8670945497825935,
      "learning_rate": 7.729472152326503e-06,
      "loss": 0.117,
      "step": 11542
    },
    {
      "epoch": 0.33674660131863005,
      "grad_norm": 0.8225740785398498,
      "learning_rate": 7.729076309310704e-06,
      "loss": 0.1563,
      "step": 11543
    },
    {
      "epoch": 0.3367757745492736,
      "grad_norm": 1.127576641006544,
      "learning_rate": 7.728680441930366e-06,
      "loss": 0.1558,
      "step": 11544
    },
    {
      "epoch": 0.33680494777991715,
      "grad_norm": 0.8866329896476555,
      "learning_rate": 7.72828455018902e-06,
      "loss": 0.1561,
      "step": 11545
    },
    {
      "epoch": 0.3368341210105607,
      "grad_norm": 0.8556623051275783,
      "learning_rate": 7.727888634090199e-06,
      "loss": 0.1528,
      "step": 11546
    },
    {
      "epoch": 0.33686329424120426,
      "grad_norm": 0.9138167291437244,
      "learning_rate": 7.72749269363744e-06,
      "loss": 0.142,
      "step": 11547
    },
    {
      "epoch": 0.3368924674718478,
      "grad_norm": 0.9106381062019776,
      "learning_rate": 7.727096728834278e-06,
      "loss": 0.1659,
      "step": 11548
    },
    {
      "epoch": 0.3369216407024914,
      "grad_norm": 0.9148809918401275,
      "learning_rate": 7.726700739684247e-06,
      "loss": 0.159,
      "step": 11549
    },
    {
      "epoch": 0.336950813933135,
      "grad_norm": 0.9319620451235113,
      "learning_rate": 7.726304726190884e-06,
      "loss": 0.1596,
      "step": 11550
    },
    {
      "epoch": 0.3369799871637785,
      "grad_norm": 0.9805622581121656,
      "learning_rate": 7.725908688357722e-06,
      "loss": 0.1625,
      "step": 11551
    },
    {
      "epoch": 0.3370091603944221,
      "grad_norm": 0.7912166294319537,
      "learning_rate": 7.725512626188299e-06,
      "loss": 0.1566,
      "step": 11552
    },
    {
      "epoch": 0.33703833362506563,
      "grad_norm": 0.9273319767245617,
      "learning_rate": 7.725116539686148e-06,
      "loss": 0.1409,
      "step": 11553
    },
    {
      "epoch": 0.3370675068557092,
      "grad_norm": 0.7738242382317029,
      "learning_rate": 7.72472042885481e-06,
      "loss": 0.1334,
      "step": 11554
    },
    {
      "epoch": 0.33709668008635274,
      "grad_norm": 0.9688612676545774,
      "learning_rate": 7.724324293697816e-06,
      "loss": 0.1564,
      "step": 11555
    },
    {
      "epoch": 0.33712585331699635,
      "grad_norm": 0.9992497320618804,
      "learning_rate": 7.723928134218705e-06,
      "loss": 0.1633,
      "step": 11556
    },
    {
      "epoch": 0.3371550265476399,
      "grad_norm": 0.8453637314931073,
      "learning_rate": 7.723531950421014e-06,
      "loss": 0.158,
      "step": 11557
    },
    {
      "epoch": 0.33718419977828346,
      "grad_norm": 0.9307297852232389,
      "learning_rate": 7.72313574230828e-06,
      "loss": 0.1397,
      "step": 11558
    },
    {
      "epoch": 0.337213373008927,
      "grad_norm": 0.8159144809842509,
      "learning_rate": 7.722739509884042e-06,
      "loss": 0.1594,
      "step": 11559
    },
    {
      "epoch": 0.33724254623957056,
      "grad_norm": 0.8272914173842891,
      "learning_rate": 7.722343253151834e-06,
      "loss": 0.1831,
      "step": 11560
    },
    {
      "epoch": 0.3372717194702141,
      "grad_norm": 0.8692708278798568,
      "learning_rate": 7.721946972115196e-06,
      "loss": 0.1327,
      "step": 11561
    },
    {
      "epoch": 0.33730089270085767,
      "grad_norm": 1.0081766452662235,
      "learning_rate": 7.721550666777664e-06,
      "loss": 0.1526,
      "step": 11562
    },
    {
      "epoch": 0.3373300659315013,
      "grad_norm": 0.8039508529224917,
      "learning_rate": 7.721154337142778e-06,
      "loss": 0.1307,
      "step": 11563
    },
    {
      "epoch": 0.33735923916214483,
      "grad_norm": 0.8050979085130708,
      "learning_rate": 7.720757983214076e-06,
      "loss": 0.1278,
      "step": 11564
    },
    {
      "epoch": 0.3373884123927884,
      "grad_norm": 0.6830516599371543,
      "learning_rate": 7.720361604995097e-06,
      "loss": 0.1462,
      "step": 11565
    },
    {
      "epoch": 0.33741758562343194,
      "grad_norm": 0.8294060227749831,
      "learning_rate": 7.719965202489377e-06,
      "loss": 0.1356,
      "step": 11566
    },
    {
      "epoch": 0.3374467588540755,
      "grad_norm": 0.8635004014556787,
      "learning_rate": 7.71956877570046e-06,
      "loss": 0.1421,
      "step": 11567
    },
    {
      "epoch": 0.33747593208471904,
      "grad_norm": 0.7720892884340611,
      "learning_rate": 7.719172324631878e-06,
      "loss": 0.131,
      "step": 11568
    },
    {
      "epoch": 0.3375051053153626,
      "grad_norm": 0.7483209597107527,
      "learning_rate": 7.718775849287178e-06,
      "loss": 0.1339,
      "step": 11569
    },
    {
      "epoch": 0.3375342785460062,
      "grad_norm": 1.015752418732121,
      "learning_rate": 7.718379349669893e-06,
      "loss": 0.1766,
      "step": 11570
    },
    {
      "epoch": 0.33756345177664976,
      "grad_norm": 1.0326672410961844,
      "learning_rate": 7.71798282578357e-06,
      "loss": 0.1597,
      "step": 11571
    },
    {
      "epoch": 0.3375926250072933,
      "grad_norm": 0.9156662847768858,
      "learning_rate": 7.717586277631744e-06,
      "loss": 0.1536,
      "step": 11572
    },
    {
      "epoch": 0.33762179823793687,
      "grad_norm": 0.9008508855862385,
      "learning_rate": 7.717189705217954e-06,
      "loss": 0.1333,
      "step": 11573
    },
    {
      "epoch": 0.3376509714685804,
      "grad_norm": 1.0395423797248333,
      "learning_rate": 7.716793108545745e-06,
      "loss": 0.1627,
      "step": 11574
    },
    {
      "epoch": 0.33768014469922397,
      "grad_norm": 0.9719764033060374,
      "learning_rate": 7.716396487618655e-06,
      "loss": 0.1613,
      "step": 11575
    },
    {
      "epoch": 0.3377093179298676,
      "grad_norm": 0.9309991623526762,
      "learning_rate": 7.715999842440225e-06,
      "loss": 0.1527,
      "step": 11576
    },
    {
      "epoch": 0.33773849116051113,
      "grad_norm": 0.9842918455596753,
      "learning_rate": 7.715603173013999e-06,
      "loss": 0.198,
      "step": 11577
    },
    {
      "epoch": 0.3377676643911547,
      "grad_norm": 1.4516828031751667,
      "learning_rate": 7.715206479343516e-06,
      "loss": 0.1587,
      "step": 11578
    },
    {
      "epoch": 0.33779683762179824,
      "grad_norm": 0.9777438508037208,
      "learning_rate": 7.714809761432317e-06,
      "loss": 0.1418,
      "step": 11579
    },
    {
      "epoch": 0.3378260108524418,
      "grad_norm": 0.9632788395810549,
      "learning_rate": 7.714413019283942e-06,
      "loss": 0.1748,
      "step": 11580
    },
    {
      "epoch": 0.33785518408308535,
      "grad_norm": 0.8574549089734549,
      "learning_rate": 7.714016252901939e-06,
      "loss": 0.1432,
      "step": 11581
    },
    {
      "epoch": 0.3378843573137289,
      "grad_norm": 0.9000887676744005,
      "learning_rate": 7.713619462289846e-06,
      "loss": 0.1449,
      "step": 11582
    },
    {
      "epoch": 0.3379135305443725,
      "grad_norm": 0.9491616826860819,
      "learning_rate": 7.713222647451203e-06,
      "loss": 0.1383,
      "step": 11583
    },
    {
      "epoch": 0.33794270377501606,
      "grad_norm": 0.8886926381518871,
      "learning_rate": 7.71282580838956e-06,
      "loss": 0.1482,
      "step": 11584
    },
    {
      "epoch": 0.3379718770056596,
      "grad_norm": 0.7507167077278053,
      "learning_rate": 7.712428945108454e-06,
      "loss": 0.1478,
      "step": 11585
    },
    {
      "epoch": 0.33800105023630317,
      "grad_norm": 0.9189021330503667,
      "learning_rate": 7.712032057611431e-06,
      "loss": 0.1477,
      "step": 11586
    },
    {
      "epoch": 0.3380302234669467,
      "grad_norm": 1.0128260734012735,
      "learning_rate": 7.711635145902032e-06,
      "loss": 0.1583,
      "step": 11587
    },
    {
      "epoch": 0.3380593966975903,
      "grad_norm": 0.8575179334345707,
      "learning_rate": 7.711238209983802e-06,
      "loss": 0.1577,
      "step": 11588
    },
    {
      "epoch": 0.33808856992823383,
      "grad_norm": 0.9998616785880221,
      "learning_rate": 7.710841249860286e-06,
      "loss": 0.1535,
      "step": 11589
    },
    {
      "epoch": 0.33811774315887744,
      "grad_norm": 0.9767503548015507,
      "learning_rate": 7.710444265535024e-06,
      "loss": 0.1438,
      "step": 11590
    },
    {
      "epoch": 0.338146916389521,
      "grad_norm": 0.7505459740062331,
      "learning_rate": 7.710047257011564e-06,
      "loss": 0.1472,
      "step": 11591
    },
    {
      "epoch": 0.33817608962016454,
      "grad_norm": 0.989067891001346,
      "learning_rate": 7.709650224293449e-06,
      "loss": 0.1196,
      "step": 11592
    },
    {
      "epoch": 0.3382052628508081,
      "grad_norm": 0.8147623034459972,
      "learning_rate": 7.709253167384223e-06,
      "loss": 0.1322,
      "step": 11593
    },
    {
      "epoch": 0.33823443608145165,
      "grad_norm": 0.7033094865173153,
      "learning_rate": 7.708856086287432e-06,
      "loss": 0.145,
      "step": 11594
    },
    {
      "epoch": 0.3382636093120952,
      "grad_norm": 1.0309814820693728,
      "learning_rate": 7.708458981006621e-06,
      "loss": 0.1682,
      "step": 11595
    },
    {
      "epoch": 0.33829278254273876,
      "grad_norm": 0.775960565931343,
      "learning_rate": 7.708061851545334e-06,
      "loss": 0.1753,
      "step": 11596
    },
    {
      "epoch": 0.33832195577338237,
      "grad_norm": 0.8237789546111267,
      "learning_rate": 7.707664697907117e-06,
      "loss": 0.1498,
      "step": 11597
    },
    {
      "epoch": 0.3383511290040259,
      "grad_norm": 0.9349919105975405,
      "learning_rate": 7.707267520095515e-06,
      "loss": 0.1742,
      "step": 11598
    },
    {
      "epoch": 0.33838030223466947,
      "grad_norm": 1.0083016500343505,
      "learning_rate": 7.70687031811408e-06,
      "loss": 0.1268,
      "step": 11599
    },
    {
      "epoch": 0.338409475465313,
      "grad_norm": 0.697713882631908,
      "learning_rate": 7.706473091966347e-06,
      "loss": 0.118,
      "step": 11600
    },
    {
      "epoch": 0.3384386486959566,
      "grad_norm": 0.7643882171336065,
      "learning_rate": 7.706075841655871e-06,
      "loss": 0.1489,
      "step": 11601
    },
    {
      "epoch": 0.33846782192660013,
      "grad_norm": 1.0678200592313596,
      "learning_rate": 7.705678567186195e-06,
      "loss": 0.1532,
      "step": 11602
    },
    {
      "epoch": 0.33849699515724374,
      "grad_norm": 0.8535030518138836,
      "learning_rate": 7.705281268560866e-06,
      "loss": 0.1302,
      "step": 11603
    },
    {
      "epoch": 0.3385261683878873,
      "grad_norm": 0.7600060163055182,
      "learning_rate": 7.704883945783435e-06,
      "loss": 0.1661,
      "step": 11604
    },
    {
      "epoch": 0.33855534161853085,
      "grad_norm": 1.0692788141133338,
      "learning_rate": 7.704486598857444e-06,
      "loss": 0.147,
      "step": 11605
    },
    {
      "epoch": 0.3385845148491744,
      "grad_norm": 0.9700778543064547,
      "learning_rate": 7.70408922778644e-06,
      "loss": 0.1467,
      "step": 11606
    },
    {
      "epoch": 0.33861368807981795,
      "grad_norm": 0.9525649354280223,
      "learning_rate": 7.703691832573975e-06,
      "loss": 0.135,
      "step": 11607
    },
    {
      "epoch": 0.3386428613104615,
      "grad_norm": 0.8816075139805843,
      "learning_rate": 7.703294413223595e-06,
      "loss": 0.1381,
      "step": 11608
    },
    {
      "epoch": 0.33867203454110506,
      "grad_norm": 0.7983909998112839,
      "learning_rate": 7.702896969738847e-06,
      "loss": 0.1508,
      "step": 11609
    },
    {
      "epoch": 0.33870120777174867,
      "grad_norm": 0.9203609233880439,
      "learning_rate": 7.702499502123281e-06,
      "loss": 0.1558,
      "step": 11610
    },
    {
      "epoch": 0.3387303810023922,
      "grad_norm": 0.6744051911156428,
      "learning_rate": 7.702102010380444e-06,
      "loss": 0.1439,
      "step": 11611
    },
    {
      "epoch": 0.3387595542330358,
      "grad_norm": 0.7199704773594848,
      "learning_rate": 7.701704494513885e-06,
      "loss": 0.1645,
      "step": 11612
    },
    {
      "epoch": 0.33878872746367933,
      "grad_norm": 0.8217566485644069,
      "learning_rate": 7.701306954527153e-06,
      "loss": 0.1357,
      "step": 11613
    },
    {
      "epoch": 0.3388179006943229,
      "grad_norm": 0.8422794302581642,
      "learning_rate": 7.700909390423798e-06,
      "loss": 0.1558,
      "step": 11614
    },
    {
      "epoch": 0.33884707392496644,
      "grad_norm": 0.7310318262358183,
      "learning_rate": 7.70051180220737e-06,
      "loss": 0.1409,
      "step": 11615
    },
    {
      "epoch": 0.33887624715561,
      "grad_norm": 3.798914361404054,
      "learning_rate": 7.700114189881413e-06,
      "loss": 0.1392,
      "step": 11616
    },
    {
      "epoch": 0.3389054203862536,
      "grad_norm": 0.8806185465121645,
      "learning_rate": 7.699716553449485e-06,
      "loss": 0.1639,
      "step": 11617
    },
    {
      "epoch": 0.33893459361689715,
      "grad_norm": 0.8395571390425997,
      "learning_rate": 7.699318892915131e-06,
      "loss": 0.1384,
      "step": 11618
    },
    {
      "epoch": 0.3389637668475407,
      "grad_norm": 0.8303897802809496,
      "learning_rate": 7.698921208281903e-06,
      "loss": 0.159,
      "step": 11619
    },
    {
      "epoch": 0.33899294007818426,
      "grad_norm": 0.8165403889974459,
      "learning_rate": 7.69852349955335e-06,
      "loss": 0.146,
      "step": 11620
    },
    {
      "epoch": 0.3390221133088278,
      "grad_norm": 1.092760362693043,
      "learning_rate": 7.698125766733023e-06,
      "loss": 0.1588,
      "step": 11621
    },
    {
      "epoch": 0.33905128653947136,
      "grad_norm": 0.6902754686469906,
      "learning_rate": 7.697728009824475e-06,
      "loss": 0.1317,
      "step": 11622
    },
    {
      "epoch": 0.3390804597701149,
      "grad_norm": 0.7312169230775266,
      "learning_rate": 7.697330228831254e-06,
      "loss": 0.1541,
      "step": 11623
    },
    {
      "epoch": 0.3391096330007585,
      "grad_norm": 0.8231684244030799,
      "learning_rate": 7.696932423756912e-06,
      "loss": 0.1491,
      "step": 11624
    },
    {
      "epoch": 0.3391388062314021,
      "grad_norm": 0.9415065153653359,
      "learning_rate": 7.696534594605e-06,
      "loss": 0.1329,
      "step": 11625
    },
    {
      "epoch": 0.33916797946204563,
      "grad_norm": 0.792732553868953,
      "learning_rate": 7.696136741379073e-06,
      "loss": 0.1679,
      "step": 11626
    },
    {
      "epoch": 0.3391971526926892,
      "grad_norm": 0.7594829618379453,
      "learning_rate": 7.69573886408268e-06,
      "loss": 0.1394,
      "step": 11627
    },
    {
      "epoch": 0.33922632592333274,
      "grad_norm": 0.9546364425628039,
      "learning_rate": 7.695340962719376e-06,
      "loss": 0.1308,
      "step": 11628
    },
    {
      "epoch": 0.3392554991539763,
      "grad_norm": 0.7239429493125663,
      "learning_rate": 7.69494303729271e-06,
      "loss": 0.1607,
      "step": 11629
    },
    {
      "epoch": 0.33928467238461985,
      "grad_norm": 0.7648747255782211,
      "learning_rate": 7.694545087806236e-06,
      "loss": 0.1538,
      "step": 11630
    },
    {
      "epoch": 0.33931384561526345,
      "grad_norm": 1.0398673908202867,
      "learning_rate": 7.694147114263505e-06,
      "loss": 0.1606,
      "step": 11631
    },
    {
      "epoch": 0.339343018845907,
      "grad_norm": 0.8509038630853565,
      "learning_rate": 7.693749116668073e-06,
      "loss": 0.134,
      "step": 11632
    },
    {
      "epoch": 0.33937219207655056,
      "grad_norm": 0.6808845366630925,
      "learning_rate": 7.69335109502349e-06,
      "loss": 0.1486,
      "step": 11633
    },
    {
      "epoch": 0.3394013653071941,
      "grad_norm": 0.9596622071022206,
      "learning_rate": 7.692953049333315e-06,
      "loss": 0.157,
      "step": 11634
    },
    {
      "epoch": 0.33943053853783767,
      "grad_norm": 0.9780170561775249,
      "learning_rate": 7.692554979601097e-06,
      "loss": 0.1492,
      "step": 11635
    },
    {
      "epoch": 0.3394597117684812,
      "grad_norm": 0.7947887071631937,
      "learning_rate": 7.69215688583039e-06,
      "loss": 0.1423,
      "step": 11636
    },
    {
      "epoch": 0.33948888499912483,
      "grad_norm": 0.7950864769771145,
      "learning_rate": 7.69175876802475e-06,
      "loss": 0.1439,
      "step": 11637
    },
    {
      "epoch": 0.3395180582297684,
      "grad_norm": 0.852636176725188,
      "learning_rate": 7.691360626187729e-06,
      "loss": 0.1345,
      "step": 11638
    },
    {
      "epoch": 0.33954723146041194,
      "grad_norm": 0.8749306756916018,
      "learning_rate": 7.690962460322883e-06,
      "loss": 0.1546,
      "step": 11639
    },
    {
      "epoch": 0.3395764046910555,
      "grad_norm": 0.7573621451009913,
      "learning_rate": 7.690564270433766e-06,
      "loss": 0.1427,
      "step": 11640
    },
    {
      "epoch": 0.33960557792169904,
      "grad_norm": 0.6883258193192641,
      "learning_rate": 7.690166056523935e-06,
      "loss": 0.1364,
      "step": 11641
    },
    {
      "epoch": 0.3396347511523426,
      "grad_norm": 0.8212558475232992,
      "learning_rate": 7.689767818596943e-06,
      "loss": 0.1739,
      "step": 11642
    },
    {
      "epoch": 0.33966392438298615,
      "grad_norm": 1.0178685133479788,
      "learning_rate": 7.689369556656346e-06,
      "loss": 0.1603,
      "step": 11643
    },
    {
      "epoch": 0.33969309761362976,
      "grad_norm": 0.7929640467630086,
      "learning_rate": 7.6889712707057e-06,
      "loss": 0.1419,
      "step": 11644
    },
    {
      "epoch": 0.3397222708442733,
      "grad_norm": 0.8285062651424151,
      "learning_rate": 7.68857296074856e-06,
      "loss": 0.1648,
      "step": 11645
    },
    {
      "epoch": 0.33975144407491686,
      "grad_norm": 0.7637143786134057,
      "learning_rate": 7.688174626788483e-06,
      "loss": 0.1465,
      "step": 11646
    },
    {
      "epoch": 0.3397806173055604,
      "grad_norm": 0.8038705124572267,
      "learning_rate": 7.687776268829024e-06,
      "loss": 0.1341,
      "step": 11647
    },
    {
      "epoch": 0.33980979053620397,
      "grad_norm": 0.6997760841537921,
      "learning_rate": 7.687377886873739e-06,
      "loss": 0.1316,
      "step": 11648
    },
    {
      "epoch": 0.3398389637668475,
      "grad_norm": 0.8494265532655626,
      "learning_rate": 7.686979480926189e-06,
      "loss": 0.1584,
      "step": 11649
    },
    {
      "epoch": 0.3398681369974911,
      "grad_norm": 0.8115325270981985,
      "learning_rate": 7.686581050989925e-06,
      "loss": 0.153,
      "step": 11650
    },
    {
      "epoch": 0.3398973102281347,
      "grad_norm": 0.6196469535696476,
      "learning_rate": 7.686182597068505e-06,
      "loss": 0.1444,
      "step": 11651
    },
    {
      "epoch": 0.33992648345877824,
      "grad_norm": 0.8742196977812604,
      "learning_rate": 7.685784119165492e-06,
      "loss": 0.1488,
      "step": 11652
    },
    {
      "epoch": 0.3399556566894218,
      "grad_norm": 0.9336099521443803,
      "learning_rate": 7.685385617284437e-06,
      "loss": 0.149,
      "step": 11653
    },
    {
      "epoch": 0.33998482992006535,
      "grad_norm": 0.8556779058735722,
      "learning_rate": 7.684987091428902e-06,
      "loss": 0.1323,
      "step": 11654
    },
    {
      "epoch": 0.3400140031507089,
      "grad_norm": 0.9249586535401686,
      "learning_rate": 7.684588541602443e-06,
      "loss": 0.1431,
      "step": 11655
    },
    {
      "epoch": 0.34004317638135245,
      "grad_norm": 1.0598896664425277,
      "learning_rate": 7.684189967808616e-06,
      "loss": 0.1416,
      "step": 11656
    },
    {
      "epoch": 0.340072349611996,
      "grad_norm": 0.813634062026962,
      "learning_rate": 7.683791370050984e-06,
      "loss": 0.1554,
      "step": 11657
    },
    {
      "epoch": 0.3401015228426396,
      "grad_norm": 1.0084842884915317,
      "learning_rate": 7.683392748333102e-06,
      "loss": 0.1618,
      "step": 11658
    },
    {
      "epoch": 0.34013069607328317,
      "grad_norm": 1.2608013321156923,
      "learning_rate": 7.682994102658532e-06,
      "loss": 0.1681,
      "step": 11659
    },
    {
      "epoch": 0.3401598693039267,
      "grad_norm": 0.7856307772707307,
      "learning_rate": 7.68259543303083e-06,
      "loss": 0.1473,
      "step": 11660
    },
    {
      "epoch": 0.3401890425345703,
      "grad_norm": 1.024086214458485,
      "learning_rate": 7.682196739453556e-06,
      "loss": 0.1544,
      "step": 11661
    },
    {
      "epoch": 0.3402182157652138,
      "grad_norm": 1.2069008199184548,
      "learning_rate": 7.68179802193027e-06,
      "loss": 0.1893,
      "step": 11662
    },
    {
      "epoch": 0.3402473889958574,
      "grad_norm": 0.872775111768538,
      "learning_rate": 7.681399280464531e-06,
      "loss": 0.1641,
      "step": 11663
    },
    {
      "epoch": 0.340276562226501,
      "grad_norm": 0.8029842648616166,
      "learning_rate": 7.681000515059901e-06,
      "loss": 0.144,
      "step": 11664
    },
    {
      "epoch": 0.34030573545714454,
      "grad_norm": 1.0516458477893995,
      "learning_rate": 7.680601725719937e-06,
      "loss": 0.1396,
      "step": 11665
    },
    {
      "epoch": 0.3403349086877881,
      "grad_norm": 0.9434842839220801,
      "learning_rate": 7.680202912448201e-06,
      "loss": 0.1601,
      "step": 11666
    },
    {
      "epoch": 0.34036408191843165,
      "grad_norm": 0.737427446621569,
      "learning_rate": 7.679804075248254e-06,
      "loss": 0.1498,
      "step": 11667
    },
    {
      "epoch": 0.3403932551490752,
      "grad_norm": 0.7840859302324004,
      "learning_rate": 7.679405214123654e-06,
      "loss": 0.1346,
      "step": 11668
    },
    {
      "epoch": 0.34042242837971876,
      "grad_norm": 0.8400931942200677,
      "learning_rate": 7.679006329077965e-06,
      "loss": 0.1316,
      "step": 11669
    },
    {
      "epoch": 0.3404516016103623,
      "grad_norm": 0.758598806223022,
      "learning_rate": 7.678607420114747e-06,
      "loss": 0.1637,
      "step": 11670
    },
    {
      "epoch": 0.3404807748410059,
      "grad_norm": 0.8306524681387498,
      "learning_rate": 7.678208487237562e-06,
      "loss": 0.1394,
      "step": 11671
    },
    {
      "epoch": 0.34050994807164947,
      "grad_norm": 0.7991370783091845,
      "learning_rate": 7.677809530449971e-06,
      "loss": 0.1478,
      "step": 11672
    },
    {
      "epoch": 0.340539121302293,
      "grad_norm": 0.6720491765664105,
      "learning_rate": 7.677410549755534e-06,
      "loss": 0.1399,
      "step": 11673
    },
    {
      "epoch": 0.3405682945329366,
      "grad_norm": 0.765124001772518,
      "learning_rate": 7.677011545157818e-06,
      "loss": 0.1362,
      "step": 11674
    },
    {
      "epoch": 0.34059746776358013,
      "grad_norm": 1.0082223645553332,
      "learning_rate": 7.676612516660379e-06,
      "loss": 0.1415,
      "step": 11675
    },
    {
      "epoch": 0.3406266409942237,
      "grad_norm": 0.6765023902159439,
      "learning_rate": 7.676213464266783e-06,
      "loss": 0.1751,
      "step": 11676
    },
    {
      "epoch": 0.34065581422486724,
      "grad_norm": 0.8235423491168042,
      "learning_rate": 7.675814387980592e-06,
      "loss": 0.1436,
      "step": 11677
    },
    {
      "epoch": 0.34068498745551085,
      "grad_norm": 0.8448033539821688,
      "learning_rate": 7.67541528780537e-06,
      "loss": 0.1483,
      "step": 11678
    },
    {
      "epoch": 0.3407141606861544,
      "grad_norm": 0.8426377749357006,
      "learning_rate": 7.67501616374468e-06,
      "loss": 0.1615,
      "step": 11679
    },
    {
      "epoch": 0.34074333391679795,
      "grad_norm": 0.8540467357121467,
      "learning_rate": 7.67461701580208e-06,
      "loss": 0.16,
      "step": 11680
    },
    {
      "epoch": 0.3407725071474415,
      "grad_norm": 0.8699308629755351,
      "learning_rate": 7.674217843981142e-06,
      "loss": 0.1469,
      "step": 11681
    },
    {
      "epoch": 0.34080168037808506,
      "grad_norm": 1.067218892242633,
      "learning_rate": 7.673818648285423e-06,
      "loss": 0.1409,
      "step": 11682
    },
    {
      "epoch": 0.3408308536087286,
      "grad_norm": 0.9097203156946464,
      "learning_rate": 7.67341942871849e-06,
      "loss": 0.1502,
      "step": 11683
    },
    {
      "epoch": 0.34086002683937217,
      "grad_norm": 0.7491748931649446,
      "learning_rate": 7.673020185283908e-06,
      "loss": 0.1156,
      "step": 11684
    },
    {
      "epoch": 0.3408892000700158,
      "grad_norm": 0.7259473015696555,
      "learning_rate": 7.672620917985238e-06,
      "loss": 0.1329,
      "step": 11685
    },
    {
      "epoch": 0.3409183733006593,
      "grad_norm": 0.8042559195369055,
      "learning_rate": 7.672221626826046e-06,
      "loss": 0.1607,
      "step": 11686
    },
    {
      "epoch": 0.3409475465313029,
      "grad_norm": 0.8026800332869615,
      "learning_rate": 7.671822311809899e-06,
      "loss": 0.1669,
      "step": 11687
    },
    {
      "epoch": 0.34097671976194643,
      "grad_norm": 0.7856470094621225,
      "learning_rate": 7.671422972940359e-06,
      "loss": 0.1606,
      "step": 11688
    },
    {
      "epoch": 0.34100589299259,
      "grad_norm": 0.6992651121871641,
      "learning_rate": 7.671023610220993e-06,
      "loss": 0.1456,
      "step": 11689
    },
    {
      "epoch": 0.34103506622323354,
      "grad_norm": 0.8107099706788065,
      "learning_rate": 7.670624223655367e-06,
      "loss": 0.1348,
      "step": 11690
    },
    {
      "epoch": 0.34106423945387715,
      "grad_norm": 0.598836061017337,
      "learning_rate": 7.670224813247043e-06,
      "loss": 0.1322,
      "step": 11691
    },
    {
      "epoch": 0.3410934126845207,
      "grad_norm": 0.8186424373152729,
      "learning_rate": 7.66982537899959e-06,
      "loss": 0.1429,
      "step": 11692
    },
    {
      "epoch": 0.34112258591516426,
      "grad_norm": 1.0788267547690011,
      "learning_rate": 7.669425920916575e-06,
      "loss": 0.1651,
      "step": 11693
    },
    {
      "epoch": 0.3411517591458078,
      "grad_norm": 0.7094197262715586,
      "learning_rate": 7.669026439001562e-06,
      "loss": 0.1215,
      "step": 11694
    },
    {
      "epoch": 0.34118093237645136,
      "grad_norm": 0.8876359519809253,
      "learning_rate": 7.668626933258117e-06,
      "loss": 0.1395,
      "step": 11695
    },
    {
      "epoch": 0.3412101056070949,
      "grad_norm": 1.0110935606247058,
      "learning_rate": 7.668227403689807e-06,
      "loss": 0.1361,
      "step": 11696
    },
    {
      "epoch": 0.34123927883773847,
      "grad_norm": 0.6144065129193836,
      "learning_rate": 7.667827850300203e-06,
      "loss": 0.1535,
      "step": 11697
    },
    {
      "epoch": 0.3412684520683821,
      "grad_norm": 0.8677026963363644,
      "learning_rate": 7.667428273092867e-06,
      "loss": 0.16,
      "step": 11698
    },
    {
      "epoch": 0.34129762529902563,
      "grad_norm": 0.8629451257224124,
      "learning_rate": 7.667028672071368e-06,
      "loss": 0.1571,
      "step": 11699
    },
    {
      "epoch": 0.3413267985296692,
      "grad_norm": 0.8449410464990044,
      "learning_rate": 7.666629047239273e-06,
      "loss": 0.1829,
      "step": 11700
    },
    {
      "epoch": 0.34135597176031274,
      "grad_norm": 0.8818627338487651,
      "learning_rate": 7.666229398600151e-06,
      "loss": 0.1771,
      "step": 11701
    },
    {
      "epoch": 0.3413851449909563,
      "grad_norm": 1.0868901220828913,
      "learning_rate": 7.66582972615757e-06,
      "loss": 0.1712,
      "step": 11702
    },
    {
      "epoch": 0.34141431822159984,
      "grad_norm": 0.836277865875183,
      "learning_rate": 7.665430029915098e-06,
      "loss": 0.1461,
      "step": 11703
    },
    {
      "epoch": 0.3414434914522434,
      "grad_norm": 0.9585987858847443,
      "learning_rate": 7.665030309876303e-06,
      "loss": 0.1406,
      "step": 11704
    },
    {
      "epoch": 0.341472664682887,
      "grad_norm": 0.9428673267000058,
      "learning_rate": 7.664630566044751e-06,
      "loss": 0.1582,
      "step": 11705
    },
    {
      "epoch": 0.34150183791353056,
      "grad_norm": 0.8198290317941089,
      "learning_rate": 7.664230798424016e-06,
      "loss": 0.1325,
      "step": 11706
    },
    {
      "epoch": 0.3415310111441741,
      "grad_norm": 0.9226597031973783,
      "learning_rate": 7.663831007017664e-06,
      "loss": 0.1335,
      "step": 11707
    },
    {
      "epoch": 0.34156018437481767,
      "grad_norm": 0.8968307048251073,
      "learning_rate": 7.663431191829263e-06,
      "loss": 0.1528,
      "step": 11708
    },
    {
      "epoch": 0.3415893576054612,
      "grad_norm": 0.9982291680294357,
      "learning_rate": 7.663031352862387e-06,
      "loss": 0.165,
      "step": 11709
    },
    {
      "epoch": 0.3416185308361048,
      "grad_norm": 0.8121613049726315,
      "learning_rate": 7.6626314901206e-06,
      "loss": 0.1399,
      "step": 11710
    },
    {
      "epoch": 0.3416477040667483,
      "grad_norm": 0.8579351196202621,
      "learning_rate": 7.662231603607475e-06,
      "loss": 0.1857,
      "step": 11711
    },
    {
      "epoch": 0.34167687729739193,
      "grad_norm": 0.9538851908123045,
      "learning_rate": 7.661831693326584e-06,
      "loss": 0.1689,
      "step": 11712
    },
    {
      "epoch": 0.3417060505280355,
      "grad_norm": 0.7149473699449731,
      "learning_rate": 7.661431759281492e-06,
      "loss": 0.1455,
      "step": 11713
    },
    {
      "epoch": 0.34173522375867904,
      "grad_norm": 1.1132623450476096,
      "learning_rate": 7.661031801475776e-06,
      "loss": 0.1673,
      "step": 11714
    },
    {
      "epoch": 0.3417643969893226,
      "grad_norm": 0.721488298031133,
      "learning_rate": 7.660631819913001e-06,
      "loss": 0.1537,
      "step": 11715
    },
    {
      "epoch": 0.34179357021996615,
      "grad_norm": 0.6243591437984605,
      "learning_rate": 7.66023181459674e-06,
      "loss": 0.1426,
      "step": 11716
    },
    {
      "epoch": 0.3418227434506097,
      "grad_norm": 0.7133498745963897,
      "learning_rate": 7.659831785530567e-06,
      "loss": 0.1305,
      "step": 11717
    },
    {
      "epoch": 0.3418519166812533,
      "grad_norm": 0.8109652739888527,
      "learning_rate": 7.659431732718048e-06,
      "loss": 0.1555,
      "step": 11718
    },
    {
      "epoch": 0.34188108991189686,
      "grad_norm": 0.8429149586885538,
      "learning_rate": 7.659031656162759e-06,
      "loss": 0.1695,
      "step": 11719
    },
    {
      "epoch": 0.3419102631425404,
      "grad_norm": 0.9783021579542683,
      "learning_rate": 7.65863155586827e-06,
      "loss": 0.175,
      "step": 11720
    },
    {
      "epoch": 0.34193943637318397,
      "grad_norm": 0.7455761986059449,
      "learning_rate": 7.658231431838153e-06,
      "loss": 0.1139,
      "step": 11721
    },
    {
      "epoch": 0.3419686096038275,
      "grad_norm": 0.8614572722605297,
      "learning_rate": 7.657831284075978e-06,
      "loss": 0.1547,
      "step": 11722
    },
    {
      "epoch": 0.3419977828344711,
      "grad_norm": 0.8069408183015282,
      "learning_rate": 7.657431112585323e-06,
      "loss": 0.1469,
      "step": 11723
    },
    {
      "epoch": 0.34202695606511463,
      "grad_norm": 1.0893315245729782,
      "learning_rate": 7.657030917369757e-06,
      "loss": 0.1525,
      "step": 11724
    },
    {
      "epoch": 0.34205612929575824,
      "grad_norm": 0.9923218410567376,
      "learning_rate": 7.656630698432852e-06,
      "loss": 0.1449,
      "step": 11725
    },
    {
      "epoch": 0.3420853025264018,
      "grad_norm": 0.8452843397484929,
      "learning_rate": 7.656230455778182e-06,
      "loss": 0.1511,
      "step": 11726
    },
    {
      "epoch": 0.34211447575704534,
      "grad_norm": 0.7588875919560713,
      "learning_rate": 7.655830189409322e-06,
      "loss": 0.163,
      "step": 11727
    },
    {
      "epoch": 0.3421436489876889,
      "grad_norm": 0.8346468307359247,
      "learning_rate": 7.655429899329843e-06,
      "loss": 0.1403,
      "step": 11728
    },
    {
      "epoch": 0.34217282221833245,
      "grad_norm": 0.9571316650537509,
      "learning_rate": 7.65502958554332e-06,
      "loss": 0.1581,
      "step": 11729
    },
    {
      "epoch": 0.342201995448976,
      "grad_norm": 0.9824069479427872,
      "learning_rate": 7.654629248053326e-06,
      "loss": 0.1318,
      "step": 11730
    },
    {
      "epoch": 0.34223116867961956,
      "grad_norm": 0.9142646902186613,
      "learning_rate": 7.654228886863437e-06,
      "loss": 0.1619,
      "step": 11731
    },
    {
      "epoch": 0.34226034191026317,
      "grad_norm": 1.007233342140085,
      "learning_rate": 7.653828501977228e-06,
      "loss": 0.1552,
      "step": 11732
    },
    {
      "epoch": 0.3422895151409067,
      "grad_norm": 0.7545373944008383,
      "learning_rate": 7.653428093398268e-06,
      "loss": 0.1476,
      "step": 11733
    },
    {
      "epoch": 0.3423186883715503,
      "grad_norm": 1.0079163395605315,
      "learning_rate": 7.653027661130137e-06,
      "loss": 0.1419,
      "step": 11734
    },
    {
      "epoch": 0.3423478616021938,
      "grad_norm": 1.177233676016444,
      "learning_rate": 7.652627205176409e-06,
      "loss": 0.1355,
      "step": 11735
    },
    {
      "epoch": 0.3423770348328374,
      "grad_norm": 0.8320629231908665,
      "learning_rate": 7.652226725540657e-06,
      "loss": 0.1316,
      "step": 11736
    },
    {
      "epoch": 0.34240620806348093,
      "grad_norm": 1.0215268112603635,
      "learning_rate": 7.651826222226459e-06,
      "loss": 0.1291,
      "step": 11737
    },
    {
      "epoch": 0.3424353812941245,
      "grad_norm": 0.7397867723421131,
      "learning_rate": 7.651425695237388e-06,
      "loss": 0.1575,
      "step": 11738
    },
    {
      "epoch": 0.3424645545247681,
      "grad_norm": 0.8591103044847006,
      "learning_rate": 7.651025144577025e-06,
      "loss": 0.1633,
      "step": 11739
    },
    {
      "epoch": 0.34249372775541165,
      "grad_norm": 1.1377176191831462,
      "learning_rate": 7.650624570248938e-06,
      "loss": 0.1551,
      "step": 11740
    },
    {
      "epoch": 0.3425229009860552,
      "grad_norm": 0.5858570252888864,
      "learning_rate": 7.650223972256709e-06,
      "loss": 0.1306,
      "step": 11741
    },
    {
      "epoch": 0.34255207421669875,
      "grad_norm": 1.1229020819776705,
      "learning_rate": 7.649823350603915e-06,
      "loss": 0.139,
      "step": 11742
    },
    {
      "epoch": 0.3425812474473423,
      "grad_norm": 0.9165859861890355,
      "learning_rate": 7.649422705294127e-06,
      "loss": 0.1496,
      "step": 11743
    },
    {
      "epoch": 0.34261042067798586,
      "grad_norm": 0.7792939136773086,
      "learning_rate": 7.64902203633093e-06,
      "loss": 0.1327,
      "step": 11744
    },
    {
      "epoch": 0.3426395939086294,
      "grad_norm": 0.8641812268876443,
      "learning_rate": 7.648621343717895e-06,
      "loss": 0.1557,
      "step": 11745
    },
    {
      "epoch": 0.342668767139273,
      "grad_norm": 0.9967717067789429,
      "learning_rate": 7.648220627458597e-06,
      "loss": 0.1321,
      "step": 11746
    },
    {
      "epoch": 0.3426979403699166,
      "grad_norm": 0.7088341018010219,
      "learning_rate": 7.647819887556621e-06,
      "loss": 0.1454,
      "step": 11747
    },
    {
      "epoch": 0.34272711360056013,
      "grad_norm": 0.9249612003418474,
      "learning_rate": 7.647419124015543e-06,
      "loss": 0.1451,
      "step": 11748
    },
    {
      "epoch": 0.3427562868312037,
      "grad_norm": 0.8937051285563443,
      "learning_rate": 7.647018336838936e-06,
      "loss": 0.1641,
      "step": 11749
    },
    {
      "epoch": 0.34278546006184724,
      "grad_norm": 0.9790850300978163,
      "learning_rate": 7.646617526030381e-06,
      "loss": 0.1453,
      "step": 11750
    },
    {
      "epoch": 0.3428146332924908,
      "grad_norm": 0.7835615237928236,
      "learning_rate": 7.64621669159346e-06,
      "loss": 0.1433,
      "step": 11751
    },
    {
      "epoch": 0.3428438065231344,
      "grad_norm": 0.9354865998205882,
      "learning_rate": 7.645815833531745e-06,
      "loss": 0.1608,
      "step": 11752
    },
    {
      "epoch": 0.34287297975377795,
      "grad_norm": 0.97072884507175,
      "learning_rate": 7.645414951848817e-06,
      "loss": 0.1621,
      "step": 11753
    },
    {
      "epoch": 0.3429021529844215,
      "grad_norm": 0.810817602561764,
      "learning_rate": 7.64501404654826e-06,
      "loss": 0.1611,
      "step": 11754
    },
    {
      "epoch": 0.34293132621506506,
      "grad_norm": 0.8599322383836334,
      "learning_rate": 7.644613117633644e-06,
      "loss": 0.1463,
      "step": 11755
    },
    {
      "epoch": 0.3429604994457086,
      "grad_norm": 0.9766877972562575,
      "learning_rate": 7.644212165108556e-06,
      "loss": 0.1782,
      "step": 11756
    },
    {
      "epoch": 0.34298967267635216,
      "grad_norm": 1.0184043242109055,
      "learning_rate": 7.643811188976574e-06,
      "loss": 0.1624,
      "step": 11757
    },
    {
      "epoch": 0.3430188459069957,
      "grad_norm": 0.9744970603175473,
      "learning_rate": 7.643410189241275e-06,
      "loss": 0.1539,
      "step": 11758
    },
    {
      "epoch": 0.3430480191376393,
      "grad_norm": 0.8742113603224083,
      "learning_rate": 7.643009165906242e-06,
      "loss": 0.1264,
      "step": 11759
    },
    {
      "epoch": 0.3430771923682829,
      "grad_norm": 1.0238609436396693,
      "learning_rate": 7.642608118975055e-06,
      "loss": 0.1735,
      "step": 11760
    },
    {
      "epoch": 0.34310636559892643,
      "grad_norm": 0.9282281248682216,
      "learning_rate": 7.64220704845129e-06,
      "loss": 0.1312,
      "step": 11761
    },
    {
      "epoch": 0.34313553882957,
      "grad_norm": 0.6502956815705189,
      "learning_rate": 7.641805954338534e-06,
      "loss": 0.1294,
      "step": 11762
    },
    {
      "epoch": 0.34316471206021354,
      "grad_norm": 0.9077142918053643,
      "learning_rate": 7.641404836640365e-06,
      "loss": 0.1338,
      "step": 11763
    },
    {
      "epoch": 0.3431938852908571,
      "grad_norm": 0.8776814631969013,
      "learning_rate": 7.641003695360363e-06,
      "loss": 0.1656,
      "step": 11764
    },
    {
      "epoch": 0.34322305852150065,
      "grad_norm": 0.6837570423818243,
      "learning_rate": 7.640602530502112e-06,
      "loss": 0.1539,
      "step": 11765
    },
    {
      "epoch": 0.34325223175214425,
      "grad_norm": 1.0231023425151644,
      "learning_rate": 7.64020134206919e-06,
      "loss": 0.1592,
      "step": 11766
    },
    {
      "epoch": 0.3432814049827878,
      "grad_norm": 0.7795675692279443,
      "learning_rate": 7.639800130065183e-06,
      "loss": 0.1513,
      "step": 11767
    },
    {
      "epoch": 0.34331057821343136,
      "grad_norm": 0.9129465443806678,
      "learning_rate": 7.639398894493668e-06,
      "loss": 0.1579,
      "step": 11768
    },
    {
      "epoch": 0.3433397514440749,
      "grad_norm": 0.740825249618372,
      "learning_rate": 7.638997635358232e-06,
      "loss": 0.1726,
      "step": 11769
    },
    {
      "epoch": 0.34336892467471847,
      "grad_norm": 0.7092008977067692,
      "learning_rate": 7.638596352662453e-06,
      "loss": 0.1361,
      "step": 11770
    },
    {
      "epoch": 0.343398097905362,
      "grad_norm": 0.8115283013936152,
      "learning_rate": 7.638195046409918e-06,
      "loss": 0.1497,
      "step": 11771
    },
    {
      "epoch": 0.3434272711360056,
      "grad_norm": 0.7232949261510621,
      "learning_rate": 7.637793716604208e-06,
      "loss": 0.1359,
      "step": 11772
    },
    {
      "epoch": 0.3434564443666492,
      "grad_norm": 0.7038991828709446,
      "learning_rate": 7.637392363248901e-06,
      "loss": 0.1502,
      "step": 11773
    },
    {
      "epoch": 0.34348561759729274,
      "grad_norm": 0.8078874550307183,
      "learning_rate": 7.636990986347588e-06,
      "loss": 0.1607,
      "step": 11774
    },
    {
      "epoch": 0.3435147908279363,
      "grad_norm": 0.7599301363984657,
      "learning_rate": 7.636589585903849e-06,
      "loss": 0.1629,
      "step": 11775
    },
    {
      "epoch": 0.34354396405857984,
      "grad_norm": 0.7400366455527537,
      "learning_rate": 7.636188161921265e-06,
      "loss": 0.1298,
      "step": 11776
    },
    {
      "epoch": 0.3435731372892234,
      "grad_norm": 0.8050150329011148,
      "learning_rate": 7.635786714403426e-06,
      "loss": 0.1799,
      "step": 11777
    },
    {
      "epoch": 0.34360231051986695,
      "grad_norm": 0.8460019406267215,
      "learning_rate": 7.63538524335391e-06,
      "loss": 0.1495,
      "step": 11778
    },
    {
      "epoch": 0.34363148375051056,
      "grad_norm": 0.7025849164842259,
      "learning_rate": 7.634983748776303e-06,
      "loss": 0.1403,
      "step": 11779
    },
    {
      "epoch": 0.3436606569811541,
      "grad_norm": 0.7814189906227804,
      "learning_rate": 7.634582230674192e-06,
      "loss": 0.159,
      "step": 11780
    },
    {
      "epoch": 0.34368983021179766,
      "grad_norm": 0.9038548466785211,
      "learning_rate": 7.63418068905116e-06,
      "loss": 0.1195,
      "step": 11781
    },
    {
      "epoch": 0.3437190034424412,
      "grad_norm": 0.7281775028636094,
      "learning_rate": 7.63377912391079e-06,
      "loss": 0.1377,
      "step": 11782
    },
    {
      "epoch": 0.34374817667308477,
      "grad_norm": 0.7515869071367169,
      "learning_rate": 7.63337753525667e-06,
      "loss": 0.1449,
      "step": 11783
    },
    {
      "epoch": 0.3437773499037283,
      "grad_norm": 1.0078354578427962,
      "learning_rate": 7.632975923092384e-06,
      "loss": 0.1326,
      "step": 11784
    },
    {
      "epoch": 0.3438065231343719,
      "grad_norm": 0.7075062353104524,
      "learning_rate": 7.632574287421516e-06,
      "loss": 0.1323,
      "step": 11785
    },
    {
      "epoch": 0.3438356963650155,
      "grad_norm": 1.0379271170357378,
      "learning_rate": 7.632172628247654e-06,
      "loss": 0.1375,
      "step": 11786
    },
    {
      "epoch": 0.34386486959565904,
      "grad_norm": 0.7948980531470224,
      "learning_rate": 7.631770945574384e-06,
      "loss": 0.1518,
      "step": 11787
    },
    {
      "epoch": 0.3438940428263026,
      "grad_norm": 0.9554102668718361,
      "learning_rate": 7.63136923940529e-06,
      "loss": 0.1317,
      "step": 11788
    },
    {
      "epoch": 0.34392321605694615,
      "grad_norm": 0.7869855229794803,
      "learning_rate": 7.63096750974396e-06,
      "loss": 0.1472,
      "step": 11789
    },
    {
      "epoch": 0.3439523892875897,
      "grad_norm": 0.8554123739345731,
      "learning_rate": 7.630565756593981e-06,
      "loss": 0.156,
      "step": 11790
    },
    {
      "epoch": 0.34398156251823325,
      "grad_norm": 0.8192941417051375,
      "learning_rate": 7.630163979958938e-06,
      "loss": 0.1298,
      "step": 11791
    },
    {
      "epoch": 0.3440107357488768,
      "grad_norm": 0.7793168006203326,
      "learning_rate": 7.629762179842419e-06,
      "loss": 0.1516,
      "step": 11792
    },
    {
      "epoch": 0.3440399089795204,
      "grad_norm": 0.9064907156623201,
      "learning_rate": 7.629360356248012e-06,
      "loss": 0.1571,
      "step": 11793
    },
    {
      "epoch": 0.34406908221016397,
      "grad_norm": 0.8137239435787438,
      "learning_rate": 7.628958509179303e-06,
      "loss": 0.139,
      "step": 11794
    },
    {
      "epoch": 0.3440982554408075,
      "grad_norm": 1.104659530404001,
      "learning_rate": 7.628556638639879e-06,
      "loss": 0.1451,
      "step": 11795
    },
    {
      "epoch": 0.3441274286714511,
      "grad_norm": 0.8586459575034965,
      "learning_rate": 7.628154744633329e-06,
      "loss": 0.1328,
      "step": 11796
    },
    {
      "epoch": 0.3441566019020946,
      "grad_norm": 0.7810139291249382,
      "learning_rate": 7.627752827163242e-06,
      "loss": 0.1368,
      "step": 11797
    },
    {
      "epoch": 0.3441857751327382,
      "grad_norm": 0.9871821266778147,
      "learning_rate": 7.627350886233203e-06,
      "loss": 0.1574,
      "step": 11798
    },
    {
      "epoch": 0.34421494836338173,
      "grad_norm": 0.7171893193030499,
      "learning_rate": 7.626948921846805e-06,
      "loss": 0.1251,
      "step": 11799
    },
    {
      "epoch": 0.34424412159402534,
      "grad_norm": 0.8632420151489926,
      "learning_rate": 7.6265469340076326e-06,
      "loss": 0.1502,
      "step": 11800
    },
    {
      "epoch": 0.3442732948246689,
      "grad_norm": 0.8329398766342871,
      "learning_rate": 7.6261449227192765e-06,
      "loss": 0.1432,
      "step": 11801
    },
    {
      "epoch": 0.34430246805531245,
      "grad_norm": 0.7117900534539481,
      "learning_rate": 7.625742887985325e-06,
      "loss": 0.1604,
      "step": 11802
    },
    {
      "epoch": 0.344331641285956,
      "grad_norm": 0.7999454698812005,
      "learning_rate": 7.6253408298093665e-06,
      "loss": 0.1484,
      "step": 11803
    },
    {
      "epoch": 0.34436081451659956,
      "grad_norm": 0.9287764574396962,
      "learning_rate": 7.6249387481949954e-06,
      "loss": 0.1777,
      "step": 11804
    },
    {
      "epoch": 0.3443899877472431,
      "grad_norm": 0.6733447502490658,
      "learning_rate": 7.624536643145796e-06,
      "loss": 0.1488,
      "step": 11805
    },
    {
      "epoch": 0.3444191609778867,
      "grad_norm": 0.7819878196454474,
      "learning_rate": 7.624134514665359e-06,
      "loss": 0.1527,
      "step": 11806
    },
    {
      "epoch": 0.34444833420853027,
      "grad_norm": 0.7754314146898684,
      "learning_rate": 7.623732362757277e-06,
      "loss": 0.1405,
      "step": 11807
    },
    {
      "epoch": 0.3444775074391738,
      "grad_norm": 0.9180180567783497,
      "learning_rate": 7.6233301874251375e-06,
      "loss": 0.1505,
      "step": 11808
    },
    {
      "epoch": 0.3445066806698174,
      "grad_norm": 0.7774195654853466,
      "learning_rate": 7.622927988672533e-06,
      "loss": 0.1502,
      "step": 11809
    },
    {
      "epoch": 0.34453585390046093,
      "grad_norm": 0.829149529881248,
      "learning_rate": 7.622525766503054e-06,
      "loss": 0.1508,
      "step": 11810
    },
    {
      "epoch": 0.3445650271311045,
      "grad_norm": 0.7174062064908221,
      "learning_rate": 7.62212352092029e-06,
      "loss": 0.1468,
      "step": 11811
    },
    {
      "epoch": 0.34459420036174804,
      "grad_norm": 0.6797694666968168,
      "learning_rate": 7.6217212519278335e-06,
      "loss": 0.1492,
      "step": 11812
    },
    {
      "epoch": 0.34462337359239165,
      "grad_norm": 0.7676823786348645,
      "learning_rate": 7.621318959529276e-06,
      "loss": 0.1412,
      "step": 11813
    },
    {
      "epoch": 0.3446525468230352,
      "grad_norm": 0.8530918494192405,
      "learning_rate": 7.620916643728209e-06,
      "loss": 0.1587,
      "step": 11814
    },
    {
      "epoch": 0.34468172005367875,
      "grad_norm": 0.8594938308878028,
      "learning_rate": 7.620514304528223e-06,
      "loss": 0.164,
      "step": 11815
    },
    {
      "epoch": 0.3447108932843223,
      "grad_norm": 0.7989164744621123,
      "learning_rate": 7.62011194193291e-06,
      "loss": 0.1455,
      "step": 11816
    },
    {
      "epoch": 0.34474006651496586,
      "grad_norm": 1.052688992387018,
      "learning_rate": 7.619709555945865e-06,
      "loss": 0.185,
      "step": 11817
    },
    {
      "epoch": 0.3447692397456094,
      "grad_norm": 0.6429489131274733,
      "learning_rate": 7.619307146570677e-06,
      "loss": 0.122,
      "step": 11818
    },
    {
      "epoch": 0.34479841297625297,
      "grad_norm": 0.803905314838356,
      "learning_rate": 7.618904713810941e-06,
      "loss": 0.1481,
      "step": 11819
    },
    {
      "epoch": 0.3448275862068966,
      "grad_norm": 1.0731413585649892,
      "learning_rate": 7.618502257670249e-06,
      "loss": 0.1467,
      "step": 11820
    },
    {
      "epoch": 0.34485675943754013,
      "grad_norm": 0.639650152004615,
      "learning_rate": 7.618099778152193e-06,
      "loss": 0.1305,
      "step": 11821
    },
    {
      "epoch": 0.3448859326681837,
      "grad_norm": 0.7677202402257982,
      "learning_rate": 7.617697275260367e-06,
      "loss": 0.1682,
      "step": 11822
    },
    {
      "epoch": 0.34491510589882723,
      "grad_norm": 1.022652750176267,
      "learning_rate": 7.6172947489983655e-06,
      "loss": 0.1397,
      "step": 11823
    },
    {
      "epoch": 0.3449442791294708,
      "grad_norm": 0.7521072658515839,
      "learning_rate": 7.616892199369781e-06,
      "loss": 0.1339,
      "step": 11824
    },
    {
      "epoch": 0.34497345236011434,
      "grad_norm": 0.7864432038689395,
      "learning_rate": 7.616489626378207e-06,
      "loss": 0.1631,
      "step": 11825
    },
    {
      "epoch": 0.3450026255907579,
      "grad_norm": 0.7365787358449442,
      "learning_rate": 7.616087030027239e-06,
      "loss": 0.1544,
      "step": 11826
    },
    {
      "epoch": 0.3450317988214015,
      "grad_norm": 0.9077261418591541,
      "learning_rate": 7.6156844103204704e-06,
      "loss": 0.1677,
      "step": 11827
    },
    {
      "epoch": 0.34506097205204506,
      "grad_norm": 0.7962660675824412,
      "learning_rate": 7.615281767261495e-06,
      "loss": 0.1502,
      "step": 11828
    },
    {
      "epoch": 0.3450901452826886,
      "grad_norm": 0.8082546928000954,
      "learning_rate": 7.6148791008539106e-06,
      "loss": 0.1372,
      "step": 11829
    },
    {
      "epoch": 0.34511931851333216,
      "grad_norm": 0.8922996356975215,
      "learning_rate": 7.614476411101308e-06,
      "loss": 0.1753,
      "step": 11830
    },
    {
      "epoch": 0.3451484917439757,
      "grad_norm": 0.8510290452679441,
      "learning_rate": 7.614073698007285e-06,
      "loss": 0.1533,
      "step": 11831
    },
    {
      "epoch": 0.34517766497461927,
      "grad_norm": 0.8544577967758579,
      "learning_rate": 7.613670961575435e-06,
      "loss": 0.1509,
      "step": 11832
    },
    {
      "epoch": 0.3452068382052629,
      "grad_norm": 1.0018614761194795,
      "learning_rate": 7.613268201809354e-06,
      "loss": 0.1526,
      "step": 11833
    },
    {
      "epoch": 0.34523601143590643,
      "grad_norm": 1.0104117854874037,
      "learning_rate": 7.61286541871264e-06,
      "loss": 0.1476,
      "step": 11834
    },
    {
      "epoch": 0.34526518466655,
      "grad_norm": 0.6568999429253323,
      "learning_rate": 7.612462612288886e-06,
      "loss": 0.1153,
      "step": 11835
    },
    {
      "epoch": 0.34529435789719354,
      "grad_norm": 0.9294501296868387,
      "learning_rate": 7.61205978254169e-06,
      "loss": 0.1575,
      "step": 11836
    },
    {
      "epoch": 0.3453235311278371,
      "grad_norm": 1.1657132330027167,
      "learning_rate": 7.611656929474649e-06,
      "loss": 0.1731,
      "step": 11837
    },
    {
      "epoch": 0.34535270435848064,
      "grad_norm": 0.9563649950542031,
      "learning_rate": 7.611254053091357e-06,
      "loss": 0.1704,
      "step": 11838
    },
    {
      "epoch": 0.3453818775891242,
      "grad_norm": 1.2000026011577534,
      "learning_rate": 7.610851153395413e-06,
      "loss": 0.1684,
      "step": 11839
    },
    {
      "epoch": 0.3454110508197678,
      "grad_norm": 0.7921110737620093,
      "learning_rate": 7.6104482303904126e-06,
      "loss": 0.1356,
      "step": 11840
    },
    {
      "epoch": 0.34544022405041136,
      "grad_norm": 0.9640731752079257,
      "learning_rate": 7.610045284079954e-06,
      "loss": 0.1611,
      "step": 11841
    },
    {
      "epoch": 0.3454693972810549,
      "grad_norm": 0.9162487112195041,
      "learning_rate": 7.609642314467633e-06,
      "loss": 0.1499,
      "step": 11842
    },
    {
      "epoch": 0.34549857051169847,
      "grad_norm": 0.8670030361238367,
      "learning_rate": 7.609239321557049e-06,
      "loss": 0.1494,
      "step": 11843
    },
    {
      "epoch": 0.345527743742342,
      "grad_norm": 0.6570273748431265,
      "learning_rate": 7.608836305351799e-06,
      "loss": 0.1318,
      "step": 11844
    },
    {
      "epoch": 0.3455569169729856,
      "grad_norm": 0.8564481053229913,
      "learning_rate": 7.608433265855482e-06,
      "loss": 0.1615,
      "step": 11845
    },
    {
      "epoch": 0.3455860902036291,
      "grad_norm": 0.9063894688631816,
      "learning_rate": 7.608030203071695e-06,
      "loss": 0.1593,
      "step": 11846
    },
    {
      "epoch": 0.34561526343427273,
      "grad_norm": 0.7773547283597553,
      "learning_rate": 7.607627117004038e-06,
      "loss": 0.1485,
      "step": 11847
    },
    {
      "epoch": 0.3456444366649163,
      "grad_norm": 0.7893258556823853,
      "learning_rate": 7.607224007656107e-06,
      "loss": 0.1692,
      "step": 11848
    },
    {
      "epoch": 0.34567360989555984,
      "grad_norm": 0.8479660892855598,
      "learning_rate": 7.606820875031504e-06,
      "loss": 0.1544,
      "step": 11849
    },
    {
      "epoch": 0.3457027831262034,
      "grad_norm": 0.803579369190686,
      "learning_rate": 7.606417719133825e-06,
      "loss": 0.1518,
      "step": 11850
    },
    {
      "epoch": 0.34573195635684695,
      "grad_norm": 0.8354596794096185,
      "learning_rate": 7.6060145399666704e-06,
      "loss": 0.1504,
      "step": 11851
    },
    {
      "epoch": 0.3457611295874905,
      "grad_norm": 0.8353572247515243,
      "learning_rate": 7.605611337533643e-06,
      "loss": 0.1693,
      "step": 11852
    },
    {
      "epoch": 0.34579030281813405,
      "grad_norm": 0.7725720728359323,
      "learning_rate": 7.6052081118383355e-06,
      "loss": 0.1195,
      "step": 11853
    },
    {
      "epoch": 0.34581947604877766,
      "grad_norm": 0.8027182717158562,
      "learning_rate": 7.604804862884356e-06,
      "loss": 0.1787,
      "step": 11854
    },
    {
      "epoch": 0.3458486492794212,
      "grad_norm": 0.7192842937875643,
      "learning_rate": 7.604401590675299e-06,
      "loss": 0.1328,
      "step": 11855
    },
    {
      "epoch": 0.34587782251006477,
      "grad_norm": 0.6950695808910768,
      "learning_rate": 7.603998295214765e-06,
      "loss": 0.1352,
      "step": 11856
    },
    {
      "epoch": 0.3459069957407083,
      "grad_norm": 0.7661365595616834,
      "learning_rate": 7.603594976506356e-06,
      "loss": 0.1266,
      "step": 11857
    },
    {
      "epoch": 0.3459361689713519,
      "grad_norm": 0.6651939436178301,
      "learning_rate": 7.6031916345536735e-06,
      "loss": 0.1726,
      "step": 11858
    },
    {
      "epoch": 0.34596534220199543,
      "grad_norm": 0.8730659487962338,
      "learning_rate": 7.602788269360318e-06,
      "loss": 0.1596,
      "step": 11859
    },
    {
      "epoch": 0.34599451543263904,
      "grad_norm": 0.8727372307097581,
      "learning_rate": 7.602384880929889e-06,
      "loss": 0.1703,
      "step": 11860
    },
    {
      "epoch": 0.3460236886632826,
      "grad_norm": 0.8503142625307699,
      "learning_rate": 7.6019814692659885e-06,
      "loss": 0.1466,
      "step": 11861
    },
    {
      "epoch": 0.34605286189392614,
      "grad_norm": 0.9260690869444146,
      "learning_rate": 7.601578034372221e-06,
      "loss": 0.1417,
      "step": 11862
    },
    {
      "epoch": 0.3460820351245697,
      "grad_norm": 0.9450116606750869,
      "learning_rate": 7.601174576252184e-06,
      "loss": 0.1424,
      "step": 11863
    },
    {
      "epoch": 0.34611120835521325,
      "grad_norm": 0.9138215423529065,
      "learning_rate": 7.600771094909483e-06,
      "loss": 0.1172,
      "step": 11864
    },
    {
      "epoch": 0.3461403815858568,
      "grad_norm": 0.7713805072391263,
      "learning_rate": 7.600367590347716e-06,
      "loss": 0.1481,
      "step": 11865
    },
    {
      "epoch": 0.34616955481650036,
      "grad_norm": 0.879120531850407,
      "learning_rate": 7.59996406257049e-06,
      "loss": 0.1858,
      "step": 11866
    },
    {
      "epoch": 0.34619872804714397,
      "grad_norm": 0.7535421844174716,
      "learning_rate": 7.599560511581406e-06,
      "loss": 0.1262,
      "step": 11867
    },
    {
      "epoch": 0.3462279012777875,
      "grad_norm": 0.8378352398830269,
      "learning_rate": 7.5991569373840625e-06,
      "loss": 0.1309,
      "step": 11868
    },
    {
      "epoch": 0.3462570745084311,
      "grad_norm": 0.8339734934011489,
      "learning_rate": 7.59875333998207e-06,
      "loss": 0.1248,
      "step": 11869
    },
    {
      "epoch": 0.3462862477390746,
      "grad_norm": 0.7902759412143556,
      "learning_rate": 7.598349719379028e-06,
      "loss": 0.1437,
      "step": 11870
    },
    {
      "epoch": 0.3463154209697182,
      "grad_norm": 0.8554018337484433,
      "learning_rate": 7.597946075578538e-06,
      "loss": 0.1287,
      "step": 11871
    },
    {
      "epoch": 0.34634459420036173,
      "grad_norm": 0.9772872600690051,
      "learning_rate": 7.5975424085842064e-06,
      "loss": 0.1411,
      "step": 11872
    },
    {
      "epoch": 0.3463737674310053,
      "grad_norm": 0.8488484589746388,
      "learning_rate": 7.597138718399637e-06,
      "loss": 0.1531,
      "step": 11873
    },
    {
      "epoch": 0.3464029406616489,
      "grad_norm": 0.8447135368301548,
      "learning_rate": 7.596735005028433e-06,
      "loss": 0.1408,
      "step": 11874
    },
    {
      "epoch": 0.34643211389229245,
      "grad_norm": 0.9235467300621414,
      "learning_rate": 7.596331268474198e-06,
      "loss": 0.1532,
      "step": 11875
    },
    {
      "epoch": 0.346461287122936,
      "grad_norm": 0.7567157832898448,
      "learning_rate": 7.595927508740537e-06,
      "loss": 0.1313,
      "step": 11876
    },
    {
      "epoch": 0.34649046035357955,
      "grad_norm": 1.0210908962802436,
      "learning_rate": 7.595523725831055e-06,
      "loss": 0.1479,
      "step": 11877
    },
    {
      "epoch": 0.3465196335842231,
      "grad_norm": 0.9050302294750344,
      "learning_rate": 7.595119919749358e-06,
      "loss": 0.1532,
      "step": 11878
    },
    {
      "epoch": 0.34654880681486666,
      "grad_norm": 0.8667936149681241,
      "learning_rate": 7.594716090499049e-06,
      "loss": 0.147,
      "step": 11879
    },
    {
      "epoch": 0.3465779800455102,
      "grad_norm": 1.254160825988959,
      "learning_rate": 7.5943122380837334e-06,
      "loss": 0.1464,
      "step": 11880
    },
    {
      "epoch": 0.3466071532761538,
      "grad_norm": 1.0749824320900403,
      "learning_rate": 7.5939083625070186e-06,
      "loss": 0.161,
      "step": 11881
    },
    {
      "epoch": 0.3466363265067974,
      "grad_norm": 0.8301352679499988,
      "learning_rate": 7.593504463772509e-06,
      "loss": 0.1449,
      "step": 11882
    },
    {
      "epoch": 0.34666549973744093,
      "grad_norm": 0.7502731131233293,
      "learning_rate": 7.59310054188381e-06,
      "loss": 0.1406,
      "step": 11883
    },
    {
      "epoch": 0.3466946729680845,
      "grad_norm": 1.1800001635301305,
      "learning_rate": 7.592696596844528e-06,
      "loss": 0.1627,
      "step": 11884
    },
    {
      "epoch": 0.34672384619872804,
      "grad_norm": 0.9820867231760797,
      "learning_rate": 7.592292628658272e-06,
      "loss": 0.1413,
      "step": 11885
    },
    {
      "epoch": 0.3467530194293716,
      "grad_norm": 0.6702341534760974,
      "learning_rate": 7.591888637328645e-06,
      "loss": 0.126,
      "step": 11886
    },
    {
      "epoch": 0.34678219266001514,
      "grad_norm": 1.0147294314014843,
      "learning_rate": 7.591484622859254e-06,
      "loss": 0.1646,
      "step": 11887
    },
    {
      "epoch": 0.34681136589065875,
      "grad_norm": 1.0580514292145637,
      "learning_rate": 7.591080585253709e-06,
      "loss": 0.1637,
      "step": 11888
    },
    {
      "epoch": 0.3468405391213023,
      "grad_norm": 0.7188400047709667,
      "learning_rate": 7.590676524515612e-06,
      "loss": 0.1382,
      "step": 11889
    },
    {
      "epoch": 0.34686971235194586,
      "grad_norm": 0.8577118374671773,
      "learning_rate": 7.5902724406485765e-06,
      "loss": 0.1584,
      "step": 11890
    },
    {
      "epoch": 0.3468988855825894,
      "grad_norm": 0.8155399706740338,
      "learning_rate": 7.589868333656205e-06,
      "loss": 0.1305,
      "step": 11891
    },
    {
      "epoch": 0.34692805881323296,
      "grad_norm": 0.9305157557534992,
      "learning_rate": 7.5894642035421085e-06,
      "loss": 0.1433,
      "step": 11892
    },
    {
      "epoch": 0.3469572320438765,
      "grad_norm": 0.6943071711003896,
      "learning_rate": 7.589060050309893e-06,
      "loss": 0.1369,
      "step": 11893
    },
    {
      "epoch": 0.3469864052745201,
      "grad_norm": 0.7759315503155931,
      "learning_rate": 7.588655873963169e-06,
      "loss": 0.1411,
      "step": 11894
    },
    {
      "epoch": 0.3470155785051637,
      "grad_norm": 0.7894836418023452,
      "learning_rate": 7.58825167450554e-06,
      "loss": 0.1446,
      "step": 11895
    },
    {
      "epoch": 0.34704475173580723,
      "grad_norm": 0.8018921402564896,
      "learning_rate": 7.58784745194062e-06,
      "loss": 0.1408,
      "step": 11896
    },
    {
      "epoch": 0.3470739249664508,
      "grad_norm": 0.7823097961516359,
      "learning_rate": 7.587443206272016e-06,
      "loss": 0.1768,
      "step": 11897
    },
    {
      "epoch": 0.34710309819709434,
      "grad_norm": 0.5954033579328916,
      "learning_rate": 7.587038937503336e-06,
      "loss": 0.1427,
      "step": 11898
    },
    {
      "epoch": 0.3471322714277379,
      "grad_norm": 0.8447046836073026,
      "learning_rate": 7.586634645638192e-06,
      "loss": 0.1302,
      "step": 11899
    },
    {
      "epoch": 0.34716144465838145,
      "grad_norm": 1.320328461967567,
      "learning_rate": 7.586230330680189e-06,
      "loss": 0.1341,
      "step": 11900
    },
    {
      "epoch": 0.34719061788902505,
      "grad_norm": 0.8787874159944483,
      "learning_rate": 7.58582599263294e-06,
      "loss": 0.1734,
      "step": 11901
    },
    {
      "epoch": 0.3472197911196686,
      "grad_norm": 0.8941425430177624,
      "learning_rate": 7.585421631500053e-06,
      "loss": 0.1544,
      "step": 11902
    },
    {
      "epoch": 0.34724896435031216,
      "grad_norm": 0.901562631007388,
      "learning_rate": 7.585017247285139e-06,
      "loss": 0.1424,
      "step": 11903
    },
    {
      "epoch": 0.3472781375809557,
      "grad_norm": 0.86713749606017,
      "learning_rate": 7.5846128399918085e-06,
      "loss": 0.1381,
      "step": 11904
    },
    {
      "epoch": 0.34730731081159927,
      "grad_norm": 0.7800106219324874,
      "learning_rate": 7.5842084096236725e-06,
      "loss": 0.1874,
      "step": 11905
    },
    {
      "epoch": 0.3473364840422428,
      "grad_norm": 0.8805537185990067,
      "learning_rate": 7.5838039561843394e-06,
      "loss": 0.1495,
      "step": 11906
    },
    {
      "epoch": 0.3473656572728864,
      "grad_norm": 1.0926782261149017,
      "learning_rate": 7.58339947967742e-06,
      "loss": 0.1582,
      "step": 11907
    },
    {
      "epoch": 0.34739483050353,
      "grad_norm": 0.7949222814397244,
      "learning_rate": 7.58299498010653e-06,
      "loss": 0.1602,
      "step": 11908
    },
    {
      "epoch": 0.34742400373417354,
      "grad_norm": 1.0383029204101395,
      "learning_rate": 7.582590457475277e-06,
      "loss": 0.1401,
      "step": 11909
    },
    {
      "epoch": 0.3474531769648171,
      "grad_norm": 0.9646818690641682,
      "learning_rate": 7.58218591178727e-06,
      "loss": 0.157,
      "step": 11910
    },
    {
      "epoch": 0.34748235019546064,
      "grad_norm": 0.6976252489915159,
      "learning_rate": 7.581781343046125e-06,
      "loss": 0.1291,
      "step": 11911
    },
    {
      "epoch": 0.3475115234261042,
      "grad_norm": 0.7660069983827438,
      "learning_rate": 7.581376751255453e-06,
      "loss": 0.1608,
      "step": 11912
    },
    {
      "epoch": 0.34754069665674775,
      "grad_norm": 1.0084938181599934,
      "learning_rate": 7.580972136418865e-06,
      "loss": 0.1423,
      "step": 11913
    },
    {
      "epoch": 0.3475698698873913,
      "grad_norm": 0.8818337085252713,
      "learning_rate": 7.580567498539975e-06,
      "loss": 0.1482,
      "step": 11914
    },
    {
      "epoch": 0.3475990431180349,
      "grad_norm": 0.8588129928066195,
      "learning_rate": 7.580162837622394e-06,
      "loss": 0.1395,
      "step": 11915
    },
    {
      "epoch": 0.34762821634867846,
      "grad_norm": 0.7396995712715371,
      "learning_rate": 7.579758153669736e-06,
      "loss": 0.1566,
      "step": 11916
    },
    {
      "epoch": 0.347657389579322,
      "grad_norm": 1.061775140027179,
      "learning_rate": 7.579353446685611e-06,
      "loss": 0.1488,
      "step": 11917
    },
    {
      "epoch": 0.34768656280996557,
      "grad_norm": 1.0504367619558557,
      "learning_rate": 7.578948716673636e-06,
      "loss": 0.1277,
      "step": 11918
    },
    {
      "epoch": 0.3477157360406091,
      "grad_norm": 0.6785031052925815,
      "learning_rate": 7.578543963637422e-06,
      "loss": 0.1393,
      "step": 11919
    },
    {
      "epoch": 0.3477449092712527,
      "grad_norm": 0.9635945082363817,
      "learning_rate": 7.578139187580582e-06,
      "loss": 0.1549,
      "step": 11920
    },
    {
      "epoch": 0.3477740825018963,
      "grad_norm": 0.8271091528319552,
      "learning_rate": 7.57773438850673e-06,
      "loss": 0.1622,
      "step": 11921
    },
    {
      "epoch": 0.34780325573253984,
      "grad_norm": 0.8516236273741031,
      "learning_rate": 7.577329566419482e-06,
      "loss": 0.1502,
      "step": 11922
    },
    {
      "epoch": 0.3478324289631834,
      "grad_norm": 1.3892241652855426,
      "learning_rate": 7.5769247213224515e-06,
      "loss": 0.1534,
      "step": 11923
    },
    {
      "epoch": 0.34786160219382695,
      "grad_norm": 0.8022308581565714,
      "learning_rate": 7.576519853219253e-06,
      "loss": 0.1528,
      "step": 11924
    },
    {
      "epoch": 0.3478907754244705,
      "grad_norm": 0.9506101986804958,
      "learning_rate": 7.576114962113499e-06,
      "loss": 0.1344,
      "step": 11925
    },
    {
      "epoch": 0.34791994865511405,
      "grad_norm": 0.6805201034378913,
      "learning_rate": 7.575710048008804e-06,
      "loss": 0.1411,
      "step": 11926
    },
    {
      "epoch": 0.3479491218857576,
      "grad_norm": 0.9126539739122491,
      "learning_rate": 7.575305110908789e-06,
      "loss": 0.1488,
      "step": 11927
    },
    {
      "epoch": 0.3479782951164012,
      "grad_norm": 0.7806815575921464,
      "learning_rate": 7.57490015081706e-06,
      "loss": 0.1382,
      "step": 11928
    },
    {
      "epoch": 0.34800746834704477,
      "grad_norm": 1.022223679229018,
      "learning_rate": 7.5744951677372405e-06,
      "loss": 0.1357,
      "step": 11929
    },
    {
      "epoch": 0.3480366415776883,
      "grad_norm": 1.0693245096665438,
      "learning_rate": 7.574090161672941e-06,
      "loss": 0.1598,
      "step": 11930
    },
    {
      "epoch": 0.3480658148083319,
      "grad_norm": 1.0079428372937314,
      "learning_rate": 7.573685132627779e-06,
      "loss": 0.1458,
      "step": 11931
    },
    {
      "epoch": 0.34809498803897543,
      "grad_norm": 0.8614096510807508,
      "learning_rate": 7.573280080605372e-06,
      "loss": 0.1711,
      "step": 11932
    },
    {
      "epoch": 0.348124161269619,
      "grad_norm": 0.8788365902587741,
      "learning_rate": 7.5728750056093324e-06,
      "loss": 0.1649,
      "step": 11933
    },
    {
      "epoch": 0.34815333450026253,
      "grad_norm": 1.0518207279438658,
      "learning_rate": 7.572469907643281e-06,
      "loss": 0.154,
      "step": 11934
    },
    {
      "epoch": 0.34818250773090614,
      "grad_norm": 0.8490076901076699,
      "learning_rate": 7.572064786710831e-06,
      "loss": 0.1525,
      "step": 11935
    },
    {
      "epoch": 0.3482116809615497,
      "grad_norm": 1.1160961260912805,
      "learning_rate": 7.571659642815601e-06,
      "loss": 0.1479,
      "step": 11936
    },
    {
      "epoch": 0.34824085419219325,
      "grad_norm": 0.9834525667146082,
      "learning_rate": 7.571254475961207e-06,
      "loss": 0.1856,
      "step": 11937
    },
    {
      "epoch": 0.3482700274228368,
      "grad_norm": 0.9704653302409625,
      "learning_rate": 7.570849286151268e-06,
      "loss": 0.1222,
      "step": 11938
    },
    {
      "epoch": 0.34829920065348036,
      "grad_norm": 0.7723015389239746,
      "learning_rate": 7.570444073389401e-06,
      "loss": 0.1481,
      "step": 11939
    },
    {
      "epoch": 0.3483283738841239,
      "grad_norm": 1.0652574036750242,
      "learning_rate": 7.570038837679221e-06,
      "loss": 0.1449,
      "step": 11940
    },
    {
      "epoch": 0.34835754711476746,
      "grad_norm": 0.9229097044709855,
      "learning_rate": 7.569633579024349e-06,
      "loss": 0.1347,
      "step": 11941
    },
    {
      "epoch": 0.34838672034541107,
      "grad_norm": 1.0344299975079618,
      "learning_rate": 7.569228297428401e-06,
      "loss": 0.1501,
      "step": 11942
    },
    {
      "epoch": 0.3484158935760546,
      "grad_norm": 1.0146508843935946,
      "learning_rate": 7.568822992894996e-06,
      "loss": 0.1346,
      "step": 11943
    },
    {
      "epoch": 0.3484450668066982,
      "grad_norm": 0.7724812330039791,
      "learning_rate": 7.5684176654277544e-06,
      "loss": 0.1345,
      "step": 11944
    },
    {
      "epoch": 0.34847424003734173,
      "grad_norm": 0.7955342858813292,
      "learning_rate": 7.568012315030291e-06,
      "loss": 0.1524,
      "step": 11945
    },
    {
      "epoch": 0.3485034132679853,
      "grad_norm": 0.8702249736454624,
      "learning_rate": 7.567606941706227e-06,
      "loss": 0.1633,
      "step": 11946
    },
    {
      "epoch": 0.34853258649862884,
      "grad_norm": 0.7052427597011488,
      "learning_rate": 7.567201545459182e-06,
      "loss": 0.1411,
      "step": 11947
    },
    {
      "epoch": 0.34856175972927245,
      "grad_norm": 0.7754105467385218,
      "learning_rate": 7.566796126292775e-06,
      "loss": 0.136,
      "step": 11948
    },
    {
      "epoch": 0.348590932959916,
      "grad_norm": 0.9373317031533087,
      "learning_rate": 7.566390684210623e-06,
      "loss": 0.1383,
      "step": 11949
    },
    {
      "epoch": 0.34862010619055955,
      "grad_norm": 0.750597646362002,
      "learning_rate": 7.565985219216348e-06,
      "loss": 0.1305,
      "step": 11950
    },
    {
      "epoch": 0.3486492794212031,
      "grad_norm": 0.7445725181939357,
      "learning_rate": 7.56557973131357e-06,
      "loss": 0.1549,
      "step": 11951
    },
    {
      "epoch": 0.34867845265184666,
      "grad_norm": 1.1197940421993258,
      "learning_rate": 7.565174220505908e-06,
      "loss": 0.1535,
      "step": 11952
    },
    {
      "epoch": 0.3487076258824902,
      "grad_norm": 0.8255686489900675,
      "learning_rate": 7.5647686867969836e-06,
      "loss": 0.1635,
      "step": 11953
    },
    {
      "epoch": 0.34873679911313377,
      "grad_norm": 0.8335188400060184,
      "learning_rate": 7.564363130190417e-06,
      "loss": 0.153,
      "step": 11954
    },
    {
      "epoch": 0.3487659723437774,
      "grad_norm": 0.9209087781811074,
      "learning_rate": 7.563957550689829e-06,
      "loss": 0.1337,
      "step": 11955
    },
    {
      "epoch": 0.34879514557442093,
      "grad_norm": 0.8560534645406798,
      "learning_rate": 7.56355194829884e-06,
      "loss": 0.165,
      "step": 11956
    },
    {
      "epoch": 0.3488243188050645,
      "grad_norm": 0.8093598768193273,
      "learning_rate": 7.563146323021069e-06,
      "loss": 0.1406,
      "step": 11957
    },
    {
      "epoch": 0.34885349203570803,
      "grad_norm": 0.6959558351912577,
      "learning_rate": 7.56274067486014e-06,
      "loss": 0.1449,
      "step": 11958
    },
    {
      "epoch": 0.3488826652663516,
      "grad_norm": 0.7469802476050259,
      "learning_rate": 7.562335003819676e-06,
      "loss": 0.1488,
      "step": 11959
    },
    {
      "epoch": 0.34891183849699514,
      "grad_norm": 0.7686951957740379,
      "learning_rate": 7.561929309903295e-06,
      "loss": 0.1309,
      "step": 11960
    },
    {
      "epoch": 0.3489410117276387,
      "grad_norm": 0.6687390943009407,
      "learning_rate": 7.561523593114621e-06,
      "loss": 0.1376,
      "step": 11961
    },
    {
      "epoch": 0.3489701849582823,
      "grad_norm": 0.7062388428726224,
      "learning_rate": 7.561117853457277e-06,
      "loss": 0.136,
      "step": 11962
    },
    {
      "epoch": 0.34899935818892586,
      "grad_norm": 0.8304008654549321,
      "learning_rate": 7.560712090934883e-06,
      "loss": 0.1302,
      "step": 11963
    },
    {
      "epoch": 0.3490285314195694,
      "grad_norm": 1.1233598241761529,
      "learning_rate": 7.560306305551064e-06,
      "loss": 0.1174,
      "step": 11964
    },
    {
      "epoch": 0.34905770465021296,
      "grad_norm": 0.6913879268733832,
      "learning_rate": 7.5599004973094404e-06,
      "loss": 0.1574,
      "step": 11965
    },
    {
      "epoch": 0.3490868778808565,
      "grad_norm": 1.0013549561427575,
      "learning_rate": 7.559494666213636e-06,
      "loss": 0.153,
      "step": 11966
    },
    {
      "epoch": 0.34911605111150007,
      "grad_norm": 0.732861297331376,
      "learning_rate": 7.559088812267274e-06,
      "loss": 0.1576,
      "step": 11967
    },
    {
      "epoch": 0.3491452243421436,
      "grad_norm": 0.8105003688198452,
      "learning_rate": 7.55868293547398e-06,
      "loss": 0.1535,
      "step": 11968
    },
    {
      "epoch": 0.34917439757278723,
      "grad_norm": 0.8230910520647171,
      "learning_rate": 7.558277035837373e-06,
      "loss": 0.141,
      "step": 11969
    },
    {
      "epoch": 0.3492035708034308,
      "grad_norm": 0.7551049755951319,
      "learning_rate": 7.5578711133610815e-06,
      "loss": 0.1311,
      "step": 11970
    },
    {
      "epoch": 0.34923274403407434,
      "grad_norm": 0.8307429867540659,
      "learning_rate": 7.557465168048726e-06,
      "loss": 0.1257,
      "step": 11971
    },
    {
      "epoch": 0.3492619172647179,
      "grad_norm": 0.8900528984135553,
      "learning_rate": 7.557059199903933e-06,
      "loss": 0.1416,
      "step": 11972
    },
    {
      "epoch": 0.34929109049536144,
      "grad_norm": 0.7253443237601457,
      "learning_rate": 7.556653208930325e-06,
      "loss": 0.1301,
      "step": 11973
    },
    {
      "epoch": 0.349320263726005,
      "grad_norm": 0.9300315031950055,
      "learning_rate": 7.556247195131527e-06,
      "loss": 0.1566,
      "step": 11974
    },
    {
      "epoch": 0.3493494369566486,
      "grad_norm": 0.7510839853240795,
      "learning_rate": 7.555841158511166e-06,
      "loss": 0.1435,
      "step": 11975
    },
    {
      "epoch": 0.34937861018729216,
      "grad_norm": 0.7661954174756337,
      "learning_rate": 7.555435099072864e-06,
      "loss": 0.1615,
      "step": 11976
    },
    {
      "epoch": 0.3494077834179357,
      "grad_norm": 0.9991595133539672,
      "learning_rate": 7.555029016820248e-06,
      "loss": 0.1311,
      "step": 11977
    },
    {
      "epoch": 0.34943695664857927,
      "grad_norm": 0.8549913344929823,
      "learning_rate": 7.554622911756943e-06,
      "loss": 0.127,
      "step": 11978
    },
    {
      "epoch": 0.3494661298792228,
      "grad_norm": 1.4073594749639478,
      "learning_rate": 7.554216783886573e-06,
      "loss": 0.154,
      "step": 11979
    },
    {
      "epoch": 0.3494953031098664,
      "grad_norm": 0.8859617198424669,
      "learning_rate": 7.553810633212766e-06,
      "loss": 0.1617,
      "step": 11980
    },
    {
      "epoch": 0.3495244763405099,
      "grad_norm": 0.8206997272813257,
      "learning_rate": 7.553404459739149e-06,
      "loss": 0.1383,
      "step": 11981
    },
    {
      "epoch": 0.34955364957115354,
      "grad_norm": 0.9161949749226932,
      "learning_rate": 7.552998263469344e-06,
      "loss": 0.1281,
      "step": 11982
    },
    {
      "epoch": 0.3495828228017971,
      "grad_norm": 1.0145605532643704,
      "learning_rate": 7.552592044406981e-06,
      "loss": 0.1389,
      "step": 11983
    },
    {
      "epoch": 0.34961199603244064,
      "grad_norm": 1.0319637108006643,
      "learning_rate": 7.552185802555687e-06,
      "loss": 0.1581,
      "step": 11984
    },
    {
      "epoch": 0.3496411692630842,
      "grad_norm": 0.8741925693978755,
      "learning_rate": 7.551779537919086e-06,
      "loss": 0.1383,
      "step": 11985
    },
    {
      "epoch": 0.34967034249372775,
      "grad_norm": 1.0485253410979751,
      "learning_rate": 7.551373250500806e-06,
      "loss": 0.1572,
      "step": 11986
    },
    {
      "epoch": 0.3496995157243713,
      "grad_norm": 0.8319919877014683,
      "learning_rate": 7.550966940304476e-06,
      "loss": 0.1624,
      "step": 11987
    },
    {
      "epoch": 0.34972868895501485,
      "grad_norm": 1.2454719213792826,
      "learning_rate": 7.550560607333721e-06,
      "loss": 0.1885,
      "step": 11988
    },
    {
      "epoch": 0.34975786218565846,
      "grad_norm": 1.0859185053467137,
      "learning_rate": 7.55015425159217e-06,
      "loss": 0.1508,
      "step": 11989
    },
    {
      "epoch": 0.349787035416302,
      "grad_norm": 0.8276987795363928,
      "learning_rate": 7.549747873083451e-06,
      "loss": 0.1772,
      "step": 11990
    },
    {
      "epoch": 0.34981620864694557,
      "grad_norm": 0.7641150272930679,
      "learning_rate": 7.549341471811192e-06,
      "loss": 0.1423,
      "step": 11991
    },
    {
      "epoch": 0.3498453818775891,
      "grad_norm": 0.9790484771841722,
      "learning_rate": 7.54893504777902e-06,
      "loss": 0.1496,
      "step": 11992
    },
    {
      "epoch": 0.3498745551082327,
      "grad_norm": 0.6392963206767323,
      "learning_rate": 7.548528600990565e-06,
      "loss": 0.1474,
      "step": 11993
    },
    {
      "epoch": 0.34990372833887623,
      "grad_norm": 0.7092662469286853,
      "learning_rate": 7.548122131449455e-06,
      "loss": 0.1316,
      "step": 11994
    },
    {
      "epoch": 0.3499329015695198,
      "grad_norm": 0.7921631611319702,
      "learning_rate": 7.547715639159319e-06,
      "loss": 0.1359,
      "step": 11995
    },
    {
      "epoch": 0.3499620748001634,
      "grad_norm": 0.7064379363786392,
      "learning_rate": 7.547309124123785e-06,
      "loss": 0.1493,
      "step": 11996
    },
    {
      "epoch": 0.34999124803080695,
      "grad_norm": 0.7506060493415203,
      "learning_rate": 7.546902586346483e-06,
      "loss": 0.1337,
      "step": 11997
    },
    {
      "epoch": 0.3500204212614505,
      "grad_norm": 0.6260097911179058,
      "learning_rate": 7.5464960258310435e-06,
      "loss": 0.1353,
      "step": 11998
    },
    {
      "epoch": 0.35004959449209405,
      "grad_norm": 0.8763805662986133,
      "learning_rate": 7.546089442581097e-06,
      "loss": 0.1387,
      "step": 11999
    },
    {
      "epoch": 0.3500787677227376,
      "grad_norm": 0.8494966847252503,
      "learning_rate": 7.545682836600269e-06,
      "loss": 0.1679,
      "step": 12000
    },
    {
      "epoch": 0.35010794095338116,
      "grad_norm": 0.7976569160323592,
      "learning_rate": 7.5452762078921935e-06,
      "loss": 0.1616,
      "step": 12001
    },
    {
      "epoch": 0.3501371141840247,
      "grad_norm": 0.9691712783063527,
      "learning_rate": 7.544869556460501e-06,
      "loss": 0.1499,
      "step": 12002
    },
    {
      "epoch": 0.3501662874146683,
      "grad_norm": 0.7424149450252084,
      "learning_rate": 7.544462882308818e-06,
      "loss": 0.1234,
      "step": 12003
    },
    {
      "epoch": 0.3501954606453119,
      "grad_norm": 0.7619736975676252,
      "learning_rate": 7.54405618544078e-06,
      "loss": 0.1315,
      "step": 12004
    },
    {
      "epoch": 0.3502246338759554,
      "grad_norm": 0.809507942534479,
      "learning_rate": 7.543649465860015e-06,
      "loss": 0.1285,
      "step": 12005
    },
    {
      "epoch": 0.350253807106599,
      "grad_norm": 0.939740138966658,
      "learning_rate": 7.543242723570154e-06,
      "loss": 0.1695,
      "step": 12006
    },
    {
      "epoch": 0.35028298033724253,
      "grad_norm": 0.9345311448229102,
      "learning_rate": 7.54283595857483e-06,
      "loss": 0.1522,
      "step": 12007
    },
    {
      "epoch": 0.3503121535678861,
      "grad_norm": 0.8543695874272043,
      "learning_rate": 7.542429170877672e-06,
      "loss": 0.1683,
      "step": 12008
    },
    {
      "epoch": 0.3503413267985297,
      "grad_norm": 1.0671623136526542,
      "learning_rate": 7.542022360482315e-06,
      "loss": 0.1223,
      "step": 12009
    },
    {
      "epoch": 0.35037050002917325,
      "grad_norm": 0.7888800643287854,
      "learning_rate": 7.54161552739239e-06,
      "loss": 0.1378,
      "step": 12010
    },
    {
      "epoch": 0.3503996732598168,
      "grad_norm": 0.8173120407487832,
      "learning_rate": 7.541208671611526e-06,
      "loss": 0.1497,
      "step": 12011
    },
    {
      "epoch": 0.35042884649046036,
      "grad_norm": 1.160749866646878,
      "learning_rate": 7.5408017931433585e-06,
      "loss": 0.1572,
      "step": 12012
    },
    {
      "epoch": 0.3504580197211039,
      "grad_norm": 0.9610673319523814,
      "learning_rate": 7.540394891991519e-06,
      "loss": 0.1611,
      "step": 12013
    },
    {
      "epoch": 0.35048719295174746,
      "grad_norm": 0.8450580634548185,
      "learning_rate": 7.539987968159641e-06,
      "loss": 0.1463,
      "step": 12014
    },
    {
      "epoch": 0.350516366182391,
      "grad_norm": 0.9142269293585659,
      "learning_rate": 7.539581021651357e-06,
      "loss": 0.1259,
      "step": 12015
    },
    {
      "epoch": 0.3505455394130346,
      "grad_norm": 0.923217182014225,
      "learning_rate": 7.539174052470299e-06,
      "loss": 0.1502,
      "step": 12016
    },
    {
      "epoch": 0.3505747126436782,
      "grad_norm": 0.9057778560682554,
      "learning_rate": 7.5387670606201e-06,
      "loss": 0.1492,
      "step": 12017
    },
    {
      "epoch": 0.35060388587432173,
      "grad_norm": 1.0498472866033246,
      "learning_rate": 7.538360046104396e-06,
      "loss": 0.1529,
      "step": 12018
    },
    {
      "epoch": 0.3506330591049653,
      "grad_norm": 0.8125394527005271,
      "learning_rate": 7.537953008926821e-06,
      "loss": 0.1377,
      "step": 12019
    },
    {
      "epoch": 0.35066223233560884,
      "grad_norm": 0.8165615823996295,
      "learning_rate": 7.537545949091005e-06,
      "loss": 0.1473,
      "step": 12020
    },
    {
      "epoch": 0.3506914055662524,
      "grad_norm": 0.7409799015249711,
      "learning_rate": 7.5371388666005866e-06,
      "loss": 0.1346,
      "step": 12021
    },
    {
      "epoch": 0.35072057879689594,
      "grad_norm": 0.8383167096198173,
      "learning_rate": 7.536731761459197e-06,
      "loss": 0.1342,
      "step": 12022
    },
    {
      "epoch": 0.35074975202753955,
      "grad_norm": 0.7865520515596042,
      "learning_rate": 7.536324633670471e-06,
      "loss": 0.1586,
      "step": 12023
    },
    {
      "epoch": 0.3507789252581831,
      "grad_norm": 0.757824135026902,
      "learning_rate": 7.535917483238047e-06,
      "loss": 0.1237,
      "step": 12024
    },
    {
      "epoch": 0.35080809848882666,
      "grad_norm": 0.8755553636142435,
      "learning_rate": 7.535510310165555e-06,
      "loss": 0.144,
      "step": 12025
    },
    {
      "epoch": 0.3508372717194702,
      "grad_norm": 0.755078719919549,
      "learning_rate": 7.535103114456631e-06,
      "loss": 0.1465,
      "step": 12026
    },
    {
      "epoch": 0.35086644495011376,
      "grad_norm": 0.9402033884650173,
      "learning_rate": 7.534695896114913e-06,
      "loss": 0.1559,
      "step": 12027
    },
    {
      "epoch": 0.3508956181807573,
      "grad_norm": 0.8540738477485491,
      "learning_rate": 7.5342886551440355e-06,
      "loss": 0.1448,
      "step": 12028
    },
    {
      "epoch": 0.35092479141140087,
      "grad_norm": 0.6425799279128513,
      "learning_rate": 7.533881391547633e-06,
      "loss": 0.154,
      "step": 12029
    },
    {
      "epoch": 0.3509539646420445,
      "grad_norm": 0.8609842832208886,
      "learning_rate": 7.533474105329343e-06,
      "loss": 0.1432,
      "step": 12030
    },
    {
      "epoch": 0.35098313787268803,
      "grad_norm": 0.718611385625933,
      "learning_rate": 7.5330667964928006e-06,
      "loss": 0.1636,
      "step": 12031
    },
    {
      "epoch": 0.3510123111033316,
      "grad_norm": 0.7689415807473821,
      "learning_rate": 7.5326594650416415e-06,
      "loss": 0.145,
      "step": 12032
    },
    {
      "epoch": 0.35104148433397514,
      "grad_norm": 0.6523245428096088,
      "learning_rate": 7.532252110979505e-06,
      "loss": 0.1612,
      "step": 12033
    },
    {
      "epoch": 0.3510706575646187,
      "grad_norm": 0.8062477362030178,
      "learning_rate": 7.531844734310025e-06,
      "loss": 0.1475,
      "step": 12034
    },
    {
      "epoch": 0.35109983079526225,
      "grad_norm": 0.7632641767439492,
      "learning_rate": 7.53143733503684e-06,
      "loss": 0.1475,
      "step": 12035
    },
    {
      "epoch": 0.35112900402590586,
      "grad_norm": 0.8321838797573419,
      "learning_rate": 7.5310299131635874e-06,
      "loss": 0.1442,
      "step": 12036
    },
    {
      "epoch": 0.3511581772565494,
      "grad_norm": 0.753205481611474,
      "learning_rate": 7.530622468693905e-06,
      "loss": 0.1425,
      "step": 12037
    },
    {
      "epoch": 0.35118735048719296,
      "grad_norm": 0.8974771231836078,
      "learning_rate": 7.530215001631426e-06,
      "loss": 0.1732,
      "step": 12038
    },
    {
      "epoch": 0.3512165237178365,
      "grad_norm": 0.6152625349741981,
      "learning_rate": 7.5298075119797945e-06,
      "loss": 0.1299,
      "step": 12039
    },
    {
      "epoch": 0.35124569694848007,
      "grad_norm": 0.7619754294519129,
      "learning_rate": 7.529399999742644e-06,
      "loss": 0.1226,
      "step": 12040
    },
    {
      "epoch": 0.3512748701791236,
      "grad_norm": 0.7399284271059957,
      "learning_rate": 7.528992464923615e-06,
      "loss": 0.1615,
      "step": 12041
    },
    {
      "epoch": 0.3513040434097672,
      "grad_norm": 0.963118361864318,
      "learning_rate": 7.528584907526343e-06,
      "loss": 0.1632,
      "step": 12042
    },
    {
      "epoch": 0.3513332166404108,
      "grad_norm": 0.8250067029946331,
      "learning_rate": 7.52817732755447e-06,
      "loss": 0.1532,
      "step": 12043
    },
    {
      "epoch": 0.35136238987105434,
      "grad_norm": 0.8625709749218062,
      "learning_rate": 7.5277697250116335e-06,
      "loss": 0.168,
      "step": 12044
    },
    {
      "epoch": 0.3513915631016979,
      "grad_norm": 0.8102563031029681,
      "learning_rate": 7.527362099901472e-06,
      "loss": 0.1555,
      "step": 12045
    },
    {
      "epoch": 0.35142073633234144,
      "grad_norm": 0.8905492280962982,
      "learning_rate": 7.526954452227626e-06,
      "loss": 0.1348,
      "step": 12046
    },
    {
      "epoch": 0.351449909562985,
      "grad_norm": 0.8863970891763994,
      "learning_rate": 7.526546781993731e-06,
      "loss": 0.1553,
      "step": 12047
    },
    {
      "epoch": 0.35147908279362855,
      "grad_norm": 0.7320287163447757,
      "learning_rate": 7.5261390892034315e-06,
      "loss": 0.1264,
      "step": 12048
    },
    {
      "epoch": 0.3515082560242721,
      "grad_norm": 0.8328094008963891,
      "learning_rate": 7.525731373860365e-06,
      "loss": 0.1497,
      "step": 12049
    },
    {
      "epoch": 0.3515374292549157,
      "grad_norm": 0.8290748429832105,
      "learning_rate": 7.525323635968171e-06,
      "loss": 0.1501,
      "step": 12050
    },
    {
      "epoch": 0.35156660248555927,
      "grad_norm": 0.7785605742897119,
      "learning_rate": 7.524915875530493e-06,
      "loss": 0.1314,
      "step": 12051
    },
    {
      "epoch": 0.3515957757162028,
      "grad_norm": 0.7153148581576247,
      "learning_rate": 7.524508092550968e-06,
      "loss": 0.1189,
      "step": 12052
    },
    {
      "epoch": 0.35162494894684637,
      "grad_norm": 1.00055863901744,
      "learning_rate": 7.524100287033235e-06,
      "loss": 0.1462,
      "step": 12053
    },
    {
      "epoch": 0.3516541221774899,
      "grad_norm": 0.7853007709733817,
      "learning_rate": 7.52369245898094e-06,
      "loss": 0.1563,
      "step": 12054
    },
    {
      "epoch": 0.3516832954081335,
      "grad_norm": 0.7423218991480369,
      "learning_rate": 7.523284608397718e-06,
      "loss": 0.1374,
      "step": 12055
    },
    {
      "epoch": 0.35171246863877703,
      "grad_norm": 0.9201515724969784,
      "learning_rate": 7.522876735287217e-06,
      "loss": 0.1408,
      "step": 12056
    },
    {
      "epoch": 0.35174164186942064,
      "grad_norm": 0.7792873540258134,
      "learning_rate": 7.522468839653072e-06,
      "loss": 0.1389,
      "step": 12057
    },
    {
      "epoch": 0.3517708151000642,
      "grad_norm": 0.7504657217148921,
      "learning_rate": 7.522060921498928e-06,
      "loss": 0.1244,
      "step": 12058
    },
    {
      "epoch": 0.35179998833070775,
      "grad_norm": 0.7462839313186306,
      "learning_rate": 7.521652980828427e-06,
      "loss": 0.1385,
      "step": 12059
    },
    {
      "epoch": 0.3518291615613513,
      "grad_norm": 0.7949877922378598,
      "learning_rate": 7.521245017645209e-06,
      "loss": 0.1484,
      "step": 12060
    },
    {
      "epoch": 0.35185833479199485,
      "grad_norm": 0.7792433715977214,
      "learning_rate": 7.520837031952919e-06,
      "loss": 0.1352,
      "step": 12061
    },
    {
      "epoch": 0.3518875080226384,
      "grad_norm": 0.7798477650475799,
      "learning_rate": 7.520429023755196e-06,
      "loss": 0.1487,
      "step": 12062
    },
    {
      "epoch": 0.351916681253282,
      "grad_norm": 0.7851479605761774,
      "learning_rate": 7.520020993055686e-06,
      "loss": 0.1606,
      "step": 12063
    },
    {
      "epoch": 0.35194585448392557,
      "grad_norm": 0.7797851748101124,
      "learning_rate": 7.5196129398580296e-06,
      "loss": 0.1318,
      "step": 12064
    },
    {
      "epoch": 0.3519750277145691,
      "grad_norm": 0.9523367546966441,
      "learning_rate": 7.51920486416587e-06,
      "loss": 0.1762,
      "step": 12065
    },
    {
      "epoch": 0.3520042009452127,
      "grad_norm": 0.7943619545766352,
      "learning_rate": 7.518796765982851e-06,
      "loss": 0.1298,
      "step": 12066
    },
    {
      "epoch": 0.35203337417585623,
      "grad_norm": 0.8896828727948359,
      "learning_rate": 7.518388645312615e-06,
      "loss": 0.1368,
      "step": 12067
    },
    {
      "epoch": 0.3520625474064998,
      "grad_norm": 0.7809993755085402,
      "learning_rate": 7.517980502158806e-06,
      "loss": 0.1214,
      "step": 12068
    },
    {
      "epoch": 0.35209172063714334,
      "grad_norm": 0.9034489189629171,
      "learning_rate": 7.51757233652507e-06,
      "loss": 0.1715,
      "step": 12069
    },
    {
      "epoch": 0.35212089386778694,
      "grad_norm": 0.7698354845054213,
      "learning_rate": 7.5171641484150484e-06,
      "loss": 0.1403,
      "step": 12070
    },
    {
      "epoch": 0.3521500670984305,
      "grad_norm": 0.9194387834832348,
      "learning_rate": 7.516755937832386e-06,
      "loss": 0.1399,
      "step": 12071
    },
    {
      "epoch": 0.35217924032907405,
      "grad_norm": 2.4641941778338987,
      "learning_rate": 7.516347704780726e-06,
      "loss": 0.1295,
      "step": 12072
    },
    {
      "epoch": 0.3522084135597176,
      "grad_norm": 0.6255735670416824,
      "learning_rate": 7.5159394492637175e-06,
      "loss": 0.1486,
      "step": 12073
    },
    {
      "epoch": 0.35223758679036116,
      "grad_norm": 0.815201660035071,
      "learning_rate": 7.5155311712849995e-06,
      "loss": 0.1256,
      "step": 12074
    },
    {
      "epoch": 0.3522667600210047,
      "grad_norm": 1.3017787395294462,
      "learning_rate": 7.515122870848222e-06,
      "loss": 0.1495,
      "step": 12075
    },
    {
      "epoch": 0.35229593325164826,
      "grad_norm": 0.8425767756467087,
      "learning_rate": 7.5147145479570275e-06,
      "loss": 0.1836,
      "step": 12076
    },
    {
      "epoch": 0.35232510648229187,
      "grad_norm": 0.8544688733768933,
      "learning_rate": 7.514306202615059e-06,
      "loss": 0.1378,
      "step": 12077
    },
    {
      "epoch": 0.3523542797129354,
      "grad_norm": 0.8529122529537075,
      "learning_rate": 7.513897834825967e-06,
      "loss": 0.1688,
      "step": 12078
    },
    {
      "epoch": 0.352383452943579,
      "grad_norm": 0.8849542775247677,
      "learning_rate": 7.513489444593396e-06,
      "loss": 0.1547,
      "step": 12079
    },
    {
      "epoch": 0.35241262617422253,
      "grad_norm": 0.7622060557294766,
      "learning_rate": 7.51308103192099e-06,
      "loss": 0.1611,
      "step": 12080
    },
    {
      "epoch": 0.3524417994048661,
      "grad_norm": 0.6766312717303754,
      "learning_rate": 7.512672596812397e-06,
      "loss": 0.1543,
      "step": 12081
    },
    {
      "epoch": 0.35247097263550964,
      "grad_norm": 0.7355151435826048,
      "learning_rate": 7.512264139271264e-06,
      "loss": 0.1495,
      "step": 12082
    },
    {
      "epoch": 0.3525001458661532,
      "grad_norm": 0.8755780711695437,
      "learning_rate": 7.511855659301232e-06,
      "loss": 0.1688,
      "step": 12083
    },
    {
      "epoch": 0.3525293190967968,
      "grad_norm": 0.9116905314087498,
      "learning_rate": 7.511447156905958e-06,
      "loss": 0.1417,
      "step": 12084
    },
    {
      "epoch": 0.35255849232744035,
      "grad_norm": 0.859793818173384,
      "learning_rate": 7.511038632089081e-06,
      "loss": 0.1485,
      "step": 12085
    },
    {
      "epoch": 0.3525876655580839,
      "grad_norm": 0.8647161525227471,
      "learning_rate": 7.510630084854249e-06,
      "loss": 0.1417,
      "step": 12086
    },
    {
      "epoch": 0.35261683878872746,
      "grad_norm": 0.8573088874550703,
      "learning_rate": 7.510221515205113e-06,
      "loss": 0.1461,
      "step": 12087
    },
    {
      "epoch": 0.352646012019371,
      "grad_norm": 0.8025920440243713,
      "learning_rate": 7.509812923145318e-06,
      "loss": 0.1323,
      "step": 12088
    },
    {
      "epoch": 0.35267518525001457,
      "grad_norm": 0.9133724786435535,
      "learning_rate": 7.509404308678512e-06,
      "loss": 0.1675,
      "step": 12089
    },
    {
      "epoch": 0.3527043584806582,
      "grad_norm": 1.091990138478178,
      "learning_rate": 7.5089956718083435e-06,
      "loss": 0.1413,
      "step": 12090
    },
    {
      "epoch": 0.35273353171130173,
      "grad_norm": 0.8776644025683882,
      "learning_rate": 7.508587012538462e-06,
      "loss": 0.1661,
      "step": 12091
    },
    {
      "epoch": 0.3527627049419453,
      "grad_norm": 0.931144277232217,
      "learning_rate": 7.508178330872512e-06,
      "loss": 0.1553,
      "step": 12092
    },
    {
      "epoch": 0.35279187817258884,
      "grad_norm": 0.9601895583663163,
      "learning_rate": 7.507769626814145e-06,
      "loss": 0.1362,
      "step": 12093
    },
    {
      "epoch": 0.3528210514032324,
      "grad_norm": 0.8446935053892277,
      "learning_rate": 7.507360900367011e-06,
      "loss": 0.1468,
      "step": 12094
    },
    {
      "epoch": 0.35285022463387594,
      "grad_norm": 0.8883699535563186,
      "learning_rate": 7.5069521515347565e-06,
      "loss": 0.1702,
      "step": 12095
    },
    {
      "epoch": 0.3528793978645195,
      "grad_norm": 0.9515818571757315,
      "learning_rate": 7.506543380321032e-06,
      "loss": 0.1325,
      "step": 12096
    },
    {
      "epoch": 0.3529085710951631,
      "grad_norm": 0.7833744001885308,
      "learning_rate": 7.5061345867294875e-06,
      "loss": 0.1505,
      "step": 12097
    },
    {
      "epoch": 0.35293774432580666,
      "grad_norm": 0.8904929112833724,
      "learning_rate": 7.505725770763769e-06,
      "loss": 0.1545,
      "step": 12098
    },
    {
      "epoch": 0.3529669175564502,
      "grad_norm": 0.7462988140846417,
      "learning_rate": 7.505316932427531e-06,
      "loss": 0.1457,
      "step": 12099
    },
    {
      "epoch": 0.35299609078709376,
      "grad_norm": 0.8340269984700273,
      "learning_rate": 7.504908071724422e-06,
      "loss": 0.1401,
      "step": 12100
    },
    {
      "epoch": 0.3530252640177373,
      "grad_norm": 0.9812345267265725,
      "learning_rate": 7.5044991886580895e-06,
      "loss": 0.1387,
      "step": 12101
    },
    {
      "epoch": 0.35305443724838087,
      "grad_norm": 0.7395035014847263,
      "learning_rate": 7.504090283232188e-06,
      "loss": 0.1306,
      "step": 12102
    },
    {
      "epoch": 0.3530836104790244,
      "grad_norm": 0.9028502130789045,
      "learning_rate": 7.503681355450365e-06,
      "loss": 0.1692,
      "step": 12103
    },
    {
      "epoch": 0.35311278370966803,
      "grad_norm": 0.7143190591113868,
      "learning_rate": 7.503272405316273e-06,
      "loss": 0.1315,
      "step": 12104
    },
    {
      "epoch": 0.3531419569403116,
      "grad_norm": 0.6872319562248489,
      "learning_rate": 7.502863432833563e-06,
      "loss": 0.1384,
      "step": 12105
    },
    {
      "epoch": 0.35317113017095514,
      "grad_norm": 0.9539852257019577,
      "learning_rate": 7.502454438005886e-06,
      "loss": 0.1786,
      "step": 12106
    },
    {
      "epoch": 0.3532003034015987,
      "grad_norm": 0.9964917694248208,
      "learning_rate": 7.502045420836892e-06,
      "loss": 0.1495,
      "step": 12107
    },
    {
      "epoch": 0.35322947663224225,
      "grad_norm": 0.990353130855816,
      "learning_rate": 7.501636381330234e-06,
      "loss": 0.157,
      "step": 12108
    },
    {
      "epoch": 0.3532586498628858,
      "grad_norm": 1.0891538696380902,
      "learning_rate": 7.5012273194895655e-06,
      "loss": 0.156,
      "step": 12109
    },
    {
      "epoch": 0.35328782309352935,
      "grad_norm": 1.0180733110929292,
      "learning_rate": 7.500818235318533e-06,
      "loss": 0.1466,
      "step": 12110
    },
    {
      "epoch": 0.35331699632417296,
      "grad_norm": 1.0325203116977864,
      "learning_rate": 7.5004091288207956e-06,
      "loss": 0.1498,
      "step": 12111
    },
    {
      "epoch": 0.3533461695548165,
      "grad_norm": 0.6384166936673309,
      "learning_rate": 7.500000000000001e-06,
      "loss": 0.1304,
      "step": 12112
    },
    {
      "epoch": 0.35337534278546007,
      "grad_norm": 0.8528566954497445,
      "learning_rate": 7.499590848859802e-06,
      "loss": 0.1439,
      "step": 12113
    },
    {
      "epoch": 0.3534045160161036,
      "grad_norm": 0.8353246956365478,
      "learning_rate": 7.499181675403855e-06,
      "loss": 0.1413,
      "step": 12114
    },
    {
      "epoch": 0.3534336892467472,
      "grad_norm": 0.782769096074687,
      "learning_rate": 7.49877247963581e-06,
      "loss": 0.1422,
      "step": 12115
    },
    {
      "epoch": 0.3534628624773907,
      "grad_norm": 0.748058712896468,
      "learning_rate": 7.49836326155932e-06,
      "loss": 0.1436,
      "step": 12116
    },
    {
      "epoch": 0.3534920357080343,
      "grad_norm": 0.8964088295486463,
      "learning_rate": 7.4979540211780396e-06,
      "loss": 0.1508,
      "step": 12117
    },
    {
      "epoch": 0.3535212089386779,
      "grad_norm": 0.7038898495325068,
      "learning_rate": 7.497544758495622e-06,
      "loss": 0.1356,
      "step": 12118
    },
    {
      "epoch": 0.35355038216932144,
      "grad_norm": 0.6770117758968943,
      "learning_rate": 7.497135473515719e-06,
      "loss": 0.1367,
      "step": 12119
    },
    {
      "epoch": 0.353579555399965,
      "grad_norm": 0.9081026508989675,
      "learning_rate": 7.49672616624199e-06,
      "loss": 0.1538,
      "step": 12120
    },
    {
      "epoch": 0.35360872863060855,
      "grad_norm": 0.9105090322175228,
      "learning_rate": 7.496316836678085e-06,
      "loss": 0.1468,
      "step": 12121
    },
    {
      "epoch": 0.3536379018612521,
      "grad_norm": 1.0005440326874793,
      "learning_rate": 7.495907484827658e-06,
      "loss": 0.1595,
      "step": 12122
    },
    {
      "epoch": 0.35366707509189566,
      "grad_norm": 0.943506731773164,
      "learning_rate": 7.495498110694364e-06,
      "loss": 0.1313,
      "step": 12123
    },
    {
      "epoch": 0.35369624832253926,
      "grad_norm": 0.9540944309194603,
      "learning_rate": 7.495088714281862e-06,
      "loss": 0.1451,
      "step": 12124
    },
    {
      "epoch": 0.3537254215531828,
      "grad_norm": 1.1399722373912111,
      "learning_rate": 7.494679295593801e-06,
      "loss": 0.1469,
      "step": 12125
    },
    {
      "epoch": 0.35375459478382637,
      "grad_norm": 0.8379482755652226,
      "learning_rate": 7.49426985463384e-06,
      "loss": 0.1518,
      "step": 12126
    },
    {
      "epoch": 0.3537837680144699,
      "grad_norm": 0.889161350385342,
      "learning_rate": 7.493860391405632e-06,
      "loss": 0.1539,
      "step": 12127
    },
    {
      "epoch": 0.3538129412451135,
      "grad_norm": 1.0597870595233896,
      "learning_rate": 7.4934509059128334e-06,
      "loss": 0.1491,
      "step": 12128
    },
    {
      "epoch": 0.35384211447575703,
      "grad_norm": 0.7716787255958263,
      "learning_rate": 7.493041398159102e-06,
      "loss": 0.1492,
      "step": 12129
    },
    {
      "epoch": 0.3538712877064006,
      "grad_norm": 0.7270262602920922,
      "learning_rate": 7.49263186814809e-06,
      "loss": 0.1298,
      "step": 12130
    },
    {
      "epoch": 0.3539004609370442,
      "grad_norm": 1.020961829125233,
      "learning_rate": 7.492222315883458e-06,
      "loss": 0.149,
      "step": 12131
    },
    {
      "epoch": 0.35392963416768775,
      "grad_norm": 0.8551619094051521,
      "learning_rate": 7.491812741368859e-06,
      "loss": 0.1781,
      "step": 12132
    },
    {
      "epoch": 0.3539588073983313,
      "grad_norm": 0.7148504836663766,
      "learning_rate": 7.491403144607951e-06,
      "loss": 0.1584,
      "step": 12133
    },
    {
      "epoch": 0.35398798062897485,
      "grad_norm": 0.7310860409671713,
      "learning_rate": 7.490993525604389e-06,
      "loss": 0.1528,
      "step": 12134
    },
    {
      "epoch": 0.3540171538596184,
      "grad_norm": 0.8109727413891358,
      "learning_rate": 7.490583884361834e-06,
      "loss": 0.142,
      "step": 12135
    },
    {
      "epoch": 0.35404632709026196,
      "grad_norm": 0.719566074690827,
      "learning_rate": 7.49017422088394e-06,
      "loss": 0.1671,
      "step": 12136
    },
    {
      "epoch": 0.3540755003209055,
      "grad_norm": 0.7792648338872127,
      "learning_rate": 7.489764535174363e-06,
      "loss": 0.15,
      "step": 12137
    },
    {
      "epoch": 0.3541046735515491,
      "grad_norm": 1.1983618739593251,
      "learning_rate": 7.489354827236765e-06,
      "loss": 0.1703,
      "step": 12138
    },
    {
      "epoch": 0.3541338467821927,
      "grad_norm": 0.8303239451422616,
      "learning_rate": 7.4889450970748e-06,
      "loss": 0.133,
      "step": 12139
    },
    {
      "epoch": 0.3541630200128362,
      "grad_norm": 0.9016886227944668,
      "learning_rate": 7.488535344692127e-06,
      "loss": 0.1457,
      "step": 12140
    },
    {
      "epoch": 0.3541921932434798,
      "grad_norm": 0.8093533800280821,
      "learning_rate": 7.488125570092406e-06,
      "loss": 0.1544,
      "step": 12141
    },
    {
      "epoch": 0.35422136647412333,
      "grad_norm": 0.8239954533166405,
      "learning_rate": 7.487715773279293e-06,
      "loss": 0.1475,
      "step": 12142
    },
    {
      "epoch": 0.3542505397047669,
      "grad_norm": 1.099868567715278,
      "learning_rate": 7.4873059542564465e-06,
      "loss": 0.1468,
      "step": 12143
    },
    {
      "epoch": 0.35427971293541044,
      "grad_norm": 1.0263479613663975,
      "learning_rate": 7.486896113027528e-06,
      "loss": 0.1479,
      "step": 12144
    },
    {
      "epoch": 0.35430888616605405,
      "grad_norm": 0.7351293633729539,
      "learning_rate": 7.486486249596194e-06,
      "loss": 0.1572,
      "step": 12145
    },
    {
      "epoch": 0.3543380593966976,
      "grad_norm": 0.7289195572148732,
      "learning_rate": 7.486076363966104e-06,
      "loss": 0.1359,
      "step": 12146
    },
    {
      "epoch": 0.35436723262734116,
      "grad_norm": 0.9182815224095634,
      "learning_rate": 7.485666456140918e-06,
      "loss": 0.1309,
      "step": 12147
    },
    {
      "epoch": 0.3543964058579847,
      "grad_norm": 0.8835430736644039,
      "learning_rate": 7.485256526124295e-06,
      "loss": 0.1657,
      "step": 12148
    },
    {
      "epoch": 0.35442557908862826,
      "grad_norm": 0.8613889013926775,
      "learning_rate": 7.484846573919895e-06,
      "loss": 0.1398,
      "step": 12149
    },
    {
      "epoch": 0.3544547523192718,
      "grad_norm": 0.9474985870617246,
      "learning_rate": 7.484436599531377e-06,
      "loss": 0.1483,
      "step": 12150
    },
    {
      "epoch": 0.3544839255499154,
      "grad_norm": 0.8719680967907784,
      "learning_rate": 7.484026602962405e-06,
      "loss": 0.156,
      "step": 12151
    },
    {
      "epoch": 0.354513098780559,
      "grad_norm": 0.9192150275876452,
      "learning_rate": 7.483616584216633e-06,
      "loss": 0.1311,
      "step": 12152
    },
    {
      "epoch": 0.35454227201120253,
      "grad_norm": 0.8408136692254884,
      "learning_rate": 7.483206543297727e-06,
      "loss": 0.1274,
      "step": 12153
    },
    {
      "epoch": 0.3545714452418461,
      "grad_norm": 1.3566829517458583,
      "learning_rate": 7.482796480209346e-06,
      "loss": 0.1455,
      "step": 12154
    },
    {
      "epoch": 0.35460061847248964,
      "grad_norm": 0.9014132017831294,
      "learning_rate": 7.48238639495515e-06,
      "loss": 0.1445,
      "step": 12155
    },
    {
      "epoch": 0.3546297917031332,
      "grad_norm": 0.8463275021589733,
      "learning_rate": 7.481976287538802e-06,
      "loss": 0.1471,
      "step": 12156
    },
    {
      "epoch": 0.35465896493377674,
      "grad_norm": 0.9980583664623567,
      "learning_rate": 7.481566157963961e-06,
      "loss": 0.1442,
      "step": 12157
    },
    {
      "epoch": 0.35468813816442035,
      "grad_norm": 0.8458981667551823,
      "learning_rate": 7.481156006234289e-06,
      "loss": 0.1308,
      "step": 12158
    },
    {
      "epoch": 0.3547173113950639,
      "grad_norm": 0.8842798179502824,
      "learning_rate": 7.480745832353451e-06,
      "loss": 0.1355,
      "step": 12159
    },
    {
      "epoch": 0.35474648462570746,
      "grad_norm": 0.9249057662349253,
      "learning_rate": 7.480335636325104e-06,
      "loss": 0.1516,
      "step": 12160
    },
    {
      "epoch": 0.354775657856351,
      "grad_norm": 0.9525575932168225,
      "learning_rate": 7.479925418152914e-06,
      "loss": 0.1372,
      "step": 12161
    },
    {
      "epoch": 0.35480483108699457,
      "grad_norm": 0.9058629692852751,
      "learning_rate": 7.479515177840542e-06,
      "loss": 0.1445,
      "step": 12162
    },
    {
      "epoch": 0.3548340043176381,
      "grad_norm": 0.93263703609085,
      "learning_rate": 7.479104915391649e-06,
      "loss": 0.1602,
      "step": 12163
    },
    {
      "epoch": 0.35486317754828167,
      "grad_norm": 0.8515894085441673,
      "learning_rate": 7.478694630809899e-06,
      "loss": 0.1168,
      "step": 12164
    },
    {
      "epoch": 0.3548923507789253,
      "grad_norm": 0.8805126690114949,
      "learning_rate": 7.478284324098957e-06,
      "loss": 0.1408,
      "step": 12165
    },
    {
      "epoch": 0.35492152400956883,
      "grad_norm": 0.7426349012566554,
      "learning_rate": 7.4778739952624835e-06,
      "loss": 0.1591,
      "step": 12166
    },
    {
      "epoch": 0.3549506972402124,
      "grad_norm": 0.6397230883071294,
      "learning_rate": 7.477463644304141e-06,
      "loss": 0.1356,
      "step": 12167
    },
    {
      "epoch": 0.35497987047085594,
      "grad_norm": 0.8982364452423646,
      "learning_rate": 7.477053271227596e-06,
      "loss": 0.1417,
      "step": 12168
    },
    {
      "epoch": 0.3550090437014995,
      "grad_norm": 0.8447914658551411,
      "learning_rate": 7.47664287603651e-06,
      "loss": 0.1737,
      "step": 12169
    },
    {
      "epoch": 0.35503821693214305,
      "grad_norm": 0.8370376273579871,
      "learning_rate": 7.476232458734547e-06,
      "loss": 0.1503,
      "step": 12170
    },
    {
      "epoch": 0.3550673901627866,
      "grad_norm": 0.8019143284953446,
      "learning_rate": 7.475822019325374e-06,
      "loss": 0.129,
      "step": 12171
    },
    {
      "epoch": 0.3550965633934302,
      "grad_norm": 0.9026727682417437,
      "learning_rate": 7.475411557812652e-06,
      "loss": 0.1345,
      "step": 12172
    },
    {
      "epoch": 0.35512573662407376,
      "grad_norm": 0.8508773884433103,
      "learning_rate": 7.4750010742000445e-06,
      "loss": 0.1414,
      "step": 12173
    },
    {
      "epoch": 0.3551549098547173,
      "grad_norm": 0.9824204270516677,
      "learning_rate": 7.474590568491222e-06,
      "loss": 0.1418,
      "step": 12174
    },
    {
      "epoch": 0.35518408308536087,
      "grad_norm": 0.8934801781268447,
      "learning_rate": 7.474180040689842e-06,
      "loss": 0.1409,
      "step": 12175
    },
    {
      "epoch": 0.3552132563160044,
      "grad_norm": 0.9221120609784726,
      "learning_rate": 7.473769490799575e-06,
      "loss": 0.1524,
      "step": 12176
    },
    {
      "epoch": 0.355242429546648,
      "grad_norm": 0.9789817786148268,
      "learning_rate": 7.473358918824085e-06,
      "loss": 0.1619,
      "step": 12177
    },
    {
      "epoch": 0.3552716027772916,
      "grad_norm": 0.7818843977047737,
      "learning_rate": 7.472948324767035e-06,
      "loss": 0.1468,
      "step": 12178
    },
    {
      "epoch": 0.35530077600793514,
      "grad_norm": 0.8606870497211695,
      "learning_rate": 7.472537708632095e-06,
      "loss": 0.1323,
      "step": 12179
    },
    {
      "epoch": 0.3553299492385787,
      "grad_norm": 0.8906836695819145,
      "learning_rate": 7.472127070422928e-06,
      "loss": 0.1628,
      "step": 12180
    },
    {
      "epoch": 0.35535912246922224,
      "grad_norm": 0.8088359794507107,
      "learning_rate": 7.4717164101432e-06,
      "loss": 0.122,
      "step": 12181
    },
    {
      "epoch": 0.3553882956998658,
      "grad_norm": 0.8566020698157975,
      "learning_rate": 7.471305727796579e-06,
      "loss": 0.1158,
      "step": 12182
    },
    {
      "epoch": 0.35541746893050935,
      "grad_norm": 0.8750096258289879,
      "learning_rate": 7.470895023386728e-06,
      "loss": 0.1742,
      "step": 12183
    },
    {
      "epoch": 0.3554466421611529,
      "grad_norm": 0.8400584190518494,
      "learning_rate": 7.470484296917319e-06,
      "loss": 0.1523,
      "step": 12184
    },
    {
      "epoch": 0.3554758153917965,
      "grad_norm": 1.110400780793465,
      "learning_rate": 7.470073548392014e-06,
      "loss": 0.146,
      "step": 12185
    },
    {
      "epoch": 0.35550498862244007,
      "grad_norm": 0.8514545642807553,
      "learning_rate": 7.469662777814484e-06,
      "loss": 0.1428,
      "step": 12186
    },
    {
      "epoch": 0.3555341618530836,
      "grad_norm": 0.8435930628979348,
      "learning_rate": 7.469251985188392e-06,
      "loss": 0.1775,
      "step": 12187
    },
    {
      "epoch": 0.35556333508372717,
      "grad_norm": 0.727108203502303,
      "learning_rate": 7.468841170517408e-06,
      "loss": 0.1388,
      "step": 12188
    },
    {
      "epoch": 0.3555925083143707,
      "grad_norm": 1.5890076218904514,
      "learning_rate": 7.468430333805201e-06,
      "loss": 0.1307,
      "step": 12189
    },
    {
      "epoch": 0.3556216815450143,
      "grad_norm": 0.8050188464842346,
      "learning_rate": 7.468019475055436e-06,
      "loss": 0.1615,
      "step": 12190
    },
    {
      "epoch": 0.35565085477565783,
      "grad_norm": 0.6650859130140865,
      "learning_rate": 7.467608594271782e-06,
      "loss": 0.1486,
      "step": 12191
    },
    {
      "epoch": 0.35568002800630144,
      "grad_norm": 0.8896317278270026,
      "learning_rate": 7.467197691457908e-06,
      "loss": 0.1446,
      "step": 12192
    },
    {
      "epoch": 0.355709201236945,
      "grad_norm": 0.7923808324329165,
      "learning_rate": 7.466786766617482e-06,
      "loss": 0.1321,
      "step": 12193
    },
    {
      "epoch": 0.35573837446758855,
      "grad_norm": 0.8962565546218314,
      "learning_rate": 7.466375819754173e-06,
      "loss": 0.1333,
      "step": 12194
    },
    {
      "epoch": 0.3557675476982321,
      "grad_norm": 0.8146336102797297,
      "learning_rate": 7.46596485087165e-06,
      "loss": 0.1294,
      "step": 12195
    },
    {
      "epoch": 0.35579672092887565,
      "grad_norm": 0.8661790351439472,
      "learning_rate": 7.465553859973581e-06,
      "loss": 0.1432,
      "step": 12196
    },
    {
      "epoch": 0.3558258941595192,
      "grad_norm": 0.8719026272812894,
      "learning_rate": 7.465142847063634e-06,
      "loss": 0.154,
      "step": 12197
    },
    {
      "epoch": 0.35585506739016276,
      "grad_norm": 0.6680003649006871,
      "learning_rate": 7.464731812145483e-06,
      "loss": 0.1184,
      "step": 12198
    },
    {
      "epoch": 0.35588424062080637,
      "grad_norm": 0.7488816345308577,
      "learning_rate": 7.464320755222793e-06,
      "loss": 0.1653,
      "step": 12199
    },
    {
      "epoch": 0.3559134138514499,
      "grad_norm": 0.8290221349289797,
      "learning_rate": 7.4639096762992345e-06,
      "loss": 0.1359,
      "step": 12200
    },
    {
      "epoch": 0.3559425870820935,
      "grad_norm": 1.0037579553709008,
      "learning_rate": 7.463498575378482e-06,
      "loss": 0.1589,
      "step": 12201
    },
    {
      "epoch": 0.35597176031273703,
      "grad_norm": 0.7962303807839208,
      "learning_rate": 7.463087452464199e-06,
      "loss": 0.1403,
      "step": 12202
    },
    {
      "epoch": 0.3560009335433806,
      "grad_norm": 0.8988662046657548,
      "learning_rate": 7.462676307560059e-06,
      "loss": 0.1664,
      "step": 12203
    },
    {
      "epoch": 0.35603010677402414,
      "grad_norm": 0.8946106540425235,
      "learning_rate": 7.462265140669735e-06,
      "loss": 0.1488,
      "step": 12204
    },
    {
      "epoch": 0.35605928000466774,
      "grad_norm": 0.866532976364385,
      "learning_rate": 7.461853951796895e-06,
      "loss": 0.1566,
      "step": 12205
    },
    {
      "epoch": 0.3560884532353113,
      "grad_norm": 0.8344186923159563,
      "learning_rate": 7.4614427409452116e-06,
      "loss": 0.1466,
      "step": 12206
    },
    {
      "epoch": 0.35611762646595485,
      "grad_norm": 1.290096770545583,
      "learning_rate": 7.461031508118354e-06,
      "loss": 0.1577,
      "step": 12207
    },
    {
      "epoch": 0.3561467996965984,
      "grad_norm": 0.8999158766283113,
      "learning_rate": 7.4606202533199945e-06,
      "loss": 0.1697,
      "step": 12208
    },
    {
      "epoch": 0.35617597292724196,
      "grad_norm": 0.9195358674976805,
      "learning_rate": 7.460208976553804e-06,
      "loss": 0.1698,
      "step": 12209
    },
    {
      "epoch": 0.3562051461578855,
      "grad_norm": 1.03287815865787,
      "learning_rate": 7.459797677823456e-06,
      "loss": 0.1179,
      "step": 12210
    },
    {
      "epoch": 0.35623431938852906,
      "grad_norm": 0.8481792372747801,
      "learning_rate": 7.4593863571326204e-06,
      "loss": 0.1521,
      "step": 12211
    },
    {
      "epoch": 0.3562634926191727,
      "grad_norm": 0.905091407798776,
      "learning_rate": 7.458975014484972e-06,
      "loss": 0.1374,
      "step": 12212
    },
    {
      "epoch": 0.3562926658498162,
      "grad_norm": 0.9903840552254256,
      "learning_rate": 7.458563649884182e-06,
      "loss": 0.1383,
      "step": 12213
    },
    {
      "epoch": 0.3563218390804598,
      "grad_norm": 0.8566119220499994,
      "learning_rate": 7.458152263333921e-06,
      "loss": 0.188,
      "step": 12214
    },
    {
      "epoch": 0.35635101231110333,
      "grad_norm": 0.806007458321855,
      "learning_rate": 7.457740854837865e-06,
      "loss": 0.1359,
      "step": 12215
    },
    {
      "epoch": 0.3563801855417469,
      "grad_norm": 0.846336649082963,
      "learning_rate": 7.457329424399685e-06,
      "loss": 0.1636,
      "step": 12216
    },
    {
      "epoch": 0.35640935877239044,
      "grad_norm": 0.9295740844894287,
      "learning_rate": 7.456917972023052e-06,
      "loss": 0.1261,
      "step": 12217
    },
    {
      "epoch": 0.356438532003034,
      "grad_norm": 0.6394264915549989,
      "learning_rate": 7.456506497711644e-06,
      "loss": 0.1388,
      "step": 12218
    },
    {
      "epoch": 0.3564677052336776,
      "grad_norm": 0.8709909836795148,
      "learning_rate": 7.456095001469135e-06,
      "loss": 0.1593,
      "step": 12219
    },
    {
      "epoch": 0.35649687846432115,
      "grad_norm": 0.9421989226538512,
      "learning_rate": 7.455683483299192e-06,
      "loss": 0.1728,
      "step": 12220
    },
    {
      "epoch": 0.3565260516949647,
      "grad_norm": 0.8111037998968257,
      "learning_rate": 7.455271943205495e-06,
      "loss": 0.1569,
      "step": 12221
    },
    {
      "epoch": 0.35655522492560826,
      "grad_norm": 0.8724484550037249,
      "learning_rate": 7.4548603811917155e-06,
      "loss": 0.1377,
      "step": 12222
    },
    {
      "epoch": 0.3565843981562518,
      "grad_norm": 0.7079865680622673,
      "learning_rate": 7.454448797261529e-06,
      "loss": 0.1614,
      "step": 12223
    },
    {
      "epoch": 0.35661357138689537,
      "grad_norm": 0.7457006911986671,
      "learning_rate": 7.45403719141861e-06,
      "loss": 0.1599,
      "step": 12224
    },
    {
      "epoch": 0.3566427446175389,
      "grad_norm": 0.7775430973464645,
      "learning_rate": 7.453625563666631e-06,
      "loss": 0.1179,
      "step": 12225
    },
    {
      "epoch": 0.35667191784818253,
      "grad_norm": 0.70813607212652,
      "learning_rate": 7.4532139140092694e-06,
      "loss": 0.136,
      "step": 12226
    },
    {
      "epoch": 0.3567010910788261,
      "grad_norm": 0.757494936279595,
      "learning_rate": 7.452802242450201e-06,
      "loss": 0.1359,
      "step": 12227
    },
    {
      "epoch": 0.35673026430946964,
      "grad_norm": 0.7858158925237995,
      "learning_rate": 7.452390548993098e-06,
      "loss": 0.1293,
      "step": 12228
    },
    {
      "epoch": 0.3567594375401132,
      "grad_norm": 0.7511279403357667,
      "learning_rate": 7.451978833641639e-06,
      "loss": 0.1476,
      "step": 12229
    },
    {
      "epoch": 0.35678861077075674,
      "grad_norm": 0.7953282518094266,
      "learning_rate": 7.451567096399497e-06,
      "loss": 0.1462,
      "step": 12230
    },
    {
      "epoch": 0.3568177840014003,
      "grad_norm": 0.7967242197118525,
      "learning_rate": 7.45115533727035e-06,
      "loss": 0.1433,
      "step": 12231
    },
    {
      "epoch": 0.3568469572320439,
      "grad_norm": 0.9466414202769508,
      "learning_rate": 7.450743556257874e-06,
      "loss": 0.1494,
      "step": 12232
    },
    {
      "epoch": 0.35687613046268746,
      "grad_norm": 0.7434133201655584,
      "learning_rate": 7.450331753365743e-06,
      "loss": 0.1479,
      "step": 12233
    },
    {
      "epoch": 0.356905303693331,
      "grad_norm": 1.0130427033764855,
      "learning_rate": 7.449919928597637e-06,
      "loss": 0.1489,
      "step": 12234
    },
    {
      "epoch": 0.35693447692397456,
      "grad_norm": 1.0454899133306563,
      "learning_rate": 7.449508081957228e-06,
      "loss": 0.1357,
      "step": 12235
    },
    {
      "epoch": 0.3569636501546181,
      "grad_norm": 1.060813850156249,
      "learning_rate": 7.449096213448198e-06,
      "loss": 0.1651,
      "step": 12236
    },
    {
      "epoch": 0.35699282338526167,
      "grad_norm": 0.9166046971015583,
      "learning_rate": 7.44868432307422e-06,
      "loss": 0.1484,
      "step": 12237
    },
    {
      "epoch": 0.3570219966159052,
      "grad_norm": 1.0708409711345588,
      "learning_rate": 7.448272410838975e-06,
      "loss": 0.1642,
      "step": 12238
    },
    {
      "epoch": 0.35705116984654883,
      "grad_norm": 0.9934534541385831,
      "learning_rate": 7.447860476746136e-06,
      "loss": 0.1529,
      "step": 12239
    },
    {
      "epoch": 0.3570803430771924,
      "grad_norm": 0.8925425957204665,
      "learning_rate": 7.447448520799384e-06,
      "loss": 0.1325,
      "step": 12240
    },
    {
      "epoch": 0.35710951630783594,
      "grad_norm": 0.8122120542844333,
      "learning_rate": 7.447036543002396e-06,
      "loss": 0.1586,
      "step": 12241
    },
    {
      "epoch": 0.3571386895384795,
      "grad_norm": 0.7963038408521792,
      "learning_rate": 7.4466245433588495e-06,
      "loss": 0.1258,
      "step": 12242
    },
    {
      "epoch": 0.35716786276912305,
      "grad_norm": 0.8484007574660379,
      "learning_rate": 7.4462125218724236e-06,
      "loss": 0.1231,
      "step": 12243
    },
    {
      "epoch": 0.3571970359997666,
      "grad_norm": 0.71054399049409,
      "learning_rate": 7.445800478546796e-06,
      "loss": 0.1538,
      "step": 12244
    },
    {
      "epoch": 0.35722620923041015,
      "grad_norm": 0.8263265182463267,
      "learning_rate": 7.445388413385646e-06,
      "loss": 0.1727,
      "step": 12245
    },
    {
      "epoch": 0.35725538246105376,
      "grad_norm": 0.7054479997653809,
      "learning_rate": 7.444976326392652e-06,
      "loss": 0.1351,
      "step": 12246
    },
    {
      "epoch": 0.3572845556916973,
      "grad_norm": 0.8755809091412908,
      "learning_rate": 7.444564217571491e-06,
      "loss": 0.1744,
      "step": 12247
    },
    {
      "epoch": 0.35731372892234087,
      "grad_norm": 1.0175376639742526,
      "learning_rate": 7.444152086925847e-06,
      "loss": 0.1346,
      "step": 12248
    },
    {
      "epoch": 0.3573429021529844,
      "grad_norm": 0.7427987981336198,
      "learning_rate": 7.443739934459397e-06,
      "loss": 0.1399,
      "step": 12249
    },
    {
      "epoch": 0.357372075383628,
      "grad_norm": 0.933542900671776,
      "learning_rate": 7.443327760175817e-06,
      "loss": 0.1365,
      "step": 12250
    },
    {
      "epoch": 0.3574012486142715,
      "grad_norm": 0.8583530360675189,
      "learning_rate": 7.442915564078793e-06,
      "loss": 0.1341,
      "step": 12251
    },
    {
      "epoch": 0.3574304218449151,
      "grad_norm": 0.7579398069238198,
      "learning_rate": 7.442503346172001e-06,
      "loss": 0.1376,
      "step": 12252
    },
    {
      "epoch": 0.3574595950755587,
      "grad_norm": 0.9293926173443993,
      "learning_rate": 7.4420911064591215e-06,
      "loss": 0.1316,
      "step": 12253
    },
    {
      "epoch": 0.35748876830620224,
      "grad_norm": 0.6904763213668176,
      "learning_rate": 7.441678844943836e-06,
      "loss": 0.1417,
      "step": 12254
    },
    {
      "epoch": 0.3575179415368458,
      "grad_norm": 0.8402918439439921,
      "learning_rate": 7.441266561629825e-06,
      "loss": 0.1549,
      "step": 12255
    },
    {
      "epoch": 0.35754711476748935,
      "grad_norm": 0.7303999063960172,
      "learning_rate": 7.440854256520769e-06,
      "loss": 0.1352,
      "step": 12256
    },
    {
      "epoch": 0.3575762879981329,
      "grad_norm": 0.9822577952961637,
      "learning_rate": 7.440441929620348e-06,
      "loss": 0.1388,
      "step": 12257
    },
    {
      "epoch": 0.35760546122877646,
      "grad_norm": 0.9098563102938438,
      "learning_rate": 7.4400295809322445e-06,
      "loss": 0.1602,
      "step": 12258
    },
    {
      "epoch": 0.35763463445942,
      "grad_norm": 0.9816882972557667,
      "learning_rate": 7.439617210460139e-06,
      "loss": 0.1449,
      "step": 12259
    },
    {
      "epoch": 0.3576638076900636,
      "grad_norm": 0.9220034516470308,
      "learning_rate": 7.439204818207715e-06,
      "loss": 0.1276,
      "step": 12260
    },
    {
      "epoch": 0.35769298092070717,
      "grad_norm": 0.79823479876437,
      "learning_rate": 7.438792404178652e-06,
      "loss": 0.1563,
      "step": 12261
    },
    {
      "epoch": 0.3577221541513507,
      "grad_norm": 0.841057332417277,
      "learning_rate": 7.4383799683766315e-06,
      "loss": 0.1494,
      "step": 12262
    },
    {
      "epoch": 0.3577513273819943,
      "grad_norm": 0.8624899017320787,
      "learning_rate": 7.437967510805336e-06,
      "loss": 0.1473,
      "step": 12263
    },
    {
      "epoch": 0.35778050061263783,
      "grad_norm": 0.8338898053412516,
      "learning_rate": 7.4375550314684505e-06,
      "loss": 0.1142,
      "step": 12264
    },
    {
      "epoch": 0.3578096738432814,
      "grad_norm": 0.7141194529104241,
      "learning_rate": 7.437142530369654e-06,
      "loss": 0.1393,
      "step": 12265
    },
    {
      "epoch": 0.357838847073925,
      "grad_norm": 0.9102631886380496,
      "learning_rate": 7.436730007512633e-06,
      "loss": 0.1293,
      "step": 12266
    },
    {
      "epoch": 0.35786802030456855,
      "grad_norm": 0.8405538358882885,
      "learning_rate": 7.436317462901068e-06,
      "loss": 0.1278,
      "step": 12267
    },
    {
      "epoch": 0.3578971935352121,
      "grad_norm": 0.9766458337122229,
      "learning_rate": 7.43590489653864e-06,
      "loss": 0.1511,
      "step": 12268
    },
    {
      "epoch": 0.35792636676585565,
      "grad_norm": 0.8098832418681778,
      "learning_rate": 7.4354923084290364e-06,
      "loss": 0.1767,
      "step": 12269
    },
    {
      "epoch": 0.3579555399964992,
      "grad_norm": 0.8814530567633131,
      "learning_rate": 7.435079698575939e-06,
      "loss": 0.1393,
      "step": 12270
    },
    {
      "epoch": 0.35798471322714276,
      "grad_norm": 0.864062979438122,
      "learning_rate": 7.43466706698303e-06,
      "loss": 0.1381,
      "step": 12271
    },
    {
      "epoch": 0.3580138864577863,
      "grad_norm": 0.801081478137863,
      "learning_rate": 7.434254413653995e-06,
      "loss": 0.1365,
      "step": 12272
    },
    {
      "epoch": 0.3580430596884299,
      "grad_norm": 0.6406910898646578,
      "learning_rate": 7.433841738592518e-06,
      "loss": 0.144,
      "step": 12273
    },
    {
      "epoch": 0.3580722329190735,
      "grad_norm": 0.7635709007643025,
      "learning_rate": 7.433429041802282e-06,
      "loss": 0.1604,
      "step": 12274
    },
    {
      "epoch": 0.358101406149717,
      "grad_norm": 0.8966671507682924,
      "learning_rate": 7.433016323286975e-06,
      "loss": 0.1479,
      "step": 12275
    },
    {
      "epoch": 0.3581305793803606,
      "grad_norm": 0.7248138408881539,
      "learning_rate": 7.432603583050277e-06,
      "loss": 0.1483,
      "step": 12276
    },
    {
      "epoch": 0.35815975261100413,
      "grad_norm": 0.8051256495884842,
      "learning_rate": 7.432190821095875e-06,
      "loss": 0.1753,
      "step": 12277
    },
    {
      "epoch": 0.3581889258416477,
      "grad_norm": 0.7255186023301744,
      "learning_rate": 7.431778037427455e-06,
      "loss": 0.1109,
      "step": 12278
    },
    {
      "epoch": 0.35821809907229124,
      "grad_norm": 0.7527019200543248,
      "learning_rate": 7.431365232048701e-06,
      "loss": 0.1254,
      "step": 12279
    },
    {
      "epoch": 0.35824727230293485,
      "grad_norm": 0.7222836276234812,
      "learning_rate": 7.430952404963298e-06,
      "loss": 0.1373,
      "step": 12280
    },
    {
      "epoch": 0.3582764455335784,
      "grad_norm": 0.8216852059432174,
      "learning_rate": 7.430539556174933e-06,
      "loss": 0.1524,
      "step": 12281
    },
    {
      "epoch": 0.35830561876422196,
      "grad_norm": 0.9893838644930824,
      "learning_rate": 7.43012668568729e-06,
      "loss": 0.156,
      "step": 12282
    },
    {
      "epoch": 0.3583347919948655,
      "grad_norm": 0.969389833464752,
      "learning_rate": 7.429713793504056e-06,
      "loss": 0.1526,
      "step": 12283
    },
    {
      "epoch": 0.35836396522550906,
      "grad_norm": 0.8041972180659,
      "learning_rate": 7.429300879628918e-06,
      "loss": 0.147,
      "step": 12284
    },
    {
      "epoch": 0.3583931384561526,
      "grad_norm": 1.1458077909766509,
      "learning_rate": 7.428887944065562e-06,
      "loss": 0.1642,
      "step": 12285
    },
    {
      "epoch": 0.35842231168679617,
      "grad_norm": 1.0715062884145345,
      "learning_rate": 7.428474986817673e-06,
      "loss": 0.1826,
      "step": 12286
    },
    {
      "epoch": 0.3584514849174398,
      "grad_norm": 0.7704009576124625,
      "learning_rate": 7.42806200788894e-06,
      "loss": 0.156,
      "step": 12287
    },
    {
      "epoch": 0.35848065814808333,
      "grad_norm": 1.4323846304424777,
      "learning_rate": 7.427649007283049e-06,
      "loss": 0.1522,
      "step": 12288
    },
    {
      "epoch": 0.3585098313787269,
      "grad_norm": 0.8563152652701729,
      "learning_rate": 7.4272359850036865e-06,
      "loss": 0.1407,
      "step": 12289
    },
    {
      "epoch": 0.35853900460937044,
      "grad_norm": 0.7655571933224661,
      "learning_rate": 7.426822941054541e-06,
      "loss": 0.1297,
      "step": 12290
    },
    {
      "epoch": 0.358568177840014,
      "grad_norm": 1.1216758641583837,
      "learning_rate": 7.4264098754393e-06,
      "loss": 0.1415,
      "step": 12291
    },
    {
      "epoch": 0.35859735107065754,
      "grad_norm": 0.9519263359126535,
      "learning_rate": 7.42599678816165e-06,
      "loss": 0.1496,
      "step": 12292
    },
    {
      "epoch": 0.35862652430130115,
      "grad_norm": 0.8922317014353787,
      "learning_rate": 7.42558367922528e-06,
      "loss": 0.1518,
      "step": 12293
    },
    {
      "epoch": 0.3586556975319447,
      "grad_norm": 0.8629977456094764,
      "learning_rate": 7.42517054863388e-06,
      "loss": 0.1518,
      "step": 12294
    },
    {
      "epoch": 0.35868487076258826,
      "grad_norm": 0.6720919543875122,
      "learning_rate": 7.424757396391133e-06,
      "loss": 0.1398,
      "step": 12295
    },
    {
      "epoch": 0.3587140439932318,
      "grad_norm": 0.8753834844981437,
      "learning_rate": 7.424344222500734e-06,
      "loss": 0.1722,
      "step": 12296
    },
    {
      "epoch": 0.35874321722387537,
      "grad_norm": 0.8353090656853341,
      "learning_rate": 7.423931026966365e-06,
      "loss": 0.1486,
      "step": 12297
    },
    {
      "epoch": 0.3587723904545189,
      "grad_norm": 0.769762163570692,
      "learning_rate": 7.4235178097917216e-06,
      "loss": 0.172,
      "step": 12298
    },
    {
      "epoch": 0.3588015636851625,
      "grad_norm": 0.7878399871303298,
      "learning_rate": 7.4231045709804885e-06,
      "loss": 0.1523,
      "step": 12299
    },
    {
      "epoch": 0.3588307369158061,
      "grad_norm": 0.8206114113125107,
      "learning_rate": 7.422691310536355e-06,
      "loss": 0.1317,
      "step": 12300
    },
    {
      "epoch": 0.35885991014644963,
      "grad_norm": 0.7403662408992112,
      "learning_rate": 7.422278028463013e-06,
      "loss": 0.1348,
      "step": 12301
    },
    {
      "epoch": 0.3588890833770932,
      "grad_norm": 0.8872450680061393,
      "learning_rate": 7.421864724764152e-06,
      "loss": 0.1471,
      "step": 12302
    },
    {
      "epoch": 0.35891825660773674,
      "grad_norm": 0.7791741098502287,
      "learning_rate": 7.421451399443459e-06,
      "loss": 0.1527,
      "step": 12303
    },
    {
      "epoch": 0.3589474298383803,
      "grad_norm": 0.685358201498907,
      "learning_rate": 7.421038052504627e-06,
      "loss": 0.1365,
      "step": 12304
    },
    {
      "epoch": 0.35897660306902385,
      "grad_norm": 0.8881283338308029,
      "learning_rate": 7.4206246839513455e-06,
      "loss": 0.1728,
      "step": 12305
    },
    {
      "epoch": 0.3590057762996674,
      "grad_norm": 1.0459925805075425,
      "learning_rate": 7.420211293787305e-06,
      "loss": 0.152,
      "step": 12306
    },
    {
      "epoch": 0.359034949530311,
      "grad_norm": 0.8036929620852763,
      "learning_rate": 7.419797882016193e-06,
      "loss": 0.1317,
      "step": 12307
    },
    {
      "epoch": 0.35906412276095456,
      "grad_norm": 0.8080802933071737,
      "learning_rate": 7.419384448641706e-06,
      "loss": 0.1336,
      "step": 12308
    },
    {
      "epoch": 0.3590932959915981,
      "grad_norm": 0.8066517583138296,
      "learning_rate": 7.418970993667531e-06,
      "loss": 0.1296,
      "step": 12309
    },
    {
      "epoch": 0.35912246922224167,
      "grad_norm": 1.0865065477623663,
      "learning_rate": 7.41855751709736e-06,
      "loss": 0.1548,
      "step": 12310
    },
    {
      "epoch": 0.3591516424528852,
      "grad_norm": 0.8762441744657048,
      "learning_rate": 7.418144018934888e-06,
      "loss": 0.1738,
      "step": 12311
    },
    {
      "epoch": 0.3591808156835288,
      "grad_norm": 1.0911604048878483,
      "learning_rate": 7.417730499183801e-06,
      "loss": 0.1347,
      "step": 12312
    },
    {
      "epoch": 0.35920998891417233,
      "grad_norm": 0.8934094349034861,
      "learning_rate": 7.417316957847793e-06,
      "loss": 0.1437,
      "step": 12313
    },
    {
      "epoch": 0.35923916214481594,
      "grad_norm": 0.9355503288008398,
      "learning_rate": 7.416903394930556e-06,
      "loss": 0.1341,
      "step": 12314
    },
    {
      "epoch": 0.3592683353754595,
      "grad_norm": 0.9136214836862947,
      "learning_rate": 7.416489810435783e-06,
      "loss": 0.1835,
      "step": 12315
    },
    {
      "epoch": 0.35929750860610304,
      "grad_norm": 0.9682915154151832,
      "learning_rate": 7.4160762043671664e-06,
      "loss": 0.1408,
      "step": 12316
    },
    {
      "epoch": 0.3593266818367466,
      "grad_norm": 0.9504391712661863,
      "learning_rate": 7.415662576728397e-06,
      "loss": 0.1561,
      "step": 12317
    },
    {
      "epoch": 0.35935585506739015,
      "grad_norm": 0.8876264488166852,
      "learning_rate": 7.41524892752317e-06,
      "loss": 0.122,
      "step": 12318
    },
    {
      "epoch": 0.3593850282980337,
      "grad_norm": 0.9609503698594272,
      "learning_rate": 7.414835256755176e-06,
      "loss": 0.1666,
      "step": 12319
    },
    {
      "epoch": 0.3594142015286773,
      "grad_norm": 0.7358350385194634,
      "learning_rate": 7.41442156442811e-06,
      "loss": 0.1255,
      "step": 12320
    },
    {
      "epoch": 0.35944337475932087,
      "grad_norm": 0.8460819344876249,
      "learning_rate": 7.414007850545666e-06,
      "loss": 0.1373,
      "step": 12321
    },
    {
      "epoch": 0.3594725479899644,
      "grad_norm": 0.8859639325453393,
      "learning_rate": 7.4135941151115335e-06,
      "loss": 0.1689,
      "step": 12322
    },
    {
      "epoch": 0.359501721220608,
      "grad_norm": 0.9864525481893218,
      "learning_rate": 7.41318035812941e-06,
      "loss": 0.1613,
      "step": 12323
    },
    {
      "epoch": 0.3595308944512515,
      "grad_norm": 0.8165972592542372,
      "learning_rate": 7.4127665796029905e-06,
      "loss": 0.1358,
      "step": 12324
    },
    {
      "epoch": 0.3595600676818951,
      "grad_norm": 0.8562266100046408,
      "learning_rate": 7.412352779535963e-06,
      "loss": 0.1519,
      "step": 12325
    },
    {
      "epoch": 0.35958924091253863,
      "grad_norm": 0.912280309723881,
      "learning_rate": 7.411938957932029e-06,
      "loss": 0.1581,
      "step": 12326
    },
    {
      "epoch": 0.35961841414318224,
      "grad_norm": 0.7717685849872788,
      "learning_rate": 7.411525114794877e-06,
      "loss": 0.1407,
      "step": 12327
    },
    {
      "epoch": 0.3596475873738258,
      "grad_norm": 0.8565656156353816,
      "learning_rate": 7.411111250128207e-06,
      "loss": 0.1538,
      "step": 12328
    },
    {
      "epoch": 0.35967676060446935,
      "grad_norm": 0.8023797838850455,
      "learning_rate": 7.4106973639357104e-06,
      "loss": 0.1503,
      "step": 12329
    },
    {
      "epoch": 0.3597059338351129,
      "grad_norm": 0.8821948443676356,
      "learning_rate": 7.4102834562210825e-06,
      "loss": 0.1441,
      "step": 12330
    },
    {
      "epoch": 0.35973510706575645,
      "grad_norm": 0.7279747327238699,
      "learning_rate": 7.4098695269880205e-06,
      "loss": 0.1563,
      "step": 12331
    },
    {
      "epoch": 0.3597642802964,
      "grad_norm": 0.9236086564391722,
      "learning_rate": 7.4094555762402174e-06,
      "loss": 0.1355,
      "step": 12332
    },
    {
      "epoch": 0.35979345352704356,
      "grad_norm": 0.8103902030532772,
      "learning_rate": 7.409041603981371e-06,
      "loss": 0.1609,
      "step": 12333
    },
    {
      "epoch": 0.35982262675768717,
      "grad_norm": 0.8249193407165809,
      "learning_rate": 7.408627610215176e-06,
      "loss": 0.1522,
      "step": 12334
    },
    {
      "epoch": 0.3598517999883307,
      "grad_norm": 0.6871405848971597,
      "learning_rate": 7.408213594945328e-06,
      "loss": 0.1183,
      "step": 12335
    },
    {
      "epoch": 0.3598809732189743,
      "grad_norm": 0.7755076899987711,
      "learning_rate": 7.4077995581755255e-06,
      "loss": 0.1368,
      "step": 12336
    },
    {
      "epoch": 0.35991014644961783,
      "grad_norm": 0.9100559507869117,
      "learning_rate": 7.407385499909462e-06,
      "loss": 0.1535,
      "step": 12337
    },
    {
      "epoch": 0.3599393196802614,
      "grad_norm": 0.7937661174648724,
      "learning_rate": 7.406971420150837e-06,
      "loss": 0.1419,
      "step": 12338
    },
    {
      "epoch": 0.35996849291090494,
      "grad_norm": 0.8959987231263276,
      "learning_rate": 7.406557318903344e-06,
      "loss": 0.1518,
      "step": 12339
    },
    {
      "epoch": 0.3599976661415485,
      "grad_norm": 0.8906269978011393,
      "learning_rate": 7.406143196170681e-06,
      "loss": 0.1796,
      "step": 12340
    },
    {
      "epoch": 0.3600268393721921,
      "grad_norm": 0.8072700045665477,
      "learning_rate": 7.405729051956548e-06,
      "loss": 0.1536,
      "step": 12341
    },
    {
      "epoch": 0.36005601260283565,
      "grad_norm": 1.0264394048547325,
      "learning_rate": 7.405314886264639e-06,
      "loss": 0.1429,
      "step": 12342
    },
    {
      "epoch": 0.3600851858334792,
      "grad_norm": 0.807448160087179,
      "learning_rate": 7.404900699098654e-06,
      "loss": 0.15,
      "step": 12343
    },
    {
      "epoch": 0.36011435906412276,
      "grad_norm": 0.6762839773443348,
      "learning_rate": 7.404486490462289e-06,
      "loss": 0.1373,
      "step": 12344
    },
    {
      "epoch": 0.3601435322947663,
      "grad_norm": 1.0202249359976419,
      "learning_rate": 7.404072260359243e-06,
      "loss": 0.1419,
      "step": 12345
    },
    {
      "epoch": 0.36017270552540986,
      "grad_norm": 0.8772031024908503,
      "learning_rate": 7.403658008793213e-06,
      "loss": 0.1341,
      "step": 12346
    },
    {
      "epoch": 0.3602018787560535,
      "grad_norm": 0.8549827882854905,
      "learning_rate": 7.4032437357678985e-06,
      "loss": 0.1659,
      "step": 12347
    },
    {
      "epoch": 0.360231051986697,
      "grad_norm": 1.0450883888744094,
      "learning_rate": 7.4028294412869985e-06,
      "loss": 0.143,
      "step": 12348
    },
    {
      "epoch": 0.3602602252173406,
      "grad_norm": 0.7713575426833317,
      "learning_rate": 7.40241512535421e-06,
      "loss": 0.1542,
      "step": 12349
    },
    {
      "epoch": 0.36028939844798413,
      "grad_norm": 0.8330528390504065,
      "learning_rate": 7.402000787973232e-06,
      "loss": 0.1447,
      "step": 12350
    },
    {
      "epoch": 0.3603185716786277,
      "grad_norm": 1.094625320039597,
      "learning_rate": 7.401586429147767e-06,
      "loss": 0.1709,
      "step": 12351
    },
    {
      "epoch": 0.36034774490927124,
      "grad_norm": 0.7813934111383724,
      "learning_rate": 7.401172048881509e-06,
      "loss": 0.1559,
      "step": 12352
    },
    {
      "epoch": 0.3603769181399148,
      "grad_norm": 0.7804873840379437,
      "learning_rate": 7.400757647178162e-06,
      "loss": 0.128,
      "step": 12353
    },
    {
      "epoch": 0.3604060913705584,
      "grad_norm": 0.8758749781555031,
      "learning_rate": 7.400343224041422e-06,
      "loss": 0.1431,
      "step": 12354
    },
    {
      "epoch": 0.36043526460120195,
      "grad_norm": 0.8377924261046195,
      "learning_rate": 7.399928779474991e-06,
      "loss": 0.1481,
      "step": 12355
    },
    {
      "epoch": 0.3604644378318455,
      "grad_norm": 0.7853079153083155,
      "learning_rate": 7.39951431348257e-06,
      "loss": 0.1439,
      "step": 12356
    },
    {
      "epoch": 0.36049361106248906,
      "grad_norm": 0.9347527446114469,
      "learning_rate": 7.399099826067857e-06,
      "loss": 0.1273,
      "step": 12357
    },
    {
      "epoch": 0.3605227842931326,
      "grad_norm": 0.9243839040542723,
      "learning_rate": 7.398685317234554e-06,
      "loss": 0.1287,
      "step": 12358
    },
    {
      "epoch": 0.36055195752377617,
      "grad_norm": 0.836372029203595,
      "learning_rate": 7.398270786986361e-06,
      "loss": 0.1491,
      "step": 12359
    },
    {
      "epoch": 0.3605811307544197,
      "grad_norm": 1.074532348994187,
      "learning_rate": 7.397856235326979e-06,
      "loss": 0.1451,
      "step": 12360
    },
    {
      "epoch": 0.36061030398506333,
      "grad_norm": 0.8130342485484173,
      "learning_rate": 7.397441662260109e-06,
      "loss": 0.1262,
      "step": 12361
    },
    {
      "epoch": 0.3606394772157069,
      "grad_norm": 0.9157609290069918,
      "learning_rate": 7.3970270677894505e-06,
      "loss": 0.144,
      "step": 12362
    },
    {
      "epoch": 0.36066865044635044,
      "grad_norm": 0.9774480662919182,
      "learning_rate": 7.396612451918709e-06,
      "loss": 0.1699,
      "step": 12363
    },
    {
      "epoch": 0.360697823676994,
      "grad_norm": 0.7751576073896935,
      "learning_rate": 7.396197814651582e-06,
      "loss": 0.171,
      "step": 12364
    },
    {
      "epoch": 0.36072699690763754,
      "grad_norm": 1.026047430528191,
      "learning_rate": 7.3957831559917735e-06,
      "loss": 0.1491,
      "step": 12365
    },
    {
      "epoch": 0.3607561701382811,
      "grad_norm": 1.1578198490582807,
      "learning_rate": 7.395368475942985e-06,
      "loss": 0.1352,
      "step": 12366
    },
    {
      "epoch": 0.36078534336892465,
      "grad_norm": 0.7358419225212605,
      "learning_rate": 7.394953774508918e-06,
      "loss": 0.1442,
      "step": 12367
    },
    {
      "epoch": 0.36081451659956826,
      "grad_norm": 0.7855099920570201,
      "learning_rate": 7.3945390516932765e-06,
      "loss": 0.1521,
      "step": 12368
    },
    {
      "epoch": 0.3608436898302118,
      "grad_norm": 0.882718394909379,
      "learning_rate": 7.394124307499762e-06,
      "loss": 0.1627,
      "step": 12369
    },
    {
      "epoch": 0.36087286306085536,
      "grad_norm": 1.1041984778019813,
      "learning_rate": 7.393709541932076e-06,
      "loss": 0.1425,
      "step": 12370
    },
    {
      "epoch": 0.3609020362914989,
      "grad_norm": 0.8117247656766673,
      "learning_rate": 7.393294754993924e-06,
      "loss": 0.1487,
      "step": 12371
    },
    {
      "epoch": 0.36093120952214247,
      "grad_norm": 0.9860067015206712,
      "learning_rate": 7.392879946689007e-06,
      "loss": 0.1738,
      "step": 12372
    },
    {
      "epoch": 0.360960382752786,
      "grad_norm": 0.9697711162001428,
      "learning_rate": 7.39246511702103e-06,
      "loss": 0.1522,
      "step": 12373
    },
    {
      "epoch": 0.3609895559834296,
      "grad_norm": 0.790772645726191,
      "learning_rate": 7.3920502659936936e-06,
      "loss": 0.1507,
      "step": 12374
    },
    {
      "epoch": 0.3610187292140732,
      "grad_norm": 0.708130741606113,
      "learning_rate": 7.3916353936107045e-06,
      "loss": 0.1264,
      "step": 12375
    },
    {
      "epoch": 0.36104790244471674,
      "grad_norm": 1.089621074722289,
      "learning_rate": 7.3912204998757656e-06,
      "loss": 0.1417,
      "step": 12376
    },
    {
      "epoch": 0.3610770756753603,
      "grad_norm": 0.9019908227128198,
      "learning_rate": 7.390805584792581e-06,
      "loss": 0.1423,
      "step": 12377
    },
    {
      "epoch": 0.36110624890600385,
      "grad_norm": 0.8293264150623696,
      "learning_rate": 7.390390648364855e-06,
      "loss": 0.147,
      "step": 12378
    },
    {
      "epoch": 0.3611354221366474,
      "grad_norm": 0.9178389304117276,
      "learning_rate": 7.389975690596292e-06,
      "loss": 0.1489,
      "step": 12379
    },
    {
      "epoch": 0.36116459536729095,
      "grad_norm": 1.0360618918651492,
      "learning_rate": 7.389560711490595e-06,
      "loss": 0.1414,
      "step": 12380
    },
    {
      "epoch": 0.36119376859793456,
      "grad_norm": 0.6703535165588835,
      "learning_rate": 7.389145711051473e-06,
      "loss": 0.1349,
      "step": 12381
    },
    {
      "epoch": 0.3612229418285781,
      "grad_norm": 0.6804167249294869,
      "learning_rate": 7.388730689282626e-06,
      "loss": 0.1432,
      "step": 12382
    },
    {
      "epoch": 0.36125211505922167,
      "grad_norm": 0.7354037221891911,
      "learning_rate": 7.388315646187763e-06,
      "loss": 0.1632,
      "step": 12383
    },
    {
      "epoch": 0.3612812882898652,
      "grad_norm": 0.805908468689585,
      "learning_rate": 7.3879005817705886e-06,
      "loss": 0.1374,
      "step": 12384
    },
    {
      "epoch": 0.3613104615205088,
      "grad_norm": 1.2220610790779731,
      "learning_rate": 7.387485496034805e-06,
      "loss": 0.1663,
      "step": 12385
    },
    {
      "epoch": 0.3613396347511523,
      "grad_norm": 0.6820647865653887,
      "learning_rate": 7.387070388984123e-06,
      "loss": 0.1361,
      "step": 12386
    },
    {
      "epoch": 0.3613688079817959,
      "grad_norm": 0.9643924075132558,
      "learning_rate": 7.386655260622247e-06,
      "loss": 0.1438,
      "step": 12387
    },
    {
      "epoch": 0.3613979812124395,
      "grad_norm": 0.8253438292834724,
      "learning_rate": 7.386240110952881e-06,
      "loss": 0.1533,
      "step": 12388
    },
    {
      "epoch": 0.36142715444308304,
      "grad_norm": 0.931771747767439,
      "learning_rate": 7.385824939979735e-06,
      "loss": 0.1348,
      "step": 12389
    },
    {
      "epoch": 0.3614563276737266,
      "grad_norm": 0.915816847488074,
      "learning_rate": 7.385409747706511e-06,
      "loss": 0.164,
      "step": 12390
    },
    {
      "epoch": 0.36148550090437015,
      "grad_norm": 0.8121290138072851,
      "learning_rate": 7.38499453413692e-06,
      "loss": 0.1439,
      "step": 12391
    },
    {
      "epoch": 0.3615146741350137,
      "grad_norm": 0.7327947356452752,
      "learning_rate": 7.3845792992746665e-06,
      "loss": 0.1581,
      "step": 12392
    },
    {
      "epoch": 0.36154384736565726,
      "grad_norm": 0.8533791669557264,
      "learning_rate": 7.384164043123458e-06,
      "loss": 0.1267,
      "step": 12393
    },
    {
      "epoch": 0.3615730205963008,
      "grad_norm": 0.8947048415332206,
      "learning_rate": 7.383748765687002e-06,
      "loss": 0.1297,
      "step": 12394
    },
    {
      "epoch": 0.3616021938269444,
      "grad_norm": 0.9297131469072029,
      "learning_rate": 7.383333466969007e-06,
      "loss": 0.1426,
      "step": 12395
    },
    {
      "epoch": 0.36163136705758797,
      "grad_norm": 0.9030085673039439,
      "learning_rate": 7.38291814697318e-06,
      "loss": 0.1448,
      "step": 12396
    },
    {
      "epoch": 0.3616605402882315,
      "grad_norm": 0.7871505040381325,
      "learning_rate": 7.382502805703227e-06,
      "loss": 0.1374,
      "step": 12397
    },
    {
      "epoch": 0.3616897135188751,
      "grad_norm": 0.7660459682425128,
      "learning_rate": 7.382087443162859e-06,
      "loss": 0.1723,
      "step": 12398
    },
    {
      "epoch": 0.36171888674951863,
      "grad_norm": 0.7032781138080652,
      "learning_rate": 7.381672059355782e-06,
      "loss": 0.1542,
      "step": 12399
    },
    {
      "epoch": 0.3617480599801622,
      "grad_norm": 1.1822475436946003,
      "learning_rate": 7.3812566542857055e-06,
      "loss": 0.1524,
      "step": 12400
    },
    {
      "epoch": 0.36177723321080574,
      "grad_norm": 0.8368114519356725,
      "learning_rate": 7.3808412279563394e-06,
      "loss": 0.1587,
      "step": 12401
    },
    {
      "epoch": 0.36180640644144935,
      "grad_norm": 0.8324356038995239,
      "learning_rate": 7.38042578037139e-06,
      "loss": 0.157,
      "step": 12402
    },
    {
      "epoch": 0.3618355796720929,
      "grad_norm": 0.6767675483795794,
      "learning_rate": 7.380010311534568e-06,
      "loss": 0.1458,
      "step": 12403
    },
    {
      "epoch": 0.36186475290273645,
      "grad_norm": 0.6801981630620667,
      "learning_rate": 7.3795948214495816e-06,
      "loss": 0.1446,
      "step": 12404
    },
    {
      "epoch": 0.36189392613338,
      "grad_norm": 0.7067335317127671,
      "learning_rate": 7.379179310120139e-06,
      "loss": 0.1338,
      "step": 12405
    },
    {
      "epoch": 0.36192309936402356,
      "grad_norm": 0.9515411579017032,
      "learning_rate": 7.378763777549955e-06,
      "loss": 0.1409,
      "step": 12406
    },
    {
      "epoch": 0.3619522725946671,
      "grad_norm": 0.8145976168747101,
      "learning_rate": 7.378348223742734e-06,
      "loss": 0.1328,
      "step": 12407
    },
    {
      "epoch": 0.3619814458253107,
      "grad_norm": 0.7151310623421635,
      "learning_rate": 7.377932648702189e-06,
      "loss": 0.1318,
      "step": 12408
    },
    {
      "epoch": 0.3620106190559543,
      "grad_norm": 0.8839560193938235,
      "learning_rate": 7.377517052432027e-06,
      "loss": 0.1712,
      "step": 12409
    },
    {
      "epoch": 0.36203979228659783,
      "grad_norm": 0.8707843326698027,
      "learning_rate": 7.377101434935961e-06,
      "loss": 0.1531,
      "step": 12410
    },
    {
      "epoch": 0.3620689655172414,
      "grad_norm": 0.8353989689392917,
      "learning_rate": 7.376685796217702e-06,
      "loss": 0.1403,
      "step": 12411
    },
    {
      "epoch": 0.36209813874788493,
      "grad_norm": 0.7556286038576497,
      "learning_rate": 7.376270136280958e-06,
      "loss": 0.1662,
      "step": 12412
    },
    {
      "epoch": 0.3621273119785285,
      "grad_norm": 1.0941445251976925,
      "learning_rate": 7.375854455129443e-06,
      "loss": 0.1254,
      "step": 12413
    },
    {
      "epoch": 0.36215648520917204,
      "grad_norm": 0.8740104712910368,
      "learning_rate": 7.375438752766864e-06,
      "loss": 0.1476,
      "step": 12414
    },
    {
      "epoch": 0.36218565843981565,
      "grad_norm": 0.7790841822211408,
      "learning_rate": 7.375023029196937e-06,
      "loss": 0.1488,
      "step": 12415
    },
    {
      "epoch": 0.3622148316704592,
      "grad_norm": 0.8175314120822311,
      "learning_rate": 7.374607284423373e-06,
      "loss": 0.149,
      "step": 12416
    },
    {
      "epoch": 0.36224400490110276,
      "grad_norm": 1.1122828174219652,
      "learning_rate": 7.374191518449878e-06,
      "loss": 0.1689,
      "step": 12417
    },
    {
      "epoch": 0.3622731781317463,
      "grad_norm": 0.9378285633316117,
      "learning_rate": 7.373775731280172e-06,
      "loss": 0.1331,
      "step": 12418
    },
    {
      "epoch": 0.36230235136238986,
      "grad_norm": 1.301318761980158,
      "learning_rate": 7.37335992291796e-06,
      "loss": 0.1541,
      "step": 12419
    },
    {
      "epoch": 0.3623315245930334,
      "grad_norm": 0.8942643632562767,
      "learning_rate": 7.3729440933669575e-06,
      "loss": 0.1633,
      "step": 12420
    },
    {
      "epoch": 0.36236069782367697,
      "grad_norm": 0.8358110494033888,
      "learning_rate": 7.372528242630878e-06,
      "loss": 0.1183,
      "step": 12421
    },
    {
      "epoch": 0.3623898710543206,
      "grad_norm": 1.0317164305132445,
      "learning_rate": 7.372112370713431e-06,
      "loss": 0.1443,
      "step": 12422
    },
    {
      "epoch": 0.36241904428496413,
      "grad_norm": 0.8983204452817084,
      "learning_rate": 7.371696477618333e-06,
      "loss": 0.1569,
      "step": 12423
    },
    {
      "epoch": 0.3624482175156077,
      "grad_norm": 0.8926004064186805,
      "learning_rate": 7.3712805633492935e-06,
      "loss": 0.1423,
      "step": 12424
    },
    {
      "epoch": 0.36247739074625124,
      "grad_norm": 0.8670794270314263,
      "learning_rate": 7.370864627910027e-06,
      "loss": 0.1456,
      "step": 12425
    },
    {
      "epoch": 0.3625065639768948,
      "grad_norm": 0.8985125181143718,
      "learning_rate": 7.370448671304248e-06,
      "loss": 0.1407,
      "step": 12426
    },
    {
      "epoch": 0.36253573720753834,
      "grad_norm": 0.7063648041598328,
      "learning_rate": 7.370032693535669e-06,
      "loss": 0.1499,
      "step": 12427
    },
    {
      "epoch": 0.3625649104381819,
      "grad_norm": 0.8830294779583203,
      "learning_rate": 7.369616694608004e-06,
      "loss": 0.148,
      "step": 12428
    },
    {
      "epoch": 0.3625940836688255,
      "grad_norm": 0.8577546795229164,
      "learning_rate": 7.369200674524966e-06,
      "loss": 0.1704,
      "step": 12429
    },
    {
      "epoch": 0.36262325689946906,
      "grad_norm": 0.7825779464405499,
      "learning_rate": 7.36878463329027e-06,
      "loss": 0.1719,
      "step": 12430
    },
    {
      "epoch": 0.3626524301301126,
      "grad_norm": 0.738730076741277,
      "learning_rate": 7.368368570907633e-06,
      "loss": 0.136,
      "step": 12431
    },
    {
      "epoch": 0.36268160336075617,
      "grad_norm": 0.8400014819160159,
      "learning_rate": 7.367952487380763e-06,
      "loss": 0.1406,
      "step": 12432
    },
    {
      "epoch": 0.3627107765913997,
      "grad_norm": 0.7990516261637564,
      "learning_rate": 7.367536382713381e-06,
      "loss": 0.1586,
      "step": 12433
    },
    {
      "epoch": 0.3627399498220433,
      "grad_norm": 0.8288024477863183,
      "learning_rate": 7.367120256909198e-06,
      "loss": 0.1413,
      "step": 12434
    },
    {
      "epoch": 0.3627691230526869,
      "grad_norm": 1.0154243653961876,
      "learning_rate": 7.366704109971929e-06,
      "loss": 0.1455,
      "step": 12435
    },
    {
      "epoch": 0.36279829628333043,
      "grad_norm": 0.8138546523351705,
      "learning_rate": 7.366287941905295e-06,
      "loss": 0.1483,
      "step": 12436
    },
    {
      "epoch": 0.362827469513974,
      "grad_norm": 0.7551388897055035,
      "learning_rate": 7.365871752713003e-06,
      "loss": 0.1437,
      "step": 12437
    },
    {
      "epoch": 0.36285664274461754,
      "grad_norm": 0.8987613971071026,
      "learning_rate": 7.365455542398775e-06,
      "loss": 0.1355,
      "step": 12438
    },
    {
      "epoch": 0.3628858159752611,
      "grad_norm": 0.8256795075390594,
      "learning_rate": 7.365039310966324e-06,
      "loss": 0.1478,
      "step": 12439
    },
    {
      "epoch": 0.36291498920590465,
      "grad_norm": 0.8169888938021671,
      "learning_rate": 7.364623058419367e-06,
      "loss": 0.1734,
      "step": 12440
    },
    {
      "epoch": 0.3629441624365482,
      "grad_norm": 0.925222291897439,
      "learning_rate": 7.364206784761618e-06,
      "loss": 0.1717,
      "step": 12441
    },
    {
      "epoch": 0.3629733356671918,
      "grad_norm": 0.9039461568838147,
      "learning_rate": 7.363790489996797e-06,
      "loss": 0.1408,
      "step": 12442
    },
    {
      "epoch": 0.36300250889783536,
      "grad_norm": 0.8004050674599028,
      "learning_rate": 7.363374174128619e-06,
      "loss": 0.1447,
      "step": 12443
    },
    {
      "epoch": 0.3630316821284789,
      "grad_norm": 0.8243223765501508,
      "learning_rate": 7.362957837160799e-06,
      "loss": 0.1367,
      "step": 12444
    },
    {
      "epoch": 0.36306085535912247,
      "grad_norm": 0.7991490427144883,
      "learning_rate": 7.362541479097056e-06,
      "loss": 0.1543,
      "step": 12445
    },
    {
      "epoch": 0.363090028589766,
      "grad_norm": 1.0016952136525967,
      "learning_rate": 7.3621250999411085e-06,
      "loss": 0.1516,
      "step": 12446
    },
    {
      "epoch": 0.3631192018204096,
      "grad_norm": 1.0991795309553871,
      "learning_rate": 7.36170869969667e-06,
      "loss": 0.1734,
      "step": 12447
    },
    {
      "epoch": 0.36314837505105313,
      "grad_norm": 0.8366821099669521,
      "learning_rate": 7.361292278367461e-06,
      "loss": 0.1499,
      "step": 12448
    },
    {
      "epoch": 0.36317754828169674,
      "grad_norm": 0.7070275659050894,
      "learning_rate": 7.360875835957198e-06,
      "loss": 0.1349,
      "step": 12449
    },
    {
      "epoch": 0.3632067215123403,
      "grad_norm": 0.7723239819916525,
      "learning_rate": 7.360459372469598e-06,
      "loss": 0.1388,
      "step": 12450
    },
    {
      "epoch": 0.36323589474298384,
      "grad_norm": 0.6756298929571691,
      "learning_rate": 7.360042887908382e-06,
      "loss": 0.155,
      "step": 12451
    },
    {
      "epoch": 0.3632650679736274,
      "grad_norm": 0.8827125519419131,
      "learning_rate": 7.359626382277265e-06,
      "loss": 0.1327,
      "step": 12452
    },
    {
      "epoch": 0.36329424120427095,
      "grad_norm": 0.8041012971349712,
      "learning_rate": 7.359209855579968e-06,
      "loss": 0.1076,
      "step": 12453
    },
    {
      "epoch": 0.3633234144349145,
      "grad_norm": 0.697372531692577,
      "learning_rate": 7.358793307820209e-06,
      "loss": 0.1375,
      "step": 12454
    },
    {
      "epoch": 0.36335258766555806,
      "grad_norm": 1.3237811105848196,
      "learning_rate": 7.358376739001704e-06,
      "loss": 0.1726,
      "step": 12455
    },
    {
      "epoch": 0.36338176089620167,
      "grad_norm": 0.9758560351141198,
      "learning_rate": 7.357960149128177e-06,
      "loss": 0.1337,
      "step": 12456
    },
    {
      "epoch": 0.3634109341268452,
      "grad_norm": 0.7721145331937025,
      "learning_rate": 7.357543538203344e-06,
      "loss": 0.1452,
      "step": 12457
    },
    {
      "epoch": 0.3634401073574888,
      "grad_norm": 0.8972062164794726,
      "learning_rate": 7.357126906230926e-06,
      "loss": 0.1342,
      "step": 12458
    },
    {
      "epoch": 0.3634692805881323,
      "grad_norm": 0.9051163989848191,
      "learning_rate": 7.35671025321464e-06,
      "loss": 0.1753,
      "step": 12459
    },
    {
      "epoch": 0.3634984538187759,
      "grad_norm": 1.1198352697714742,
      "learning_rate": 7.356293579158207e-06,
      "loss": 0.1451,
      "step": 12460
    },
    {
      "epoch": 0.36352762704941943,
      "grad_norm": 1.0176748330465637,
      "learning_rate": 7.355876884065349e-06,
      "loss": 0.1333,
      "step": 12461
    },
    {
      "epoch": 0.36355680028006304,
      "grad_norm": 0.9384466077659679,
      "learning_rate": 7.355460167939783e-06,
      "loss": 0.1503,
      "step": 12462
    },
    {
      "epoch": 0.3635859735107066,
      "grad_norm": 0.8246058816548979,
      "learning_rate": 7.3550434307852335e-06,
      "loss": 0.1331,
      "step": 12463
    },
    {
      "epoch": 0.36361514674135015,
      "grad_norm": 1.0233503954893863,
      "learning_rate": 7.354626672605416e-06,
      "loss": 0.1419,
      "step": 12464
    },
    {
      "epoch": 0.3636443199719937,
      "grad_norm": 0.8133834003284872,
      "learning_rate": 7.354209893404054e-06,
      "loss": 0.1296,
      "step": 12465
    },
    {
      "epoch": 0.36367349320263725,
      "grad_norm": 1.2913316892089426,
      "learning_rate": 7.353793093184869e-06,
      "loss": 0.1569,
      "step": 12466
    },
    {
      "epoch": 0.3637026664332808,
      "grad_norm": 0.8334656644246954,
      "learning_rate": 7.353376271951581e-06,
      "loss": 0.1505,
      "step": 12467
    },
    {
      "epoch": 0.36373183966392436,
      "grad_norm": 0.8517273287822625,
      "learning_rate": 7.352959429707911e-06,
      "loss": 0.1605,
      "step": 12468
    },
    {
      "epoch": 0.36376101289456797,
      "grad_norm": 0.8995970232561108,
      "learning_rate": 7.3525425664575815e-06,
      "loss": 0.1591,
      "step": 12469
    },
    {
      "epoch": 0.3637901861252115,
      "grad_norm": 0.7371374702805029,
      "learning_rate": 7.352125682204313e-06,
      "loss": 0.1557,
      "step": 12470
    },
    {
      "epoch": 0.3638193593558551,
      "grad_norm": 0.9365223917183233,
      "learning_rate": 7.351708776951828e-06,
      "loss": 0.1453,
      "step": 12471
    },
    {
      "epoch": 0.36384853258649863,
      "grad_norm": 0.8667120873011884,
      "learning_rate": 7.351291850703848e-06,
      "loss": 0.1408,
      "step": 12472
    },
    {
      "epoch": 0.3638777058171422,
      "grad_norm": 0.7788669940383607,
      "learning_rate": 7.350874903464097e-06,
      "loss": 0.1465,
      "step": 12473
    },
    {
      "epoch": 0.36390687904778574,
      "grad_norm": 0.9206504844211706,
      "learning_rate": 7.350457935236295e-06,
      "loss": 0.1449,
      "step": 12474
    },
    {
      "epoch": 0.3639360522784293,
      "grad_norm": 1.1593282146380695,
      "learning_rate": 7.350040946024165e-06,
      "loss": 0.156,
      "step": 12475
    },
    {
      "epoch": 0.3639652255090729,
      "grad_norm": 0.9126505802412705,
      "learning_rate": 7.349623935831432e-06,
      "loss": 0.136,
      "step": 12476
    },
    {
      "epoch": 0.36399439873971645,
      "grad_norm": 0.7666088844096738,
      "learning_rate": 7.349206904661816e-06,
      "loss": 0.1421,
      "step": 12477
    },
    {
      "epoch": 0.36402357197036,
      "grad_norm": 0.7484162238205191,
      "learning_rate": 7.348789852519043e-06,
      "loss": 0.1389,
      "step": 12478
    },
    {
      "epoch": 0.36405274520100356,
      "grad_norm": 0.8097442914824786,
      "learning_rate": 7.348372779406834e-06,
      "loss": 0.1389,
      "step": 12479
    },
    {
      "epoch": 0.3640819184316471,
      "grad_norm": 0.7987503546657555,
      "learning_rate": 7.347955685328912e-06,
      "loss": 0.1251,
      "step": 12480
    },
    {
      "epoch": 0.36411109166229066,
      "grad_norm": 0.9288490047922632,
      "learning_rate": 7.347538570289005e-06,
      "loss": 0.1302,
      "step": 12481
    },
    {
      "epoch": 0.3641402648929342,
      "grad_norm": 0.6750726528592491,
      "learning_rate": 7.347121434290834e-06,
      "loss": 0.1229,
      "step": 12482
    },
    {
      "epoch": 0.3641694381235778,
      "grad_norm": 0.7746199922297062,
      "learning_rate": 7.346704277338122e-06,
      "loss": 0.1453,
      "step": 12483
    },
    {
      "epoch": 0.3641986113542214,
      "grad_norm": 0.873953568339805,
      "learning_rate": 7.346287099434593e-06,
      "loss": 0.1409,
      "step": 12484
    },
    {
      "epoch": 0.36422778458486493,
      "grad_norm": 0.7120896229151646,
      "learning_rate": 7.345869900583975e-06,
      "loss": 0.1519,
      "step": 12485
    },
    {
      "epoch": 0.3642569578155085,
      "grad_norm": 0.8220019306038706,
      "learning_rate": 7.345452680789989e-06,
      "loss": 0.1612,
      "step": 12486
    },
    {
      "epoch": 0.36428613104615204,
      "grad_norm": 0.8239607206109042,
      "learning_rate": 7.345035440056363e-06,
      "loss": 0.1742,
      "step": 12487
    },
    {
      "epoch": 0.3643153042767956,
      "grad_norm": 0.9195671088239025,
      "learning_rate": 7.34461817838682e-06,
      "loss": 0.1548,
      "step": 12488
    },
    {
      "epoch": 0.36434447750743915,
      "grad_norm": 0.820900638310747,
      "learning_rate": 7.344200895785083e-06,
      "loss": 0.1483,
      "step": 12489
    },
    {
      "epoch": 0.36437365073808275,
      "grad_norm": 0.9859117442782211,
      "learning_rate": 7.343783592254883e-06,
      "loss": 0.1403,
      "step": 12490
    },
    {
      "epoch": 0.3644028239687263,
      "grad_norm": 0.9502163373819177,
      "learning_rate": 7.3433662677999426e-06,
      "loss": 0.1631,
      "step": 12491
    },
    {
      "epoch": 0.36443199719936986,
      "grad_norm": 0.86215746008853,
      "learning_rate": 7.342948922423985e-06,
      "loss": 0.1289,
      "step": 12492
    },
    {
      "epoch": 0.3644611704300134,
      "grad_norm": 1.028385262362884,
      "learning_rate": 7.342531556130742e-06,
      "loss": 0.1184,
      "step": 12493
    },
    {
      "epoch": 0.36449034366065697,
      "grad_norm": 0.8530250849243711,
      "learning_rate": 7.342114168923935e-06,
      "loss": 0.1511,
      "step": 12494
    },
    {
      "epoch": 0.3645195168913005,
      "grad_norm": 0.9723990594033373,
      "learning_rate": 7.341696760807291e-06,
      "loss": 0.1756,
      "step": 12495
    },
    {
      "epoch": 0.36454869012194413,
      "grad_norm": 0.9925847724373023,
      "learning_rate": 7.341279331784539e-06,
      "loss": 0.1366,
      "step": 12496
    },
    {
      "epoch": 0.3645778633525877,
      "grad_norm": 1.0303219629002769,
      "learning_rate": 7.340861881859403e-06,
      "loss": 0.1519,
      "step": 12497
    },
    {
      "epoch": 0.36460703658323124,
      "grad_norm": 0.9082810844900719,
      "learning_rate": 7.34044441103561e-06,
      "loss": 0.1422,
      "step": 12498
    },
    {
      "epoch": 0.3646362098138748,
      "grad_norm": 0.927028182869787,
      "learning_rate": 7.340026919316889e-06,
      "loss": 0.1354,
      "step": 12499
    },
    {
      "epoch": 0.36466538304451834,
      "grad_norm": 0.7169585676932424,
      "learning_rate": 7.339609406706966e-06,
      "loss": 0.1121,
      "step": 12500
    },
    {
      "epoch": 0.3646945562751619,
      "grad_norm": 0.7099762171401988,
      "learning_rate": 7.339191873209569e-06,
      "loss": 0.1406,
      "step": 12501
    },
    {
      "epoch": 0.36472372950580545,
      "grad_norm": 0.8390462407599057,
      "learning_rate": 7.3387743188284255e-06,
      "loss": 0.165,
      "step": 12502
    },
    {
      "epoch": 0.36475290273644906,
      "grad_norm": 0.7523429675407453,
      "learning_rate": 7.338356743567264e-06,
      "loss": 0.1242,
      "step": 12503
    },
    {
      "epoch": 0.3647820759670926,
      "grad_norm": 0.9066002377405343,
      "learning_rate": 7.3379391474298085e-06,
      "loss": 0.1673,
      "step": 12504
    },
    {
      "epoch": 0.36481124919773616,
      "grad_norm": 1.3086197754302376,
      "learning_rate": 7.337521530419793e-06,
      "loss": 0.1544,
      "step": 12505
    },
    {
      "epoch": 0.3648404224283797,
      "grad_norm": 1.0820332839361442,
      "learning_rate": 7.337103892540945e-06,
      "loss": 0.1546,
      "step": 12506
    },
    {
      "epoch": 0.36486959565902327,
      "grad_norm": 0.7992437926444342,
      "learning_rate": 7.336686233796988e-06,
      "loss": 0.1554,
      "step": 12507
    },
    {
      "epoch": 0.3648987688896668,
      "grad_norm": 0.9363009022825539,
      "learning_rate": 7.336268554191657e-06,
      "loss": 0.118,
      "step": 12508
    },
    {
      "epoch": 0.3649279421203104,
      "grad_norm": 1.084394193415658,
      "learning_rate": 7.335850853728675e-06,
      "loss": 0.1483,
      "step": 12509
    },
    {
      "epoch": 0.364957115350954,
      "grad_norm": 0.9791888893527011,
      "learning_rate": 7.335433132411775e-06,
      "loss": 0.1494,
      "step": 12510
    },
    {
      "epoch": 0.36498628858159754,
      "grad_norm": 0.7291919732518494,
      "learning_rate": 7.335015390244688e-06,
      "loss": 0.1357,
      "step": 12511
    },
    {
      "epoch": 0.3650154618122411,
      "grad_norm": 0.8973636101942085,
      "learning_rate": 7.334597627231138e-06,
      "loss": 0.1765,
      "step": 12512
    },
    {
      "epoch": 0.36504463504288465,
      "grad_norm": 0.8235104568007249,
      "learning_rate": 7.334179843374859e-06,
      "loss": 0.1397,
      "step": 12513
    },
    {
      "epoch": 0.3650738082735282,
      "grad_norm": 0.8596961124766973,
      "learning_rate": 7.333762038679579e-06,
      "loss": 0.1459,
      "step": 12514
    },
    {
      "epoch": 0.36510298150417175,
      "grad_norm": 0.8554809964876028,
      "learning_rate": 7.3333442131490294e-06,
      "loss": 0.1492,
      "step": 12515
    },
    {
      "epoch": 0.3651321547348153,
      "grad_norm": 0.8191381647651743,
      "learning_rate": 7.332926366786939e-06,
      "loss": 0.1575,
      "step": 12516
    },
    {
      "epoch": 0.3651613279654589,
      "grad_norm": 0.9718655156205231,
      "learning_rate": 7.33250849959704e-06,
      "loss": 0.1343,
      "step": 12517
    },
    {
      "epoch": 0.36519050119610247,
      "grad_norm": 0.8546476213944995,
      "learning_rate": 7.3320906115830615e-06,
      "loss": 0.1414,
      "step": 12518
    },
    {
      "epoch": 0.365219674426746,
      "grad_norm": 1.244414597345418,
      "learning_rate": 7.331672702748733e-06,
      "loss": 0.1709,
      "step": 12519
    },
    {
      "epoch": 0.3652488476573896,
      "grad_norm": 0.6982264029886699,
      "learning_rate": 7.331254773097789e-06,
      "loss": 0.1346,
      "step": 12520
    },
    {
      "epoch": 0.36527802088803313,
      "grad_norm": 0.9942275033912196,
      "learning_rate": 7.33083682263396e-06,
      "loss": 0.1182,
      "step": 12521
    },
    {
      "epoch": 0.3653071941186767,
      "grad_norm": 0.8936472742380306,
      "learning_rate": 7.330418851360974e-06,
      "loss": 0.1383,
      "step": 12522
    },
    {
      "epoch": 0.3653363673493203,
      "grad_norm": 0.7166159321473673,
      "learning_rate": 7.330000859282567e-06,
      "loss": 0.1462,
      "step": 12523
    },
    {
      "epoch": 0.36536554057996384,
      "grad_norm": 0.9633409343417314,
      "learning_rate": 7.329582846402467e-06,
      "loss": 0.1607,
      "step": 12524
    },
    {
      "epoch": 0.3653947138106074,
      "grad_norm": 1.1610880501000653,
      "learning_rate": 7.329164812724405e-06,
      "loss": 0.1804,
      "step": 12525
    },
    {
      "epoch": 0.36542388704125095,
      "grad_norm": 0.5569456887478516,
      "learning_rate": 7.32874675825212e-06,
      "loss": 0.1408,
      "step": 12526
    },
    {
      "epoch": 0.3654530602718945,
      "grad_norm": 1.125308199847849,
      "learning_rate": 7.328328682989338e-06,
      "loss": 0.1547,
      "step": 12527
    },
    {
      "epoch": 0.36548223350253806,
      "grad_norm": 1.0911837240978715,
      "learning_rate": 7.327910586939794e-06,
      "loss": 0.1535,
      "step": 12528
    },
    {
      "epoch": 0.3655114067331816,
      "grad_norm": 0.6655995335581377,
      "learning_rate": 7.327492470107218e-06,
      "loss": 0.1287,
      "step": 12529
    },
    {
      "epoch": 0.3655405799638252,
      "grad_norm": 0.8633033130423697,
      "learning_rate": 7.327074332495348e-06,
      "loss": 0.138,
      "step": 12530
    },
    {
      "epoch": 0.36556975319446877,
      "grad_norm": 1.029517089838608,
      "learning_rate": 7.326656174107911e-06,
      "loss": 0.1588,
      "step": 12531
    },
    {
      "epoch": 0.3655989264251123,
      "grad_norm": 0.8222599758116758,
      "learning_rate": 7.326237994948644e-06,
      "loss": 0.1316,
      "step": 12532
    },
    {
      "epoch": 0.3656280996557559,
      "grad_norm": 0.815122704375594,
      "learning_rate": 7.325819795021281e-06,
      "loss": 0.1559,
      "step": 12533
    },
    {
      "epoch": 0.36565727288639943,
      "grad_norm": 0.9597640113146232,
      "learning_rate": 7.325401574329551e-06,
      "loss": 0.1494,
      "step": 12534
    },
    {
      "epoch": 0.365686446117043,
      "grad_norm": 0.8239063456490182,
      "learning_rate": 7.3249833328771935e-06,
      "loss": 0.151,
      "step": 12535
    },
    {
      "epoch": 0.36571561934768654,
      "grad_norm": 0.9384049183735227,
      "learning_rate": 7.3245650706679395e-06,
      "loss": 0.1573,
      "step": 12536
    },
    {
      "epoch": 0.36574479257833015,
      "grad_norm": 0.836803788745504,
      "learning_rate": 7.324146787705522e-06,
      "loss": 0.1367,
      "step": 12537
    },
    {
      "epoch": 0.3657739658089737,
      "grad_norm": 0.9357429364670283,
      "learning_rate": 7.323728483993678e-06,
      "loss": 0.1714,
      "step": 12538
    },
    {
      "epoch": 0.36580313903961725,
      "grad_norm": 0.7367031054890589,
      "learning_rate": 7.323310159536141e-06,
      "loss": 0.1551,
      "step": 12539
    },
    {
      "epoch": 0.3658323122702608,
      "grad_norm": 0.8075364455756667,
      "learning_rate": 7.322891814336645e-06,
      "loss": 0.1333,
      "step": 12540
    },
    {
      "epoch": 0.36586148550090436,
      "grad_norm": 0.833073740749177,
      "learning_rate": 7.3224734483989254e-06,
      "loss": 0.1683,
      "step": 12541
    },
    {
      "epoch": 0.3658906587315479,
      "grad_norm": 0.871719146026141,
      "learning_rate": 7.322055061726717e-06,
      "loss": 0.1647,
      "step": 12542
    },
    {
      "epoch": 0.36591983196219147,
      "grad_norm": 0.9603750499039191,
      "learning_rate": 7.321636654323756e-06,
      "loss": 0.1398,
      "step": 12543
    },
    {
      "epoch": 0.3659490051928351,
      "grad_norm": 0.8743735121129615,
      "learning_rate": 7.321218226193777e-06,
      "loss": 0.1707,
      "step": 12544
    },
    {
      "epoch": 0.36597817842347863,
      "grad_norm": 0.9361800895713882,
      "learning_rate": 7.320799777340516e-06,
      "loss": 0.1939,
      "step": 12545
    },
    {
      "epoch": 0.3660073516541222,
      "grad_norm": 1.4397172012087283,
      "learning_rate": 7.320381307767708e-06,
      "loss": 0.1726,
      "step": 12546
    },
    {
      "epoch": 0.36603652488476573,
      "grad_norm": 0.7590116649866849,
      "learning_rate": 7.319962817479089e-06,
      "loss": 0.134,
      "step": 12547
    },
    {
      "epoch": 0.3660656981154093,
      "grad_norm": 0.9971170486586559,
      "learning_rate": 7.319544306478398e-06,
      "loss": 0.139,
      "step": 12548
    },
    {
      "epoch": 0.36609487134605284,
      "grad_norm": 1.0121016816886785,
      "learning_rate": 7.3191257747693664e-06,
      "loss": 0.1427,
      "step": 12549
    },
    {
      "epoch": 0.36612404457669645,
      "grad_norm": 1.0037128635150039,
      "learning_rate": 7.318707222355735e-06,
      "loss": 0.1373,
      "step": 12550
    },
    {
      "epoch": 0.36615321780734,
      "grad_norm": 1.1971441068620639,
      "learning_rate": 7.318288649241241e-06,
      "loss": 0.1292,
      "step": 12551
    },
    {
      "epoch": 0.36618239103798356,
      "grad_norm": 0.8520980034299164,
      "learning_rate": 7.317870055429615e-06,
      "loss": 0.1645,
      "step": 12552
    },
    {
      "epoch": 0.3662115642686271,
      "grad_norm": 0.8578629480944141,
      "learning_rate": 7.317451440924602e-06,
      "loss": 0.1567,
      "step": 12553
    },
    {
      "epoch": 0.36624073749927066,
      "grad_norm": 0.8119415694644823,
      "learning_rate": 7.317032805729935e-06,
      "loss": 0.1311,
      "step": 12554
    },
    {
      "epoch": 0.3662699107299142,
      "grad_norm": 1.067426569165156,
      "learning_rate": 7.31661414984935e-06,
      "loss": 0.1482,
      "step": 12555
    },
    {
      "epoch": 0.36629908396055777,
      "grad_norm": 0.7879180641463571,
      "learning_rate": 7.3161954732865906e-06,
      "loss": 0.1483,
      "step": 12556
    },
    {
      "epoch": 0.3663282571912014,
      "grad_norm": 0.8850431311253087,
      "learning_rate": 7.315776776045388e-06,
      "loss": 0.1277,
      "step": 12557
    },
    {
      "epoch": 0.36635743042184493,
      "grad_norm": 0.7589116034713146,
      "learning_rate": 7.315358058129485e-06,
      "loss": 0.1464,
      "step": 12558
    },
    {
      "epoch": 0.3663866036524885,
      "grad_norm": 0.8775888047374377,
      "learning_rate": 7.314939319542617e-06,
      "loss": 0.1578,
      "step": 12559
    },
    {
      "epoch": 0.36641577688313204,
      "grad_norm": 0.9138593029972256,
      "learning_rate": 7.314520560288522e-06,
      "loss": 0.1452,
      "step": 12560
    },
    {
      "epoch": 0.3664449501137756,
      "grad_norm": 0.9309638777077405,
      "learning_rate": 7.314101780370942e-06,
      "loss": 0.1611,
      "step": 12561
    },
    {
      "epoch": 0.36647412334441914,
      "grad_norm": 0.8200125293243199,
      "learning_rate": 7.313682979793614e-06,
      "loss": 0.1687,
      "step": 12562
    },
    {
      "epoch": 0.3665032965750627,
      "grad_norm": 0.7819398768318264,
      "learning_rate": 7.313264158560276e-06,
      "loss": 0.1602,
      "step": 12563
    },
    {
      "epoch": 0.3665324698057063,
      "grad_norm": 0.960687783706917,
      "learning_rate": 7.312845316674667e-06,
      "loss": 0.1535,
      "step": 12564
    },
    {
      "epoch": 0.36656164303634986,
      "grad_norm": 0.806436860397053,
      "learning_rate": 7.312426454140528e-06,
      "loss": 0.1455,
      "step": 12565
    },
    {
      "epoch": 0.3665908162669934,
      "grad_norm": 0.8037049708673916,
      "learning_rate": 7.312007570961598e-06,
      "loss": 0.1467,
      "step": 12566
    },
    {
      "epoch": 0.36661998949763697,
      "grad_norm": 0.8475448014865734,
      "learning_rate": 7.311588667141615e-06,
      "loss": 0.1775,
      "step": 12567
    },
    {
      "epoch": 0.3666491627282805,
      "grad_norm": 0.8330063194912772,
      "learning_rate": 7.311169742684321e-06,
      "loss": 0.1507,
      "step": 12568
    },
    {
      "epoch": 0.3666783359589241,
      "grad_norm": 0.9139978268192165,
      "learning_rate": 7.3107507975934555e-06,
      "loss": 0.1654,
      "step": 12569
    },
    {
      "epoch": 0.3667075091895676,
      "grad_norm": 0.7029375873164558,
      "learning_rate": 7.3103318318727566e-06,
      "loss": 0.123,
      "step": 12570
    },
    {
      "epoch": 0.36673668242021124,
      "grad_norm": 0.7456527961582567,
      "learning_rate": 7.30991284552597e-06,
      "loss": 0.1627,
      "step": 12571
    },
    {
      "epoch": 0.3667658556508548,
      "grad_norm": 0.8667322478230863,
      "learning_rate": 7.309493838556832e-06,
      "loss": 0.1414,
      "step": 12572
    },
    {
      "epoch": 0.36679502888149834,
      "grad_norm": 0.9750652424929281,
      "learning_rate": 7.309074810969083e-06,
      "loss": 0.1484,
      "step": 12573
    },
    {
      "epoch": 0.3668242021121419,
      "grad_norm": 0.751521085709621,
      "learning_rate": 7.308655762766466e-06,
      "loss": 0.1632,
      "step": 12574
    },
    {
      "epoch": 0.36685337534278545,
      "grad_norm": 0.8418233374470161,
      "learning_rate": 7.30823669395272e-06,
      "loss": 0.114,
      "step": 12575
    },
    {
      "epoch": 0.366882548573429,
      "grad_norm": 0.8811982961030276,
      "learning_rate": 7.30781760453159e-06,
      "loss": 0.1416,
      "step": 12576
    },
    {
      "epoch": 0.3669117218040726,
      "grad_norm": 1.04891068844708,
      "learning_rate": 7.307398494506814e-06,
      "loss": 0.1682,
      "step": 12577
    },
    {
      "epoch": 0.36694089503471616,
      "grad_norm": 1.0826757853124749,
      "learning_rate": 7.306979363882136e-06,
      "loss": 0.1396,
      "step": 12578
    },
    {
      "epoch": 0.3669700682653597,
      "grad_norm": 0.7940050144481171,
      "learning_rate": 7.306560212661295e-06,
      "loss": 0.1379,
      "step": 12579
    },
    {
      "epoch": 0.36699924149600327,
      "grad_norm": 0.6952751578376332,
      "learning_rate": 7.306141040848037e-06,
      "loss": 0.1508,
      "step": 12580
    },
    {
      "epoch": 0.3670284147266468,
      "grad_norm": 0.9643883866133751,
      "learning_rate": 7.305721848446103e-06,
      "loss": 0.1702,
      "step": 12581
    },
    {
      "epoch": 0.3670575879572904,
      "grad_norm": 0.7867314299628712,
      "learning_rate": 7.305302635459233e-06,
      "loss": 0.1437,
      "step": 12582
    },
    {
      "epoch": 0.36708676118793393,
      "grad_norm": 1.0084647801481508,
      "learning_rate": 7.304883401891173e-06,
      "loss": 0.1812,
      "step": 12583
    },
    {
      "epoch": 0.36711593441857754,
      "grad_norm": 0.8342827322264076,
      "learning_rate": 7.304464147745662e-06,
      "loss": 0.1561,
      "step": 12584
    },
    {
      "epoch": 0.3671451076492211,
      "grad_norm": 0.8581378104788102,
      "learning_rate": 7.3040448730264455e-06,
      "loss": 0.1453,
      "step": 12585
    },
    {
      "epoch": 0.36717428087986465,
      "grad_norm": 0.9456648386883989,
      "learning_rate": 7.303625577737269e-06,
      "loss": 0.1307,
      "step": 12586
    },
    {
      "epoch": 0.3672034541105082,
      "grad_norm": 0.84759138025627,
      "learning_rate": 7.303206261881871e-06,
      "loss": 0.1276,
      "step": 12587
    },
    {
      "epoch": 0.36723262734115175,
      "grad_norm": 0.6764654554263654,
      "learning_rate": 7.302786925463998e-06,
      "loss": 0.1237,
      "step": 12588
    },
    {
      "epoch": 0.3672618005717953,
      "grad_norm": 0.8086057539763226,
      "learning_rate": 7.302367568487393e-06,
      "loss": 0.1582,
      "step": 12589
    },
    {
      "epoch": 0.36729097380243886,
      "grad_norm": 1.0471987049130558,
      "learning_rate": 7.3019481909558e-06,
      "loss": 0.1749,
      "step": 12590
    },
    {
      "epoch": 0.36732014703308247,
      "grad_norm": 0.7825322907667858,
      "learning_rate": 7.301528792872963e-06,
      "loss": 0.1754,
      "step": 12591
    },
    {
      "epoch": 0.367349320263726,
      "grad_norm": 0.808594072427891,
      "learning_rate": 7.301109374242626e-06,
      "loss": 0.1592,
      "step": 12592
    },
    {
      "epoch": 0.3673784934943696,
      "grad_norm": 0.7378181528008292,
      "learning_rate": 7.300689935068534e-06,
      "loss": 0.1381,
      "step": 12593
    },
    {
      "epoch": 0.3674076667250131,
      "grad_norm": 1.1631232788413999,
      "learning_rate": 7.3002704753544316e-06,
      "loss": 0.1713,
      "step": 12594
    },
    {
      "epoch": 0.3674368399556567,
      "grad_norm": 0.7272131576528931,
      "learning_rate": 7.299850995104063e-06,
      "loss": 0.1346,
      "step": 12595
    },
    {
      "epoch": 0.36746601318630023,
      "grad_norm": 0.7224715044228271,
      "learning_rate": 7.2994314943211755e-06,
      "loss": 0.1414,
      "step": 12596
    },
    {
      "epoch": 0.3674951864169438,
      "grad_norm": 0.84799081098376,
      "learning_rate": 7.299011973009511e-06,
      "loss": 0.145,
      "step": 12597
    },
    {
      "epoch": 0.3675243596475874,
      "grad_norm": 1.0170357671998496,
      "learning_rate": 7.298592431172818e-06,
      "loss": 0.1574,
      "step": 12598
    },
    {
      "epoch": 0.36755353287823095,
      "grad_norm": 0.8404918700923496,
      "learning_rate": 7.2981728688148365e-06,
      "loss": 0.1352,
      "step": 12599
    },
    {
      "epoch": 0.3675827061088745,
      "grad_norm": 0.8144727261788373,
      "learning_rate": 7.297753285939319e-06,
      "loss": 0.1271,
      "step": 12600
    },
    {
      "epoch": 0.36761187933951806,
      "grad_norm": 1.0073862448209447,
      "learning_rate": 7.297333682550009e-06,
      "loss": 0.1326,
      "step": 12601
    },
    {
      "epoch": 0.3676410525701616,
      "grad_norm": 1.1080962498161437,
      "learning_rate": 7.296914058650653e-06,
      "loss": 0.1415,
      "step": 12602
    },
    {
      "epoch": 0.36767022580080516,
      "grad_norm": 0.7716627843074678,
      "learning_rate": 7.296494414244996e-06,
      "loss": 0.1668,
      "step": 12603
    },
    {
      "epoch": 0.36769939903144877,
      "grad_norm": 0.9608901366792665,
      "learning_rate": 7.296074749336785e-06,
      "loss": 0.1642,
      "step": 12604
    },
    {
      "epoch": 0.3677285722620923,
      "grad_norm": 0.7199984408702625,
      "learning_rate": 7.295655063929765e-06,
      "loss": 0.1473,
      "step": 12605
    },
    {
      "epoch": 0.3677577454927359,
      "grad_norm": 0.8288821214606242,
      "learning_rate": 7.295235358027686e-06,
      "loss": 0.1569,
      "step": 12606
    },
    {
      "epoch": 0.36778691872337943,
      "grad_norm": 0.8112232057577572,
      "learning_rate": 7.294815631634294e-06,
      "loss": 0.1401,
      "step": 12607
    },
    {
      "epoch": 0.367816091954023,
      "grad_norm": 0.654042417879684,
      "learning_rate": 7.294395884753336e-06,
      "loss": 0.1356,
      "step": 12608
    },
    {
      "epoch": 0.36784526518466654,
      "grad_norm": 0.6115312187425573,
      "learning_rate": 7.293976117388558e-06,
      "loss": 0.1169,
      "step": 12609
    },
    {
      "epoch": 0.3678744384153101,
      "grad_norm": 0.8874601262272724,
      "learning_rate": 7.29355632954371e-06,
      "loss": 0.1531,
      "step": 12610
    },
    {
      "epoch": 0.3679036116459537,
      "grad_norm": 0.7446788851902356,
      "learning_rate": 7.293136521222538e-06,
      "loss": 0.1137,
      "step": 12611
    },
    {
      "epoch": 0.36793278487659725,
      "grad_norm": 0.7548866430434173,
      "learning_rate": 7.292716692428791e-06,
      "loss": 0.1368,
      "step": 12612
    },
    {
      "epoch": 0.3679619581072408,
      "grad_norm": 0.7365736692314704,
      "learning_rate": 7.292296843166217e-06,
      "loss": 0.1469,
      "step": 12613
    },
    {
      "epoch": 0.36799113133788436,
      "grad_norm": 0.8264816518176756,
      "learning_rate": 7.291876973438562e-06,
      "loss": 0.1535,
      "step": 12614
    },
    {
      "epoch": 0.3680203045685279,
      "grad_norm": 0.7813558982894001,
      "learning_rate": 7.291457083249578e-06,
      "loss": 0.1689,
      "step": 12615
    },
    {
      "epoch": 0.36804947779917146,
      "grad_norm": 0.8491973925937982,
      "learning_rate": 7.291037172603013e-06,
      "loss": 0.1541,
      "step": 12616
    },
    {
      "epoch": 0.368078651029815,
      "grad_norm": 0.7364897979429192,
      "learning_rate": 7.2906172415026136e-06,
      "loss": 0.1657,
      "step": 12617
    },
    {
      "epoch": 0.3681078242604586,
      "grad_norm": 0.7998676568471853,
      "learning_rate": 7.290197289952131e-06,
      "loss": 0.1297,
      "step": 12618
    },
    {
      "epoch": 0.3681369974911022,
      "grad_norm": 0.828428368684619,
      "learning_rate": 7.289777317955313e-06,
      "loss": 0.1625,
      "step": 12619
    },
    {
      "epoch": 0.36816617072174573,
      "grad_norm": 0.855784368364791,
      "learning_rate": 7.289357325515911e-06,
      "loss": 0.1485,
      "step": 12620
    },
    {
      "epoch": 0.3681953439523893,
      "grad_norm": 0.8547418600021226,
      "learning_rate": 7.288937312637673e-06,
      "loss": 0.1478,
      "step": 12621
    },
    {
      "epoch": 0.36822451718303284,
      "grad_norm": 0.964073973781457,
      "learning_rate": 7.288517279324349e-06,
      "loss": 0.1301,
      "step": 12622
    },
    {
      "epoch": 0.3682536904136764,
      "grad_norm": 0.8274583125272957,
      "learning_rate": 7.2880972255796875e-06,
      "loss": 0.1701,
      "step": 12623
    },
    {
      "epoch": 0.36828286364431995,
      "grad_norm": 0.982394475670459,
      "learning_rate": 7.287677151407442e-06,
      "loss": 0.1423,
      "step": 12624
    },
    {
      "epoch": 0.36831203687496356,
      "grad_norm": 0.787345466973106,
      "learning_rate": 7.28725705681136e-06,
      "loss": 0.1225,
      "step": 12625
    },
    {
      "epoch": 0.3683412101056071,
      "grad_norm": 0.9727213667441302,
      "learning_rate": 7.286836941795193e-06,
      "loss": 0.1431,
      "step": 12626
    },
    {
      "epoch": 0.36837038333625066,
      "grad_norm": 0.9185978711724994,
      "learning_rate": 7.286416806362693e-06,
      "loss": 0.1422,
      "step": 12627
    },
    {
      "epoch": 0.3683995565668942,
      "grad_norm": 0.8118258793856865,
      "learning_rate": 7.285996650517608e-06,
      "loss": 0.1366,
      "step": 12628
    },
    {
      "epoch": 0.36842872979753777,
      "grad_norm": 1.1994577940098725,
      "learning_rate": 7.285576474263692e-06,
      "loss": 0.1542,
      "step": 12629
    },
    {
      "epoch": 0.3684579030281813,
      "grad_norm": 1.0416878269668777,
      "learning_rate": 7.285156277604693e-06,
      "loss": 0.1546,
      "step": 12630
    },
    {
      "epoch": 0.3684870762588249,
      "grad_norm": 1.0730921371275317,
      "learning_rate": 7.284736060544366e-06,
      "loss": 0.1426,
      "step": 12631
    },
    {
      "epoch": 0.3685162494894685,
      "grad_norm": 0.8199071874414507,
      "learning_rate": 7.284315823086459e-06,
      "loss": 0.1414,
      "step": 12632
    },
    {
      "epoch": 0.36854542272011204,
      "grad_norm": 0.9402127848174042,
      "learning_rate": 7.283895565234729e-06,
      "loss": 0.133,
      "step": 12633
    },
    {
      "epoch": 0.3685745959507556,
      "grad_norm": 1.0920864126210375,
      "learning_rate": 7.283475286992923e-06,
      "loss": 0.1707,
      "step": 12634
    },
    {
      "epoch": 0.36860376918139914,
      "grad_norm": 0.8404151566503486,
      "learning_rate": 7.283054988364793e-06,
      "loss": 0.1367,
      "step": 12635
    },
    {
      "epoch": 0.3686329424120427,
      "grad_norm": 0.7585589900651192,
      "learning_rate": 7.282634669354094e-06,
      "loss": 0.134,
      "step": 12636
    },
    {
      "epoch": 0.36866211564268625,
      "grad_norm": 0.9278302596057294,
      "learning_rate": 7.282214329964578e-06,
      "loss": 0.1346,
      "step": 12637
    },
    {
      "epoch": 0.36869128887332986,
      "grad_norm": 0.7470972843530774,
      "learning_rate": 7.2817939701999974e-06,
      "loss": 0.1634,
      "step": 12638
    },
    {
      "epoch": 0.3687204621039734,
      "grad_norm": 0.8306930217254754,
      "learning_rate": 7.281373590064105e-06,
      "loss": 0.1637,
      "step": 12639
    },
    {
      "epoch": 0.36874963533461697,
      "grad_norm": 0.9285373974943016,
      "learning_rate": 7.280953189560653e-06,
      "loss": 0.1651,
      "step": 12640
    },
    {
      "epoch": 0.3687788085652605,
      "grad_norm": 0.9310102750005027,
      "learning_rate": 7.280532768693396e-06,
      "loss": 0.1332,
      "step": 12641
    },
    {
      "epoch": 0.36880798179590407,
      "grad_norm": 0.786755285060495,
      "learning_rate": 7.280112327466087e-06,
      "loss": 0.1651,
      "step": 12642
    },
    {
      "epoch": 0.3688371550265476,
      "grad_norm": 0.9027386530738184,
      "learning_rate": 7.27969186588248e-06,
      "loss": 0.1301,
      "step": 12643
    },
    {
      "epoch": 0.3688663282571912,
      "grad_norm": 0.8144305451701089,
      "learning_rate": 7.2792713839463255e-06,
      "loss": 0.1395,
      "step": 12644
    },
    {
      "epoch": 0.3688955014878348,
      "grad_norm": 1.0296444669740603,
      "learning_rate": 7.2788508816613836e-06,
      "loss": 0.1436,
      "step": 12645
    },
    {
      "epoch": 0.36892467471847834,
      "grad_norm": 0.9730056098013248,
      "learning_rate": 7.278430359031403e-06,
      "loss": 0.1472,
      "step": 12646
    },
    {
      "epoch": 0.3689538479491219,
      "grad_norm": 0.7994043618478935,
      "learning_rate": 7.278009816060141e-06,
      "loss": 0.138,
      "step": 12647
    },
    {
      "epoch": 0.36898302117976545,
      "grad_norm": 0.9740903092945568,
      "learning_rate": 7.277589252751351e-06,
      "loss": 0.1426,
      "step": 12648
    },
    {
      "epoch": 0.369012194410409,
      "grad_norm": 0.9514339440379055,
      "learning_rate": 7.277168669108787e-06,
      "loss": 0.1581,
      "step": 12649
    },
    {
      "epoch": 0.36904136764105255,
      "grad_norm": 0.8376829055759069,
      "learning_rate": 7.276748065136206e-06,
      "loss": 0.1339,
      "step": 12650
    },
    {
      "epoch": 0.3690705408716961,
      "grad_norm": 0.6932765483000005,
      "learning_rate": 7.27632744083736e-06,
      "loss": 0.1326,
      "step": 12651
    },
    {
      "epoch": 0.3690997141023397,
      "grad_norm": 1.0511895405972749,
      "learning_rate": 7.2759067962160075e-06,
      "loss": 0.1264,
      "step": 12652
    },
    {
      "epoch": 0.36912888733298327,
      "grad_norm": 1.0100299524399472,
      "learning_rate": 7.275486131275903e-06,
      "loss": 0.1458,
      "step": 12653
    },
    {
      "epoch": 0.3691580605636268,
      "grad_norm": 0.9109570724650197,
      "learning_rate": 7.2750654460208e-06,
      "loss": 0.1526,
      "step": 12654
    },
    {
      "epoch": 0.3691872337942704,
      "grad_norm": 0.8927953356704228,
      "learning_rate": 7.274644740454458e-06,
      "loss": 0.1385,
      "step": 12655
    },
    {
      "epoch": 0.36921640702491393,
      "grad_norm": 0.8107226697996354,
      "learning_rate": 7.274224014580627e-06,
      "loss": 0.15,
      "step": 12656
    },
    {
      "epoch": 0.3692455802555575,
      "grad_norm": 0.7177052389467927,
      "learning_rate": 7.27380326840307e-06,
      "loss": 0.1292,
      "step": 12657
    },
    {
      "epoch": 0.36927475348620104,
      "grad_norm": 0.8994355904918047,
      "learning_rate": 7.27338250192554e-06,
      "loss": 0.1596,
      "step": 12658
    },
    {
      "epoch": 0.36930392671684464,
      "grad_norm": 0.7671412830821046,
      "learning_rate": 7.2729617151517915e-06,
      "loss": 0.1586,
      "step": 12659
    },
    {
      "epoch": 0.3693330999474882,
      "grad_norm": 0.7200823778342188,
      "learning_rate": 7.272540908085586e-06,
      "loss": 0.1628,
      "step": 12660
    },
    {
      "epoch": 0.36936227317813175,
      "grad_norm": 0.932197094073567,
      "learning_rate": 7.272120080730677e-06,
      "loss": 0.1355,
      "step": 12661
    },
    {
      "epoch": 0.3693914464087753,
      "grad_norm": 0.7493755613652879,
      "learning_rate": 7.271699233090821e-06,
      "loss": 0.1338,
      "step": 12662
    },
    {
      "epoch": 0.36942061963941886,
      "grad_norm": 0.8691183903945077,
      "learning_rate": 7.271278365169778e-06,
      "loss": 0.1663,
      "step": 12663
    },
    {
      "epoch": 0.3694497928700624,
      "grad_norm": 0.7487734779311649,
      "learning_rate": 7.270857476971303e-06,
      "loss": 0.1309,
      "step": 12664
    },
    {
      "epoch": 0.369478966100706,
      "grad_norm": 0.6663862528649757,
      "learning_rate": 7.270436568499156e-06,
      "loss": 0.1271,
      "step": 12665
    },
    {
      "epoch": 0.36950813933134957,
      "grad_norm": 0.8112441244218074,
      "learning_rate": 7.270015639757092e-06,
      "loss": 0.1346,
      "step": 12666
    },
    {
      "epoch": 0.3695373125619931,
      "grad_norm": 3.906884028660531,
      "learning_rate": 7.269594690748871e-06,
      "loss": 0.1395,
      "step": 12667
    },
    {
      "epoch": 0.3695664857926367,
      "grad_norm": 0.9087683544262717,
      "learning_rate": 7.26917372147825e-06,
      "loss": 0.1533,
      "step": 12668
    },
    {
      "epoch": 0.36959565902328023,
      "grad_norm": 0.7699315262554577,
      "learning_rate": 7.268752731948987e-06,
      "loss": 0.1395,
      "step": 12669
    },
    {
      "epoch": 0.3696248322539238,
      "grad_norm": 0.8308879806137981,
      "learning_rate": 7.268331722164843e-06,
      "loss": 0.1527,
      "step": 12670
    },
    {
      "epoch": 0.36965400548456734,
      "grad_norm": 0.7691284472237025,
      "learning_rate": 7.267910692129574e-06,
      "loss": 0.1404,
      "step": 12671
    },
    {
      "epoch": 0.36968317871521095,
      "grad_norm": 0.7064038777247387,
      "learning_rate": 7.267489641846938e-06,
      "loss": 0.1552,
      "step": 12672
    },
    {
      "epoch": 0.3697123519458545,
      "grad_norm": 0.8410894472372811,
      "learning_rate": 7.267068571320699e-06,
      "loss": 0.1534,
      "step": 12673
    },
    {
      "epoch": 0.36974152517649805,
      "grad_norm": 0.954164419387724,
      "learning_rate": 7.26664748055461e-06,
      "loss": 0.1376,
      "step": 12674
    },
    {
      "epoch": 0.3697706984071416,
      "grad_norm": 1.1417621661994168,
      "learning_rate": 7.266226369552436e-06,
      "loss": 0.154,
      "step": 12675
    },
    {
      "epoch": 0.36979987163778516,
      "grad_norm": 0.6217955345989012,
      "learning_rate": 7.265805238317933e-06,
      "loss": 0.1463,
      "step": 12676
    },
    {
      "epoch": 0.3698290448684287,
      "grad_norm": 0.8034245069093413,
      "learning_rate": 7.2653840868548595e-06,
      "loss": 0.1723,
      "step": 12677
    },
    {
      "epoch": 0.36985821809907227,
      "grad_norm": 0.8025368610914108,
      "learning_rate": 7.264962915166981e-06,
      "loss": 0.1395,
      "step": 12678
    },
    {
      "epoch": 0.3698873913297159,
      "grad_norm": 0.8857000147703661,
      "learning_rate": 7.264541723258053e-06,
      "loss": 0.1656,
      "step": 12679
    },
    {
      "epoch": 0.36991656456035943,
      "grad_norm": 0.7973792407931395,
      "learning_rate": 7.264120511131837e-06,
      "loss": 0.1508,
      "step": 12680
    },
    {
      "epoch": 0.369945737791003,
      "grad_norm": 0.8282811444616076,
      "learning_rate": 7.263699278792093e-06,
      "loss": 0.148,
      "step": 12681
    },
    {
      "epoch": 0.36997491102164654,
      "grad_norm": 0.9754627730566209,
      "learning_rate": 7.263278026242583e-06,
      "loss": 0.1383,
      "step": 12682
    },
    {
      "epoch": 0.3700040842522901,
      "grad_norm": 0.8821581951445242,
      "learning_rate": 7.2628567534870665e-06,
      "loss": 0.1785,
      "step": 12683
    },
    {
      "epoch": 0.37003325748293364,
      "grad_norm": 0.8014611317129382,
      "learning_rate": 7.2624354605293045e-06,
      "loss": 0.1419,
      "step": 12684
    },
    {
      "epoch": 0.3700624307135772,
      "grad_norm": 0.928112475567267,
      "learning_rate": 7.26201414737306e-06,
      "loss": 0.174,
      "step": 12685
    },
    {
      "epoch": 0.3700916039442208,
      "grad_norm": 0.9527295221348488,
      "learning_rate": 7.261592814022094e-06,
      "loss": 0.1672,
      "step": 12686
    },
    {
      "epoch": 0.37012077717486436,
      "grad_norm": 0.8100111062669616,
      "learning_rate": 7.2611714604801655e-06,
      "loss": 0.1641,
      "step": 12687
    },
    {
      "epoch": 0.3701499504055079,
      "grad_norm": 1.088862419658833,
      "learning_rate": 7.260750086751039e-06,
      "loss": 0.1624,
      "step": 12688
    },
    {
      "epoch": 0.37017912363615146,
      "grad_norm": 0.8741832877252597,
      "learning_rate": 7.260328692838475e-06,
      "loss": 0.1674,
      "step": 12689
    },
    {
      "epoch": 0.370208296866795,
      "grad_norm": 0.8113410708118879,
      "learning_rate": 7.259907278746237e-06,
      "loss": 0.163,
      "step": 12690
    },
    {
      "epoch": 0.37023747009743857,
      "grad_norm": 0.6559126878198583,
      "learning_rate": 7.2594858444780845e-06,
      "loss": 0.1351,
      "step": 12691
    },
    {
      "epoch": 0.3702666433280822,
      "grad_norm": 1.057230150043581,
      "learning_rate": 7.259064390037781e-06,
      "loss": 0.1363,
      "step": 12692
    },
    {
      "epoch": 0.37029581655872573,
      "grad_norm": 0.8382173020004834,
      "learning_rate": 7.258642915429093e-06,
      "loss": 0.1516,
      "step": 12693
    },
    {
      "epoch": 0.3703249897893693,
      "grad_norm": 0.8541340696797689,
      "learning_rate": 7.258221420655778e-06,
      "loss": 0.1599,
      "step": 12694
    },
    {
      "epoch": 0.37035416302001284,
      "grad_norm": 0.9839161636709558,
      "learning_rate": 7.257799905721602e-06,
      "loss": 0.1588,
      "step": 12695
    },
    {
      "epoch": 0.3703833362506564,
      "grad_norm": 0.8469433734573926,
      "learning_rate": 7.257378370630328e-06,
      "loss": 0.143,
      "step": 12696
    },
    {
      "epoch": 0.37041250948129995,
      "grad_norm": 0.813809899925821,
      "learning_rate": 7.256956815385718e-06,
      "loss": 0.155,
      "step": 12697
    },
    {
      "epoch": 0.3704416827119435,
      "grad_norm": 0.9687633725390876,
      "learning_rate": 7.2565352399915354e-06,
      "loss": 0.1422,
      "step": 12698
    },
    {
      "epoch": 0.3704708559425871,
      "grad_norm": 0.8074508913582549,
      "learning_rate": 7.256113644451547e-06,
      "loss": 0.1287,
      "step": 12699
    },
    {
      "epoch": 0.37050002917323066,
      "grad_norm": 0.9016226971122039,
      "learning_rate": 7.2556920287695135e-06,
      "loss": 0.1462,
      "step": 12700
    },
    {
      "epoch": 0.3705292024038742,
      "grad_norm": 0.9994119263648709,
      "learning_rate": 7.2552703929491995e-06,
      "loss": 0.1451,
      "step": 12701
    },
    {
      "epoch": 0.37055837563451777,
      "grad_norm": 0.8768566263661491,
      "learning_rate": 7.254848736994371e-06,
      "loss": 0.1363,
      "step": 12702
    },
    {
      "epoch": 0.3705875488651613,
      "grad_norm": 0.7873476512517228,
      "learning_rate": 7.254427060908791e-06,
      "loss": 0.1594,
      "step": 12703
    },
    {
      "epoch": 0.3706167220958049,
      "grad_norm": 1.0958104625118537,
      "learning_rate": 7.254005364696223e-06,
      "loss": 0.1426,
      "step": 12704
    },
    {
      "epoch": 0.3706458953264484,
      "grad_norm": 0.6734979153631149,
      "learning_rate": 7.253583648360435e-06,
      "loss": 0.1423,
      "step": 12705
    },
    {
      "epoch": 0.37067506855709204,
      "grad_norm": 0.8778050796298381,
      "learning_rate": 7.253161911905188e-06,
      "loss": 0.1423,
      "step": 12706
    },
    {
      "epoch": 0.3707042417877356,
      "grad_norm": 1.2332592993197071,
      "learning_rate": 7.25274015533425e-06,
      "loss": 0.1225,
      "step": 12707
    },
    {
      "epoch": 0.37073341501837914,
      "grad_norm": 0.7214949137311216,
      "learning_rate": 7.252318378651388e-06,
      "loss": 0.1251,
      "step": 12708
    },
    {
      "epoch": 0.3707625882490227,
      "grad_norm": 0.8838682258775027,
      "learning_rate": 7.251896581860364e-06,
      "loss": 0.1438,
      "step": 12709
    },
    {
      "epoch": 0.37079176147966625,
      "grad_norm": 0.8865027838061383,
      "learning_rate": 7.2514747649649445e-06,
      "loss": 0.1368,
      "step": 12710
    },
    {
      "epoch": 0.3708209347103098,
      "grad_norm": 0.6501735305599404,
      "learning_rate": 7.2510529279688955e-06,
      "loss": 0.1391,
      "step": 12711
    },
    {
      "epoch": 0.37085010794095336,
      "grad_norm": 0.7626504494552498,
      "learning_rate": 7.250631070875983e-06,
      "loss": 0.1523,
      "step": 12712
    },
    {
      "epoch": 0.37087928117159696,
      "grad_norm": 0.8341725753743278,
      "learning_rate": 7.250209193689975e-06,
      "loss": 0.1926,
      "step": 12713
    },
    {
      "epoch": 0.3709084544022405,
      "grad_norm": 0.8088679688713538,
      "learning_rate": 7.249787296414635e-06,
      "loss": 0.136,
      "step": 12714
    },
    {
      "epoch": 0.37093762763288407,
      "grad_norm": 0.6777045927186891,
      "learning_rate": 7.249365379053731e-06,
      "loss": 0.1432,
      "step": 12715
    },
    {
      "epoch": 0.3709668008635276,
      "grad_norm": 0.736509969660584,
      "learning_rate": 7.248943441611031e-06,
      "loss": 0.1321,
      "step": 12716
    },
    {
      "epoch": 0.3709959740941712,
      "grad_norm": 0.9220189050883129,
      "learning_rate": 7.248521484090299e-06,
      "loss": 0.1631,
      "step": 12717
    },
    {
      "epoch": 0.37102514732481473,
      "grad_norm": 0.6296469905412683,
      "learning_rate": 7.248099506495307e-06,
      "loss": 0.1554,
      "step": 12718
    },
    {
      "epoch": 0.37105432055545834,
      "grad_norm": 0.8702416146894282,
      "learning_rate": 7.247677508829816e-06,
      "loss": 0.1568,
      "step": 12719
    },
    {
      "epoch": 0.3710834937861019,
      "grad_norm": 0.7698777512516692,
      "learning_rate": 7.2472554910976e-06,
      "loss": 0.143,
      "step": 12720
    },
    {
      "epoch": 0.37111266701674545,
      "grad_norm": 0.6924246629798406,
      "learning_rate": 7.246833453302422e-06,
      "loss": 0.1263,
      "step": 12721
    },
    {
      "epoch": 0.371141840247389,
      "grad_norm": 0.7094621667323893,
      "learning_rate": 7.24641139544805e-06,
      "loss": 0.1511,
      "step": 12722
    },
    {
      "epoch": 0.37117101347803255,
      "grad_norm": 0.7122842266152872,
      "learning_rate": 7.2459893175382546e-06,
      "loss": 0.1508,
      "step": 12723
    },
    {
      "epoch": 0.3712001867086761,
      "grad_norm": 0.9425692565536175,
      "learning_rate": 7.245567219576803e-06,
      "loss": 0.1574,
      "step": 12724
    },
    {
      "epoch": 0.37122935993931966,
      "grad_norm": 0.8105275883169979,
      "learning_rate": 7.2451451015674624e-06,
      "loss": 0.1379,
      "step": 12725
    },
    {
      "epoch": 0.37125853316996327,
      "grad_norm": 0.7417886750217544,
      "learning_rate": 7.244722963514002e-06,
      "loss": 0.1395,
      "step": 12726
    },
    {
      "epoch": 0.3712877064006068,
      "grad_norm": 0.9187036576494949,
      "learning_rate": 7.244300805420192e-06,
      "loss": 0.1376,
      "step": 12727
    },
    {
      "epoch": 0.3713168796312504,
      "grad_norm": 0.672931933546487,
      "learning_rate": 7.2438786272897995e-06,
      "loss": 0.1265,
      "step": 12728
    },
    {
      "epoch": 0.3713460528618939,
      "grad_norm": 0.8197851589142929,
      "learning_rate": 7.243456429126594e-06,
      "loss": 0.1437,
      "step": 12729
    },
    {
      "epoch": 0.3713752260925375,
      "grad_norm": 0.694977998895702,
      "learning_rate": 7.243034210934345e-06,
      "loss": 0.1314,
      "step": 12730
    },
    {
      "epoch": 0.37140439932318103,
      "grad_norm": 0.7052585724435444,
      "learning_rate": 7.242611972716823e-06,
      "loss": 0.1516,
      "step": 12731
    },
    {
      "epoch": 0.3714335725538246,
      "grad_norm": 1.0067913676235247,
      "learning_rate": 7.2421897144777965e-06,
      "loss": 0.1368,
      "step": 12732
    },
    {
      "epoch": 0.3714627457844682,
      "grad_norm": 0.9442444614065237,
      "learning_rate": 7.2417674362210365e-06,
      "loss": 0.1538,
      "step": 12733
    },
    {
      "epoch": 0.37149191901511175,
      "grad_norm": 0.957447244826272,
      "learning_rate": 7.241345137950309e-06,
      "loss": 0.1667,
      "step": 12734
    },
    {
      "epoch": 0.3715210922457553,
      "grad_norm": 0.8537917080419062,
      "learning_rate": 7.24092281966939e-06,
      "loss": 0.1609,
      "step": 12735
    },
    {
      "epoch": 0.37155026547639886,
      "grad_norm": 0.7917483530332348,
      "learning_rate": 7.2405004813820465e-06,
      "loss": 0.1281,
      "step": 12736
    },
    {
      "epoch": 0.3715794387070424,
      "grad_norm": 1.5691676737995557,
      "learning_rate": 7.240078123092047e-06,
      "loss": 0.1313,
      "step": 12737
    },
    {
      "epoch": 0.37160861193768596,
      "grad_norm": 0.868321333198384,
      "learning_rate": 7.2396557448031675e-06,
      "loss": 0.1512,
      "step": 12738
    },
    {
      "epoch": 0.3716377851683295,
      "grad_norm": 0.7526311165689189,
      "learning_rate": 7.239233346519176e-06,
      "loss": 0.1534,
      "step": 12739
    },
    {
      "epoch": 0.3716669583989731,
      "grad_norm": 0.8363204816108493,
      "learning_rate": 7.238810928243842e-06,
      "loss": 0.1577,
      "step": 12740
    },
    {
      "epoch": 0.3716961316296167,
      "grad_norm": 0.9236118860998855,
      "learning_rate": 7.238388489980941e-06,
      "loss": 0.1489,
      "step": 12741
    },
    {
      "epoch": 0.37172530486026023,
      "grad_norm": 0.7284906094986009,
      "learning_rate": 7.2379660317342405e-06,
      "loss": 0.139,
      "step": 12742
    },
    {
      "epoch": 0.3717544780909038,
      "grad_norm": 1.139559360147518,
      "learning_rate": 7.237543553507513e-06,
      "loss": 0.1337,
      "step": 12743
    },
    {
      "epoch": 0.37178365132154734,
      "grad_norm": 0.9258717371918922,
      "learning_rate": 7.237121055304532e-06,
      "loss": 0.1567,
      "step": 12744
    },
    {
      "epoch": 0.3718128245521909,
      "grad_norm": 0.79707393282504,
      "learning_rate": 7.236698537129066e-06,
      "loss": 0.161,
      "step": 12745
    },
    {
      "epoch": 0.37184199778283444,
      "grad_norm": 0.9828875833309629,
      "learning_rate": 7.236275998984892e-06,
      "loss": 0.1242,
      "step": 12746
    },
    {
      "epoch": 0.37187117101347805,
      "grad_norm": 0.7557707280313231,
      "learning_rate": 7.235853440875777e-06,
      "loss": 0.1496,
      "step": 12747
    },
    {
      "epoch": 0.3719003442441216,
      "grad_norm": 0.9305273171442837,
      "learning_rate": 7.235430862805499e-06,
      "loss": 0.1467,
      "step": 12748
    },
    {
      "epoch": 0.37192951747476516,
      "grad_norm": 0.7835039130081966,
      "learning_rate": 7.235008264777827e-06,
      "loss": 0.1452,
      "step": 12749
    },
    {
      "epoch": 0.3719586907054087,
      "grad_norm": 0.7762569275972072,
      "learning_rate": 7.2345856467965345e-06,
      "loss": 0.1373,
      "step": 12750
    },
    {
      "epoch": 0.37198786393605227,
      "grad_norm": 0.9320611300067672,
      "learning_rate": 7.2341630088653955e-06,
      "loss": 0.1359,
      "step": 12751
    },
    {
      "epoch": 0.3720170371666958,
      "grad_norm": 0.9019528982672234,
      "learning_rate": 7.233740350988181e-06,
      "loss": 0.1687,
      "step": 12752
    },
    {
      "epoch": 0.3720462103973394,
      "grad_norm": 0.7524268631385961,
      "learning_rate": 7.233317673168667e-06,
      "loss": 0.1497,
      "step": 12753
    },
    {
      "epoch": 0.372075383627983,
      "grad_norm": 0.8015622169646974,
      "learning_rate": 7.232894975410626e-06,
      "loss": 0.1358,
      "step": 12754
    },
    {
      "epoch": 0.37210455685862653,
      "grad_norm": 0.7264333250152629,
      "learning_rate": 7.232472257717831e-06,
      "loss": 0.1414,
      "step": 12755
    },
    {
      "epoch": 0.3721337300892701,
      "grad_norm": 0.8230271531325329,
      "learning_rate": 7.232049520094057e-06,
      "loss": 0.1623,
      "step": 12756
    },
    {
      "epoch": 0.37216290331991364,
      "grad_norm": 0.8013650352287885,
      "learning_rate": 7.231626762543078e-06,
      "loss": 0.1227,
      "step": 12757
    },
    {
      "epoch": 0.3721920765505572,
      "grad_norm": 0.8606246876335979,
      "learning_rate": 7.231203985068666e-06,
      "loss": 0.1617,
      "step": 12758
    },
    {
      "epoch": 0.37222124978120075,
      "grad_norm": 0.7493734677857707,
      "learning_rate": 7.230781187674601e-06,
      "loss": 0.1349,
      "step": 12759
    },
    {
      "epoch": 0.37225042301184436,
      "grad_norm": 0.797890250530901,
      "learning_rate": 7.230358370364652e-06,
      "loss": 0.1333,
      "step": 12760
    },
    {
      "epoch": 0.3722795962424879,
      "grad_norm": 0.741412221042629,
      "learning_rate": 7.2299355331425955e-06,
      "loss": 0.1275,
      "step": 12761
    },
    {
      "epoch": 0.37230876947313146,
      "grad_norm": 0.7848481159593104,
      "learning_rate": 7.229512676012207e-06,
      "loss": 0.143,
      "step": 12762
    },
    {
      "epoch": 0.372337942703775,
      "grad_norm": 0.8226034318008352,
      "learning_rate": 7.229089798977264e-06,
      "loss": 0.128,
      "step": 12763
    },
    {
      "epoch": 0.37236711593441857,
      "grad_norm": 0.7372312788201161,
      "learning_rate": 7.2286669020415355e-06,
      "loss": 0.1571,
      "step": 12764
    },
    {
      "epoch": 0.3723962891650621,
      "grad_norm": 0.7415974436887275,
      "learning_rate": 7.228243985208804e-06,
      "loss": 0.1278,
      "step": 12765
    },
    {
      "epoch": 0.3724254623957057,
      "grad_norm": 0.8904885817454987,
      "learning_rate": 7.227821048482842e-06,
      "loss": 0.1382,
      "step": 12766
    },
    {
      "epoch": 0.3724546356263493,
      "grad_norm": 0.8968460568461896,
      "learning_rate": 7.227398091867422e-06,
      "loss": 0.1371,
      "step": 12767
    },
    {
      "epoch": 0.37248380885699284,
      "grad_norm": 0.8371768229407455,
      "learning_rate": 7.226975115366328e-06,
      "loss": 0.171,
      "step": 12768
    },
    {
      "epoch": 0.3725129820876364,
      "grad_norm": 0.9250473502488088,
      "learning_rate": 7.22655211898333e-06,
      "loss": 0.1462,
      "step": 12769
    },
    {
      "epoch": 0.37254215531827994,
      "grad_norm": 1.0230115628904144,
      "learning_rate": 7.226129102722206e-06,
      "loss": 0.1575,
      "step": 12770
    },
    {
      "epoch": 0.3725713285489235,
      "grad_norm": 0.8323563189907527,
      "learning_rate": 7.225706066586733e-06,
      "loss": 0.1377,
      "step": 12771
    },
    {
      "epoch": 0.37260050177956705,
      "grad_norm": 0.7385433873269618,
      "learning_rate": 7.225283010580686e-06,
      "loss": 0.16,
      "step": 12772
    },
    {
      "epoch": 0.3726296750102106,
      "grad_norm": 0.8241995662093278,
      "learning_rate": 7.224859934707845e-06,
      "loss": 0.1419,
      "step": 12773
    },
    {
      "epoch": 0.3726588482408542,
      "grad_norm": 0.8881407002886734,
      "learning_rate": 7.224436838971986e-06,
      "loss": 0.1512,
      "step": 12774
    },
    {
      "epoch": 0.37268802147149777,
      "grad_norm": 0.9006077207869064,
      "learning_rate": 7.224013723376886e-06,
      "loss": 0.1388,
      "step": 12775
    },
    {
      "epoch": 0.3727171947021413,
      "grad_norm": 0.840964875196712,
      "learning_rate": 7.223590587926322e-06,
      "loss": 0.1629,
      "step": 12776
    },
    {
      "epoch": 0.37274636793278487,
      "grad_norm": 1.0528834947135033,
      "learning_rate": 7.223167432624071e-06,
      "loss": 0.1651,
      "step": 12777
    },
    {
      "epoch": 0.3727755411634284,
      "grad_norm": 0.765849402535143,
      "learning_rate": 7.2227442574739135e-06,
      "loss": 0.145,
      "step": 12778
    },
    {
      "epoch": 0.372804714394072,
      "grad_norm": 0.914936798327709,
      "learning_rate": 7.222321062479625e-06,
      "loss": 0.1496,
      "step": 12779
    },
    {
      "epoch": 0.3728338876247156,
      "grad_norm": 1.0107411168454763,
      "learning_rate": 7.221897847644985e-06,
      "loss": 0.1625,
      "step": 12780
    },
    {
      "epoch": 0.37286306085535914,
      "grad_norm": 0.8774221008451132,
      "learning_rate": 7.221474612973771e-06,
      "loss": 0.1465,
      "step": 12781
    },
    {
      "epoch": 0.3728922340860027,
      "grad_norm": 0.818555050504513,
      "learning_rate": 7.22105135846976e-06,
      "loss": 0.1539,
      "step": 12782
    },
    {
      "epoch": 0.37292140731664625,
      "grad_norm": 0.957235837635529,
      "learning_rate": 7.220628084136736e-06,
      "loss": 0.1549,
      "step": 12783
    },
    {
      "epoch": 0.3729505805472898,
      "grad_norm": 0.7835090422931695,
      "learning_rate": 7.220204789978473e-06,
      "loss": 0.1404,
      "step": 12784
    },
    {
      "epoch": 0.37297975377793335,
      "grad_norm": 0.7670782905317178,
      "learning_rate": 7.219781475998753e-06,
      "loss": 0.1768,
      "step": 12785
    },
    {
      "epoch": 0.3730089270085769,
      "grad_norm": 1.1532221791237023,
      "learning_rate": 7.219358142201352e-06,
      "loss": 0.1443,
      "step": 12786
    },
    {
      "epoch": 0.3730381002392205,
      "grad_norm": 0.8264581414476969,
      "learning_rate": 7.218934788590053e-06,
      "loss": 0.1532,
      "step": 12787
    },
    {
      "epoch": 0.37306727346986407,
      "grad_norm": 0.9916087496784959,
      "learning_rate": 7.218511415168633e-06,
      "loss": 0.1493,
      "step": 12788
    },
    {
      "epoch": 0.3730964467005076,
      "grad_norm": 1.1611645508019375,
      "learning_rate": 7.218088021940872e-06,
      "loss": 0.1409,
      "step": 12789
    },
    {
      "epoch": 0.3731256199311512,
      "grad_norm": 0.9272351621046583,
      "learning_rate": 7.217664608910552e-06,
      "loss": 0.1555,
      "step": 12790
    },
    {
      "epoch": 0.37315479316179473,
      "grad_norm": 0.9172760740376449,
      "learning_rate": 7.217241176081451e-06,
      "loss": 0.1396,
      "step": 12791
    },
    {
      "epoch": 0.3731839663924383,
      "grad_norm": 1.1666594732936182,
      "learning_rate": 7.21681772345735e-06,
      "loss": 0.1465,
      "step": 12792
    },
    {
      "epoch": 0.37321313962308184,
      "grad_norm": 0.8439716641350714,
      "learning_rate": 7.21639425104203e-06,
      "loss": 0.1538,
      "step": 12793
    },
    {
      "epoch": 0.37324231285372544,
      "grad_norm": 1.0805611965721982,
      "learning_rate": 7.215970758839272e-06,
      "loss": 0.1595,
      "step": 12794
    },
    {
      "epoch": 0.373271486084369,
      "grad_norm": 1.3128693885816374,
      "learning_rate": 7.215547246852856e-06,
      "loss": 0.1618,
      "step": 12795
    },
    {
      "epoch": 0.37330065931501255,
      "grad_norm": 1.1045141047839067,
      "learning_rate": 7.21512371508656e-06,
      "loss": 0.1512,
      "step": 12796
    },
    {
      "epoch": 0.3733298325456561,
      "grad_norm": 0.592389833864529,
      "learning_rate": 7.214700163544171e-06,
      "loss": 0.1291,
      "step": 12797
    },
    {
      "epoch": 0.37335900577629966,
      "grad_norm": 1.5000220461646439,
      "learning_rate": 7.2142765922294675e-06,
      "loss": 0.1526,
      "step": 12798
    },
    {
      "epoch": 0.3733881790069432,
      "grad_norm": 1.2455101272119433,
      "learning_rate": 7.213853001146229e-06,
      "loss": 0.1382,
      "step": 12799
    },
    {
      "epoch": 0.37341735223758676,
      "grad_norm": 0.6794075304930273,
      "learning_rate": 7.213429390298243e-06,
      "loss": 0.1493,
      "step": 12800
    },
    {
      "epoch": 0.3734465254682304,
      "grad_norm": 0.8178729732341539,
      "learning_rate": 7.213005759689286e-06,
      "loss": 0.1403,
      "step": 12801
    },
    {
      "epoch": 0.3734756986988739,
      "grad_norm": 1.2423852804172166,
      "learning_rate": 7.212582109323141e-06,
      "loss": 0.1624,
      "step": 12802
    },
    {
      "epoch": 0.3735048719295175,
      "grad_norm": 0.7730886036964798,
      "learning_rate": 7.212158439203593e-06,
      "loss": 0.1575,
      "step": 12803
    },
    {
      "epoch": 0.37353404516016103,
      "grad_norm": 1.0462063519020495,
      "learning_rate": 7.21173474933442e-06,
      "loss": 0.1495,
      "step": 12804
    },
    {
      "epoch": 0.3735632183908046,
      "grad_norm": 0.9880394441696694,
      "learning_rate": 7.2113110397194094e-06,
      "loss": 0.1446,
      "step": 12805
    },
    {
      "epoch": 0.37359239162144814,
      "grad_norm": 0.8399042504911042,
      "learning_rate": 7.210887310362341e-06,
      "loss": 0.139,
      "step": 12806
    },
    {
      "epoch": 0.37362156485209175,
      "grad_norm": 0.7712592400776408,
      "learning_rate": 7.2104635612669984e-06,
      "loss": 0.13,
      "step": 12807
    },
    {
      "epoch": 0.3736507380827353,
      "grad_norm": 0.9868046955871935,
      "learning_rate": 7.210039792437165e-06,
      "loss": 0.132,
      "step": 12808
    },
    {
      "epoch": 0.37367991131337885,
      "grad_norm": 0.6052468986562363,
      "learning_rate": 7.2096160038766225e-06,
      "loss": 0.1276,
      "step": 12809
    },
    {
      "epoch": 0.3737090845440224,
      "grad_norm": 1.004714779689122,
      "learning_rate": 7.209192195589159e-06,
      "loss": 0.1359,
      "step": 12810
    },
    {
      "epoch": 0.37373825777466596,
      "grad_norm": 0.8037176755728264,
      "learning_rate": 7.208768367578551e-06,
      "loss": 0.1413,
      "step": 12811
    },
    {
      "epoch": 0.3737674310053095,
      "grad_norm": 0.8238673228541763,
      "learning_rate": 7.208344519848589e-06,
      "loss": 0.1505,
      "step": 12812
    },
    {
      "epoch": 0.37379660423595307,
      "grad_norm": 0.7690389986364415,
      "learning_rate": 7.207920652403054e-06,
      "loss": 0.1531,
      "step": 12813
    },
    {
      "epoch": 0.3738257774665967,
      "grad_norm": 0.8254209981613991,
      "learning_rate": 7.207496765245729e-06,
      "loss": 0.1368,
      "step": 12814
    },
    {
      "epoch": 0.37385495069724023,
      "grad_norm": 0.6900741912062895,
      "learning_rate": 7.207072858380402e-06,
      "loss": 0.1407,
      "step": 12815
    },
    {
      "epoch": 0.3738841239278838,
      "grad_norm": 1.0600254361303463,
      "learning_rate": 7.206648931810855e-06,
      "loss": 0.135,
      "step": 12816
    },
    {
      "epoch": 0.37391329715852734,
      "grad_norm": 0.7431866458866175,
      "learning_rate": 7.20622498554087e-06,
      "loss": 0.1552,
      "step": 12817
    },
    {
      "epoch": 0.3739424703891709,
      "grad_norm": 0.7518029155696899,
      "learning_rate": 7.205801019574239e-06,
      "loss": 0.1536,
      "step": 12818
    },
    {
      "epoch": 0.37397164361981444,
      "grad_norm": 0.7642362741752822,
      "learning_rate": 7.205377033914742e-06,
      "loss": 0.1578,
      "step": 12819
    },
    {
      "epoch": 0.374000816850458,
      "grad_norm": 1.0284505970533113,
      "learning_rate": 7.204953028566164e-06,
      "loss": 0.1736,
      "step": 12820
    },
    {
      "epoch": 0.3740299900811016,
      "grad_norm": 0.8088192319957287,
      "learning_rate": 7.204529003532292e-06,
      "loss": 0.1518,
      "step": 12821
    },
    {
      "epoch": 0.37405916331174516,
      "grad_norm": 0.8304772257381413,
      "learning_rate": 7.204104958816913e-06,
      "loss": 0.1312,
      "step": 12822
    },
    {
      "epoch": 0.3740883365423887,
      "grad_norm": 0.7755346740607266,
      "learning_rate": 7.203680894423809e-06,
      "loss": 0.1232,
      "step": 12823
    },
    {
      "epoch": 0.37411750977303226,
      "grad_norm": 0.7693245134642306,
      "learning_rate": 7.203256810356769e-06,
      "loss": 0.1512,
      "step": 12824
    },
    {
      "epoch": 0.3741466830036758,
      "grad_norm": 0.9523896806829728,
      "learning_rate": 7.202832706619579e-06,
      "loss": 0.1314,
      "step": 12825
    },
    {
      "epoch": 0.37417585623431937,
      "grad_norm": 0.7951488704850891,
      "learning_rate": 7.202408583216023e-06,
      "loss": 0.127,
      "step": 12826
    },
    {
      "epoch": 0.3742050294649629,
      "grad_norm": 0.8067102693565923,
      "learning_rate": 7.201984440149889e-06,
      "loss": 0.1682,
      "step": 12827
    },
    {
      "epoch": 0.37423420269560653,
      "grad_norm": 1.0685204648025133,
      "learning_rate": 7.2015602774249645e-06,
      "loss": 0.1596,
      "step": 12828
    },
    {
      "epoch": 0.3742633759262501,
      "grad_norm": 0.9378162171489898,
      "learning_rate": 7.201136095045035e-06,
      "loss": 0.1532,
      "step": 12829
    },
    {
      "epoch": 0.37429254915689364,
      "grad_norm": 0.6825283671666629,
      "learning_rate": 7.200711893013889e-06,
      "loss": 0.1652,
      "step": 12830
    },
    {
      "epoch": 0.3743217223875372,
      "grad_norm": 0.7794609561132333,
      "learning_rate": 7.200287671335311e-06,
      "loss": 0.1462,
      "step": 12831
    },
    {
      "epoch": 0.37435089561818075,
      "grad_norm": 0.6949545272455027,
      "learning_rate": 7.199863430013088e-06,
      "loss": 0.1355,
      "step": 12832
    },
    {
      "epoch": 0.3743800688488243,
      "grad_norm": 0.7559217194938304,
      "learning_rate": 7.1994391690510136e-06,
      "loss": 0.1292,
      "step": 12833
    },
    {
      "epoch": 0.3744092420794679,
      "grad_norm": 0.7526174945214693,
      "learning_rate": 7.19901488845287e-06,
      "loss": 0.1494,
      "step": 12834
    },
    {
      "epoch": 0.37443841531011146,
      "grad_norm": 0.7730864831196734,
      "learning_rate": 7.1985905882224446e-06,
      "loss": 0.1545,
      "step": 12835
    },
    {
      "epoch": 0.374467588540755,
      "grad_norm": 0.8467018024327737,
      "learning_rate": 7.198166268363529e-06,
      "loss": 0.1504,
      "step": 12836
    },
    {
      "epoch": 0.37449676177139857,
      "grad_norm": 0.9572708813983742,
      "learning_rate": 7.19774192887991e-06,
      "loss": 0.145,
      "step": 12837
    },
    {
      "epoch": 0.3745259350020421,
      "grad_norm": 0.7930922126743046,
      "learning_rate": 7.197317569775375e-06,
      "loss": 0.1393,
      "step": 12838
    },
    {
      "epoch": 0.3745551082326857,
      "grad_norm": 0.8380399611026271,
      "learning_rate": 7.196893191053713e-06,
      "loss": 0.1508,
      "step": 12839
    },
    {
      "epoch": 0.3745842814633292,
      "grad_norm": 0.9770374218791077,
      "learning_rate": 7.196468792718714e-06,
      "loss": 0.1626,
      "step": 12840
    },
    {
      "epoch": 0.37461345469397284,
      "grad_norm": 1.0658859121375772,
      "learning_rate": 7.196044374774165e-06,
      "loss": 0.1444,
      "step": 12841
    },
    {
      "epoch": 0.3746426279246164,
      "grad_norm": 0.8047978745261797,
      "learning_rate": 7.1956199372238555e-06,
      "loss": 0.1416,
      "step": 12842
    },
    {
      "epoch": 0.37467180115525994,
      "grad_norm": 1.0186389228583306,
      "learning_rate": 7.1951954800715775e-06,
      "loss": 0.1496,
      "step": 12843
    },
    {
      "epoch": 0.3747009743859035,
      "grad_norm": 0.9815262561047814,
      "learning_rate": 7.194771003321116e-06,
      "loss": 0.1628,
      "step": 12844
    },
    {
      "epoch": 0.37473014761654705,
      "grad_norm": 0.8453425149772271,
      "learning_rate": 7.194346506976264e-06,
      "loss": 0.178,
      "step": 12845
    },
    {
      "epoch": 0.3747593208471906,
      "grad_norm": 0.7025099970289065,
      "learning_rate": 7.193921991040811e-06,
      "loss": 0.1168,
      "step": 12846
    },
    {
      "epoch": 0.37478849407783416,
      "grad_norm": 0.8768540199574046,
      "learning_rate": 7.193497455518545e-06,
      "loss": 0.1491,
      "step": 12847
    },
    {
      "epoch": 0.37481766730847776,
      "grad_norm": 0.9173644426660783,
      "learning_rate": 7.193072900413258e-06,
      "loss": 0.1634,
      "step": 12848
    },
    {
      "epoch": 0.3748468405391213,
      "grad_norm": 0.76045222399714,
      "learning_rate": 7.192648325728739e-06,
      "loss": 0.1586,
      "step": 12849
    },
    {
      "epoch": 0.37487601376976487,
      "grad_norm": 0.7832145092698721,
      "learning_rate": 7.1922237314687795e-06,
      "loss": 0.14,
      "step": 12850
    },
    {
      "epoch": 0.3749051870004084,
      "grad_norm": 0.8572299816846876,
      "learning_rate": 7.191799117637169e-06,
      "loss": 0.1504,
      "step": 12851
    },
    {
      "epoch": 0.374934360231052,
      "grad_norm": 0.8418590093246815,
      "learning_rate": 7.191374484237701e-06,
      "loss": 0.1604,
      "step": 12852
    },
    {
      "epoch": 0.37496353346169553,
      "grad_norm": 0.83495815477393,
      "learning_rate": 7.1909498312741635e-06,
      "loss": 0.1268,
      "step": 12853
    },
    {
      "epoch": 0.3749927066923391,
      "grad_norm": 0.6949037949282069,
      "learning_rate": 7.190525158750349e-06,
      "loss": 0.1617,
      "step": 12854
    },
    {
      "epoch": 0.3750218799229827,
      "grad_norm": 0.7707164045187853,
      "learning_rate": 7.19010046667005e-06,
      "loss": 0.1202,
      "step": 12855
    },
    {
      "epoch": 0.37505105315362625,
      "grad_norm": 0.934571627521969,
      "learning_rate": 7.189675755037055e-06,
      "loss": 0.1565,
      "step": 12856
    },
    {
      "epoch": 0.3750802263842698,
      "grad_norm": 0.7682593052696219,
      "learning_rate": 7.189251023855158e-06,
      "loss": 0.114,
      "step": 12857
    },
    {
      "epoch": 0.37510939961491335,
      "grad_norm": 0.7097243328524351,
      "learning_rate": 7.188826273128152e-06,
      "loss": 0.1389,
      "step": 12858
    },
    {
      "epoch": 0.3751385728455569,
      "grad_norm": 1.101588816045312,
      "learning_rate": 7.188401502859825e-06,
      "loss": 0.17,
      "step": 12859
    },
    {
      "epoch": 0.37516774607620046,
      "grad_norm": 0.809860658631005,
      "learning_rate": 7.187976713053975e-06,
      "loss": 0.1359,
      "step": 12860
    },
    {
      "epoch": 0.375196919306844,
      "grad_norm": 0.776896204454263,
      "learning_rate": 7.187551903714389e-06,
      "loss": 0.1436,
      "step": 12861
    },
    {
      "epoch": 0.3752260925374876,
      "grad_norm": 0.6367088342375902,
      "learning_rate": 7.187127074844862e-06,
      "loss": 0.1186,
      "step": 12862
    },
    {
      "epoch": 0.3752552657681312,
      "grad_norm": 0.7681994472771605,
      "learning_rate": 7.186702226449187e-06,
      "loss": 0.1259,
      "step": 12863
    },
    {
      "epoch": 0.3752844389987747,
      "grad_norm": 0.9294077857237404,
      "learning_rate": 7.186277358531158e-06,
      "loss": 0.1253,
      "step": 12864
    },
    {
      "epoch": 0.3753136122294183,
      "grad_norm": 0.805502573751167,
      "learning_rate": 7.185852471094563e-06,
      "loss": 0.141,
      "step": 12865
    },
    {
      "epoch": 0.37534278546006183,
      "grad_norm": 0.6881424002631252,
      "learning_rate": 7.185427564143201e-06,
      "loss": 0.1303,
      "step": 12866
    },
    {
      "epoch": 0.3753719586907054,
      "grad_norm": 1.030590331604276,
      "learning_rate": 7.1850026376808645e-06,
      "loss": 0.1442,
      "step": 12867
    },
    {
      "epoch": 0.375401131921349,
      "grad_norm": 0.7952529500074451,
      "learning_rate": 7.1845776917113445e-06,
      "loss": 0.1242,
      "step": 12868
    },
    {
      "epoch": 0.37543030515199255,
      "grad_norm": 0.8270142445276427,
      "learning_rate": 7.184152726238437e-06,
      "loss": 0.1419,
      "step": 12869
    },
    {
      "epoch": 0.3754594783826361,
      "grad_norm": 0.8080599199572762,
      "learning_rate": 7.183727741265935e-06,
      "loss": 0.1572,
      "step": 12870
    },
    {
      "epoch": 0.37548865161327966,
      "grad_norm": 0.7388164255739978,
      "learning_rate": 7.183302736797632e-06,
      "loss": 0.1318,
      "step": 12871
    },
    {
      "epoch": 0.3755178248439232,
      "grad_norm": 0.8551495141645852,
      "learning_rate": 7.182877712837326e-06,
      "loss": 0.1535,
      "step": 12872
    },
    {
      "epoch": 0.37554699807456676,
      "grad_norm": 0.7926117580264666,
      "learning_rate": 7.182452669388809e-06,
      "loss": 0.1422,
      "step": 12873
    },
    {
      "epoch": 0.3755761713052103,
      "grad_norm": 0.9528508626438138,
      "learning_rate": 7.182027606455873e-06,
      "loss": 0.1427,
      "step": 12874
    },
    {
      "epoch": 0.3756053445358539,
      "grad_norm": 0.7720977339248862,
      "learning_rate": 7.181602524042317e-06,
      "loss": 0.1211,
      "step": 12875
    },
    {
      "epoch": 0.3756345177664975,
      "grad_norm": 0.7329168311726614,
      "learning_rate": 7.1811774221519336e-06,
      "loss": 0.136,
      "step": 12876
    },
    {
      "epoch": 0.37566369099714103,
      "grad_norm": 0.7780701546134563,
      "learning_rate": 7.180752300788518e-06,
      "loss": 0.1687,
      "step": 12877
    },
    {
      "epoch": 0.3756928642277846,
      "grad_norm": 0.885036411558389,
      "learning_rate": 7.180327159955869e-06,
      "loss": 0.1305,
      "step": 12878
    },
    {
      "epoch": 0.37572203745842814,
      "grad_norm": 0.8002865762299235,
      "learning_rate": 7.179901999657778e-06,
      "loss": 0.1281,
      "step": 12879
    },
    {
      "epoch": 0.3757512106890717,
      "grad_norm": 0.6139885216483737,
      "learning_rate": 7.179476819898042e-06,
      "loss": 0.1277,
      "step": 12880
    },
    {
      "epoch": 0.37578038391971524,
      "grad_norm": 0.8295300050875486,
      "learning_rate": 7.179051620680457e-06,
      "loss": 0.1667,
      "step": 12881
    },
    {
      "epoch": 0.37580955715035885,
      "grad_norm": 0.7792411625735932,
      "learning_rate": 7.178626402008821e-06,
      "loss": 0.1501,
      "step": 12882
    },
    {
      "epoch": 0.3758387303810024,
      "grad_norm": 0.6584314271683108,
      "learning_rate": 7.178201163886928e-06,
      "loss": 0.1292,
      "step": 12883
    },
    {
      "epoch": 0.37586790361164596,
      "grad_norm": 0.8692277701651672,
      "learning_rate": 7.177775906318574e-06,
      "loss": 0.1196,
      "step": 12884
    },
    {
      "epoch": 0.3758970768422895,
      "grad_norm": 0.8391969923653462,
      "learning_rate": 7.177350629307558e-06,
      "loss": 0.1338,
      "step": 12885
    },
    {
      "epoch": 0.37592625007293307,
      "grad_norm": 0.9623909496126544,
      "learning_rate": 7.176925332857674e-06,
      "loss": 0.1491,
      "step": 12886
    },
    {
      "epoch": 0.3759554233035766,
      "grad_norm": 0.9077478299947973,
      "learning_rate": 7.176500016972721e-06,
      "loss": 0.1353,
      "step": 12887
    },
    {
      "epoch": 0.3759845965342202,
      "grad_norm": 0.7651834974419126,
      "learning_rate": 7.176074681656495e-06,
      "loss": 0.1477,
      "step": 12888
    },
    {
      "epoch": 0.3760137697648638,
      "grad_norm": 1.081742506430794,
      "learning_rate": 7.175649326912794e-06,
      "loss": 0.1251,
      "step": 12889
    },
    {
      "epoch": 0.37604294299550733,
      "grad_norm": 0.7679017013848687,
      "learning_rate": 7.175223952745416e-06,
      "loss": 0.1296,
      "step": 12890
    },
    {
      "epoch": 0.3760721162261509,
      "grad_norm": 0.9669289364213036,
      "learning_rate": 7.174798559158157e-06,
      "loss": 0.1408,
      "step": 12891
    },
    {
      "epoch": 0.37610128945679444,
      "grad_norm": 0.9629585317633905,
      "learning_rate": 7.174373146154814e-06,
      "loss": 0.1561,
      "step": 12892
    },
    {
      "epoch": 0.376130462687438,
      "grad_norm": 0.850242793603612,
      "learning_rate": 7.17394771373919e-06,
      "loss": 0.154,
      "step": 12893
    },
    {
      "epoch": 0.37615963591808155,
      "grad_norm": 0.9562489098328032,
      "learning_rate": 7.173522261915078e-06,
      "loss": 0.1596,
      "step": 12894
    },
    {
      "epoch": 0.37618880914872516,
      "grad_norm": 0.8542218411673976,
      "learning_rate": 7.173096790686278e-06,
      "loss": 0.1416,
      "step": 12895
    },
    {
      "epoch": 0.3762179823793687,
      "grad_norm": 0.8199126257197301,
      "learning_rate": 7.172671300056588e-06,
      "loss": 0.1225,
      "step": 12896
    },
    {
      "epoch": 0.37624715561001226,
      "grad_norm": 0.8759460699810758,
      "learning_rate": 7.172245790029808e-06,
      "loss": 0.1641,
      "step": 12897
    },
    {
      "epoch": 0.3762763288406558,
      "grad_norm": 1.0116621949880957,
      "learning_rate": 7.1718202606097366e-06,
      "loss": 0.1689,
      "step": 12898
    },
    {
      "epoch": 0.37630550207129937,
      "grad_norm": 0.6673690742963837,
      "learning_rate": 7.171394711800172e-06,
      "loss": 0.1214,
      "step": 12899
    },
    {
      "epoch": 0.3763346753019429,
      "grad_norm": 1.007074597015964,
      "learning_rate": 7.1709691436049145e-06,
      "loss": 0.1502,
      "step": 12900
    },
    {
      "epoch": 0.3763638485325865,
      "grad_norm": 0.8534217065999815,
      "learning_rate": 7.170543556027762e-06,
      "loss": 0.1431,
      "step": 12901
    },
    {
      "epoch": 0.3763930217632301,
      "grad_norm": 0.8005454422205138,
      "learning_rate": 7.170117949072514e-06,
      "loss": 0.1458,
      "step": 12902
    },
    {
      "epoch": 0.37642219499387364,
      "grad_norm": 0.7868223042884781,
      "learning_rate": 7.1696923227429724e-06,
      "loss": 0.1325,
      "step": 12903
    },
    {
      "epoch": 0.3764513682245172,
      "grad_norm": 0.8432534370096046,
      "learning_rate": 7.169266677042934e-06,
      "loss": 0.1387,
      "step": 12904
    },
    {
      "epoch": 0.37648054145516074,
      "grad_norm": 0.707858434326484,
      "learning_rate": 7.168841011976202e-06,
      "loss": 0.1401,
      "step": 12905
    },
    {
      "epoch": 0.3765097146858043,
      "grad_norm": 0.7993405197909869,
      "learning_rate": 7.168415327546575e-06,
      "loss": 0.1545,
      "step": 12906
    },
    {
      "epoch": 0.37653888791644785,
      "grad_norm": 0.7133277890137376,
      "learning_rate": 7.167989623757853e-06,
      "loss": 0.1249,
      "step": 12907
    },
    {
      "epoch": 0.3765680611470914,
      "grad_norm": 0.6799619895613896,
      "learning_rate": 7.1675639006138385e-06,
      "loss": 0.1432,
      "step": 12908
    },
    {
      "epoch": 0.376597234377735,
      "grad_norm": 0.8732558172520097,
      "learning_rate": 7.16713815811833e-06,
      "loss": 0.129,
      "step": 12909
    },
    {
      "epoch": 0.37662640760837857,
      "grad_norm": 0.8703644826462984,
      "learning_rate": 7.166712396275128e-06,
      "loss": 0.1416,
      "step": 12910
    },
    {
      "epoch": 0.3766555808390221,
      "grad_norm": 0.8734050869939769,
      "learning_rate": 7.166286615088037e-06,
      "loss": 0.1283,
      "step": 12911
    },
    {
      "epoch": 0.3766847540696657,
      "grad_norm": 0.8412952623074301,
      "learning_rate": 7.165860814560855e-06,
      "loss": 0.1562,
      "step": 12912
    },
    {
      "epoch": 0.3767139273003092,
      "grad_norm": 0.9821024713743136,
      "learning_rate": 7.165434994697386e-06,
      "loss": 0.1526,
      "step": 12913
    },
    {
      "epoch": 0.3767431005309528,
      "grad_norm": 0.7847728972948989,
      "learning_rate": 7.16500915550143e-06,
      "loss": 0.1633,
      "step": 12914
    },
    {
      "epoch": 0.37677227376159633,
      "grad_norm": 0.872635241897278,
      "learning_rate": 7.1645832969767894e-06,
      "loss": 0.1648,
      "step": 12915
    },
    {
      "epoch": 0.37680144699223994,
      "grad_norm": 0.847901694637845,
      "learning_rate": 7.164157419127263e-06,
      "loss": 0.1575,
      "step": 12916
    },
    {
      "epoch": 0.3768306202228835,
      "grad_norm": 0.7788239673278788,
      "learning_rate": 7.1637315219566585e-06,
      "loss": 0.1414,
      "step": 12917
    },
    {
      "epoch": 0.37685979345352705,
      "grad_norm": 0.9305316814725858,
      "learning_rate": 7.1633056054687756e-06,
      "loss": 0.1474,
      "step": 12918
    },
    {
      "epoch": 0.3768889666841706,
      "grad_norm": 0.768186906984129,
      "learning_rate": 7.162879669667415e-06,
      "loss": 0.1465,
      "step": 12919
    },
    {
      "epoch": 0.37691813991481415,
      "grad_norm": 0.7541898117802722,
      "learning_rate": 7.162453714556383e-06,
      "loss": 0.1497,
      "step": 12920
    },
    {
      "epoch": 0.3769473131454577,
      "grad_norm": 0.7062473964100255,
      "learning_rate": 7.162027740139479e-06,
      "loss": 0.1327,
      "step": 12921
    },
    {
      "epoch": 0.3769764863761013,
      "grad_norm": 0.8258927353984997,
      "learning_rate": 7.1616017464205065e-06,
      "loss": 0.1422,
      "step": 12922
    },
    {
      "epoch": 0.37700565960674487,
      "grad_norm": 0.6491862551330999,
      "learning_rate": 7.1611757334032725e-06,
      "loss": 0.1427,
      "step": 12923
    },
    {
      "epoch": 0.3770348328373884,
      "grad_norm": 1.10500134042551,
      "learning_rate": 7.160749701091576e-06,
      "loss": 0.1605,
      "step": 12924
    },
    {
      "epoch": 0.377064006068032,
      "grad_norm": 0.8126912899723046,
      "learning_rate": 7.160323649489221e-06,
      "loss": 0.1173,
      "step": 12925
    },
    {
      "epoch": 0.37709317929867553,
      "grad_norm": 0.8426200014264674,
      "learning_rate": 7.159897578600014e-06,
      "loss": 0.1391,
      "step": 12926
    },
    {
      "epoch": 0.3771223525293191,
      "grad_norm": 0.8834136266003784,
      "learning_rate": 7.1594714884277564e-06,
      "loss": 0.1502,
      "step": 12927
    },
    {
      "epoch": 0.37715152575996264,
      "grad_norm": 1.0371223791268027,
      "learning_rate": 7.1590453789762525e-06,
      "loss": 0.1424,
      "step": 12928
    },
    {
      "epoch": 0.37718069899060624,
      "grad_norm": 0.9614534965231251,
      "learning_rate": 7.158619250249307e-06,
      "loss": 0.1144,
      "step": 12929
    },
    {
      "epoch": 0.3772098722212498,
      "grad_norm": 1.0766179123513977,
      "learning_rate": 7.158193102250724e-06,
      "loss": 0.1418,
      "step": 12930
    },
    {
      "epoch": 0.37723904545189335,
      "grad_norm": 0.7851736856001515,
      "learning_rate": 7.157766934984308e-06,
      "loss": 0.1536,
      "step": 12931
    },
    {
      "epoch": 0.3772682186825369,
      "grad_norm": 1.1128148525327382,
      "learning_rate": 7.157340748453864e-06,
      "loss": 0.1436,
      "step": 12932
    },
    {
      "epoch": 0.37729739191318046,
      "grad_norm": 0.97632633608461,
      "learning_rate": 7.1569145426631985e-06,
      "loss": 0.1474,
      "step": 12933
    },
    {
      "epoch": 0.377326565143824,
      "grad_norm": 0.8353249735371481,
      "learning_rate": 7.156488317616111e-06,
      "loss": 0.1593,
      "step": 12934
    },
    {
      "epoch": 0.37735573837446756,
      "grad_norm": 0.6876320909100034,
      "learning_rate": 7.156062073316414e-06,
      "loss": 0.126,
      "step": 12935
    },
    {
      "epoch": 0.3773849116051112,
      "grad_norm": 0.9650734550691265,
      "learning_rate": 7.155635809767909e-06,
      "loss": 0.1413,
      "step": 12936
    },
    {
      "epoch": 0.3774140848357547,
      "grad_norm": 1.3062486775554714,
      "learning_rate": 7.1552095269744e-06,
      "loss": 0.1696,
      "step": 12937
    },
    {
      "epoch": 0.3774432580663983,
      "grad_norm": 0.7289393306992145,
      "learning_rate": 7.154783224939697e-06,
      "loss": 0.1399,
      "step": 12938
    },
    {
      "epoch": 0.37747243129704183,
      "grad_norm": 1.007827707825707,
      "learning_rate": 7.154356903667604e-06,
      "loss": 0.1212,
      "step": 12939
    },
    {
      "epoch": 0.3775016045276854,
      "grad_norm": 0.953456318475476,
      "learning_rate": 7.153930563161926e-06,
      "loss": 0.1374,
      "step": 12940
    },
    {
      "epoch": 0.37753077775832894,
      "grad_norm": 0.7861556916629966,
      "learning_rate": 7.15350420342647e-06,
      "loss": 0.1574,
      "step": 12941
    },
    {
      "epoch": 0.3775599509889725,
      "grad_norm": 1.035179138482058,
      "learning_rate": 7.1530778244650425e-06,
      "loss": 0.1313,
      "step": 12942
    },
    {
      "epoch": 0.3775891242196161,
      "grad_norm": 0.7924070480833347,
      "learning_rate": 7.1526514262814495e-06,
      "loss": 0.1394,
      "step": 12943
    },
    {
      "epoch": 0.37761829745025965,
      "grad_norm": 1.0319999178575172,
      "learning_rate": 7.1522250088795e-06,
      "loss": 0.1638,
      "step": 12944
    },
    {
      "epoch": 0.3776474706809032,
      "grad_norm": 0.8222215758395307,
      "learning_rate": 7.1517985722630005e-06,
      "loss": 0.1532,
      "step": 12945
    },
    {
      "epoch": 0.37767664391154676,
      "grad_norm": 1.104517354869253,
      "learning_rate": 7.151372116435753e-06,
      "loss": 0.1387,
      "step": 12946
    },
    {
      "epoch": 0.3777058171421903,
      "grad_norm": 0.9845709166685269,
      "learning_rate": 7.150945641401571e-06,
      "loss": 0.148,
      "step": 12947
    },
    {
      "epoch": 0.37773499037283387,
      "grad_norm": 0.8761587873510057,
      "learning_rate": 7.150519147164261e-06,
      "loss": 0.1431,
      "step": 12948
    },
    {
      "epoch": 0.3777641636034775,
      "grad_norm": 1.0675377919863407,
      "learning_rate": 7.150092633727627e-06,
      "loss": 0.1531,
      "step": 12949
    },
    {
      "epoch": 0.37779333683412103,
      "grad_norm": 0.8704741403693905,
      "learning_rate": 7.149666101095482e-06,
      "loss": 0.1831,
      "step": 12950
    },
    {
      "epoch": 0.3778225100647646,
      "grad_norm": 0.71824553247317,
      "learning_rate": 7.149239549271629e-06,
      "loss": 0.1243,
      "step": 12951
    },
    {
      "epoch": 0.37785168329540814,
      "grad_norm": 0.9730730253029378,
      "learning_rate": 7.148812978259878e-06,
      "loss": 0.2166,
      "step": 12952
    },
    {
      "epoch": 0.3778808565260517,
      "grad_norm": 0.8428157727675645,
      "learning_rate": 7.148386388064039e-06,
      "loss": 0.1237,
      "step": 12953
    },
    {
      "epoch": 0.37791002975669524,
      "grad_norm": 0.8006041165399888,
      "learning_rate": 7.14795977868792e-06,
      "loss": 0.1371,
      "step": 12954
    },
    {
      "epoch": 0.3779392029873388,
      "grad_norm": 0.9305105700919174,
      "learning_rate": 7.147533150135327e-06,
      "loss": 0.1249,
      "step": 12955
    },
    {
      "epoch": 0.3779683762179824,
      "grad_norm": 1.0914651019831614,
      "learning_rate": 7.147106502410071e-06,
      "loss": 0.1459,
      "step": 12956
    },
    {
      "epoch": 0.37799754944862596,
      "grad_norm": 0.885704244639512,
      "learning_rate": 7.146679835515962e-06,
      "loss": 0.1446,
      "step": 12957
    },
    {
      "epoch": 0.3780267226792695,
      "grad_norm": 0.958352530010404,
      "learning_rate": 7.146253149456806e-06,
      "loss": 0.1447,
      "step": 12958
    },
    {
      "epoch": 0.37805589590991306,
      "grad_norm": 0.9641981998357005,
      "learning_rate": 7.145826444236415e-06,
      "loss": 0.1344,
      "step": 12959
    },
    {
      "epoch": 0.3780850691405566,
      "grad_norm": 0.8479666204307287,
      "learning_rate": 7.1453997198586e-06,
      "loss": 0.1658,
      "step": 12960
    },
    {
      "epoch": 0.37811424237120017,
      "grad_norm": 0.7779690532002743,
      "learning_rate": 7.144972976327164e-06,
      "loss": 0.1219,
      "step": 12961
    },
    {
      "epoch": 0.3781434156018437,
      "grad_norm": 1.2539229754779762,
      "learning_rate": 7.144546213645924e-06,
      "loss": 0.1535,
      "step": 12962
    },
    {
      "epoch": 0.37817258883248733,
      "grad_norm": 0.7621838741709405,
      "learning_rate": 7.144119431818689e-06,
      "loss": 0.1582,
      "step": 12963
    },
    {
      "epoch": 0.3782017620631309,
      "grad_norm": 0.735321625004359,
      "learning_rate": 7.1436926308492645e-06,
      "loss": 0.1305,
      "step": 12964
    },
    {
      "epoch": 0.37823093529377444,
      "grad_norm": 0.959420232020522,
      "learning_rate": 7.1432658107414665e-06,
      "loss": 0.1298,
      "step": 12965
    },
    {
      "epoch": 0.378260108524418,
      "grad_norm": 0.7998303281463885,
      "learning_rate": 7.142838971499101e-06,
      "loss": 0.1726,
      "step": 12966
    },
    {
      "epoch": 0.37828928175506155,
      "grad_norm": 0.9765520966833411,
      "learning_rate": 7.142412113125981e-06,
      "loss": 0.1813,
      "step": 12967
    },
    {
      "epoch": 0.3783184549857051,
      "grad_norm": 0.7140966566107453,
      "learning_rate": 7.141985235625918e-06,
      "loss": 0.1365,
      "step": 12968
    },
    {
      "epoch": 0.37834762821634865,
      "grad_norm": 1.017120231765769,
      "learning_rate": 7.141558339002721e-06,
      "loss": 0.1651,
      "step": 12969
    },
    {
      "epoch": 0.37837680144699226,
      "grad_norm": 0.9148143008092731,
      "learning_rate": 7.141131423260204e-06,
      "loss": 0.1353,
      "step": 12970
    },
    {
      "epoch": 0.3784059746776358,
      "grad_norm": 0.7676960095955689,
      "learning_rate": 7.140704488402175e-06,
      "loss": 0.1281,
      "step": 12971
    },
    {
      "epoch": 0.37843514790827937,
      "grad_norm": 0.7908730580557988,
      "learning_rate": 7.1402775344324485e-06,
      "loss": 0.1282,
      "step": 12972
    },
    {
      "epoch": 0.3784643211389229,
      "grad_norm": 1.0511472798732373,
      "learning_rate": 7.1398505613548345e-06,
      "loss": 0.1518,
      "step": 12973
    },
    {
      "epoch": 0.3784934943695665,
      "grad_norm": 0.771793661445887,
      "learning_rate": 7.1394235691731454e-06,
      "loss": 0.1521,
      "step": 12974
    },
    {
      "epoch": 0.37852266760021,
      "grad_norm": 0.6078147772896247,
      "learning_rate": 7.1389965578911946e-06,
      "loss": 0.1351,
      "step": 12975
    },
    {
      "epoch": 0.37855184083085364,
      "grad_norm": 1.0553111873365606,
      "learning_rate": 7.138569527512791e-06,
      "loss": 0.1486,
      "step": 12976
    },
    {
      "epoch": 0.3785810140614972,
      "grad_norm": 0.8961806256233492,
      "learning_rate": 7.13814247804175e-06,
      "loss": 0.1513,
      "step": 12977
    },
    {
      "epoch": 0.37861018729214074,
      "grad_norm": 0.8875527816798312,
      "learning_rate": 7.137715409481884e-06,
      "loss": 0.131,
      "step": 12978
    },
    {
      "epoch": 0.3786393605227843,
      "grad_norm": 0.7366436156084128,
      "learning_rate": 7.137288321837005e-06,
      "loss": 0.1421,
      "step": 12979
    },
    {
      "epoch": 0.37866853375342785,
      "grad_norm": 0.6727231513794392,
      "learning_rate": 7.136861215110926e-06,
      "loss": 0.1209,
      "step": 12980
    },
    {
      "epoch": 0.3786977069840714,
      "grad_norm": 0.8167695900482286,
      "learning_rate": 7.1364340893074605e-06,
      "loss": 0.1485,
      "step": 12981
    },
    {
      "epoch": 0.37872688021471496,
      "grad_norm": 0.7501467648092552,
      "learning_rate": 7.13600694443042e-06,
      "loss": 0.1541,
      "step": 12982
    },
    {
      "epoch": 0.37875605344535856,
      "grad_norm": 0.6557522570342736,
      "learning_rate": 7.135579780483621e-06,
      "loss": 0.1574,
      "step": 12983
    },
    {
      "epoch": 0.3787852266760021,
      "grad_norm": 0.6872754374578743,
      "learning_rate": 7.1351525974708756e-06,
      "loss": 0.1316,
      "step": 12984
    },
    {
      "epoch": 0.37881439990664567,
      "grad_norm": 0.8949837223811141,
      "learning_rate": 7.134725395395997e-06,
      "loss": 0.1539,
      "step": 12985
    },
    {
      "epoch": 0.3788435731372892,
      "grad_norm": 0.7704934776306336,
      "learning_rate": 7.1342981742627996e-06,
      "loss": 0.1363,
      "step": 12986
    },
    {
      "epoch": 0.3788727463679328,
      "grad_norm": 0.6718863266055916,
      "learning_rate": 7.133870934075098e-06,
      "loss": 0.1555,
      "step": 12987
    },
    {
      "epoch": 0.37890191959857633,
      "grad_norm": 0.8177509554857887,
      "learning_rate": 7.133443674836705e-06,
      "loss": 0.1389,
      "step": 12988
    },
    {
      "epoch": 0.3789310928292199,
      "grad_norm": 0.7196452370218708,
      "learning_rate": 7.133016396551438e-06,
      "loss": 0.1379,
      "step": 12989
    },
    {
      "epoch": 0.3789602660598635,
      "grad_norm": 0.9257648968287675,
      "learning_rate": 7.132589099223108e-06,
      "loss": 0.1585,
      "step": 12990
    },
    {
      "epoch": 0.37898943929050705,
      "grad_norm": 0.9373042126222249,
      "learning_rate": 7.132161782855533e-06,
      "loss": 0.144,
      "step": 12991
    },
    {
      "epoch": 0.3790186125211506,
      "grad_norm": 0.9099163048196881,
      "learning_rate": 7.131734447452525e-06,
      "loss": 0.1406,
      "step": 12992
    },
    {
      "epoch": 0.37904778575179415,
      "grad_norm": 0.9153891170724695,
      "learning_rate": 7.131307093017902e-06,
      "loss": 0.1458,
      "step": 12993
    },
    {
      "epoch": 0.3790769589824377,
      "grad_norm": 0.8824702923566206,
      "learning_rate": 7.130879719555477e-06,
      "loss": 0.1382,
      "step": 12994
    },
    {
      "epoch": 0.37910613221308126,
      "grad_norm": 0.7086844386030828,
      "learning_rate": 7.130452327069068e-06,
      "loss": 0.1244,
      "step": 12995
    },
    {
      "epoch": 0.3791353054437248,
      "grad_norm": 0.9866606689997597,
      "learning_rate": 7.130024915562488e-06,
      "loss": 0.1161,
      "step": 12996
    },
    {
      "epoch": 0.3791644786743684,
      "grad_norm": 0.7773186359509144,
      "learning_rate": 7.129597485039554e-06,
      "loss": 0.1569,
      "step": 12997
    },
    {
      "epoch": 0.379193651905012,
      "grad_norm": 0.7173249266886189,
      "learning_rate": 7.129170035504084e-06,
      "loss": 0.134,
      "step": 12998
    },
    {
      "epoch": 0.37922282513565553,
      "grad_norm": 0.7722682844543123,
      "learning_rate": 7.1287425669598896e-06,
      "loss": 0.1384,
      "step": 12999
    },
    {
      "epoch": 0.3792519983662991,
      "grad_norm": 0.7188234623188318,
      "learning_rate": 7.128315079410792e-06,
      "loss": 0.123,
      "step": 13000
    },
    {
      "epoch": 0.37928117159694263,
      "grad_norm": 0.7432564586110186,
      "learning_rate": 7.1278875728606035e-06,
      "loss": 0.1349,
      "step": 13001
    },
    {
      "epoch": 0.3793103448275862,
      "grad_norm": 0.7902680911141535,
      "learning_rate": 7.127460047313144e-06,
      "loss": 0.1161,
      "step": 13002
    },
    {
      "epoch": 0.37933951805822974,
      "grad_norm": 2.027281163123131,
      "learning_rate": 7.127032502772229e-06,
      "loss": 0.1601,
      "step": 13003
    },
    {
      "epoch": 0.37936869128887335,
      "grad_norm": 0.7195166679385525,
      "learning_rate": 7.1266049392416745e-06,
      "loss": 0.1426,
      "step": 13004
    },
    {
      "epoch": 0.3793978645195169,
      "grad_norm": 0.8912337214678676,
      "learning_rate": 7.126177356725299e-06,
      "loss": 0.149,
      "step": 13005
    },
    {
      "epoch": 0.37942703775016046,
      "grad_norm": 0.8818951914893414,
      "learning_rate": 7.1257497552269205e-06,
      "loss": 0.138,
      "step": 13006
    },
    {
      "epoch": 0.379456210980804,
      "grad_norm": 0.8222407696262598,
      "learning_rate": 7.1253221347503545e-06,
      "loss": 0.144,
      "step": 13007
    },
    {
      "epoch": 0.37948538421144756,
      "grad_norm": 0.6831361651877006,
      "learning_rate": 7.1248944952994204e-06,
      "loss": 0.1586,
      "step": 13008
    },
    {
      "epoch": 0.3795145574420911,
      "grad_norm": 0.9650011422116275,
      "learning_rate": 7.124466836877936e-06,
      "loss": 0.1533,
      "step": 13009
    },
    {
      "epoch": 0.3795437306727347,
      "grad_norm": 0.8199863927822962,
      "learning_rate": 7.12403915948972e-06,
      "loss": 0.1564,
      "step": 13010
    },
    {
      "epoch": 0.3795729039033783,
      "grad_norm": 0.7205981841075796,
      "learning_rate": 7.123611463138585e-06,
      "loss": 0.1476,
      "step": 13011
    },
    {
      "epoch": 0.37960207713402183,
      "grad_norm": 0.8891586204009748,
      "learning_rate": 7.123183747828357e-06,
      "loss": 0.149,
      "step": 13012
    },
    {
      "epoch": 0.3796312503646654,
      "grad_norm": 0.7511475967066265,
      "learning_rate": 7.122756013562853e-06,
      "loss": 0.1216,
      "step": 13013
    },
    {
      "epoch": 0.37966042359530894,
      "grad_norm": 0.7172693632880247,
      "learning_rate": 7.122328260345887e-06,
      "loss": 0.1317,
      "step": 13014
    },
    {
      "epoch": 0.3796895968259525,
      "grad_norm": 0.753777054067735,
      "learning_rate": 7.1219004881812824e-06,
      "loss": 0.1508,
      "step": 13015
    },
    {
      "epoch": 0.37971877005659604,
      "grad_norm": 0.7407290835739581,
      "learning_rate": 7.1214726970728566e-06,
      "loss": 0.1519,
      "step": 13016
    },
    {
      "epoch": 0.37974794328723965,
      "grad_norm": 0.8642151428868987,
      "learning_rate": 7.121044887024428e-06,
      "loss": 0.1323,
      "step": 13017
    },
    {
      "epoch": 0.3797771165178832,
      "grad_norm": 0.8638983260524838,
      "learning_rate": 7.120617058039818e-06,
      "loss": 0.1499,
      "step": 13018
    },
    {
      "epoch": 0.37980628974852676,
      "grad_norm": 0.6458164310364573,
      "learning_rate": 7.120189210122846e-06,
      "loss": 0.1285,
      "step": 13019
    },
    {
      "epoch": 0.3798354629791703,
      "grad_norm": 1.307966356872709,
      "learning_rate": 7.11976134327733e-06,
      "loss": 0.1395,
      "step": 13020
    },
    {
      "epoch": 0.37986463620981387,
      "grad_norm": 0.8898874170684433,
      "learning_rate": 7.119333457507089e-06,
      "loss": 0.1521,
      "step": 13021
    },
    {
      "epoch": 0.3798938094404574,
      "grad_norm": 1.0613668946563664,
      "learning_rate": 7.118905552815946e-06,
      "loss": 0.1528,
      "step": 13022
    },
    {
      "epoch": 0.379922982671101,
      "grad_norm": 0.8766091331398129,
      "learning_rate": 7.118477629207721e-06,
      "loss": 0.1492,
      "step": 13023
    },
    {
      "epoch": 0.3799521559017446,
      "grad_norm": 0.8012986552951176,
      "learning_rate": 7.1180496866862325e-06,
      "loss": 0.1617,
      "step": 13024
    },
    {
      "epoch": 0.37998132913238813,
      "grad_norm": 1.0196559398984344,
      "learning_rate": 7.1176217252553035e-06,
      "loss": 0.1431,
      "step": 13025
    },
    {
      "epoch": 0.3800105023630317,
      "grad_norm": 0.7495552245143045,
      "learning_rate": 7.117193744918751e-06,
      "loss": 0.1444,
      "step": 13026
    },
    {
      "epoch": 0.38003967559367524,
      "grad_norm": 0.7002159722388028,
      "learning_rate": 7.116765745680399e-06,
      "loss": 0.1675,
      "step": 13027
    },
    {
      "epoch": 0.3800688488243188,
      "grad_norm": 0.9144575591113243,
      "learning_rate": 7.116337727544069e-06,
      "loss": 0.157,
      "step": 13028
    },
    {
      "epoch": 0.38009802205496235,
      "grad_norm": 0.8715475008276659,
      "learning_rate": 7.115909690513578e-06,
      "loss": 0.1237,
      "step": 13029
    },
    {
      "epoch": 0.3801271952856059,
      "grad_norm": 0.898523550872658,
      "learning_rate": 7.1154816345927545e-06,
      "loss": 0.1552,
      "step": 13030
    },
    {
      "epoch": 0.3801563685162495,
      "grad_norm": 1.0962867135529077,
      "learning_rate": 7.1150535597854135e-06,
      "loss": 0.1262,
      "step": 13031
    },
    {
      "epoch": 0.38018554174689306,
      "grad_norm": 0.821932644955272,
      "learning_rate": 7.11462546609538e-06,
      "loss": 0.1273,
      "step": 13032
    },
    {
      "epoch": 0.3802147149775366,
      "grad_norm": 0.9093602231829235,
      "learning_rate": 7.114197353526474e-06,
      "loss": 0.152,
      "step": 13033
    },
    {
      "epoch": 0.38024388820818017,
      "grad_norm": 0.7960080562623917,
      "learning_rate": 7.1137692220825196e-06,
      "loss": 0.1424,
      "step": 13034
    },
    {
      "epoch": 0.3802730614388237,
      "grad_norm": 0.8044031144666941,
      "learning_rate": 7.113341071767338e-06,
      "loss": 0.1409,
      "step": 13035
    },
    {
      "epoch": 0.3803022346694673,
      "grad_norm": 0.6914698331714102,
      "learning_rate": 7.112912902584752e-06,
      "loss": 0.1299,
      "step": 13036
    },
    {
      "epoch": 0.3803314079001109,
      "grad_norm": 0.7620713834767586,
      "learning_rate": 7.112484714538584e-06,
      "loss": 0.1319,
      "step": 13037
    },
    {
      "epoch": 0.38036058113075444,
      "grad_norm": 0.9237652496892056,
      "learning_rate": 7.1120565076326565e-06,
      "loss": 0.1477,
      "step": 13038
    },
    {
      "epoch": 0.380389754361398,
      "grad_norm": 0.9166525200244503,
      "learning_rate": 7.1116282818707924e-06,
      "loss": 0.1348,
      "step": 13039
    },
    {
      "epoch": 0.38041892759204154,
      "grad_norm": 0.6828208987571173,
      "learning_rate": 7.111200037256816e-06,
      "loss": 0.1348,
      "step": 13040
    },
    {
      "epoch": 0.3804481008226851,
      "grad_norm": 1.1488067358228407,
      "learning_rate": 7.110771773794548e-06,
      "loss": 0.1427,
      "step": 13041
    },
    {
      "epoch": 0.38047727405332865,
      "grad_norm": 0.882183824494278,
      "learning_rate": 7.110343491487815e-06,
      "loss": 0.1495,
      "step": 13042
    },
    {
      "epoch": 0.3805064472839722,
      "grad_norm": 0.8755335334471303,
      "learning_rate": 7.109915190340439e-06,
      "loss": 0.1413,
      "step": 13043
    },
    {
      "epoch": 0.3805356205146158,
      "grad_norm": 0.7890303215104586,
      "learning_rate": 7.109486870356243e-06,
      "loss": 0.1446,
      "step": 13044
    },
    {
      "epoch": 0.38056479374525937,
      "grad_norm": 0.8572201015853493,
      "learning_rate": 7.1090585315390525e-06,
      "loss": 0.1505,
      "step": 13045
    },
    {
      "epoch": 0.3805939669759029,
      "grad_norm": 0.9668789016106413,
      "learning_rate": 7.108630173892691e-06,
      "loss": 0.1593,
      "step": 13046
    },
    {
      "epoch": 0.3806231402065465,
      "grad_norm": 0.7463194331800527,
      "learning_rate": 7.108201797420983e-06,
      "loss": 0.1483,
      "step": 13047
    },
    {
      "epoch": 0.38065231343719,
      "grad_norm": 0.8313397874101502,
      "learning_rate": 7.107773402127751e-06,
      "loss": 0.1519,
      "step": 13048
    },
    {
      "epoch": 0.3806814866678336,
      "grad_norm": 1.0873651459527833,
      "learning_rate": 7.107344988016822e-06,
      "loss": 0.1429,
      "step": 13049
    },
    {
      "epoch": 0.38071065989847713,
      "grad_norm": 0.764576326893396,
      "learning_rate": 7.1069165550920205e-06,
      "loss": 0.1449,
      "step": 13050
    },
    {
      "epoch": 0.38073983312912074,
      "grad_norm": 1.3986039501018053,
      "learning_rate": 7.106488103357171e-06,
      "loss": 0.1521,
      "step": 13051
    },
    {
      "epoch": 0.3807690063597643,
      "grad_norm": 1.060778432603687,
      "learning_rate": 7.106059632816098e-06,
      "loss": 0.1485,
      "step": 13052
    },
    {
      "epoch": 0.38079817959040785,
      "grad_norm": 0.9756185368049597,
      "learning_rate": 7.105631143472628e-06,
      "loss": 0.1331,
      "step": 13053
    },
    {
      "epoch": 0.3808273528210514,
      "grad_norm": 1.0338252019646097,
      "learning_rate": 7.105202635330586e-06,
      "loss": 0.1438,
      "step": 13054
    },
    {
      "epoch": 0.38085652605169495,
      "grad_norm": 0.8616962415237442,
      "learning_rate": 7.104774108393797e-06,
      "loss": 0.1547,
      "step": 13055
    },
    {
      "epoch": 0.3808856992823385,
      "grad_norm": 0.7960991086389061,
      "learning_rate": 7.104345562666086e-06,
      "loss": 0.1371,
      "step": 13056
    },
    {
      "epoch": 0.38091487251298206,
      "grad_norm": 1.0248147656572244,
      "learning_rate": 7.1039169981512825e-06,
      "loss": 0.1285,
      "step": 13057
    },
    {
      "epoch": 0.38094404574362567,
      "grad_norm": 0.9138140422863517,
      "learning_rate": 7.103488414853209e-06,
      "loss": 0.1336,
      "step": 13058
    },
    {
      "epoch": 0.3809732189742692,
      "grad_norm": 0.8605081926102031,
      "learning_rate": 7.103059812775693e-06,
      "loss": 0.1356,
      "step": 13059
    },
    {
      "epoch": 0.3810023922049128,
      "grad_norm": 0.8019811823044132,
      "learning_rate": 7.102631191922561e-06,
      "loss": 0.165,
      "step": 13060
    },
    {
      "epoch": 0.38103156543555633,
      "grad_norm": 0.9293022660798531,
      "learning_rate": 7.10220255229764e-06,
      "loss": 0.1649,
      "step": 13061
    },
    {
      "epoch": 0.3810607386661999,
      "grad_norm": 1.134806044489274,
      "learning_rate": 7.101773893904756e-06,
      "loss": 0.1581,
      "step": 13062
    },
    {
      "epoch": 0.38108991189684344,
      "grad_norm": 0.855958036612699,
      "learning_rate": 7.101345216747737e-06,
      "loss": 0.1561,
      "step": 13063
    },
    {
      "epoch": 0.38111908512748705,
      "grad_norm": 1.1020334433221302,
      "learning_rate": 7.100916520830409e-06,
      "loss": 0.1393,
      "step": 13064
    },
    {
      "epoch": 0.3811482583581306,
      "grad_norm": 0.903005414426244,
      "learning_rate": 7.1004878061565995e-06,
      "loss": 0.157,
      "step": 13065
    },
    {
      "epoch": 0.38117743158877415,
      "grad_norm": 0.7590175862443029,
      "learning_rate": 7.100059072730136e-06,
      "loss": 0.1682,
      "step": 13066
    },
    {
      "epoch": 0.3812066048194177,
      "grad_norm": 0.7066565815456609,
      "learning_rate": 7.0996303205548486e-06,
      "loss": 0.1373,
      "step": 13067
    },
    {
      "epoch": 0.38123577805006126,
      "grad_norm": 0.7771638731156558,
      "learning_rate": 7.099201549634561e-06,
      "loss": 0.1505,
      "step": 13068
    },
    {
      "epoch": 0.3812649512807048,
      "grad_norm": 0.7399993588177523,
      "learning_rate": 7.098772759973104e-06,
      "loss": 0.1556,
      "step": 13069
    },
    {
      "epoch": 0.38129412451134836,
      "grad_norm": 0.693851392144675,
      "learning_rate": 7.098343951574305e-06,
      "loss": 0.1546,
      "step": 13070
    },
    {
      "epoch": 0.381323297741992,
      "grad_norm": 0.697594934361742,
      "learning_rate": 7.097915124441991e-06,
      "loss": 0.1422,
      "step": 13071
    },
    {
      "epoch": 0.3813524709726355,
      "grad_norm": 0.6922999767936167,
      "learning_rate": 7.097486278579993e-06,
      "loss": 0.1237,
      "step": 13072
    },
    {
      "epoch": 0.3813816442032791,
      "grad_norm": 0.9004585109504485,
      "learning_rate": 7.097057413992136e-06,
      "loss": 0.1779,
      "step": 13073
    },
    {
      "epoch": 0.38141081743392263,
      "grad_norm": 0.820602806872622,
      "learning_rate": 7.096628530682253e-06,
      "loss": 0.1257,
      "step": 13074
    },
    {
      "epoch": 0.3814399906645662,
      "grad_norm": 0.7324287376510917,
      "learning_rate": 7.096199628654171e-06,
      "loss": 0.1286,
      "step": 13075
    },
    {
      "epoch": 0.38146916389520974,
      "grad_norm": 0.7892389193274874,
      "learning_rate": 7.095770707911718e-06,
      "loss": 0.128,
      "step": 13076
    },
    {
      "epoch": 0.3814983371258533,
      "grad_norm": 0.9323040225014304,
      "learning_rate": 7.0953417684587255e-06,
      "loss": 0.1447,
      "step": 13077
    },
    {
      "epoch": 0.3815275103564969,
      "grad_norm": 0.7199735658137739,
      "learning_rate": 7.094912810299021e-06,
      "loss": 0.1501,
      "step": 13078
    },
    {
      "epoch": 0.38155668358714045,
      "grad_norm": 1.115182105137788,
      "learning_rate": 7.094483833436435e-06,
      "loss": 0.1658,
      "step": 13079
    },
    {
      "epoch": 0.381585856817784,
      "grad_norm": 0.8367324438425654,
      "learning_rate": 7.094054837874798e-06,
      "loss": 0.1457,
      "step": 13080
    },
    {
      "epoch": 0.38161503004842756,
      "grad_norm": 0.8292608413906478,
      "learning_rate": 7.093625823617939e-06,
      "loss": 0.1598,
      "step": 13081
    },
    {
      "epoch": 0.3816442032790711,
      "grad_norm": 0.9212773809358213,
      "learning_rate": 7.0931967906696885e-06,
      "loss": 0.1188,
      "step": 13082
    },
    {
      "epoch": 0.38167337650971467,
      "grad_norm": 0.6563577012682009,
      "learning_rate": 7.092767739033877e-06,
      "loss": 0.1668,
      "step": 13083
    },
    {
      "epoch": 0.3817025497403582,
      "grad_norm": 0.9401194032639766,
      "learning_rate": 7.092338668714333e-06,
      "loss": 0.1635,
      "step": 13084
    },
    {
      "epoch": 0.38173172297100183,
      "grad_norm": 0.9556742187372048,
      "learning_rate": 7.0919095797148915e-06,
      "loss": 0.1319,
      "step": 13085
    },
    {
      "epoch": 0.3817608962016454,
      "grad_norm": 0.7710741032344843,
      "learning_rate": 7.091480472039378e-06,
      "loss": 0.1431,
      "step": 13086
    },
    {
      "epoch": 0.38179006943228894,
      "grad_norm": 0.7858500377077914,
      "learning_rate": 7.091051345691628e-06,
      "loss": 0.1552,
      "step": 13087
    },
    {
      "epoch": 0.3818192426629325,
      "grad_norm": 1.3675361224142528,
      "learning_rate": 7.090622200675471e-06,
      "loss": 0.1448,
      "step": 13088
    },
    {
      "epoch": 0.38184841589357604,
      "grad_norm": 0.8874926855421642,
      "learning_rate": 7.090193036994737e-06,
      "loss": 0.1455,
      "step": 13089
    },
    {
      "epoch": 0.3818775891242196,
      "grad_norm": 0.9731620384555049,
      "learning_rate": 7.089763854653259e-06,
      "loss": 0.1553,
      "step": 13090
    },
    {
      "epoch": 0.3819067623548632,
      "grad_norm": 1.0635409840006158,
      "learning_rate": 7.089334653654868e-06,
      "loss": 0.1485,
      "step": 13091
    },
    {
      "epoch": 0.38193593558550676,
      "grad_norm": 0.8482488111094789,
      "learning_rate": 7.088905434003396e-06,
      "loss": 0.1582,
      "step": 13092
    },
    {
      "epoch": 0.3819651088161503,
      "grad_norm": 1.218399775578868,
      "learning_rate": 7.088476195702675e-06,
      "loss": 0.1472,
      "step": 13093
    },
    {
      "epoch": 0.38199428204679386,
      "grad_norm": 0.7200220502156465,
      "learning_rate": 7.088046938756536e-06,
      "loss": 0.1344,
      "step": 13094
    },
    {
      "epoch": 0.3820234552774374,
      "grad_norm": 0.8267643247609752,
      "learning_rate": 7.0876176631688144e-06,
      "loss": 0.1283,
      "step": 13095
    },
    {
      "epoch": 0.38205262850808097,
      "grad_norm": 0.7760840832630073,
      "learning_rate": 7.0871883689433396e-06,
      "loss": 0.1716,
      "step": 13096
    },
    {
      "epoch": 0.3820818017387245,
      "grad_norm": 0.5994343031843402,
      "learning_rate": 7.086759056083945e-06,
      "loss": 0.141,
      "step": 13097
    },
    {
      "epoch": 0.38211097496936813,
      "grad_norm": 0.7332000190947274,
      "learning_rate": 7.086329724594464e-06,
      "loss": 0.1295,
      "step": 13098
    },
    {
      "epoch": 0.3821401482000117,
      "grad_norm": 0.7322465399146401,
      "learning_rate": 7.0859003744787296e-06,
      "loss": 0.1495,
      "step": 13099
    },
    {
      "epoch": 0.38216932143065524,
      "grad_norm": 0.7286991271251654,
      "learning_rate": 7.085471005740575e-06,
      "loss": 0.1384,
      "step": 13100
    },
    {
      "epoch": 0.3821984946612988,
      "grad_norm": 0.6617685520670704,
      "learning_rate": 7.085041618383831e-06,
      "loss": 0.1475,
      "step": 13101
    },
    {
      "epoch": 0.38222766789194235,
      "grad_norm": 0.8567712621782794,
      "learning_rate": 7.084612212412336e-06,
      "loss": 0.1403,
      "step": 13102
    },
    {
      "epoch": 0.3822568411225859,
      "grad_norm": 0.8779255613544709,
      "learning_rate": 7.08418278782992e-06,
      "loss": 0.1615,
      "step": 13103
    },
    {
      "epoch": 0.38228601435322945,
      "grad_norm": 0.740055980900757,
      "learning_rate": 7.083753344640415e-06,
      "loss": 0.144,
      "step": 13104
    },
    {
      "epoch": 0.38231518758387306,
      "grad_norm": 0.8713575859818685,
      "learning_rate": 7.083323882847661e-06,
      "loss": 0.1327,
      "step": 13105
    },
    {
      "epoch": 0.3823443608145166,
      "grad_norm": 0.8144970970507229,
      "learning_rate": 7.082894402455487e-06,
      "loss": 0.138,
      "step": 13106
    },
    {
      "epoch": 0.38237353404516017,
      "grad_norm": 0.7131895739917116,
      "learning_rate": 7.08246490346773e-06,
      "loss": 0.1428,
      "step": 13107
    },
    {
      "epoch": 0.3824027072758037,
      "grad_norm": 0.8303449996677388,
      "learning_rate": 7.082035385888222e-06,
      "loss": 0.1426,
      "step": 13108
    },
    {
      "epoch": 0.3824318805064473,
      "grad_norm": 0.8555770600779227,
      "learning_rate": 7.081605849720799e-06,
      "loss": 0.1676,
      "step": 13109
    },
    {
      "epoch": 0.38246105373709083,
      "grad_norm": 0.7928736545267687,
      "learning_rate": 7.081176294969298e-06,
      "loss": 0.1326,
      "step": 13110
    },
    {
      "epoch": 0.3824902269677344,
      "grad_norm": 0.9716808502627015,
      "learning_rate": 7.08074672163755e-06,
      "loss": 0.1356,
      "step": 13111
    },
    {
      "epoch": 0.382519400198378,
      "grad_norm": 0.800261002745498,
      "learning_rate": 7.080317129729392e-06,
      "loss": 0.145,
      "step": 13112
    },
    {
      "epoch": 0.38254857342902154,
      "grad_norm": 0.8046887385196647,
      "learning_rate": 7.079887519248661e-06,
      "loss": 0.1428,
      "step": 13113
    },
    {
      "epoch": 0.3825777466596651,
      "grad_norm": 0.7150547588897157,
      "learning_rate": 7.079457890199188e-06,
      "loss": 0.1237,
      "step": 13114
    },
    {
      "epoch": 0.38260691989030865,
      "grad_norm": 0.6849095150734505,
      "learning_rate": 7.0790282425848145e-06,
      "loss": 0.1412,
      "step": 13115
    },
    {
      "epoch": 0.3826360931209522,
      "grad_norm": 0.9194422123303154,
      "learning_rate": 7.07859857640937e-06,
      "loss": 0.1403,
      "step": 13116
    },
    {
      "epoch": 0.38266526635159576,
      "grad_norm": 0.8675519789641049,
      "learning_rate": 7.0781688916766965e-06,
      "loss": 0.1443,
      "step": 13117
    },
    {
      "epoch": 0.3826944395822393,
      "grad_norm": 0.7194996006589511,
      "learning_rate": 7.0777391883906265e-06,
      "loss": 0.1138,
      "step": 13118
    },
    {
      "epoch": 0.3827236128128829,
      "grad_norm": 0.9397140559046668,
      "learning_rate": 7.077309466554996e-06,
      "loss": 0.1591,
      "step": 13119
    },
    {
      "epoch": 0.38275278604352647,
      "grad_norm": 0.7076092649402206,
      "learning_rate": 7.076879726173643e-06,
      "loss": 0.1613,
      "step": 13120
    },
    {
      "epoch": 0.38278195927417,
      "grad_norm": 0.7465014711959881,
      "learning_rate": 7.0764499672504035e-06,
      "loss": 0.1294,
      "step": 13121
    },
    {
      "epoch": 0.3828111325048136,
      "grad_norm": 0.8638462707328708,
      "learning_rate": 7.0760201897891145e-06,
      "loss": 0.1346,
      "step": 13122
    },
    {
      "epoch": 0.38284030573545713,
      "grad_norm": 0.8553734920619475,
      "learning_rate": 7.075590393793612e-06,
      "loss": 0.1529,
      "step": 13123
    },
    {
      "epoch": 0.3828694789661007,
      "grad_norm": 0.7383771376980681,
      "learning_rate": 7.075160579267734e-06,
      "loss": 0.1461,
      "step": 13124
    },
    {
      "epoch": 0.3828986521967443,
      "grad_norm": 0.6253729740034147,
      "learning_rate": 7.074730746215319e-06,
      "loss": 0.1432,
      "step": 13125
    },
    {
      "epoch": 0.38292782542738785,
      "grad_norm": 0.8786975519092382,
      "learning_rate": 7.074300894640202e-06,
      "loss": 0.1548,
      "step": 13126
    },
    {
      "epoch": 0.3829569986580314,
      "grad_norm": 0.8880575694395363,
      "learning_rate": 7.073871024546224e-06,
      "loss": 0.139,
      "step": 13127
    },
    {
      "epoch": 0.38298617188867495,
      "grad_norm": 0.775338023325945,
      "learning_rate": 7.073441135937218e-06,
      "loss": 0.1441,
      "step": 13128
    },
    {
      "epoch": 0.3830153451193185,
      "grad_norm": 0.7930982428849666,
      "learning_rate": 7.073011228817026e-06,
      "loss": 0.1584,
      "step": 13129
    },
    {
      "epoch": 0.38304451834996206,
      "grad_norm": 0.911810955626411,
      "learning_rate": 7.072581303189485e-06,
      "loss": 0.1488,
      "step": 13130
    },
    {
      "epoch": 0.3830736915806056,
      "grad_norm": 0.9098857458468019,
      "learning_rate": 7.072151359058431e-06,
      "loss": 0.1604,
      "step": 13131
    },
    {
      "epoch": 0.3831028648112492,
      "grad_norm": 0.7879204619318778,
      "learning_rate": 7.071721396427706e-06,
      "loss": 0.1378,
      "step": 13132
    },
    {
      "epoch": 0.3831320380418928,
      "grad_norm": 0.8966769901371093,
      "learning_rate": 7.071291415301147e-06,
      "loss": 0.1387,
      "step": 13133
    },
    {
      "epoch": 0.38316121127253633,
      "grad_norm": 1.0341074388373561,
      "learning_rate": 7.070861415682591e-06,
      "loss": 0.1482,
      "step": 13134
    },
    {
      "epoch": 0.3831903845031799,
      "grad_norm": 1.2304936071236645,
      "learning_rate": 7.0704313975758795e-06,
      "loss": 0.1419,
      "step": 13135
    },
    {
      "epoch": 0.38321955773382343,
      "grad_norm": 1.0014628454773198,
      "learning_rate": 7.07000136098485e-06,
      "loss": 0.1381,
      "step": 13136
    },
    {
      "epoch": 0.383248730964467,
      "grad_norm": 1.0036603629484904,
      "learning_rate": 7.069571305913344e-06,
      "loss": 0.1453,
      "step": 13137
    },
    {
      "epoch": 0.38327790419511054,
      "grad_norm": 1.1866706294109137,
      "learning_rate": 7.0691412323651985e-06,
      "loss": 0.1406,
      "step": 13138
    },
    {
      "epoch": 0.38330707742575415,
      "grad_norm": 0.8985576384858317,
      "learning_rate": 7.0687111403442545e-06,
      "loss": 0.1393,
      "step": 13139
    },
    {
      "epoch": 0.3833362506563977,
      "grad_norm": 0.9242137471340901,
      "learning_rate": 7.068281029854352e-06,
      "loss": 0.1672,
      "step": 13140
    },
    {
      "epoch": 0.38336542388704126,
      "grad_norm": 0.8816153269856454,
      "learning_rate": 7.067850900899328e-06,
      "loss": 0.169,
      "step": 13141
    },
    {
      "epoch": 0.3833945971176848,
      "grad_norm": 0.7916098566601042,
      "learning_rate": 7.067420753483026e-06,
      "loss": 0.1299,
      "step": 13142
    },
    {
      "epoch": 0.38342377034832836,
      "grad_norm": 0.7133509619734466,
      "learning_rate": 7.066990587609286e-06,
      "loss": 0.1358,
      "step": 13143
    },
    {
      "epoch": 0.3834529435789719,
      "grad_norm": 0.8723191442988169,
      "learning_rate": 7.066560403281946e-06,
      "loss": 0.1394,
      "step": 13144
    },
    {
      "epoch": 0.38348211680961547,
      "grad_norm": 0.7490395641106506,
      "learning_rate": 7.06613020050485e-06,
      "loss": 0.137,
      "step": 13145
    },
    {
      "epoch": 0.3835112900402591,
      "grad_norm": 0.8581450018827587,
      "learning_rate": 7.065699979281834e-06,
      "loss": 0.1422,
      "step": 13146
    },
    {
      "epoch": 0.38354046327090263,
      "grad_norm": 0.778933914965665,
      "learning_rate": 7.065269739616744e-06,
      "loss": 0.1629,
      "step": 13147
    },
    {
      "epoch": 0.3835696365015462,
      "grad_norm": 0.8985223450460263,
      "learning_rate": 7.064839481513417e-06,
      "loss": 0.1286,
      "step": 13148
    },
    {
      "epoch": 0.38359880973218974,
      "grad_norm": 0.758957710394968,
      "learning_rate": 7.064409204975696e-06,
      "loss": 0.1746,
      "step": 13149
    },
    {
      "epoch": 0.3836279829628333,
      "grad_norm": 0.7899024126793673,
      "learning_rate": 7.0639789100074255e-06,
      "loss": 0.1471,
      "step": 13150
    },
    {
      "epoch": 0.38365715619347684,
      "grad_norm": 0.6982124085047007,
      "learning_rate": 7.06354859661244e-06,
      "loss": 0.1378,
      "step": 13151
    },
    {
      "epoch": 0.38368632942412045,
      "grad_norm": 0.7941912606468112,
      "learning_rate": 7.0631182647945884e-06,
      "loss": 0.1433,
      "step": 13152
    },
    {
      "epoch": 0.383715502654764,
      "grad_norm": 0.8363233650555554,
      "learning_rate": 7.062687914557708e-06,
      "loss": 0.1489,
      "step": 13153
    },
    {
      "epoch": 0.38374467588540756,
      "grad_norm": 0.829309201774025,
      "learning_rate": 7.062257545905642e-06,
      "loss": 0.1674,
      "step": 13154
    },
    {
      "epoch": 0.3837738491160511,
      "grad_norm": 0.7630820142471877,
      "learning_rate": 7.061827158842234e-06,
      "loss": 0.1302,
      "step": 13155
    },
    {
      "epoch": 0.38380302234669467,
      "grad_norm": 0.7884424514086901,
      "learning_rate": 7.061396753371323e-06,
      "loss": 0.1648,
      "step": 13156
    },
    {
      "epoch": 0.3838321955773382,
      "grad_norm": 0.8154456882331151,
      "learning_rate": 7.060966329496757e-06,
      "loss": 0.1626,
      "step": 13157
    },
    {
      "epoch": 0.3838613688079818,
      "grad_norm": 1.0104256131077718,
      "learning_rate": 7.060535887222373e-06,
      "loss": 0.1508,
      "step": 13158
    },
    {
      "epoch": 0.3838905420386254,
      "grad_norm": 0.7977958642526866,
      "learning_rate": 7.060105426552018e-06,
      "loss": 0.1506,
      "step": 13159
    },
    {
      "epoch": 0.38391971526926894,
      "grad_norm": 1.062430256860251,
      "learning_rate": 7.0596749474895344e-06,
      "loss": 0.1234,
      "step": 13160
    },
    {
      "epoch": 0.3839488884999125,
      "grad_norm": 0.8411417714500453,
      "learning_rate": 7.059244450038762e-06,
      "loss": 0.1573,
      "step": 13161
    },
    {
      "epoch": 0.38397806173055604,
      "grad_norm": 0.8339153474587397,
      "learning_rate": 7.058813934203549e-06,
      "loss": 0.1486,
      "step": 13162
    },
    {
      "epoch": 0.3840072349611996,
      "grad_norm": 0.9894703493526382,
      "learning_rate": 7.058383399987736e-06,
      "loss": 0.1361,
      "step": 13163
    },
    {
      "epoch": 0.38403640819184315,
      "grad_norm": 1.129688469946021,
      "learning_rate": 7.057952847395166e-06,
      "loss": 0.1368,
      "step": 13164
    },
    {
      "epoch": 0.3840655814224867,
      "grad_norm": 0.7600692887581971,
      "learning_rate": 7.057522276429686e-06,
      "loss": 0.1551,
      "step": 13165
    },
    {
      "epoch": 0.3840947546531303,
      "grad_norm": 1.2455831289902193,
      "learning_rate": 7.057091687095138e-06,
      "loss": 0.1595,
      "step": 13166
    },
    {
      "epoch": 0.38412392788377386,
      "grad_norm": 1.0068548520666945,
      "learning_rate": 7.056661079395366e-06,
      "loss": 0.1375,
      "step": 13167
    },
    {
      "epoch": 0.3841531011144174,
      "grad_norm": 0.7662769792406645,
      "learning_rate": 7.056230453334214e-06,
      "loss": 0.1468,
      "step": 13168
    },
    {
      "epoch": 0.38418227434506097,
      "grad_norm": 0.9628656124681166,
      "learning_rate": 7.055799808915529e-06,
      "loss": 0.1484,
      "step": 13169
    },
    {
      "epoch": 0.3842114475757045,
      "grad_norm": 0.8008510300215405,
      "learning_rate": 7.0553691461431536e-06,
      "loss": 0.1185,
      "step": 13170
    },
    {
      "epoch": 0.3842406208063481,
      "grad_norm": 0.7761874409154677,
      "learning_rate": 7.054938465020933e-06,
      "loss": 0.1371,
      "step": 13171
    },
    {
      "epoch": 0.38426979403699163,
      "grad_norm": 2.5130980677053087,
      "learning_rate": 7.054507765552712e-06,
      "loss": 0.1527,
      "step": 13172
    },
    {
      "epoch": 0.38429896726763524,
      "grad_norm": 1.1640321970693506,
      "learning_rate": 7.054077047742336e-06,
      "loss": 0.1329,
      "step": 13173
    },
    {
      "epoch": 0.3843281404982788,
      "grad_norm": 1.0871499764143346,
      "learning_rate": 7.053646311593651e-06,
      "loss": 0.1514,
      "step": 13174
    },
    {
      "epoch": 0.38435731372892235,
      "grad_norm": 0.7590588990257113,
      "learning_rate": 7.053215557110503e-06,
      "loss": 0.1531,
      "step": 13175
    },
    {
      "epoch": 0.3843864869595659,
      "grad_norm": 0.929740989737801,
      "learning_rate": 7.052784784296735e-06,
      "loss": 0.1473,
      "step": 13176
    },
    {
      "epoch": 0.38441566019020945,
      "grad_norm": 1.277307219791055,
      "learning_rate": 7.052353993156196e-06,
      "loss": 0.1456,
      "step": 13177
    },
    {
      "epoch": 0.384444833420853,
      "grad_norm": 0.9094694212534672,
      "learning_rate": 7.051923183692728e-06,
      "loss": 0.1575,
      "step": 13178
    },
    {
      "epoch": 0.3844740066514966,
      "grad_norm": 0.8590938719441122,
      "learning_rate": 7.0514923559101814e-06,
      "loss": 0.1427,
      "step": 13179
    },
    {
      "epoch": 0.38450317988214017,
      "grad_norm": 0.8323348273163871,
      "learning_rate": 7.0510615098124005e-06,
      "loss": 0.1211,
      "step": 13180
    },
    {
      "epoch": 0.3845323531127837,
      "grad_norm": 0.7958251264863333,
      "learning_rate": 7.0506306454032326e-06,
      "loss": 0.1386,
      "step": 13181
    },
    {
      "epoch": 0.3845615263434273,
      "grad_norm": 0.9837312411046082,
      "learning_rate": 7.050199762686522e-06,
      "loss": 0.1261,
      "step": 13182
    },
    {
      "epoch": 0.3845906995740708,
      "grad_norm": 0.7655586884409743,
      "learning_rate": 7.04976886166612e-06,
      "loss": 0.1474,
      "step": 13183
    },
    {
      "epoch": 0.3846198728047144,
      "grad_norm": 0.7334294555695986,
      "learning_rate": 7.049337942345868e-06,
      "loss": 0.1354,
      "step": 13184
    },
    {
      "epoch": 0.38464904603535793,
      "grad_norm": 0.8762714129354264,
      "learning_rate": 7.048907004729619e-06,
      "loss": 0.1414,
      "step": 13185
    },
    {
      "epoch": 0.38467821926600154,
      "grad_norm": 0.7870794442118512,
      "learning_rate": 7.048476048821215e-06,
      "loss": 0.146,
      "step": 13186
    },
    {
      "epoch": 0.3847073924966451,
      "grad_norm": 1.01210760370242,
      "learning_rate": 7.048045074624508e-06,
      "loss": 0.1389,
      "step": 13187
    },
    {
      "epoch": 0.38473656572728865,
      "grad_norm": 0.7542164408527335,
      "learning_rate": 7.047614082143342e-06,
      "loss": 0.1475,
      "step": 13188
    },
    {
      "epoch": 0.3847657389579322,
      "grad_norm": 0.8052815624525067,
      "learning_rate": 7.047183071381566e-06,
      "loss": 0.134,
      "step": 13189
    },
    {
      "epoch": 0.38479491218857576,
      "grad_norm": 0.962643863131615,
      "learning_rate": 7.046752042343029e-06,
      "loss": 0.1905,
      "step": 13190
    },
    {
      "epoch": 0.3848240854192193,
      "grad_norm": 0.9266481751109104,
      "learning_rate": 7.046320995031578e-06,
      "loss": 0.1385,
      "step": 13191
    },
    {
      "epoch": 0.38485325864986286,
      "grad_norm": 0.6338790428294409,
      "learning_rate": 7.045889929451063e-06,
      "loss": 0.1332,
      "step": 13192
    },
    {
      "epoch": 0.38488243188050647,
      "grad_norm": 0.7303123833273253,
      "learning_rate": 7.045458845605329e-06,
      "loss": 0.1191,
      "step": 13193
    },
    {
      "epoch": 0.38491160511115,
      "grad_norm": 0.8025559681918394,
      "learning_rate": 7.045027743498227e-06,
      "loss": 0.1405,
      "step": 13194
    },
    {
      "epoch": 0.3849407783417936,
      "grad_norm": 0.8078721313870865,
      "learning_rate": 7.044596623133607e-06,
      "loss": 0.1535,
      "step": 13195
    },
    {
      "epoch": 0.38496995157243713,
      "grad_norm": 0.8735492269731023,
      "learning_rate": 7.044165484515315e-06,
      "loss": 0.1308,
      "step": 13196
    },
    {
      "epoch": 0.3849991248030807,
      "grad_norm": 0.8228735997223994,
      "learning_rate": 7.043734327647202e-06,
      "loss": 0.1489,
      "step": 13197
    },
    {
      "epoch": 0.38502829803372424,
      "grad_norm": 0.6732863492182765,
      "learning_rate": 7.043303152533119e-06,
      "loss": 0.1392,
      "step": 13198
    },
    {
      "epoch": 0.3850574712643678,
      "grad_norm": 0.8134206062713474,
      "learning_rate": 7.042871959176909e-06,
      "loss": 0.2068,
      "step": 13199
    },
    {
      "epoch": 0.3850866444950114,
      "grad_norm": 0.838629080481506,
      "learning_rate": 7.0424407475824285e-06,
      "loss": 0.1358,
      "step": 13200
    },
    {
      "epoch": 0.38511581772565495,
      "grad_norm": 0.8938669022470852,
      "learning_rate": 7.042009517753525e-06,
      "loss": 0.1642,
      "step": 13201
    },
    {
      "epoch": 0.3851449909562985,
      "grad_norm": 0.8037364863431298,
      "learning_rate": 7.041578269694047e-06,
      "loss": 0.13,
      "step": 13202
    },
    {
      "epoch": 0.38517416418694206,
      "grad_norm": 1.0073783371034495,
      "learning_rate": 7.041147003407845e-06,
      "loss": 0.1806,
      "step": 13203
    },
    {
      "epoch": 0.3852033374175856,
      "grad_norm": 0.8013140474535293,
      "learning_rate": 7.04071571889877e-06,
      "loss": 0.1547,
      "step": 13204
    },
    {
      "epoch": 0.38523251064822917,
      "grad_norm": 0.8313544827264453,
      "learning_rate": 7.040284416170673e-06,
      "loss": 0.1378,
      "step": 13205
    },
    {
      "epoch": 0.3852616838788728,
      "grad_norm": 1.025391914818843,
      "learning_rate": 7.039853095227404e-06,
      "loss": 0.1379,
      "step": 13206
    },
    {
      "epoch": 0.3852908571095163,
      "grad_norm": 0.7223974582346451,
      "learning_rate": 7.039421756072814e-06,
      "loss": 0.1182,
      "step": 13207
    },
    {
      "epoch": 0.3853200303401599,
      "grad_norm": 0.7760834738644458,
      "learning_rate": 7.038990398710751e-06,
      "loss": 0.1189,
      "step": 13208
    },
    {
      "epoch": 0.38534920357080343,
      "grad_norm": 1.148551727361324,
      "learning_rate": 7.03855902314507e-06,
      "loss": 0.1506,
      "step": 13209
    },
    {
      "epoch": 0.385378376801447,
      "grad_norm": 0.7109625283204699,
      "learning_rate": 7.0381276293796204e-06,
      "loss": 0.1336,
      "step": 13210
    },
    {
      "epoch": 0.38540755003209054,
      "grad_norm": 1.0315526502324017,
      "learning_rate": 7.0376962174182536e-06,
      "loss": 0.1532,
      "step": 13211
    },
    {
      "epoch": 0.3854367232627341,
      "grad_norm": 0.7497842649954283,
      "learning_rate": 7.037264787264823e-06,
      "loss": 0.1611,
      "step": 13212
    },
    {
      "epoch": 0.3854658964933777,
      "grad_norm": 1.0430596271386305,
      "learning_rate": 7.036833338923177e-06,
      "loss": 0.1495,
      "step": 13213
    },
    {
      "epoch": 0.38549506972402126,
      "grad_norm": 0.7477285953945649,
      "learning_rate": 7.03640187239717e-06,
      "loss": 0.1399,
      "step": 13214
    },
    {
      "epoch": 0.3855242429546648,
      "grad_norm": 1.1289120089937266,
      "learning_rate": 7.035970387690652e-06,
      "loss": 0.1332,
      "step": 13215
    },
    {
      "epoch": 0.38555341618530836,
      "grad_norm": 0.93720400927447,
      "learning_rate": 7.035538884807478e-06,
      "loss": 0.1217,
      "step": 13216
    },
    {
      "epoch": 0.3855825894159519,
      "grad_norm": 0.7350282797287049,
      "learning_rate": 7.035107363751499e-06,
      "loss": 0.1256,
      "step": 13217
    },
    {
      "epoch": 0.38561176264659547,
      "grad_norm": 1.0483884820992884,
      "learning_rate": 7.034675824526566e-06,
      "loss": 0.1359,
      "step": 13218
    },
    {
      "epoch": 0.385640935877239,
      "grad_norm": 0.9309292583366686,
      "learning_rate": 7.034244267136533e-06,
      "loss": 0.121,
      "step": 13219
    },
    {
      "epoch": 0.38567010910788263,
      "grad_norm": 1.0145449977165903,
      "learning_rate": 7.033812691585253e-06,
      "loss": 0.1367,
      "step": 13220
    },
    {
      "epoch": 0.3856992823385262,
      "grad_norm": 0.8552576548131363,
      "learning_rate": 7.033381097876578e-06,
      "loss": 0.1324,
      "step": 13221
    },
    {
      "epoch": 0.38572845556916974,
      "grad_norm": 1.1621448997547186,
      "learning_rate": 7.032949486014364e-06,
      "loss": 0.1327,
      "step": 13222
    },
    {
      "epoch": 0.3857576287998133,
      "grad_norm": 0.960819941259101,
      "learning_rate": 7.032517856002461e-06,
      "loss": 0.1475,
      "step": 13223
    },
    {
      "epoch": 0.38578680203045684,
      "grad_norm": 0.9398773742574359,
      "learning_rate": 7.0320862078447235e-06,
      "loss": 0.1235,
      "step": 13224
    },
    {
      "epoch": 0.3858159752611004,
      "grad_norm": 0.8729047289873096,
      "learning_rate": 7.0316545415450065e-06,
      "loss": 0.1163,
      "step": 13225
    },
    {
      "epoch": 0.38584514849174395,
      "grad_norm": 1.0931647627080159,
      "learning_rate": 7.0312228571071614e-06,
      "loss": 0.1569,
      "step": 13226
    },
    {
      "epoch": 0.38587432172238756,
      "grad_norm": 1.0550094070111407,
      "learning_rate": 7.030791154535045e-06,
      "loss": 0.1408,
      "step": 13227
    },
    {
      "epoch": 0.3859034949530311,
      "grad_norm": 0.7257333429410203,
      "learning_rate": 7.03035943383251e-06,
      "loss": 0.1593,
      "step": 13228
    },
    {
      "epoch": 0.38593266818367467,
      "grad_norm": 0.959882441307829,
      "learning_rate": 7.029927695003408e-06,
      "loss": 0.1429,
      "step": 13229
    },
    {
      "epoch": 0.3859618414143182,
      "grad_norm": 0.7871133713404713,
      "learning_rate": 7.029495938051599e-06,
      "loss": 0.141,
      "step": 13230
    },
    {
      "epoch": 0.38599101464496177,
      "grad_norm": 0.7285164992562347,
      "learning_rate": 7.029064162980934e-06,
      "loss": 0.1525,
      "step": 13231
    },
    {
      "epoch": 0.3860201878756053,
      "grad_norm": 0.708274471085333,
      "learning_rate": 7.028632369795267e-06,
      "loss": 0.1383,
      "step": 13232
    },
    {
      "epoch": 0.3860493611062489,
      "grad_norm": 0.9021603207471135,
      "learning_rate": 7.028200558498457e-06,
      "loss": 0.1407,
      "step": 13233
    },
    {
      "epoch": 0.3860785343368925,
      "grad_norm": 0.7355522210034191,
      "learning_rate": 7.0277687290943555e-06,
      "loss": 0.1314,
      "step": 13234
    },
    {
      "epoch": 0.38610770756753604,
      "grad_norm": 0.9074463689364065,
      "learning_rate": 7.027336881586818e-06,
      "loss": 0.1462,
      "step": 13235
    },
    {
      "epoch": 0.3861368807981796,
      "grad_norm": 0.704192428681642,
      "learning_rate": 7.026905015979702e-06,
      "loss": 0.1215,
      "step": 13236
    },
    {
      "epoch": 0.38616605402882315,
      "grad_norm": 0.6116731736115757,
      "learning_rate": 7.026473132276862e-06,
      "loss": 0.1287,
      "step": 13237
    },
    {
      "epoch": 0.3861952272594667,
      "grad_norm": 0.9547230536130067,
      "learning_rate": 7.026041230482152e-06,
      "loss": 0.145,
      "step": 13238
    },
    {
      "epoch": 0.38622440049011025,
      "grad_norm": 0.8319729792584203,
      "learning_rate": 7.02560931059943e-06,
      "loss": 0.1458,
      "step": 13239
    },
    {
      "epoch": 0.38625357372075386,
      "grad_norm": 0.7360962858435293,
      "learning_rate": 7.025177372632554e-06,
      "loss": 0.1336,
      "step": 13240
    },
    {
      "epoch": 0.3862827469513974,
      "grad_norm": 0.7169362741111335,
      "learning_rate": 7.0247454165853746e-06,
      "loss": 0.1566,
      "step": 13241
    },
    {
      "epoch": 0.38631192018204097,
      "grad_norm": 0.842506903764658,
      "learning_rate": 7.024313442461753e-06,
      "loss": 0.1573,
      "step": 13242
    },
    {
      "epoch": 0.3863410934126845,
      "grad_norm": 0.7194110795473797,
      "learning_rate": 7.023881450265544e-06,
      "loss": 0.144,
      "step": 13243
    },
    {
      "epoch": 0.3863702666433281,
      "grad_norm": 0.7806393290389207,
      "learning_rate": 7.023449440000605e-06,
      "loss": 0.128,
      "step": 13244
    },
    {
      "epoch": 0.38639943987397163,
      "grad_norm": 1.0850628310588,
      "learning_rate": 7.023017411670792e-06,
      "loss": 0.1547,
      "step": 13245
    },
    {
      "epoch": 0.3864286131046152,
      "grad_norm": 0.7937628761505633,
      "learning_rate": 7.022585365279963e-06,
      "loss": 0.1176,
      "step": 13246
    },
    {
      "epoch": 0.3864577863352588,
      "grad_norm": 1.016898103575818,
      "learning_rate": 7.022153300831974e-06,
      "loss": 0.1545,
      "step": 13247
    },
    {
      "epoch": 0.38648695956590234,
      "grad_norm": 0.8049622544118625,
      "learning_rate": 7.021721218330684e-06,
      "loss": 0.1433,
      "step": 13248
    },
    {
      "epoch": 0.3865161327965459,
      "grad_norm": 0.9263397793203935,
      "learning_rate": 7.021289117779948e-06,
      "loss": 0.1645,
      "step": 13249
    },
    {
      "epoch": 0.38654530602718945,
      "grad_norm": 1.1031347446761475,
      "learning_rate": 7.020856999183626e-06,
      "loss": 0.1427,
      "step": 13250
    },
    {
      "epoch": 0.386574479257833,
      "grad_norm": 1.4868112119703023,
      "learning_rate": 7.020424862545576e-06,
      "loss": 0.1485,
      "step": 13251
    },
    {
      "epoch": 0.38660365248847656,
      "grad_norm": 1.1050750802395453,
      "learning_rate": 7.019992707869655e-06,
      "loss": 0.1403,
      "step": 13252
    },
    {
      "epoch": 0.3866328257191201,
      "grad_norm": 0.9099726322467829,
      "learning_rate": 7.019560535159719e-06,
      "loss": 0.1497,
      "step": 13253
    },
    {
      "epoch": 0.3866619989497637,
      "grad_norm": 1.1939293331015322,
      "learning_rate": 7.019128344419631e-06,
      "loss": 0.1517,
      "step": 13254
    },
    {
      "epoch": 0.38669117218040727,
      "grad_norm": 1.2814198179595297,
      "learning_rate": 7.018696135653248e-06,
      "loss": 0.146,
      "step": 13255
    },
    {
      "epoch": 0.3867203454110508,
      "grad_norm": 0.7766726570343186,
      "learning_rate": 7.018263908864424e-06,
      "loss": 0.1398,
      "step": 13256
    },
    {
      "epoch": 0.3867495186416944,
      "grad_norm": 0.7753074895094298,
      "learning_rate": 7.017831664057026e-06,
      "loss": 0.1347,
      "step": 13257
    },
    {
      "epoch": 0.38677869187233793,
      "grad_norm": 0.8792880022194638,
      "learning_rate": 7.0173994012349066e-06,
      "loss": 0.127,
      "step": 13258
    },
    {
      "epoch": 0.3868078651029815,
      "grad_norm": 0.9984398289008541,
      "learning_rate": 7.016967120401925e-06,
      "loss": 0.1777,
      "step": 13259
    },
    {
      "epoch": 0.38683703833362504,
      "grad_norm": 0.8389228509562606,
      "learning_rate": 7.016534821561947e-06,
      "loss": 0.1692,
      "step": 13260
    },
    {
      "epoch": 0.38686621156426865,
      "grad_norm": 0.8388674127325592,
      "learning_rate": 7.016102504718824e-06,
      "loss": 0.1249,
      "step": 13261
    },
    {
      "epoch": 0.3868953847949122,
      "grad_norm": 0.7382572353971572,
      "learning_rate": 7.015670169876419e-06,
      "loss": 0.1502,
      "step": 13262
    },
    {
      "epoch": 0.38692455802555575,
      "grad_norm": 0.8735915659877994,
      "learning_rate": 7.015237817038594e-06,
      "loss": 0.1485,
      "step": 13263
    },
    {
      "epoch": 0.3869537312561993,
      "grad_norm": 0.9981665930892641,
      "learning_rate": 7.014805446209205e-06,
      "loss": 0.1262,
      "step": 13264
    },
    {
      "epoch": 0.38698290448684286,
      "grad_norm": 0.6620816291523073,
      "learning_rate": 7.014373057392115e-06,
      "loss": 0.1389,
      "step": 13265
    },
    {
      "epoch": 0.3870120777174864,
      "grad_norm": 0.7700687214631495,
      "learning_rate": 7.013940650591182e-06,
      "loss": 0.1435,
      "step": 13266
    },
    {
      "epoch": 0.38704125094813,
      "grad_norm": 1.1417505579337457,
      "learning_rate": 7.01350822581027e-06,
      "loss": 0.1269,
      "step": 13267
    },
    {
      "epoch": 0.3870704241787736,
      "grad_norm": 0.9066213369944394,
      "learning_rate": 7.013075783053235e-06,
      "loss": 0.1709,
      "step": 13268
    },
    {
      "epoch": 0.38709959740941713,
      "grad_norm": 0.6524092610583785,
      "learning_rate": 7.012643322323941e-06,
      "loss": 0.132,
      "step": 13269
    },
    {
      "epoch": 0.3871287706400607,
      "grad_norm": 0.8563818341021124,
      "learning_rate": 7.012210843626248e-06,
      "loss": 0.128,
      "step": 13270
    },
    {
      "epoch": 0.38715794387070424,
      "grad_norm": 0.9548941536063597,
      "learning_rate": 7.011778346964015e-06,
      "loss": 0.137,
      "step": 13271
    },
    {
      "epoch": 0.3871871171013478,
      "grad_norm": 0.9557896221863059,
      "learning_rate": 7.011345832341109e-06,
      "loss": 0.1452,
      "step": 13272
    },
    {
      "epoch": 0.38721629033199134,
      "grad_norm": 0.9963487168990761,
      "learning_rate": 7.0109132997613845e-06,
      "loss": 0.1533,
      "step": 13273
    },
    {
      "epoch": 0.38724546356263495,
      "grad_norm": 0.998368277925134,
      "learning_rate": 7.010480749228706e-06,
      "loss": 0.1653,
      "step": 13274
    },
    {
      "epoch": 0.3872746367932785,
      "grad_norm": 0.8492515173009113,
      "learning_rate": 7.010048180746938e-06,
      "loss": 0.1196,
      "step": 13275
    },
    {
      "epoch": 0.38730381002392206,
      "grad_norm": 0.967085633972716,
      "learning_rate": 7.009615594319937e-06,
      "loss": 0.1659,
      "step": 13276
    },
    {
      "epoch": 0.3873329832545656,
      "grad_norm": 0.8020883773551066,
      "learning_rate": 7.0091829899515684e-06,
      "loss": 0.1476,
      "step": 13277
    },
    {
      "epoch": 0.38736215648520916,
      "grad_norm": 0.9411406020672687,
      "learning_rate": 7.008750367645694e-06,
      "loss": 0.1428,
      "step": 13278
    },
    {
      "epoch": 0.3873913297158527,
      "grad_norm": 0.864130362302704,
      "learning_rate": 7.008317727406175e-06,
      "loss": 0.1338,
      "step": 13279
    },
    {
      "epoch": 0.38742050294649627,
      "grad_norm": 0.7606777040854541,
      "learning_rate": 7.007885069236876e-06,
      "loss": 0.1323,
      "step": 13280
    },
    {
      "epoch": 0.3874496761771399,
      "grad_norm": 0.8536732065673777,
      "learning_rate": 7.0074523931416585e-06,
      "loss": 0.157,
      "step": 13281
    },
    {
      "epoch": 0.38747884940778343,
      "grad_norm": 1.2402798134330753,
      "learning_rate": 7.007019699124385e-06,
      "loss": 0.1519,
      "step": 13282
    },
    {
      "epoch": 0.387508022638427,
      "grad_norm": 0.9339407090209275,
      "learning_rate": 7.006586987188917e-06,
      "loss": 0.1507,
      "step": 13283
    },
    {
      "epoch": 0.38753719586907054,
      "grad_norm": 1.0502150476663425,
      "learning_rate": 7.006154257339121e-06,
      "loss": 0.1617,
      "step": 13284
    },
    {
      "epoch": 0.3875663690997141,
      "grad_norm": 1.1563302839339988,
      "learning_rate": 7.00572150957886e-06,
      "loss": 0.1357,
      "step": 13285
    },
    {
      "epoch": 0.38759554233035765,
      "grad_norm": 0.8922102358620652,
      "learning_rate": 7.005288743911994e-06,
      "loss": 0.1296,
      "step": 13286
    },
    {
      "epoch": 0.3876247155610012,
      "grad_norm": 0.8754802960953985,
      "learning_rate": 7.004855960342389e-06,
      "loss": 0.1306,
      "step": 13287
    },
    {
      "epoch": 0.3876538887916448,
      "grad_norm": 0.9678799900693552,
      "learning_rate": 7.00442315887391e-06,
      "loss": 0.1411,
      "step": 13288
    },
    {
      "epoch": 0.38768306202228836,
      "grad_norm": 1.3212304721783794,
      "learning_rate": 7.003990339510417e-06,
      "loss": 0.1213,
      "step": 13289
    },
    {
      "epoch": 0.3877122352529319,
      "grad_norm": 0.8836057308626918,
      "learning_rate": 7.003557502255779e-06,
      "loss": 0.1385,
      "step": 13290
    },
    {
      "epoch": 0.38774140848357547,
      "grad_norm": 0.9399206699618007,
      "learning_rate": 7.003124647113857e-06,
      "loss": 0.1624,
      "step": 13291
    },
    {
      "epoch": 0.387770581714219,
      "grad_norm": 0.8276290543556397,
      "learning_rate": 7.002691774088517e-06,
      "loss": 0.1562,
      "step": 13292
    },
    {
      "epoch": 0.3877997549448626,
      "grad_norm": 0.9554146122010095,
      "learning_rate": 7.002258883183621e-06,
      "loss": 0.1451,
      "step": 13293
    },
    {
      "epoch": 0.3878289281755062,
      "grad_norm": 0.8170182403350386,
      "learning_rate": 7.001825974403038e-06,
      "loss": 0.1402,
      "step": 13294
    },
    {
      "epoch": 0.38785810140614974,
      "grad_norm": 0.7558694011352974,
      "learning_rate": 7.001393047750629e-06,
      "loss": 0.139,
      "step": 13295
    },
    {
      "epoch": 0.3878872746367933,
      "grad_norm": 0.698040528931186,
      "learning_rate": 7.000960103230261e-06,
      "loss": 0.1372,
      "step": 13296
    },
    {
      "epoch": 0.38791644786743684,
      "grad_norm": 0.931884498583288,
      "learning_rate": 7.000527140845801e-06,
      "loss": 0.1233,
      "step": 13297
    },
    {
      "epoch": 0.3879456210980804,
      "grad_norm": 0.9023811833610386,
      "learning_rate": 7.000094160601109e-06,
      "loss": 0.1408,
      "step": 13298
    },
    {
      "epoch": 0.38797479432872395,
      "grad_norm": 0.8712635751305197,
      "learning_rate": 6.999661162500056e-06,
      "loss": 0.1636,
      "step": 13299
    },
    {
      "epoch": 0.3880039675593675,
      "grad_norm": 0.9558331606284869,
      "learning_rate": 6.999228146546504e-06,
      "loss": 0.1234,
      "step": 13300
    },
    {
      "epoch": 0.3880331407900111,
      "grad_norm": 0.9861588803013244,
      "learning_rate": 6.998795112744321e-06,
      "loss": 0.153,
      "step": 13301
    },
    {
      "epoch": 0.38806231402065466,
      "grad_norm": 1.109512376081367,
      "learning_rate": 6.9983620610973725e-06,
      "loss": 0.1598,
      "step": 13302
    },
    {
      "epoch": 0.3880914872512982,
      "grad_norm": 0.8147009888384774,
      "learning_rate": 6.997928991609525e-06,
      "loss": 0.1363,
      "step": 13303
    },
    {
      "epoch": 0.38812066048194177,
      "grad_norm": 0.9339738820826606,
      "learning_rate": 6.997495904284643e-06,
      "loss": 0.1644,
      "step": 13304
    },
    {
      "epoch": 0.3881498337125853,
      "grad_norm": 0.7825712474625479,
      "learning_rate": 6.9970627991265964e-06,
      "loss": 0.1512,
      "step": 13305
    },
    {
      "epoch": 0.3881790069432289,
      "grad_norm": 0.9293887480382017,
      "learning_rate": 6.9966296761392485e-06,
      "loss": 0.1671,
      "step": 13306
    },
    {
      "epoch": 0.38820818017387243,
      "grad_norm": 0.8516994438558391,
      "learning_rate": 6.9961965353264675e-06,
      "loss": 0.1322,
      "step": 13307
    },
    {
      "epoch": 0.38823735340451604,
      "grad_norm": 0.9549232915876097,
      "learning_rate": 6.995763376692121e-06,
      "loss": 0.1544,
      "step": 13308
    },
    {
      "epoch": 0.3882665266351596,
      "grad_norm": 1.013295450340856,
      "learning_rate": 6.995330200240075e-06,
      "loss": 0.1342,
      "step": 13309
    },
    {
      "epoch": 0.38829569986580315,
      "grad_norm": 0.6954548142395225,
      "learning_rate": 6.994897005974197e-06,
      "loss": 0.1104,
      "step": 13310
    },
    {
      "epoch": 0.3883248730964467,
      "grad_norm": 0.8169893974116368,
      "learning_rate": 6.9944637938983555e-06,
      "loss": 0.1254,
      "step": 13311
    },
    {
      "epoch": 0.38835404632709025,
      "grad_norm": 1.1914272777175088,
      "learning_rate": 6.994030564016418e-06,
      "loss": 0.1555,
      "step": 13312
    },
    {
      "epoch": 0.3883832195577338,
      "grad_norm": 0.8296728806910973,
      "learning_rate": 6.993597316332249e-06,
      "loss": 0.154,
      "step": 13313
    },
    {
      "epoch": 0.38841239278837736,
      "grad_norm": 0.8159415283837939,
      "learning_rate": 6.9931640508497215e-06,
      "loss": 0.1387,
      "step": 13314
    },
    {
      "epoch": 0.38844156601902097,
      "grad_norm": 1.022868637756655,
      "learning_rate": 6.9927307675727005e-06,
      "loss": 0.1463,
      "step": 13315
    },
    {
      "epoch": 0.3884707392496645,
      "grad_norm": 0.9184870012599347,
      "learning_rate": 6.9922974665050534e-06,
      "loss": 0.1829,
      "step": 13316
    },
    {
      "epoch": 0.3884999124803081,
      "grad_norm": 1.0162453871884387,
      "learning_rate": 6.991864147650653e-06,
      "loss": 0.1591,
      "step": 13317
    },
    {
      "epoch": 0.3885290857109516,
      "grad_norm": 0.6958059252750741,
      "learning_rate": 6.991430811013363e-06,
      "loss": 0.1392,
      "step": 13318
    },
    {
      "epoch": 0.3885582589415952,
      "grad_norm": 1.5786897357763856,
      "learning_rate": 6.990997456597054e-06,
      "loss": 0.1395,
      "step": 13319
    },
    {
      "epoch": 0.38858743217223873,
      "grad_norm": 0.7972810695226562,
      "learning_rate": 6.990564084405595e-06,
      "loss": 0.149,
      "step": 13320
    },
    {
      "epoch": 0.38861660540288234,
      "grad_norm": 0.8777016036161785,
      "learning_rate": 6.990130694442857e-06,
      "loss": 0.1416,
      "step": 13321
    },
    {
      "epoch": 0.3886457786335259,
      "grad_norm": 0.8780807240183021,
      "learning_rate": 6.989697286712705e-06,
      "loss": 0.1377,
      "step": 13322
    },
    {
      "epoch": 0.38867495186416945,
      "grad_norm": 0.9176289988609246,
      "learning_rate": 6.9892638612190125e-06,
      "loss": 0.1512,
      "step": 13323
    },
    {
      "epoch": 0.388704125094813,
      "grad_norm": 0.803467416298351,
      "learning_rate": 6.988830417965645e-06,
      "loss": 0.1515,
      "step": 13324
    },
    {
      "epoch": 0.38873329832545656,
      "grad_norm": 0.811414526116803,
      "learning_rate": 6.988396956956476e-06,
      "loss": 0.1527,
      "step": 13325
    },
    {
      "epoch": 0.3887624715561001,
      "grad_norm": 0.725293480837676,
      "learning_rate": 6.987963478195373e-06,
      "loss": 0.1594,
      "step": 13326
    },
    {
      "epoch": 0.38879164478674366,
      "grad_norm": 0.8542884036141146,
      "learning_rate": 6.9875299816862075e-06,
      "loss": 0.141,
      "step": 13327
    },
    {
      "epoch": 0.38882081801738727,
      "grad_norm": 0.9948677236482435,
      "learning_rate": 6.987096467432847e-06,
      "loss": 0.1649,
      "step": 13328
    },
    {
      "epoch": 0.3888499912480308,
      "grad_norm": 0.8379567960194645,
      "learning_rate": 6.986662935439165e-06,
      "loss": 0.1173,
      "step": 13329
    },
    {
      "epoch": 0.3888791644786744,
      "grad_norm": 0.9104299231378886,
      "learning_rate": 6.98622938570903e-06,
      "loss": 0.1329,
      "step": 13330
    },
    {
      "epoch": 0.38890833770931793,
      "grad_norm": 1.2147220613229637,
      "learning_rate": 6.985795818246313e-06,
      "loss": 0.134,
      "step": 13331
    },
    {
      "epoch": 0.3889375109399615,
      "grad_norm": 1.1428833879959506,
      "learning_rate": 6.985362233054887e-06,
      "loss": 0.1697,
      "step": 13332
    },
    {
      "epoch": 0.38896668417060504,
      "grad_norm": 0.7664877068593712,
      "learning_rate": 6.984928630138619e-06,
      "loss": 0.1414,
      "step": 13333
    },
    {
      "epoch": 0.3889958574012486,
      "grad_norm": 0.7097562592324536,
      "learning_rate": 6.984495009501381e-06,
      "loss": 0.1476,
      "step": 13334
    },
    {
      "epoch": 0.3890250306318922,
      "grad_norm": 0.9832394396281896,
      "learning_rate": 6.984061371147047e-06,
      "loss": 0.1442,
      "step": 13335
    },
    {
      "epoch": 0.38905420386253575,
      "grad_norm": 0.8274496434558746,
      "learning_rate": 6.983627715079487e-06,
      "loss": 0.1457,
      "step": 13336
    },
    {
      "epoch": 0.3890833770931793,
      "grad_norm": 0.7244927244327911,
      "learning_rate": 6.98319404130257e-06,
      "loss": 0.1492,
      "step": 13337
    },
    {
      "epoch": 0.38911255032382286,
      "grad_norm": 1.043038759570755,
      "learning_rate": 6.982760349820172e-06,
      "loss": 0.1507,
      "step": 13338
    },
    {
      "epoch": 0.3891417235544664,
      "grad_norm": 0.9942063133788643,
      "learning_rate": 6.9823266406361625e-06,
      "loss": 0.1362,
      "step": 13339
    },
    {
      "epoch": 0.38917089678510997,
      "grad_norm": 0.8422806446955047,
      "learning_rate": 6.981892913754414e-06,
      "loss": 0.1769,
      "step": 13340
    },
    {
      "epoch": 0.3892000700157535,
      "grad_norm": 0.8708336803825205,
      "learning_rate": 6.981459169178799e-06,
      "loss": 0.1208,
      "step": 13341
    },
    {
      "epoch": 0.3892292432463971,
      "grad_norm": 0.82161203254612,
      "learning_rate": 6.98102540691319e-06,
      "loss": 0.156,
      "step": 13342
    },
    {
      "epoch": 0.3892584164770407,
      "grad_norm": 0.8041841095006205,
      "learning_rate": 6.980591626961457e-06,
      "loss": 0.1447,
      "step": 13343
    },
    {
      "epoch": 0.38928758970768423,
      "grad_norm": 0.9053273291678036,
      "learning_rate": 6.980157829327476e-06,
      "loss": 0.1366,
      "step": 13344
    },
    {
      "epoch": 0.3893167629383278,
      "grad_norm": 0.8936512087957569,
      "learning_rate": 6.979724014015119e-06,
      "loss": 0.1458,
      "step": 13345
    },
    {
      "epoch": 0.38934593616897134,
      "grad_norm": 0.9754935926371973,
      "learning_rate": 6.979290181028258e-06,
      "loss": 0.144,
      "step": 13346
    },
    {
      "epoch": 0.3893751093996149,
      "grad_norm": 0.6794009994474512,
      "learning_rate": 6.978856330370768e-06,
      "loss": 0.1295,
      "step": 13347
    },
    {
      "epoch": 0.3894042826302585,
      "grad_norm": 0.7867241800532104,
      "learning_rate": 6.97842246204652e-06,
      "loss": 0.1337,
      "step": 13348
    },
    {
      "epoch": 0.38943345586090206,
      "grad_norm": 0.714191117427281,
      "learning_rate": 6.977988576059387e-06,
      "loss": 0.1414,
      "step": 13349
    },
    {
      "epoch": 0.3894626290915456,
      "grad_norm": 0.8831457005697209,
      "learning_rate": 6.977554672413247e-06,
      "loss": 0.1369,
      "step": 13350
    },
    {
      "epoch": 0.38949180232218916,
      "grad_norm": 0.662207327080798,
      "learning_rate": 6.97712075111197e-06,
      "loss": 0.153,
      "step": 13351
    },
    {
      "epoch": 0.3895209755528327,
      "grad_norm": 0.9824106029973495,
      "learning_rate": 6.97668681215943e-06,
      "loss": 0.1417,
      "step": 13352
    },
    {
      "epoch": 0.38955014878347627,
      "grad_norm": 0.7797362553881462,
      "learning_rate": 6.976252855559504e-06,
      "loss": 0.1465,
      "step": 13353
    },
    {
      "epoch": 0.3895793220141198,
      "grad_norm": 0.7245514585214006,
      "learning_rate": 6.975818881316062e-06,
      "loss": 0.161,
      "step": 13354
    },
    {
      "epoch": 0.38960849524476343,
      "grad_norm": 0.6144009335532287,
      "learning_rate": 6.975384889432981e-06,
      "loss": 0.1372,
      "step": 13355
    },
    {
      "epoch": 0.389637668475407,
      "grad_norm": 0.8314983607142742,
      "learning_rate": 6.974950879914136e-06,
      "loss": 0.14,
      "step": 13356
    },
    {
      "epoch": 0.38966684170605054,
      "grad_norm": 0.7598820440301378,
      "learning_rate": 6.9745168527634024e-06,
      "loss": 0.109,
      "step": 13357
    },
    {
      "epoch": 0.3896960149366941,
      "grad_norm": 0.667888523275534,
      "learning_rate": 6.974082807984651e-06,
      "loss": 0.1733,
      "step": 13358
    },
    {
      "epoch": 0.38972518816733764,
      "grad_norm": 0.7147882986652229,
      "learning_rate": 6.973648745581761e-06,
      "loss": 0.1278,
      "step": 13359
    },
    {
      "epoch": 0.3897543613979812,
      "grad_norm": 0.9553106230970906,
      "learning_rate": 6.973214665558606e-06,
      "loss": 0.1422,
      "step": 13360
    },
    {
      "epoch": 0.38978353462862475,
      "grad_norm": 0.7756088344563122,
      "learning_rate": 6.972780567919061e-06,
      "loss": 0.1396,
      "step": 13361
    },
    {
      "epoch": 0.38981270785926836,
      "grad_norm": 0.847104542280691,
      "learning_rate": 6.972346452667003e-06,
      "loss": 0.1432,
      "step": 13362
    },
    {
      "epoch": 0.3898418810899119,
      "grad_norm": 0.8712932944498577,
      "learning_rate": 6.971912319806306e-06,
      "loss": 0.1497,
      "step": 13363
    },
    {
      "epoch": 0.38987105432055547,
      "grad_norm": 0.6655785423789258,
      "learning_rate": 6.971478169340846e-06,
      "loss": 0.1272,
      "step": 13364
    },
    {
      "epoch": 0.389900227551199,
      "grad_norm": 0.7828140569257632,
      "learning_rate": 6.971044001274502e-06,
      "loss": 0.1402,
      "step": 13365
    },
    {
      "epoch": 0.38992940078184257,
      "grad_norm": 0.888757757668912,
      "learning_rate": 6.970609815611146e-06,
      "loss": 0.1665,
      "step": 13366
    },
    {
      "epoch": 0.3899585740124861,
      "grad_norm": 1.1091736288901854,
      "learning_rate": 6.970175612354655e-06,
      "loss": 0.1469,
      "step": 13367
    },
    {
      "epoch": 0.3899877472431297,
      "grad_norm": 0.873249582296831,
      "learning_rate": 6.969741391508907e-06,
      "loss": 0.1707,
      "step": 13368
    },
    {
      "epoch": 0.3900169204737733,
      "grad_norm": 0.7237473938954652,
      "learning_rate": 6.969307153077779e-06,
      "loss": 0.1505,
      "step": 13369
    },
    {
      "epoch": 0.39004609370441684,
      "grad_norm": 0.8722380633299138,
      "learning_rate": 6.968872897065147e-06,
      "loss": 0.1459,
      "step": 13370
    },
    {
      "epoch": 0.3900752669350604,
      "grad_norm": 0.8690157356299223,
      "learning_rate": 6.9684386234748866e-06,
      "loss": 0.141,
      "step": 13371
    },
    {
      "epoch": 0.39010444016570395,
      "grad_norm": 0.7648898813933638,
      "learning_rate": 6.968004332310877e-06,
      "loss": 0.121,
      "step": 13372
    },
    {
      "epoch": 0.3901336133963475,
      "grad_norm": 0.6912289339659948,
      "learning_rate": 6.967570023576993e-06,
      "loss": 0.1603,
      "step": 13373
    },
    {
      "epoch": 0.39016278662699105,
      "grad_norm": 0.7626884920461074,
      "learning_rate": 6.967135697277114e-06,
      "loss": 0.1539,
      "step": 13374
    },
    {
      "epoch": 0.3901919598576346,
      "grad_norm": 0.9089730229210611,
      "learning_rate": 6.96670135341512e-06,
      "loss": 0.125,
      "step": 13375
    },
    {
      "epoch": 0.3902211330882782,
      "grad_norm": 0.7886555626671125,
      "learning_rate": 6.966266991994881e-06,
      "loss": 0.1466,
      "step": 13376
    },
    {
      "epoch": 0.39025030631892177,
      "grad_norm": 0.980473515537376,
      "learning_rate": 6.965832613020284e-06,
      "loss": 0.1729,
      "step": 13377
    },
    {
      "epoch": 0.3902794795495653,
      "grad_norm": 0.8537764799859051,
      "learning_rate": 6.9653982164952e-06,
      "loss": 0.1566,
      "step": 13378
    },
    {
      "epoch": 0.3903086527802089,
      "grad_norm": 1.0210186184483199,
      "learning_rate": 6.96496380242351e-06,
      "loss": 0.1522,
      "step": 13379
    },
    {
      "epoch": 0.39033782601085243,
      "grad_norm": 0.8263969947570391,
      "learning_rate": 6.964529370809095e-06,
      "loss": 0.1384,
      "step": 13380
    },
    {
      "epoch": 0.390366999241496,
      "grad_norm": 0.9348121446069635,
      "learning_rate": 6.964094921655828e-06,
      "loss": 0.1613,
      "step": 13381
    },
    {
      "epoch": 0.3903961724721396,
      "grad_norm": 0.9743860353075883,
      "learning_rate": 6.963660454967591e-06,
      "loss": 0.1304,
      "step": 13382
    },
    {
      "epoch": 0.39042534570278314,
      "grad_norm": 0.712965059417837,
      "learning_rate": 6.963225970748262e-06,
      "loss": 0.1601,
      "step": 13383
    },
    {
      "epoch": 0.3904545189334267,
      "grad_norm": 0.8278049793772592,
      "learning_rate": 6.96279146900172e-06,
      "loss": 0.1621,
      "step": 13384
    },
    {
      "epoch": 0.39048369216407025,
      "grad_norm": 0.8431031487562697,
      "learning_rate": 6.962356949731846e-06,
      "loss": 0.1639,
      "step": 13385
    },
    {
      "epoch": 0.3905128653947138,
      "grad_norm": 0.8061050375402079,
      "learning_rate": 6.961922412942517e-06,
      "loss": 0.1272,
      "step": 13386
    },
    {
      "epoch": 0.39054203862535736,
      "grad_norm": 0.6606946199278728,
      "learning_rate": 6.9614878586376125e-06,
      "loss": 0.1391,
      "step": 13387
    },
    {
      "epoch": 0.3905712118560009,
      "grad_norm": 0.9617599754709144,
      "learning_rate": 6.961053286821012e-06,
      "loss": 0.1427,
      "step": 13388
    },
    {
      "epoch": 0.3906003850866445,
      "grad_norm": 0.9996814338324447,
      "learning_rate": 6.960618697496597e-06,
      "loss": 0.1594,
      "step": 13389
    },
    {
      "epoch": 0.3906295583172881,
      "grad_norm": 0.7699719287102674,
      "learning_rate": 6.960184090668245e-06,
      "loss": 0.1336,
      "step": 13390
    },
    {
      "epoch": 0.3906587315479316,
      "grad_norm": 0.8229926159042056,
      "learning_rate": 6.959749466339839e-06,
      "loss": 0.1605,
      "step": 13391
    },
    {
      "epoch": 0.3906879047785752,
      "grad_norm": 0.8933858508674338,
      "learning_rate": 6.959314824515258e-06,
      "loss": 0.1386,
      "step": 13392
    },
    {
      "epoch": 0.39071707800921873,
      "grad_norm": 0.7196626303364317,
      "learning_rate": 6.95888016519838e-06,
      "loss": 0.137,
      "step": 13393
    },
    {
      "epoch": 0.3907462512398623,
      "grad_norm": 0.8334043516529545,
      "learning_rate": 6.958445488393088e-06,
      "loss": 0.1375,
      "step": 13394
    },
    {
      "epoch": 0.39077542447050584,
      "grad_norm": 0.7419972030408932,
      "learning_rate": 6.958010794103263e-06,
      "loss": 0.1373,
      "step": 13395
    },
    {
      "epoch": 0.39080459770114945,
      "grad_norm": 0.8216999238492296,
      "learning_rate": 6.957576082332784e-06,
      "loss": 0.1451,
      "step": 13396
    },
    {
      "epoch": 0.390833770931793,
      "grad_norm": 0.7050367296930518,
      "learning_rate": 6.9571413530855345e-06,
      "loss": 0.1328,
      "step": 13397
    },
    {
      "epoch": 0.39086294416243655,
      "grad_norm": 0.8815943297971746,
      "learning_rate": 6.956706606365393e-06,
      "loss": 0.1341,
      "step": 13398
    },
    {
      "epoch": 0.3908921173930801,
      "grad_norm": 0.9252955919633908,
      "learning_rate": 6.956271842176242e-06,
      "loss": 0.1364,
      "step": 13399
    },
    {
      "epoch": 0.39092129062372366,
      "grad_norm": 0.8930728103288988,
      "learning_rate": 6.9558370605219634e-06,
      "loss": 0.1286,
      "step": 13400
    },
    {
      "epoch": 0.3909504638543672,
      "grad_norm": 0.9163816687984387,
      "learning_rate": 6.955402261406439e-06,
      "loss": 0.1381,
      "step": 13401
    },
    {
      "epoch": 0.39097963708501077,
      "grad_norm": 0.7680791483130143,
      "learning_rate": 6.954967444833549e-06,
      "loss": 0.1574,
      "step": 13402
    },
    {
      "epoch": 0.3910088103156544,
      "grad_norm": 1.0069978034553984,
      "learning_rate": 6.954532610807176e-06,
      "loss": 0.1443,
      "step": 13403
    },
    {
      "epoch": 0.39103798354629793,
      "grad_norm": 0.8988432582345715,
      "learning_rate": 6.954097759331204e-06,
      "loss": 0.1377,
      "step": 13404
    },
    {
      "epoch": 0.3910671567769415,
      "grad_norm": 0.7066239225388304,
      "learning_rate": 6.953662890409512e-06,
      "loss": 0.155,
      "step": 13405
    },
    {
      "epoch": 0.39109633000758504,
      "grad_norm": 1.1597307283036862,
      "learning_rate": 6.9532280040459855e-06,
      "loss": 0.1424,
      "step": 13406
    },
    {
      "epoch": 0.3911255032382286,
      "grad_norm": 0.8405187499121104,
      "learning_rate": 6.952793100244506e-06,
      "loss": 0.1663,
      "step": 13407
    },
    {
      "epoch": 0.39115467646887214,
      "grad_norm": 0.7460433118762413,
      "learning_rate": 6.952358179008954e-06,
      "loss": 0.1335,
      "step": 13408
    },
    {
      "epoch": 0.39118384969951575,
      "grad_norm": 1.059038470736294,
      "learning_rate": 6.951923240343217e-06,
      "loss": 0.151,
      "step": 13409
    },
    {
      "epoch": 0.3912130229301593,
      "grad_norm": 0.7679939106601765,
      "learning_rate": 6.951488284251173e-06,
      "loss": 0.1437,
      "step": 13410
    },
    {
      "epoch": 0.39124219616080286,
      "grad_norm": 0.8507353232572379,
      "learning_rate": 6.9510533107367066e-06,
      "loss": 0.144,
      "step": 13411
    },
    {
      "epoch": 0.3912713693914464,
      "grad_norm": 1.205972558740319,
      "learning_rate": 6.950618319803704e-06,
      "loss": 0.1485,
      "step": 13412
    },
    {
      "epoch": 0.39130054262208996,
      "grad_norm": 1.0747486516695628,
      "learning_rate": 6.950183311456046e-06,
      "loss": 0.1662,
      "step": 13413
    },
    {
      "epoch": 0.3913297158527335,
      "grad_norm": 0.8034709340146459,
      "learning_rate": 6.9497482856976175e-06,
      "loss": 0.1401,
      "step": 13414
    },
    {
      "epoch": 0.39135888908337707,
      "grad_norm": 1.2788093437326604,
      "learning_rate": 6.949313242532301e-06,
      "loss": 0.1371,
      "step": 13415
    },
    {
      "epoch": 0.3913880623140207,
      "grad_norm": 0.8976758945386955,
      "learning_rate": 6.94887818196398e-06,
      "loss": 0.1456,
      "step": 13416
    },
    {
      "epoch": 0.39141723554466423,
      "grad_norm": 0.6312697081513999,
      "learning_rate": 6.948443103996543e-06,
      "loss": 0.1163,
      "step": 13417
    },
    {
      "epoch": 0.3914464087753078,
      "grad_norm": 0.775990863607962,
      "learning_rate": 6.948008008633868e-06,
      "loss": 0.1408,
      "step": 13418
    },
    {
      "epoch": 0.39147558200595134,
      "grad_norm": 0.9341190483712725,
      "learning_rate": 6.947572895879844e-06,
      "loss": 0.1793,
      "step": 13419
    },
    {
      "epoch": 0.3915047552365949,
      "grad_norm": 0.6071239175759338,
      "learning_rate": 6.947137765738354e-06,
      "loss": 0.1297,
      "step": 13420
    },
    {
      "epoch": 0.39153392846723845,
      "grad_norm": 0.6793513193876647,
      "learning_rate": 6.946702618213284e-06,
      "loss": 0.1071,
      "step": 13421
    },
    {
      "epoch": 0.391563101697882,
      "grad_norm": 0.7551884994473822,
      "learning_rate": 6.946267453308518e-06,
      "loss": 0.1516,
      "step": 13422
    },
    {
      "epoch": 0.3915922749285256,
      "grad_norm": 0.780841912088871,
      "learning_rate": 6.945832271027937e-06,
      "loss": 0.1603,
      "step": 13423
    },
    {
      "epoch": 0.39162144815916916,
      "grad_norm": 0.7038816360485793,
      "learning_rate": 6.945397071375433e-06,
      "loss": 0.1259,
      "step": 13424
    },
    {
      "epoch": 0.3916506213898127,
      "grad_norm": 0.7328245116045685,
      "learning_rate": 6.944961854354888e-06,
      "loss": 0.1238,
      "step": 13425
    },
    {
      "epoch": 0.39167979462045627,
      "grad_norm": 1.2517861853783918,
      "learning_rate": 6.944526619970187e-06,
      "loss": 0.1537,
      "step": 13426
    },
    {
      "epoch": 0.3917089678510998,
      "grad_norm": 0.743241132460899,
      "learning_rate": 6.944091368225218e-06,
      "loss": 0.155,
      "step": 13427
    },
    {
      "epoch": 0.3917381410817434,
      "grad_norm": 0.6935536329792923,
      "learning_rate": 6.9436560991238635e-06,
      "loss": 0.1292,
      "step": 13428
    },
    {
      "epoch": 0.3917673143123869,
      "grad_norm": 0.7756549210802175,
      "learning_rate": 6.943220812670013e-06,
      "loss": 0.1563,
      "step": 13429
    },
    {
      "epoch": 0.39179648754303054,
      "grad_norm": 0.7414237234309615,
      "learning_rate": 6.94278550886755e-06,
      "loss": 0.1534,
      "step": 13430
    },
    {
      "epoch": 0.3918256607736741,
      "grad_norm": 0.8426514748873748,
      "learning_rate": 6.942350187720361e-06,
      "loss": 0.1469,
      "step": 13431
    },
    {
      "epoch": 0.39185483400431764,
      "grad_norm": 0.7615308497476291,
      "learning_rate": 6.941914849232336e-06,
      "loss": 0.1314,
      "step": 13432
    },
    {
      "epoch": 0.3918840072349612,
      "grad_norm": 0.7505485574867814,
      "learning_rate": 6.941479493407356e-06,
      "loss": 0.1398,
      "step": 13433
    },
    {
      "epoch": 0.39191318046560475,
      "grad_norm": 0.9822685707588608,
      "learning_rate": 6.9410441202493115e-06,
      "loss": 0.1497,
      "step": 13434
    },
    {
      "epoch": 0.3919423536962483,
      "grad_norm": 0.8013111594305649,
      "learning_rate": 6.940608729762088e-06,
      "loss": 0.1599,
      "step": 13435
    },
    {
      "epoch": 0.3919715269268919,
      "grad_norm": 0.7633362671154748,
      "learning_rate": 6.940173321949574e-06,
      "loss": 0.1313,
      "step": 13436
    },
    {
      "epoch": 0.39200070015753546,
      "grad_norm": 0.7355768933169572,
      "learning_rate": 6.9397378968156555e-06,
      "loss": 0.1291,
      "step": 13437
    },
    {
      "epoch": 0.392029873388179,
      "grad_norm": 0.8647746265161046,
      "learning_rate": 6.9393024543642195e-06,
      "loss": 0.1692,
      "step": 13438
    },
    {
      "epoch": 0.39205904661882257,
      "grad_norm": 0.8840514013776822,
      "learning_rate": 6.938866994599156e-06,
      "loss": 0.1305,
      "step": 13439
    },
    {
      "epoch": 0.3920882198494661,
      "grad_norm": 0.8111839257480463,
      "learning_rate": 6.938431517524349e-06,
      "loss": 0.1401,
      "step": 13440
    },
    {
      "epoch": 0.3921173930801097,
      "grad_norm": 1.1505431095945406,
      "learning_rate": 6.937996023143687e-06,
      "loss": 0.1444,
      "step": 13441
    },
    {
      "epoch": 0.39214656631075323,
      "grad_norm": 1.006634496209286,
      "learning_rate": 6.937560511461062e-06,
      "loss": 0.1443,
      "step": 13442
    },
    {
      "epoch": 0.39217573954139684,
      "grad_norm": 0.7314659622389533,
      "learning_rate": 6.937124982480358e-06,
      "loss": 0.1428,
      "step": 13443
    },
    {
      "epoch": 0.3922049127720404,
      "grad_norm": 0.839361978663376,
      "learning_rate": 6.936689436205464e-06,
      "loss": 0.1324,
      "step": 13444
    },
    {
      "epoch": 0.39223408600268395,
      "grad_norm": 0.7894591607237862,
      "learning_rate": 6.936253872640269e-06,
      "loss": 0.1252,
      "step": 13445
    },
    {
      "epoch": 0.3922632592333275,
      "grad_norm": 0.8514628703439183,
      "learning_rate": 6.935818291788663e-06,
      "loss": 0.1134,
      "step": 13446
    },
    {
      "epoch": 0.39229243246397105,
      "grad_norm": 0.7229729683223703,
      "learning_rate": 6.935382693654532e-06,
      "loss": 0.1392,
      "step": 13447
    },
    {
      "epoch": 0.3923216056946146,
      "grad_norm": 0.866114985431533,
      "learning_rate": 6.934947078241767e-06,
      "loss": 0.1354,
      "step": 13448
    },
    {
      "epoch": 0.39235077892525816,
      "grad_norm": 1.0060248452344418,
      "learning_rate": 6.934511445554257e-06,
      "loss": 0.1407,
      "step": 13449
    },
    {
      "epoch": 0.39237995215590177,
      "grad_norm": 0.920134761062441,
      "learning_rate": 6.934075795595889e-06,
      "loss": 0.1301,
      "step": 13450
    },
    {
      "epoch": 0.3924091253865453,
      "grad_norm": 1.0085656115031563,
      "learning_rate": 6.933640128370556e-06,
      "loss": 0.1618,
      "step": 13451
    },
    {
      "epoch": 0.3924382986171889,
      "grad_norm": 0.9004872747952627,
      "learning_rate": 6.933204443882144e-06,
      "loss": 0.1413,
      "step": 13452
    },
    {
      "epoch": 0.3924674718478324,
      "grad_norm": 0.8948913061908531,
      "learning_rate": 6.932768742134545e-06,
      "loss": 0.1173,
      "step": 13453
    },
    {
      "epoch": 0.392496645078476,
      "grad_norm": 1.1737819694857237,
      "learning_rate": 6.932333023131647e-06,
      "loss": 0.1371,
      "step": 13454
    },
    {
      "epoch": 0.39252581830911953,
      "grad_norm": 0.8046936114660276,
      "learning_rate": 6.9318972868773425e-06,
      "loss": 0.1329,
      "step": 13455
    },
    {
      "epoch": 0.3925549915397631,
      "grad_norm": 0.7641640235500119,
      "learning_rate": 6.931461533375518e-06,
      "loss": 0.1575,
      "step": 13456
    },
    {
      "epoch": 0.3925841647704067,
      "grad_norm": 0.8387902511728224,
      "learning_rate": 6.931025762630069e-06,
      "loss": 0.1237,
      "step": 13457
    },
    {
      "epoch": 0.39261333800105025,
      "grad_norm": 0.8401778315870334,
      "learning_rate": 6.930589974644881e-06,
      "loss": 0.1409,
      "step": 13458
    },
    {
      "epoch": 0.3926425112316938,
      "grad_norm": 1.0466468885461653,
      "learning_rate": 6.930154169423849e-06,
      "loss": 0.1375,
      "step": 13459
    },
    {
      "epoch": 0.39267168446233736,
      "grad_norm": 1.0032165642616175,
      "learning_rate": 6.929718346970858e-06,
      "loss": 0.1496,
      "step": 13460
    },
    {
      "epoch": 0.3927008576929809,
      "grad_norm": 1.0900803003505035,
      "learning_rate": 6.929282507289804e-06,
      "loss": 0.1302,
      "step": 13461
    },
    {
      "epoch": 0.39273003092362446,
      "grad_norm": 0.8295190180096297,
      "learning_rate": 6.928846650384575e-06,
      "loss": 0.1875,
      "step": 13462
    },
    {
      "epoch": 0.39275920415426807,
      "grad_norm": 1.060580655896145,
      "learning_rate": 6.928410776259065e-06,
      "loss": 0.1562,
      "step": 13463
    },
    {
      "epoch": 0.3927883773849116,
      "grad_norm": 0.9378510304402614,
      "learning_rate": 6.927974884917163e-06,
      "loss": 0.1248,
      "step": 13464
    },
    {
      "epoch": 0.3928175506155552,
      "grad_norm": 0.7853299827127223,
      "learning_rate": 6.927538976362762e-06,
      "loss": 0.1579,
      "step": 13465
    },
    {
      "epoch": 0.39284672384619873,
      "grad_norm": 1.0478984293700828,
      "learning_rate": 6.9271030505997535e-06,
      "loss": 0.1588,
      "step": 13466
    },
    {
      "epoch": 0.3928758970768423,
      "grad_norm": 1.0617111371064418,
      "learning_rate": 6.92666710763203e-06,
      "loss": 0.1735,
      "step": 13467
    },
    {
      "epoch": 0.39290507030748584,
      "grad_norm": 0.8479044160090914,
      "learning_rate": 6.926231147463481e-06,
      "loss": 0.1472,
      "step": 13468
    },
    {
      "epoch": 0.3929342435381294,
      "grad_norm": 1.0243936652671943,
      "learning_rate": 6.925795170098e-06,
      "loss": 0.1569,
      "step": 13469
    },
    {
      "epoch": 0.392963416768773,
      "grad_norm": 1.0772117900981126,
      "learning_rate": 6.92535917553948e-06,
      "loss": 0.1321,
      "step": 13470
    },
    {
      "epoch": 0.39299258999941655,
      "grad_norm": 0.8288398664135171,
      "learning_rate": 6.924923163791811e-06,
      "loss": 0.1268,
      "step": 13471
    },
    {
      "epoch": 0.3930217632300601,
      "grad_norm": 1.0995106201194746,
      "learning_rate": 6.92448713485889e-06,
      "loss": 0.1864,
      "step": 13472
    },
    {
      "epoch": 0.39305093646070366,
      "grad_norm": 1.0389480358599599,
      "learning_rate": 6.924051088744606e-06,
      "loss": 0.1525,
      "step": 13473
    },
    {
      "epoch": 0.3930801096913472,
      "grad_norm": 0.8380733324749676,
      "learning_rate": 6.923615025452854e-06,
      "loss": 0.1595,
      "step": 13474
    },
    {
      "epoch": 0.39310928292199077,
      "grad_norm": 0.9623144556101351,
      "learning_rate": 6.923178944987525e-06,
      "loss": 0.1691,
      "step": 13475
    },
    {
      "epoch": 0.3931384561526343,
      "grad_norm": 0.9322633572796867,
      "learning_rate": 6.922742847352515e-06,
      "loss": 0.1632,
      "step": 13476
    },
    {
      "epoch": 0.39316762938327793,
      "grad_norm": 0.8082087471222191,
      "learning_rate": 6.922306732551716e-06,
      "loss": 0.1625,
      "step": 13477
    },
    {
      "epoch": 0.3931968026139215,
      "grad_norm": 0.8813435088048738,
      "learning_rate": 6.92187060058902e-06,
      "loss": 0.1624,
      "step": 13478
    },
    {
      "epoch": 0.39322597584456503,
      "grad_norm": 0.7664965159860045,
      "learning_rate": 6.921434451468323e-06,
      "loss": 0.1414,
      "step": 13479
    },
    {
      "epoch": 0.3932551490752086,
      "grad_norm": 0.7388090242828428,
      "learning_rate": 6.9209982851935165e-06,
      "loss": 0.1285,
      "step": 13480
    },
    {
      "epoch": 0.39328432230585214,
      "grad_norm": 0.7635029803592907,
      "learning_rate": 6.920562101768498e-06,
      "loss": 0.124,
      "step": 13481
    },
    {
      "epoch": 0.3933134955364957,
      "grad_norm": 0.6998395776978557,
      "learning_rate": 6.920125901197159e-06,
      "loss": 0.1263,
      "step": 13482
    },
    {
      "epoch": 0.39334266876713925,
      "grad_norm": 0.6459048921764976,
      "learning_rate": 6.919689683483392e-06,
      "loss": 0.1464,
      "step": 13483
    },
    {
      "epoch": 0.39337184199778286,
      "grad_norm": 0.8050426883242157,
      "learning_rate": 6.919253448631097e-06,
      "loss": 0.1255,
      "step": 13484
    },
    {
      "epoch": 0.3934010152284264,
      "grad_norm": 0.7900914807108079,
      "learning_rate": 6.918817196644163e-06,
      "loss": 0.1559,
      "step": 13485
    },
    {
      "epoch": 0.39343018845906996,
      "grad_norm": 0.7080388530461845,
      "learning_rate": 6.918380927526488e-06,
      "loss": 0.1344,
      "step": 13486
    },
    {
      "epoch": 0.3934593616897135,
      "grad_norm": 0.7611724563438523,
      "learning_rate": 6.917944641281966e-06,
      "loss": 0.14,
      "step": 13487
    },
    {
      "epoch": 0.39348853492035707,
      "grad_norm": 0.8121858794534704,
      "learning_rate": 6.917508337914493e-06,
      "loss": 0.1348,
      "step": 13488
    },
    {
      "epoch": 0.3935177081510006,
      "grad_norm": 0.873557338320025,
      "learning_rate": 6.9170720174279615e-06,
      "loss": 0.1295,
      "step": 13489
    },
    {
      "epoch": 0.3935468813816442,
      "grad_norm": 0.6910521841080438,
      "learning_rate": 6.91663567982627e-06,
      "loss": 0.154,
      "step": 13490
    },
    {
      "epoch": 0.3935760546122878,
      "grad_norm": 0.9495453741411941,
      "learning_rate": 6.9161993251133135e-06,
      "loss": 0.1504,
      "step": 13491
    },
    {
      "epoch": 0.39360522784293134,
      "grad_norm": 0.99695846990306,
      "learning_rate": 6.915762953292985e-06,
      "loss": 0.1413,
      "step": 13492
    },
    {
      "epoch": 0.3936344010735749,
      "grad_norm": 0.9585221962636161,
      "learning_rate": 6.915326564369183e-06,
      "loss": 0.1543,
      "step": 13493
    },
    {
      "epoch": 0.39366357430421844,
      "grad_norm": 0.872098656123676,
      "learning_rate": 6.914890158345802e-06,
      "loss": 0.143,
      "step": 13494
    },
    {
      "epoch": 0.393692747534862,
      "grad_norm": 0.9465268504551351,
      "learning_rate": 6.91445373522674e-06,
      "loss": 0.139,
      "step": 13495
    },
    {
      "epoch": 0.39372192076550555,
      "grad_norm": 0.8381989221421502,
      "learning_rate": 6.91401729501589e-06,
      "loss": 0.1464,
      "step": 13496
    },
    {
      "epoch": 0.39375109399614916,
      "grad_norm": 1.0099884439074915,
      "learning_rate": 6.913580837717153e-06,
      "loss": 0.1449,
      "step": 13497
    },
    {
      "epoch": 0.3937802672267927,
      "grad_norm": 0.8391478691306493,
      "learning_rate": 6.9131443633344205e-06,
      "loss": 0.1398,
      "step": 13498
    },
    {
      "epoch": 0.39380944045743627,
      "grad_norm": 0.8009808303418667,
      "learning_rate": 6.912707871871595e-06,
      "loss": 0.145,
      "step": 13499
    },
    {
      "epoch": 0.3938386136880798,
      "grad_norm": 0.8945467445430153,
      "learning_rate": 6.9122713633325674e-06,
      "loss": 0.1391,
      "step": 13500
    },
    {
      "epoch": 0.3938677869187234,
      "grad_norm": 1.0227978992708744,
      "learning_rate": 6.911834837721239e-06,
      "loss": 0.1238,
      "step": 13501
    },
    {
      "epoch": 0.3938969601493669,
      "grad_norm": 0.8293197513229508,
      "learning_rate": 6.911398295041506e-06,
      "loss": 0.147,
      "step": 13502
    },
    {
      "epoch": 0.3939261333800105,
      "grad_norm": 1.8600840272680774,
      "learning_rate": 6.910961735297265e-06,
      "loss": 0.1383,
      "step": 13503
    },
    {
      "epoch": 0.3939553066106541,
      "grad_norm": 1.1716484242902854,
      "learning_rate": 6.910525158492413e-06,
      "loss": 0.134,
      "step": 13504
    },
    {
      "epoch": 0.39398447984129764,
      "grad_norm": 0.8756867062019763,
      "learning_rate": 6.910088564630848e-06,
      "loss": 0.1427,
      "step": 13505
    },
    {
      "epoch": 0.3940136530719412,
      "grad_norm": 1.069655355328582,
      "learning_rate": 6.909651953716469e-06,
      "loss": 0.1647,
      "step": 13506
    },
    {
      "epoch": 0.39404282630258475,
      "grad_norm": 0.9480922403444754,
      "learning_rate": 6.9092153257531735e-06,
      "loss": 0.1522,
      "step": 13507
    },
    {
      "epoch": 0.3940719995332283,
      "grad_norm": 0.8298992396590289,
      "learning_rate": 6.90877868074486e-06,
      "loss": 0.1612,
      "step": 13508
    },
    {
      "epoch": 0.39410117276387185,
      "grad_norm": 0.8143344578505003,
      "learning_rate": 6.908342018695424e-06,
      "loss": 0.1712,
      "step": 13509
    },
    {
      "epoch": 0.3941303459945154,
      "grad_norm": 0.9898841472787201,
      "learning_rate": 6.907905339608768e-06,
      "loss": 0.1536,
      "step": 13510
    },
    {
      "epoch": 0.394159519225159,
      "grad_norm": 0.9673297942709088,
      "learning_rate": 6.907468643488788e-06,
      "loss": 0.1527,
      "step": 13511
    },
    {
      "epoch": 0.39418869245580257,
      "grad_norm": 0.7036049003008338,
      "learning_rate": 6.907031930339384e-06,
      "loss": 0.1442,
      "step": 13512
    },
    {
      "epoch": 0.3942178656864461,
      "grad_norm": 0.9303468860530448,
      "learning_rate": 6.906595200164452e-06,
      "loss": 0.1375,
      "step": 13513
    },
    {
      "epoch": 0.3942470389170897,
      "grad_norm": 0.8843318750722188,
      "learning_rate": 6.906158452967895e-06,
      "loss": 0.1303,
      "step": 13514
    },
    {
      "epoch": 0.39427621214773323,
      "grad_norm": 0.8193348564637531,
      "learning_rate": 6.905721688753611e-06,
      "loss": 0.1471,
      "step": 13515
    },
    {
      "epoch": 0.3943053853783768,
      "grad_norm": 0.8446213699654381,
      "learning_rate": 6.905284907525496e-06,
      "loss": 0.1309,
      "step": 13516
    },
    {
      "epoch": 0.39433455860902034,
      "grad_norm": 0.8891430358286819,
      "learning_rate": 6.9048481092874545e-06,
      "loss": 0.1643,
      "step": 13517
    },
    {
      "epoch": 0.39436373183966394,
      "grad_norm": 0.7375669001339739,
      "learning_rate": 6.9044112940433825e-06,
      "loss": 0.1563,
      "step": 13518
    },
    {
      "epoch": 0.3943929050703075,
      "grad_norm": 0.7680674624667125,
      "learning_rate": 6.903974461797182e-06,
      "loss": 0.1512,
      "step": 13519
    },
    {
      "epoch": 0.39442207830095105,
      "grad_norm": 0.8104362183563902,
      "learning_rate": 6.903537612552752e-06,
      "loss": 0.1507,
      "step": 13520
    },
    {
      "epoch": 0.3944512515315946,
      "grad_norm": 0.7865974776961054,
      "learning_rate": 6.903100746313992e-06,
      "loss": 0.1476,
      "step": 13521
    },
    {
      "epoch": 0.39448042476223816,
      "grad_norm": 0.9050056481214198,
      "learning_rate": 6.902663863084803e-06,
      "loss": 0.1507,
      "step": 13522
    },
    {
      "epoch": 0.3945095979928817,
      "grad_norm": 0.9189139522724291,
      "learning_rate": 6.902226962869085e-06,
      "loss": 0.1403,
      "step": 13523
    },
    {
      "epoch": 0.3945387712235253,
      "grad_norm": 1.0383020721973253,
      "learning_rate": 6.90179004567074e-06,
      "loss": 0.1543,
      "step": 13524
    },
    {
      "epoch": 0.3945679444541689,
      "grad_norm": 0.8674877597307317,
      "learning_rate": 6.9013531114936664e-06,
      "loss": 0.1621,
      "step": 13525
    },
    {
      "epoch": 0.3945971176848124,
      "grad_norm": 0.9702152424831368,
      "learning_rate": 6.900916160341766e-06,
      "loss": 0.1304,
      "step": 13526
    },
    {
      "epoch": 0.394626290915456,
      "grad_norm": 0.8763961594611642,
      "learning_rate": 6.90047919221894e-06,
      "loss": 0.1557,
      "step": 13527
    },
    {
      "epoch": 0.39465546414609953,
      "grad_norm": 0.8700884550279984,
      "learning_rate": 6.90004220712909e-06,
      "loss": 0.1639,
      "step": 13528
    },
    {
      "epoch": 0.3946846373767431,
      "grad_norm": 0.7770719129623878,
      "learning_rate": 6.899605205076118e-06,
      "loss": 0.1496,
      "step": 13529
    },
    {
      "epoch": 0.39471381060738664,
      "grad_norm": 0.9202876285706476,
      "learning_rate": 6.899168186063922e-06,
      "loss": 0.1551,
      "step": 13530
    },
    {
      "epoch": 0.39474298383803025,
      "grad_norm": 0.760391271866602,
      "learning_rate": 6.898731150096405e-06,
      "loss": 0.1592,
      "step": 13531
    },
    {
      "epoch": 0.3947721570686738,
      "grad_norm": 0.6977262503265874,
      "learning_rate": 6.898294097177472e-06,
      "loss": 0.1259,
      "step": 13532
    },
    {
      "epoch": 0.39480133029931735,
      "grad_norm": 0.8164748693757632,
      "learning_rate": 6.897857027311021e-06,
      "loss": 0.1295,
      "step": 13533
    },
    {
      "epoch": 0.3948305035299609,
      "grad_norm": 0.7817836696599647,
      "learning_rate": 6.897419940500957e-06,
      "loss": 0.1544,
      "step": 13534
    },
    {
      "epoch": 0.39485967676060446,
      "grad_norm": 0.8495968672593023,
      "learning_rate": 6.8969828367511795e-06,
      "loss": 0.1492,
      "step": 13535
    },
    {
      "epoch": 0.394888849991248,
      "grad_norm": 0.8039639482440903,
      "learning_rate": 6.896545716065591e-06,
      "loss": 0.1255,
      "step": 13536
    },
    {
      "epoch": 0.39491802322189157,
      "grad_norm": 0.9095502768520879,
      "learning_rate": 6.896108578448098e-06,
      "loss": 0.1574,
      "step": 13537
    },
    {
      "epoch": 0.3949471964525352,
      "grad_norm": 0.8898926453378159,
      "learning_rate": 6.8956714239025976e-06,
      "loss": 0.1813,
      "step": 13538
    },
    {
      "epoch": 0.39497636968317873,
      "grad_norm": 0.9486733008022501,
      "learning_rate": 6.895234252432996e-06,
      "loss": 0.146,
      "step": 13539
    },
    {
      "epoch": 0.3950055429138223,
      "grad_norm": 1.0384175536627858,
      "learning_rate": 6.894797064043196e-06,
      "loss": 0.1532,
      "step": 13540
    },
    {
      "epoch": 0.39503471614446584,
      "grad_norm": 0.7370307247044519,
      "learning_rate": 6.894359858737099e-06,
      "loss": 0.1528,
      "step": 13541
    },
    {
      "epoch": 0.3950638893751094,
      "grad_norm": 0.8463856413184923,
      "learning_rate": 6.893922636518612e-06,
      "loss": 0.1347,
      "step": 13542
    },
    {
      "epoch": 0.39509306260575294,
      "grad_norm": 0.9238022439981455,
      "learning_rate": 6.893485397391633e-06,
      "loss": 0.1662,
      "step": 13543
    },
    {
      "epoch": 0.3951222358363965,
      "grad_norm": 0.7820990008330788,
      "learning_rate": 6.89304814136007e-06,
      "loss": 0.1378,
      "step": 13544
    },
    {
      "epoch": 0.3951514090670401,
      "grad_norm": 1.0888335436583485,
      "learning_rate": 6.892610868427824e-06,
      "loss": 0.1963,
      "step": 13545
    },
    {
      "epoch": 0.39518058229768366,
      "grad_norm": 0.9156504059078454,
      "learning_rate": 6.8921735785988e-06,
      "loss": 0.1188,
      "step": 13546
    },
    {
      "epoch": 0.3952097555283272,
      "grad_norm": 0.8877364499479483,
      "learning_rate": 6.891736271876903e-06,
      "loss": 0.1421,
      "step": 13547
    },
    {
      "epoch": 0.39523892875897076,
      "grad_norm": 0.758630855278139,
      "learning_rate": 6.8912989482660365e-06,
      "loss": 0.1265,
      "step": 13548
    },
    {
      "epoch": 0.3952681019896143,
      "grad_norm": 0.8116769721432958,
      "learning_rate": 6.890861607770103e-06,
      "loss": 0.1463,
      "step": 13549
    },
    {
      "epoch": 0.39529727522025787,
      "grad_norm": 0.7628585711814966,
      "learning_rate": 6.890424250393009e-06,
      "loss": 0.1325,
      "step": 13550
    },
    {
      "epoch": 0.3953264484509015,
      "grad_norm": 0.7730110063315181,
      "learning_rate": 6.889986876138659e-06,
      "loss": 0.1433,
      "step": 13551
    },
    {
      "epoch": 0.39535562168154503,
      "grad_norm": 0.7051345359852466,
      "learning_rate": 6.889549485010957e-06,
      "loss": 0.1457,
      "step": 13552
    },
    {
      "epoch": 0.3953847949121886,
      "grad_norm": 1.1425198602171238,
      "learning_rate": 6.889112077013808e-06,
      "loss": 0.1305,
      "step": 13553
    },
    {
      "epoch": 0.39541396814283214,
      "grad_norm": 0.6616249465596958,
      "learning_rate": 6.888674652151117e-06,
      "loss": 0.1214,
      "step": 13554
    },
    {
      "epoch": 0.3954431413734757,
      "grad_norm": 0.7753742877667316,
      "learning_rate": 6.88823721042679e-06,
      "loss": 0.1596,
      "step": 13555
    },
    {
      "epoch": 0.39547231460411925,
      "grad_norm": 0.9304339527547232,
      "learning_rate": 6.887799751844732e-06,
      "loss": 0.1646,
      "step": 13556
    },
    {
      "epoch": 0.3955014878347628,
      "grad_norm": 0.7840134291357677,
      "learning_rate": 6.8873622764088495e-06,
      "loss": 0.1346,
      "step": 13557
    },
    {
      "epoch": 0.3955306610654064,
      "grad_norm": 0.7153077068367624,
      "learning_rate": 6.886924784123046e-06,
      "loss": 0.1479,
      "step": 13558
    },
    {
      "epoch": 0.39555983429604996,
      "grad_norm": 0.7315921902262976,
      "learning_rate": 6.8864872749912296e-06,
      "loss": 0.128,
      "step": 13559
    },
    {
      "epoch": 0.3955890075266935,
      "grad_norm": 0.8785516750944411,
      "learning_rate": 6.886049749017304e-06,
      "loss": 0.1436,
      "step": 13560
    },
    {
      "epoch": 0.39561818075733707,
      "grad_norm": 0.8587352973909724,
      "learning_rate": 6.885612206205175e-06,
      "loss": 0.1454,
      "step": 13561
    },
    {
      "epoch": 0.3956473539879806,
      "grad_norm": 0.7551958300928429,
      "learning_rate": 6.885174646558754e-06,
      "loss": 0.1262,
      "step": 13562
    },
    {
      "epoch": 0.3956765272186242,
      "grad_norm": 0.6814931549444815,
      "learning_rate": 6.8847370700819415e-06,
      "loss": 0.1461,
      "step": 13563
    },
    {
      "epoch": 0.3957057004492677,
      "grad_norm": 0.7909599958417884,
      "learning_rate": 6.8842994767786466e-06,
      "loss": 0.1431,
      "step": 13564
    },
    {
      "epoch": 0.39573487367991134,
      "grad_norm": 0.7321566528171322,
      "learning_rate": 6.883861866652776e-06,
      "loss": 0.1372,
      "step": 13565
    },
    {
      "epoch": 0.3957640469105549,
      "grad_norm": 0.8094397867003983,
      "learning_rate": 6.883424239708236e-06,
      "loss": 0.1453,
      "step": 13566
    },
    {
      "epoch": 0.39579322014119844,
      "grad_norm": 0.8227919385291982,
      "learning_rate": 6.882986595948935e-06,
      "loss": 0.14,
      "step": 13567
    },
    {
      "epoch": 0.395822393371842,
      "grad_norm": 0.7330943694807599,
      "learning_rate": 6.882548935378778e-06,
      "loss": 0.1081,
      "step": 13568
    },
    {
      "epoch": 0.39585156660248555,
      "grad_norm": 0.9788886462500496,
      "learning_rate": 6.8821112580016734e-06,
      "loss": 0.1501,
      "step": 13569
    },
    {
      "epoch": 0.3958807398331291,
      "grad_norm": 0.85249551709532,
      "learning_rate": 6.881673563821529e-06,
      "loss": 0.1696,
      "step": 13570
    },
    {
      "epoch": 0.39590991306377266,
      "grad_norm": 0.8054367868561896,
      "learning_rate": 6.881235852842253e-06,
      "loss": 0.1185,
      "step": 13571
    },
    {
      "epoch": 0.39593908629441626,
      "grad_norm": 0.9278896903449173,
      "learning_rate": 6.880798125067752e-06,
      "loss": 0.1567,
      "step": 13572
    },
    {
      "epoch": 0.3959682595250598,
      "grad_norm": 0.8227234897426949,
      "learning_rate": 6.880360380501934e-06,
      "loss": 0.1617,
      "step": 13573
    },
    {
      "epoch": 0.39599743275570337,
      "grad_norm": 1.1067236153092375,
      "learning_rate": 6.879922619148709e-06,
      "loss": 0.1452,
      "step": 13574
    },
    {
      "epoch": 0.3960266059863469,
      "grad_norm": 0.8642894651343764,
      "learning_rate": 6.879484841011981e-06,
      "loss": 0.1243,
      "step": 13575
    },
    {
      "epoch": 0.3960557792169905,
      "grad_norm": 0.9586605647173144,
      "learning_rate": 6.8790470460956625e-06,
      "loss": 0.1472,
      "step": 13576
    },
    {
      "epoch": 0.39608495244763403,
      "grad_norm": 0.9086036286416684,
      "learning_rate": 6.878609234403661e-06,
      "loss": 0.1367,
      "step": 13577
    },
    {
      "epoch": 0.39611412567827764,
      "grad_norm": 0.8747271647171905,
      "learning_rate": 6.878171405939883e-06,
      "loss": 0.1354,
      "step": 13578
    },
    {
      "epoch": 0.3961432989089212,
      "grad_norm": 0.8553925512113649,
      "learning_rate": 6.8777335607082415e-06,
      "loss": 0.1206,
      "step": 13579
    },
    {
      "epoch": 0.39617247213956475,
      "grad_norm": 0.7722446172228927,
      "learning_rate": 6.8772956987126415e-06,
      "loss": 0.1282,
      "step": 13580
    },
    {
      "epoch": 0.3962016453702083,
      "grad_norm": 0.9474777884206562,
      "learning_rate": 6.876857819956993e-06,
      "loss": 0.1769,
      "step": 13581
    },
    {
      "epoch": 0.39623081860085185,
      "grad_norm": 0.7971638941395826,
      "learning_rate": 6.876419924445208e-06,
      "loss": 0.1277,
      "step": 13582
    },
    {
      "epoch": 0.3962599918314954,
      "grad_norm": 0.7513084934398179,
      "learning_rate": 6.875982012181192e-06,
      "loss": 0.151,
      "step": 13583
    },
    {
      "epoch": 0.39628916506213896,
      "grad_norm": 0.8124671974017118,
      "learning_rate": 6.875544083168857e-06,
      "loss": 0.1452,
      "step": 13584
    },
    {
      "epoch": 0.39631833829278257,
      "grad_norm": 0.9957779640245273,
      "learning_rate": 6.875106137412112e-06,
      "loss": 0.1395,
      "step": 13585
    },
    {
      "epoch": 0.3963475115234261,
      "grad_norm": 0.8307337501671527,
      "learning_rate": 6.874668174914867e-06,
      "loss": 0.1231,
      "step": 13586
    },
    {
      "epoch": 0.3963766847540697,
      "grad_norm": 0.907476529519294,
      "learning_rate": 6.874230195681032e-06,
      "loss": 0.1488,
      "step": 13587
    },
    {
      "epoch": 0.39640585798471323,
      "grad_norm": 1.0410896304727788,
      "learning_rate": 6.8737921997145175e-06,
      "loss": 0.1439,
      "step": 13588
    },
    {
      "epoch": 0.3964350312153568,
      "grad_norm": 0.9864157463871159,
      "learning_rate": 6.8733541870192345e-06,
      "loss": 0.1469,
      "step": 13589
    },
    {
      "epoch": 0.39646420444600033,
      "grad_norm": 1.1234569276469955,
      "learning_rate": 6.87291615759909e-06,
      "loss": 0.1379,
      "step": 13590
    },
    {
      "epoch": 0.3964933776766439,
      "grad_norm": 1.0268759686645481,
      "learning_rate": 6.872478111457999e-06,
      "loss": 0.1488,
      "step": 13591
    },
    {
      "epoch": 0.3965225509072875,
      "grad_norm": 0.8466032986845579,
      "learning_rate": 6.8720400485998705e-06,
      "loss": 0.1311,
      "step": 13592
    },
    {
      "epoch": 0.39655172413793105,
      "grad_norm": 0.8212839396277573,
      "learning_rate": 6.871601969028614e-06,
      "loss": 0.1406,
      "step": 13593
    },
    {
      "epoch": 0.3965808973685746,
      "grad_norm": 0.80401134328649,
      "learning_rate": 6.871163872748144e-06,
      "loss": 0.1567,
      "step": 13594
    },
    {
      "epoch": 0.39661007059921816,
      "grad_norm": 0.9520256305586496,
      "learning_rate": 6.870725759762369e-06,
      "loss": 0.1368,
      "step": 13595
    },
    {
      "epoch": 0.3966392438298617,
      "grad_norm": 0.7466076877669924,
      "learning_rate": 6.870287630075198e-06,
      "loss": 0.1294,
      "step": 13596
    },
    {
      "epoch": 0.39666841706050526,
      "grad_norm": 0.8538214083824109,
      "learning_rate": 6.8698494836905494e-06,
      "loss": 0.1671,
      "step": 13597
    },
    {
      "epoch": 0.3966975902911488,
      "grad_norm": 0.9462874808801192,
      "learning_rate": 6.8694113206123305e-06,
      "loss": 0.1356,
      "step": 13598
    },
    {
      "epoch": 0.3967267635217924,
      "grad_norm": 0.7981356657539314,
      "learning_rate": 6.868973140844453e-06,
      "loss": 0.1427,
      "step": 13599
    },
    {
      "epoch": 0.396755936752436,
      "grad_norm": 0.8499232209187003,
      "learning_rate": 6.868534944390828e-06,
      "loss": 0.1233,
      "step": 13600
    },
    {
      "epoch": 0.39678510998307953,
      "grad_norm": 0.6653544955477789,
      "learning_rate": 6.868096731255371e-06,
      "loss": 0.1829,
      "step": 13601
    },
    {
      "epoch": 0.3968142832137231,
      "grad_norm": 1.0097988328038208,
      "learning_rate": 6.867658501441991e-06,
      "loss": 0.1641,
      "step": 13602
    },
    {
      "epoch": 0.39684345644436664,
      "grad_norm": 0.8557864152496123,
      "learning_rate": 6.867220254954602e-06,
      "loss": 0.1121,
      "step": 13603
    },
    {
      "epoch": 0.3968726296750102,
      "grad_norm": 0.9184535543711677,
      "learning_rate": 6.866781991797118e-06,
      "loss": 0.1562,
      "step": 13604
    },
    {
      "epoch": 0.39690180290565374,
      "grad_norm": 0.7206300689972719,
      "learning_rate": 6.866343711973446e-06,
      "loss": 0.1425,
      "step": 13605
    },
    {
      "epoch": 0.39693097613629735,
      "grad_norm": 0.9796407534578669,
      "learning_rate": 6.865905415487506e-06,
      "loss": 0.1546,
      "step": 13606
    },
    {
      "epoch": 0.3969601493669409,
      "grad_norm": 1.1280364906262048,
      "learning_rate": 6.8654671023432085e-06,
      "loss": 0.1535,
      "step": 13607
    },
    {
      "epoch": 0.39698932259758446,
      "grad_norm": 0.8940280196917338,
      "learning_rate": 6.865028772544464e-06,
      "loss": 0.1372,
      "step": 13608
    },
    {
      "epoch": 0.397018495828228,
      "grad_norm": 1.0597706161073654,
      "learning_rate": 6.8645904260951905e-06,
      "loss": 0.1337,
      "step": 13609
    },
    {
      "epoch": 0.39704766905887157,
      "grad_norm": 0.7998501927757913,
      "learning_rate": 6.864152062999297e-06,
      "loss": 0.1434,
      "step": 13610
    },
    {
      "epoch": 0.3970768422895151,
      "grad_norm": 0.9850858201656993,
      "learning_rate": 6.863713683260696e-06,
      "loss": 0.1447,
      "step": 13611
    },
    {
      "epoch": 0.39710601552015873,
      "grad_norm": 0.7793562916951422,
      "learning_rate": 6.863275286883308e-06,
      "loss": 0.1464,
      "step": 13612
    },
    {
      "epoch": 0.3971351887508023,
      "grad_norm": 0.7311343531268348,
      "learning_rate": 6.862836873871043e-06,
      "loss": 0.1352,
      "step": 13613
    },
    {
      "epoch": 0.39716436198144583,
      "grad_norm": 0.8864138963850705,
      "learning_rate": 6.862398444227813e-06,
      "loss": 0.1684,
      "step": 13614
    },
    {
      "epoch": 0.3971935352120894,
      "grad_norm": 0.8371271507064795,
      "learning_rate": 6.861959997957537e-06,
      "loss": 0.1689,
      "step": 13615
    },
    {
      "epoch": 0.39722270844273294,
      "grad_norm": 0.759781032712083,
      "learning_rate": 6.861521535064124e-06,
      "loss": 0.154,
      "step": 13616
    },
    {
      "epoch": 0.3972518816733765,
      "grad_norm": 0.814907302855488,
      "learning_rate": 6.861083055551492e-06,
      "loss": 0.1781,
      "step": 13617
    },
    {
      "epoch": 0.39728105490402005,
      "grad_norm": 0.9859344627740455,
      "learning_rate": 6.860644559423555e-06,
      "loss": 0.1365,
      "step": 13618
    },
    {
      "epoch": 0.39731022813466366,
      "grad_norm": 0.5475142826293896,
      "learning_rate": 6.860206046684229e-06,
      "loss": 0.1287,
      "step": 13619
    },
    {
      "epoch": 0.3973394013653072,
      "grad_norm": 1.0550977484094017,
      "learning_rate": 6.859767517337425e-06,
      "loss": 0.1729,
      "step": 13620
    },
    {
      "epoch": 0.39736857459595076,
      "grad_norm": 0.7835031097060281,
      "learning_rate": 6.859328971387062e-06,
      "loss": 0.145,
      "step": 13621
    },
    {
      "epoch": 0.3973977478265943,
      "grad_norm": 0.7424203829276208,
      "learning_rate": 6.858890408837054e-06,
      "loss": 0.135,
      "step": 13622
    },
    {
      "epoch": 0.39742692105723787,
      "grad_norm": 0.7568285103001061,
      "learning_rate": 6.858451829691314e-06,
      "loss": 0.1366,
      "step": 13623
    },
    {
      "epoch": 0.3974560942878814,
      "grad_norm": 0.9075307612597083,
      "learning_rate": 6.858013233953762e-06,
      "loss": 0.1382,
      "step": 13624
    },
    {
      "epoch": 0.397485267518525,
      "grad_norm": 0.8407214729416896,
      "learning_rate": 6.85757462162831e-06,
      "loss": 0.1301,
      "step": 13625
    },
    {
      "epoch": 0.3975144407491686,
      "grad_norm": 0.7890127917039195,
      "learning_rate": 6.857135992718875e-06,
      "loss": 0.1441,
      "step": 13626
    },
    {
      "epoch": 0.39754361397981214,
      "grad_norm": 0.8125349100335384,
      "learning_rate": 6.856697347229375e-06,
      "loss": 0.1199,
      "step": 13627
    },
    {
      "epoch": 0.3975727872104557,
      "grad_norm": 0.9964706228097383,
      "learning_rate": 6.856258685163724e-06,
      "loss": 0.1468,
      "step": 13628
    },
    {
      "epoch": 0.39760196044109924,
      "grad_norm": 0.6975779232087724,
      "learning_rate": 6.855820006525838e-06,
      "loss": 0.1579,
      "step": 13629
    },
    {
      "epoch": 0.3976311336717428,
      "grad_norm": 0.9108399539603893,
      "learning_rate": 6.855381311319633e-06,
      "loss": 0.168,
      "step": 13630
    },
    {
      "epoch": 0.39766030690238635,
      "grad_norm": 1.0386356101843057,
      "learning_rate": 6.854942599549028e-06,
      "loss": 0.1458,
      "step": 13631
    },
    {
      "epoch": 0.3976894801330299,
      "grad_norm": 0.6874329075659427,
      "learning_rate": 6.854503871217937e-06,
      "loss": 0.1123,
      "step": 13632
    },
    {
      "epoch": 0.3977186533636735,
      "grad_norm": 0.8603785550408117,
      "learning_rate": 6.854065126330279e-06,
      "loss": 0.149,
      "step": 13633
    },
    {
      "epoch": 0.39774782659431707,
      "grad_norm": 0.985375576240278,
      "learning_rate": 6.853626364889972e-06,
      "loss": 0.1348,
      "step": 13634
    },
    {
      "epoch": 0.3977769998249606,
      "grad_norm": 0.885232077058918,
      "learning_rate": 6.853187586900927e-06,
      "loss": 0.1339,
      "step": 13635
    },
    {
      "epoch": 0.3978061730556042,
      "grad_norm": 0.8530249718279882,
      "learning_rate": 6.852748792367069e-06,
      "loss": 0.1265,
      "step": 13636
    },
    {
      "epoch": 0.3978353462862477,
      "grad_norm": 0.8670925090412506,
      "learning_rate": 6.852309981292311e-06,
      "loss": 0.1629,
      "step": 13637
    },
    {
      "epoch": 0.3978645195168913,
      "grad_norm": 1.0442530482132857,
      "learning_rate": 6.851871153680572e-06,
      "loss": 0.1393,
      "step": 13638
    },
    {
      "epoch": 0.3978936927475349,
      "grad_norm": 1.0169475190648418,
      "learning_rate": 6.851432309535769e-06,
      "loss": 0.1215,
      "step": 13639
    },
    {
      "epoch": 0.39792286597817844,
      "grad_norm": 0.9061511494073102,
      "learning_rate": 6.8509934488618205e-06,
      "loss": 0.1738,
      "step": 13640
    },
    {
      "epoch": 0.397952039208822,
      "grad_norm": 0.964644869692467,
      "learning_rate": 6.850554571662643e-06,
      "loss": 0.1317,
      "step": 13641
    },
    {
      "epoch": 0.39798121243946555,
      "grad_norm": 1.1840934691961553,
      "learning_rate": 6.850115677942159e-06,
      "loss": 0.1435,
      "step": 13642
    },
    {
      "epoch": 0.3980103856701091,
      "grad_norm": 0.6787396057643946,
      "learning_rate": 6.8496767677042816e-06,
      "loss": 0.1275,
      "step": 13643
    },
    {
      "epoch": 0.39803955890075265,
      "grad_norm": 0.8043946569604296,
      "learning_rate": 6.849237840952933e-06,
      "loss": 0.1441,
      "step": 13644
    },
    {
      "epoch": 0.3980687321313962,
      "grad_norm": 1.013178519533018,
      "learning_rate": 6.8487988976920286e-06,
      "loss": 0.1477,
      "step": 13645
    },
    {
      "epoch": 0.3980979053620398,
      "grad_norm": 0.686504512547161,
      "learning_rate": 6.84835993792549e-06,
      "loss": 0.1303,
      "step": 13646
    },
    {
      "epoch": 0.39812707859268337,
      "grad_norm": 1.4665453513914628,
      "learning_rate": 6.847920961657235e-06,
      "loss": 0.1974,
      "step": 13647
    },
    {
      "epoch": 0.3981562518233269,
      "grad_norm": 0.8053405976829205,
      "learning_rate": 6.847481968891183e-06,
      "loss": 0.1443,
      "step": 13648
    },
    {
      "epoch": 0.3981854250539705,
      "grad_norm": 0.7239652844664418,
      "learning_rate": 6.847042959631253e-06,
      "loss": 0.1338,
      "step": 13649
    },
    {
      "epoch": 0.39821459828461403,
      "grad_norm": 0.8310026643884398,
      "learning_rate": 6.846603933881364e-06,
      "loss": 0.1483,
      "step": 13650
    },
    {
      "epoch": 0.3982437715152576,
      "grad_norm": 0.7626654642274614,
      "learning_rate": 6.846164891645436e-06,
      "loss": 0.1319,
      "step": 13651
    },
    {
      "epoch": 0.39827294474590114,
      "grad_norm": 0.804123077163358,
      "learning_rate": 6.84572583292739e-06,
      "loss": 0.1735,
      "step": 13652
    },
    {
      "epoch": 0.39830211797654475,
      "grad_norm": 0.9864754575496317,
      "learning_rate": 6.845286757731142e-06,
      "loss": 0.1313,
      "step": 13653
    },
    {
      "epoch": 0.3983312912071883,
      "grad_norm": 0.82570891583435,
      "learning_rate": 6.844847666060617e-06,
      "loss": 0.1441,
      "step": 13654
    },
    {
      "epoch": 0.39836046443783185,
      "grad_norm": 0.9266173515462345,
      "learning_rate": 6.844408557919731e-06,
      "loss": 0.1235,
      "step": 13655
    },
    {
      "epoch": 0.3983896376684754,
      "grad_norm": 0.7049451437364153,
      "learning_rate": 6.843969433312404e-06,
      "loss": 0.1462,
      "step": 13656
    },
    {
      "epoch": 0.39841881089911896,
      "grad_norm": 0.761751592673055,
      "learning_rate": 6.8435302922425606e-06,
      "loss": 0.1295,
      "step": 13657
    },
    {
      "epoch": 0.3984479841297625,
      "grad_norm": 0.7271112240939605,
      "learning_rate": 6.843091134714117e-06,
      "loss": 0.1522,
      "step": 13658
    },
    {
      "epoch": 0.39847715736040606,
      "grad_norm": 0.9028992364270692,
      "learning_rate": 6.842651960730997e-06,
      "loss": 0.131,
      "step": 13659
    },
    {
      "epoch": 0.3985063305910497,
      "grad_norm": 0.685376002217138,
      "learning_rate": 6.842212770297121e-06,
      "loss": 0.1241,
      "step": 13660
    },
    {
      "epoch": 0.3985355038216932,
      "grad_norm": 0.7221454328676604,
      "learning_rate": 6.8417735634164075e-06,
      "loss": 0.1232,
      "step": 13661
    },
    {
      "epoch": 0.3985646770523368,
      "grad_norm": 1.018529010873739,
      "learning_rate": 6.841334340092779e-06,
      "loss": 0.1513,
      "step": 13662
    },
    {
      "epoch": 0.39859385028298033,
      "grad_norm": 0.9545248989083854,
      "learning_rate": 6.840895100330159e-06,
      "loss": 0.1615,
      "step": 13663
    },
    {
      "epoch": 0.3986230235136239,
      "grad_norm": 0.7540961625778743,
      "learning_rate": 6.840455844132465e-06,
      "loss": 0.1313,
      "step": 13664
    },
    {
      "epoch": 0.39865219674426744,
      "grad_norm": 0.769118084821783,
      "learning_rate": 6.840016571503622e-06,
      "loss": 0.1515,
      "step": 13665
    },
    {
      "epoch": 0.39868136997491105,
      "grad_norm": 0.8988004881390081,
      "learning_rate": 6.8395772824475494e-06,
      "loss": 0.1445,
      "step": 13666
    },
    {
      "epoch": 0.3987105432055546,
      "grad_norm": 0.9666765933995999,
      "learning_rate": 6.839137976968171e-06,
      "loss": 0.1395,
      "step": 13667
    },
    {
      "epoch": 0.39873971643619815,
      "grad_norm": 0.8060915256768564,
      "learning_rate": 6.838698655069406e-06,
      "loss": 0.1276,
      "step": 13668
    },
    {
      "epoch": 0.3987688896668417,
      "grad_norm": 0.7311542730813468,
      "learning_rate": 6.83825931675518e-06,
      "loss": 0.1384,
      "step": 13669
    },
    {
      "epoch": 0.39879806289748526,
      "grad_norm": 0.836450015113188,
      "learning_rate": 6.8378199620294126e-06,
      "loss": 0.1395,
      "step": 13670
    },
    {
      "epoch": 0.3988272361281288,
      "grad_norm": 0.9349376752476497,
      "learning_rate": 6.837380590896028e-06,
      "loss": 0.1684,
      "step": 13671
    },
    {
      "epoch": 0.39885640935877237,
      "grad_norm": 0.7928181811245086,
      "learning_rate": 6.836941203358947e-06,
      "loss": 0.1472,
      "step": 13672
    },
    {
      "epoch": 0.398885582589416,
      "grad_norm": 0.7648200196571773,
      "learning_rate": 6.836501799422095e-06,
      "loss": 0.1435,
      "step": 13673
    },
    {
      "epoch": 0.39891475582005953,
      "grad_norm": 0.9884217383835263,
      "learning_rate": 6.836062379089393e-06,
      "loss": 0.192,
      "step": 13674
    },
    {
      "epoch": 0.3989439290507031,
      "grad_norm": 0.8091423813958553,
      "learning_rate": 6.8356229423647636e-06,
      "loss": 0.1224,
      "step": 13675
    },
    {
      "epoch": 0.39897310228134664,
      "grad_norm": 0.9697528805961072,
      "learning_rate": 6.83518348925213e-06,
      "loss": 0.1444,
      "step": 13676
    },
    {
      "epoch": 0.3990022755119902,
      "grad_norm": 0.8652562414182313,
      "learning_rate": 6.834744019755419e-06,
      "loss": 0.1361,
      "step": 13677
    },
    {
      "epoch": 0.39903144874263374,
      "grad_norm": 0.899832730824932,
      "learning_rate": 6.8343045338785495e-06,
      "loss": 0.1439,
      "step": 13678
    },
    {
      "epoch": 0.3990606219732773,
      "grad_norm": 1.0012039375355022,
      "learning_rate": 6.833865031625448e-06,
      "loss": 0.1555,
      "step": 13679
    },
    {
      "epoch": 0.3990897952039209,
      "grad_norm": 1.0975808984182382,
      "learning_rate": 6.833425513000036e-06,
      "loss": 0.1648,
      "step": 13680
    },
    {
      "epoch": 0.39911896843456446,
      "grad_norm": 0.6872895853973296,
      "learning_rate": 6.8329859780062395e-06,
      "loss": 0.1328,
      "step": 13681
    },
    {
      "epoch": 0.399148141665208,
      "grad_norm": 0.7616114784834984,
      "learning_rate": 6.832546426647983e-06,
      "loss": 0.1425,
      "step": 13682
    },
    {
      "epoch": 0.39917731489585156,
      "grad_norm": 0.8331362403542113,
      "learning_rate": 6.832106858929186e-06,
      "loss": 0.1538,
      "step": 13683
    },
    {
      "epoch": 0.3992064881264951,
      "grad_norm": 0.6674530606614226,
      "learning_rate": 6.831667274853779e-06,
      "loss": 0.1409,
      "step": 13684
    },
    {
      "epoch": 0.39923566135713867,
      "grad_norm": 0.7260908785878939,
      "learning_rate": 6.831227674425684e-06,
      "loss": 0.1325,
      "step": 13685
    },
    {
      "epoch": 0.3992648345877822,
      "grad_norm": 0.974648249879995,
      "learning_rate": 6.830788057648824e-06,
      "loss": 0.1601,
      "step": 13686
    },
    {
      "epoch": 0.39929400781842583,
      "grad_norm": 0.8437984069947647,
      "learning_rate": 6.830348424527126e-06,
      "loss": 0.1522,
      "step": 13687
    },
    {
      "epoch": 0.3993231810490694,
      "grad_norm": 0.7086230213971858,
      "learning_rate": 6.829908775064514e-06,
      "loss": 0.1467,
      "step": 13688
    },
    {
      "epoch": 0.39935235427971294,
      "grad_norm": 0.9228994496897659,
      "learning_rate": 6.829469109264915e-06,
      "loss": 0.1451,
      "step": 13689
    },
    {
      "epoch": 0.3993815275103565,
      "grad_norm": 0.9013430047366084,
      "learning_rate": 6.82902942713225e-06,
      "loss": 0.1356,
      "step": 13690
    },
    {
      "epoch": 0.39941070074100005,
      "grad_norm": 0.7251464735188569,
      "learning_rate": 6.828589728670447e-06,
      "loss": 0.1452,
      "step": 13691
    },
    {
      "epoch": 0.3994398739716436,
      "grad_norm": 0.7455558487477596,
      "learning_rate": 6.828150013883433e-06,
      "loss": 0.1348,
      "step": 13692
    },
    {
      "epoch": 0.3994690472022872,
      "grad_norm": 0.9193250319521461,
      "learning_rate": 6.8277102827751305e-06,
      "loss": 0.1729,
      "step": 13693
    },
    {
      "epoch": 0.39949822043293076,
      "grad_norm": 0.9482756342979466,
      "learning_rate": 6.827270535349469e-06,
      "loss": 0.1405,
      "step": 13694
    },
    {
      "epoch": 0.3995273936635743,
      "grad_norm": 0.9751141984532288,
      "learning_rate": 6.826830771610371e-06,
      "loss": 0.143,
      "step": 13695
    },
    {
      "epoch": 0.39955656689421787,
      "grad_norm": 1.044181444455976,
      "learning_rate": 6.8263909915617646e-06,
      "loss": 0.141,
      "step": 13696
    },
    {
      "epoch": 0.3995857401248614,
      "grad_norm": 0.8687203231252904,
      "learning_rate": 6.825951195207575e-06,
      "loss": 0.1363,
      "step": 13697
    },
    {
      "epoch": 0.399614913355505,
      "grad_norm": 0.8530636291008991,
      "learning_rate": 6.825511382551729e-06,
      "loss": 0.1484,
      "step": 13698
    },
    {
      "epoch": 0.39964408658614853,
      "grad_norm": 0.9079172967032355,
      "learning_rate": 6.825071553598152e-06,
      "loss": 0.1423,
      "step": 13699
    },
    {
      "epoch": 0.39967325981679214,
      "grad_norm": 0.8637818657784768,
      "learning_rate": 6.824631708350774e-06,
      "loss": 0.1404,
      "step": 13700
    },
    {
      "epoch": 0.3997024330474357,
      "grad_norm": 0.9649551259914709,
      "learning_rate": 6.824191846813517e-06,
      "loss": 0.1404,
      "step": 13701
    },
    {
      "epoch": 0.39973160627807924,
      "grad_norm": 0.9593491688199949,
      "learning_rate": 6.8237519689903145e-06,
      "loss": 0.1235,
      "step": 13702
    },
    {
      "epoch": 0.3997607795087228,
      "grad_norm": 1.0040722232054902,
      "learning_rate": 6.823312074885087e-06,
      "loss": 0.1352,
      "step": 13703
    },
    {
      "epoch": 0.39978995273936635,
      "grad_norm": 0.8886624316938437,
      "learning_rate": 6.822872164501765e-06,
      "loss": 0.1529,
      "step": 13704
    },
    {
      "epoch": 0.3998191259700099,
      "grad_norm": 0.7400950797262666,
      "learning_rate": 6.822432237844275e-06,
      "loss": 0.1644,
      "step": 13705
    },
    {
      "epoch": 0.39984829920065346,
      "grad_norm": 0.8593681653423954,
      "learning_rate": 6.821992294916546e-06,
      "loss": 0.1345,
      "step": 13706
    },
    {
      "epoch": 0.39987747243129707,
      "grad_norm": 1.0256629022542472,
      "learning_rate": 6.821552335722504e-06,
      "loss": 0.1315,
      "step": 13707
    },
    {
      "epoch": 0.3999066456619406,
      "grad_norm": 0.7381785662894601,
      "learning_rate": 6.821112360266079e-06,
      "loss": 0.1226,
      "step": 13708
    },
    {
      "epoch": 0.39993581889258417,
      "grad_norm": 1.0433674533863682,
      "learning_rate": 6.820672368551198e-06,
      "loss": 0.1328,
      "step": 13709
    },
    {
      "epoch": 0.3999649921232277,
      "grad_norm": 0.7546346570667153,
      "learning_rate": 6.8202323605817854e-06,
      "loss": 0.1311,
      "step": 13710
    },
    {
      "epoch": 0.3999941653538713,
      "grad_norm": 1.12112052236558,
      "learning_rate": 6.819792336361775e-06,
      "loss": 0.1355,
      "step": 13711
    },
    {
      "epoch": 0.40002333858451483,
      "grad_norm": 0.697245421308078,
      "learning_rate": 6.819352295895093e-06,
      "loss": 0.1349,
      "step": 13712
    },
    {
      "epoch": 0.4000525118151584,
      "grad_norm": 0.8054019674828651,
      "learning_rate": 6.818912239185666e-06,
      "loss": 0.1358,
      "step": 13713
    },
    {
      "epoch": 0.400081685045802,
      "grad_norm": 0.6075957489490795,
      "learning_rate": 6.8184721662374285e-06,
      "loss": 0.1464,
      "step": 13714
    },
    {
      "epoch": 0.40011085827644555,
      "grad_norm": 0.7445938503867925,
      "learning_rate": 6.818032077054304e-06,
      "loss": 0.1593,
      "step": 13715
    },
    {
      "epoch": 0.4001400315070891,
      "grad_norm": 0.628204010319937,
      "learning_rate": 6.817591971640221e-06,
      "loss": 0.1461,
      "step": 13716
    },
    {
      "epoch": 0.40016920473773265,
      "grad_norm": 0.733320092149337,
      "learning_rate": 6.817151849999114e-06,
      "loss": 0.1495,
      "step": 13717
    },
    {
      "epoch": 0.4001983779683762,
      "grad_norm": 0.8620401004457822,
      "learning_rate": 6.8167117121349065e-06,
      "loss": 0.1189,
      "step": 13718
    },
    {
      "epoch": 0.40022755119901976,
      "grad_norm": 0.699680097500003,
      "learning_rate": 6.8162715580515324e-06,
      "loss": 0.1396,
      "step": 13719
    },
    {
      "epoch": 0.40025672442966337,
      "grad_norm": 0.650314855260613,
      "learning_rate": 6.815831387752918e-06,
      "loss": 0.1318,
      "step": 13720
    },
    {
      "epoch": 0.4002858976603069,
      "grad_norm": 0.7255972396355026,
      "learning_rate": 6.815391201242996e-06,
      "loss": 0.128,
      "step": 13721
    },
    {
      "epoch": 0.4003150708909505,
      "grad_norm": 0.9121216969140034,
      "learning_rate": 6.8149509985256935e-06,
      "loss": 0.1406,
      "step": 13722
    },
    {
      "epoch": 0.40034424412159403,
      "grad_norm": 0.7030736458426388,
      "learning_rate": 6.814510779604942e-06,
      "loss": 0.1463,
      "step": 13723
    },
    {
      "epoch": 0.4003734173522376,
      "grad_norm": 0.8533675344816903,
      "learning_rate": 6.814070544484672e-06,
      "loss": 0.1815,
      "step": 13724
    },
    {
      "epoch": 0.40040259058288113,
      "grad_norm": 0.8935322043974434,
      "learning_rate": 6.813630293168811e-06,
      "loss": 0.1594,
      "step": 13725
    },
    {
      "epoch": 0.4004317638135247,
      "grad_norm": 0.6425807864399719,
      "learning_rate": 6.813190025661294e-06,
      "loss": 0.1313,
      "step": 13726
    },
    {
      "epoch": 0.4004609370441683,
      "grad_norm": 1.0053122257531732,
      "learning_rate": 6.8127497419660495e-06,
      "loss": 0.1411,
      "step": 13727
    },
    {
      "epoch": 0.40049011027481185,
      "grad_norm": 0.7250950628887969,
      "learning_rate": 6.8123094420870065e-06,
      "loss": 0.1133,
      "step": 13728
    },
    {
      "epoch": 0.4005192835054554,
      "grad_norm": 1.0213365217185995,
      "learning_rate": 6.811869126028099e-06,
      "loss": 0.1864,
      "step": 13729
    },
    {
      "epoch": 0.40054845673609896,
      "grad_norm": 0.6960357927011381,
      "learning_rate": 6.811428793793255e-06,
      "loss": 0.1475,
      "step": 13730
    },
    {
      "epoch": 0.4005776299667425,
      "grad_norm": 0.7879005124766262,
      "learning_rate": 6.810988445386406e-06,
      "loss": 0.1352,
      "step": 13731
    },
    {
      "epoch": 0.40060680319738606,
      "grad_norm": 0.8364188457649364,
      "learning_rate": 6.810548080811487e-06,
      "loss": 0.1369,
      "step": 13732
    },
    {
      "epoch": 0.4006359764280296,
      "grad_norm": 0.8757253023910055,
      "learning_rate": 6.810107700072427e-06,
      "loss": 0.1345,
      "step": 13733
    },
    {
      "epoch": 0.4006651496586732,
      "grad_norm": 0.6263167723830406,
      "learning_rate": 6.809667303173156e-06,
      "loss": 0.1249,
      "step": 13734
    },
    {
      "epoch": 0.4006943228893168,
      "grad_norm": 0.7551049383541194,
      "learning_rate": 6.809226890117609e-06,
      "loss": 0.1325,
      "step": 13735
    },
    {
      "epoch": 0.40072349611996033,
      "grad_norm": 0.7788834650234634,
      "learning_rate": 6.8087864609097154e-06,
      "loss": 0.1575,
      "step": 13736
    },
    {
      "epoch": 0.4007526693506039,
      "grad_norm": 0.8587180472217778,
      "learning_rate": 6.8083460155534075e-06,
      "loss": 0.1224,
      "step": 13737
    },
    {
      "epoch": 0.40078184258124744,
      "grad_norm": 1.0791596637441496,
      "learning_rate": 6.807905554052619e-06,
      "loss": 0.1848,
      "step": 13738
    },
    {
      "epoch": 0.400811015811891,
      "grad_norm": 0.8389357021892137,
      "learning_rate": 6.8074650764112815e-06,
      "loss": 0.1581,
      "step": 13739
    },
    {
      "epoch": 0.40084018904253454,
      "grad_norm": 0.7995316925510891,
      "learning_rate": 6.807024582633325e-06,
      "loss": 0.1397,
      "step": 13740
    },
    {
      "epoch": 0.40086936227317815,
      "grad_norm": 0.7666141779090255,
      "learning_rate": 6.806584072722686e-06,
      "loss": 0.1596,
      "step": 13741
    },
    {
      "epoch": 0.4008985355038217,
      "grad_norm": 0.7958196756378516,
      "learning_rate": 6.806143546683297e-06,
      "loss": 0.1502,
      "step": 13742
    },
    {
      "epoch": 0.40092770873446526,
      "grad_norm": 0.8472601015465253,
      "learning_rate": 6.8057030045190866e-06,
      "loss": 0.1631,
      "step": 13743
    },
    {
      "epoch": 0.4009568819651088,
      "grad_norm": 0.7462214077864776,
      "learning_rate": 6.805262446233993e-06,
      "loss": 0.1466,
      "step": 13744
    },
    {
      "epoch": 0.40098605519575237,
      "grad_norm": 0.7412816926561623,
      "learning_rate": 6.804821871831947e-06,
      "loss": 0.1179,
      "step": 13745
    },
    {
      "epoch": 0.4010152284263959,
      "grad_norm": 0.7237460861176316,
      "learning_rate": 6.804381281316881e-06,
      "loss": 0.1392,
      "step": 13746
    },
    {
      "epoch": 0.4010444016570395,
      "grad_norm": 0.7882012158566257,
      "learning_rate": 6.803940674692732e-06,
      "loss": 0.1333,
      "step": 13747
    },
    {
      "epoch": 0.4010735748876831,
      "grad_norm": 0.7731565601826743,
      "learning_rate": 6.80350005196343e-06,
      "loss": 0.1558,
      "step": 13748
    },
    {
      "epoch": 0.40110274811832664,
      "grad_norm": 0.7430322833263876,
      "learning_rate": 6.803059413132909e-06,
      "loss": 0.1547,
      "step": 13749
    },
    {
      "epoch": 0.4011319213489702,
      "grad_norm": 0.8498513542633379,
      "learning_rate": 6.802618758205105e-06,
      "loss": 0.1714,
      "step": 13750
    },
    {
      "epoch": 0.40116109457961374,
      "grad_norm": 1.0071163344423861,
      "learning_rate": 6.802178087183951e-06,
      "loss": 0.1469,
      "step": 13751
    },
    {
      "epoch": 0.4011902678102573,
      "grad_norm": 0.9074375671638603,
      "learning_rate": 6.801737400073381e-06,
      "loss": 0.1431,
      "step": 13752
    },
    {
      "epoch": 0.40121944104090085,
      "grad_norm": 0.8852197129256496,
      "learning_rate": 6.80129669687733e-06,
      "loss": 0.1714,
      "step": 13753
    },
    {
      "epoch": 0.40124861427154446,
      "grad_norm": 0.6976559907887856,
      "learning_rate": 6.800855977599732e-06,
      "loss": 0.1385,
      "step": 13754
    },
    {
      "epoch": 0.401277787502188,
      "grad_norm": 0.879096578156981,
      "learning_rate": 6.80041524224452e-06,
      "loss": 0.1416,
      "step": 13755
    },
    {
      "epoch": 0.40130696073283156,
      "grad_norm": 0.7184119635969515,
      "learning_rate": 6.799974490815633e-06,
      "loss": 0.1414,
      "step": 13756
    },
    {
      "epoch": 0.4013361339634751,
      "grad_norm": 0.8268474876082909,
      "learning_rate": 6.799533723317003e-06,
      "loss": 0.1153,
      "step": 13757
    },
    {
      "epoch": 0.40136530719411867,
      "grad_norm": 0.9037195838981106,
      "learning_rate": 6.799092939752564e-06,
      "loss": 0.1534,
      "step": 13758
    },
    {
      "epoch": 0.4013944804247622,
      "grad_norm": 0.7823091877787984,
      "learning_rate": 6.798652140126255e-06,
      "loss": 0.1586,
      "step": 13759
    },
    {
      "epoch": 0.4014236536554058,
      "grad_norm": 0.9153302275706843,
      "learning_rate": 6.798211324442008e-06,
      "loss": 0.1513,
      "step": 13760
    },
    {
      "epoch": 0.4014528268860494,
      "grad_norm": 0.7831550550923494,
      "learning_rate": 6.7977704927037595e-06,
      "loss": 0.1422,
      "step": 13761
    },
    {
      "epoch": 0.40148200011669294,
      "grad_norm": 0.7939853484381711,
      "learning_rate": 6.797329644915445e-06,
      "loss": 0.1663,
      "step": 13762
    },
    {
      "epoch": 0.4015111733473365,
      "grad_norm": 0.5919398204852676,
      "learning_rate": 6.796888781081e-06,
      "loss": 0.1304,
      "step": 13763
    },
    {
      "epoch": 0.40154034657798005,
      "grad_norm": 0.8825133697731564,
      "learning_rate": 6.796447901204362e-06,
      "loss": 0.1343,
      "step": 13764
    },
    {
      "epoch": 0.4015695198086236,
      "grad_norm": 0.8454693897606342,
      "learning_rate": 6.796007005289465e-06,
      "loss": 0.1412,
      "step": 13765
    },
    {
      "epoch": 0.40159869303926715,
      "grad_norm": 0.9068838026048358,
      "learning_rate": 6.795566093340247e-06,
      "loss": 0.1245,
      "step": 13766
    },
    {
      "epoch": 0.4016278662699107,
      "grad_norm": 0.8999832981873501,
      "learning_rate": 6.795125165360643e-06,
      "loss": 0.1392,
      "step": 13767
    },
    {
      "epoch": 0.4016570395005543,
      "grad_norm": 0.8406459172297751,
      "learning_rate": 6.79468422135459e-06,
      "loss": 0.1506,
      "step": 13768
    },
    {
      "epoch": 0.40168621273119787,
      "grad_norm": 0.8693911267360379,
      "learning_rate": 6.794243261326025e-06,
      "loss": 0.1271,
      "step": 13769
    },
    {
      "epoch": 0.4017153859618414,
      "grad_norm": 0.8783531792446729,
      "learning_rate": 6.7938022852788845e-06,
      "loss": 0.1581,
      "step": 13770
    },
    {
      "epoch": 0.401744559192485,
      "grad_norm": 0.9904763397049472,
      "learning_rate": 6.793361293217105e-06,
      "loss": 0.1473,
      "step": 13771
    },
    {
      "epoch": 0.4017737324231285,
      "grad_norm": 0.8116511921077783,
      "learning_rate": 6.792920285144624e-06,
      "loss": 0.1223,
      "step": 13772
    },
    {
      "epoch": 0.4018029056537721,
      "grad_norm": 1.127403999058117,
      "learning_rate": 6.792479261065379e-06,
      "loss": 0.1488,
      "step": 13773
    },
    {
      "epoch": 0.40183207888441563,
      "grad_norm": 0.7887529480367309,
      "learning_rate": 6.792038220983308e-06,
      "loss": 0.1434,
      "step": 13774
    },
    {
      "epoch": 0.40186125211505924,
      "grad_norm": 0.8417388114250335,
      "learning_rate": 6.791597164902346e-06,
      "loss": 0.1776,
      "step": 13775
    },
    {
      "epoch": 0.4018904253457028,
      "grad_norm": 0.9964791878290662,
      "learning_rate": 6.791156092826434e-06,
      "loss": 0.1535,
      "step": 13776
    },
    {
      "epoch": 0.40191959857634635,
      "grad_norm": 0.8273039584371675,
      "learning_rate": 6.790715004759506e-06,
      "loss": 0.1346,
      "step": 13777
    },
    {
      "epoch": 0.4019487718069899,
      "grad_norm": 0.658956263251214,
      "learning_rate": 6.790273900705502e-06,
      "loss": 0.1111,
      "step": 13778
    },
    {
      "epoch": 0.40197794503763346,
      "grad_norm": 0.9148364028349939,
      "learning_rate": 6.789832780668362e-06,
      "loss": 0.1333,
      "step": 13779
    },
    {
      "epoch": 0.402007118268277,
      "grad_norm": 0.976282860395019,
      "learning_rate": 6.78939164465202e-06,
      "loss": 0.1378,
      "step": 13780
    },
    {
      "epoch": 0.4020362914989206,
      "grad_norm": 0.8420970097414102,
      "learning_rate": 6.788950492660417e-06,
      "loss": 0.1425,
      "step": 13781
    },
    {
      "epoch": 0.40206546472956417,
      "grad_norm": 0.8257070823340206,
      "learning_rate": 6.788509324697492e-06,
      "loss": 0.1568,
      "step": 13782
    },
    {
      "epoch": 0.4020946379602077,
      "grad_norm": 0.9999952522008241,
      "learning_rate": 6.7880681407671835e-06,
      "loss": 0.1807,
      "step": 13783
    },
    {
      "epoch": 0.4021238111908513,
      "grad_norm": 1.4350027163849897,
      "learning_rate": 6.787626940873427e-06,
      "loss": 0.1382,
      "step": 13784
    },
    {
      "epoch": 0.40215298442149483,
      "grad_norm": 0.944498623984756,
      "learning_rate": 6.787185725020166e-06,
      "loss": 0.1454,
      "step": 13785
    },
    {
      "epoch": 0.4021821576521384,
      "grad_norm": 0.8353992898859052,
      "learning_rate": 6.7867444932113365e-06,
      "loss": 0.1152,
      "step": 13786
    },
    {
      "epoch": 0.40221133088278194,
      "grad_norm": 0.9867069702543034,
      "learning_rate": 6.7863032454508786e-06,
      "loss": 0.1368,
      "step": 13787
    },
    {
      "epoch": 0.40224050411342555,
      "grad_norm": 1.0051752584566038,
      "learning_rate": 6.785861981742732e-06,
      "loss": 0.1765,
      "step": 13788
    },
    {
      "epoch": 0.4022696773440691,
      "grad_norm": 0.981644628910491,
      "learning_rate": 6.785420702090837e-06,
      "loss": 0.1785,
      "step": 13789
    },
    {
      "epoch": 0.40229885057471265,
      "grad_norm": 0.8079146604087067,
      "learning_rate": 6.7849794064991306e-06,
      "loss": 0.1426,
      "step": 13790
    },
    {
      "epoch": 0.4023280238053562,
      "grad_norm": 0.9380987146825817,
      "learning_rate": 6.784538094971555e-06,
      "loss": 0.181,
      "step": 13791
    },
    {
      "epoch": 0.40235719703599976,
      "grad_norm": 0.8754134722520672,
      "learning_rate": 6.784096767512048e-06,
      "loss": 0.1339,
      "step": 13792
    },
    {
      "epoch": 0.4023863702666433,
      "grad_norm": 0.851925529761588,
      "learning_rate": 6.783655424124551e-06,
      "loss": 0.1466,
      "step": 13793
    },
    {
      "epoch": 0.40241554349728687,
      "grad_norm": 0.7035318098262453,
      "learning_rate": 6.783214064813007e-06,
      "loss": 0.1496,
      "step": 13794
    },
    {
      "epoch": 0.4024447167279305,
      "grad_norm": 0.9123551210521693,
      "learning_rate": 6.782772689581352e-06,
      "loss": 0.1548,
      "step": 13795
    },
    {
      "epoch": 0.402473889958574,
      "grad_norm": 0.807820870976712,
      "learning_rate": 6.782331298433527e-06,
      "loss": 0.1428,
      "step": 13796
    },
    {
      "epoch": 0.4025030631892176,
      "grad_norm": 0.937659808531094,
      "learning_rate": 6.781889891373475e-06,
      "loss": 0.164,
      "step": 13797
    },
    {
      "epoch": 0.40253223641986113,
      "grad_norm": 0.8513003427067009,
      "learning_rate": 6.781448468405134e-06,
      "loss": 0.1585,
      "step": 13798
    },
    {
      "epoch": 0.4025614096505047,
      "grad_norm": 0.6954541883374559,
      "learning_rate": 6.781007029532447e-06,
      "loss": 0.1357,
      "step": 13799
    },
    {
      "epoch": 0.40259058288114824,
      "grad_norm": 1.1038917546829061,
      "learning_rate": 6.780565574759355e-06,
      "loss": 0.1552,
      "step": 13800
    },
    {
      "epoch": 0.4026197561117918,
      "grad_norm": 0.9587137507817821,
      "learning_rate": 6.780124104089797e-06,
      "loss": 0.1531,
      "step": 13801
    },
    {
      "epoch": 0.4026489293424354,
      "grad_norm": 0.9066835913109763,
      "learning_rate": 6.779682617527716e-06,
      "loss": 0.1333,
      "step": 13802
    },
    {
      "epoch": 0.40267810257307896,
      "grad_norm": 0.8230750795006987,
      "learning_rate": 6.779241115077055e-06,
      "loss": 0.1264,
      "step": 13803
    },
    {
      "epoch": 0.4027072758037225,
      "grad_norm": 0.7413107909208608,
      "learning_rate": 6.778799596741754e-06,
      "loss": 0.1504,
      "step": 13804
    },
    {
      "epoch": 0.40273644903436606,
      "grad_norm": 0.9141975771969796,
      "learning_rate": 6.778358062525754e-06,
      "loss": 0.1257,
      "step": 13805
    },
    {
      "epoch": 0.4027656222650096,
      "grad_norm": 0.7411382445326381,
      "learning_rate": 6.7779165124329996e-06,
      "loss": 0.1517,
      "step": 13806
    },
    {
      "epoch": 0.40279479549565317,
      "grad_norm": 0.8542320160237078,
      "learning_rate": 6.777474946467429e-06,
      "loss": 0.1217,
      "step": 13807
    },
    {
      "epoch": 0.4028239687262968,
      "grad_norm": 0.8449350094060898,
      "learning_rate": 6.777033364632985e-06,
      "loss": 0.1413,
      "step": 13808
    },
    {
      "epoch": 0.40285314195694033,
      "grad_norm": 0.6907324546454325,
      "learning_rate": 6.776591766933615e-06,
      "loss": 0.125,
      "step": 13809
    },
    {
      "epoch": 0.4028823151875839,
      "grad_norm": 0.8094075390524268,
      "learning_rate": 6.776150153373256e-06,
      "loss": 0.1598,
      "step": 13810
    },
    {
      "epoch": 0.40291148841822744,
      "grad_norm": 0.7738383526866942,
      "learning_rate": 6.775708523955853e-06,
      "loss": 0.1332,
      "step": 13811
    },
    {
      "epoch": 0.402940661648871,
      "grad_norm": 1.0297724380694226,
      "learning_rate": 6.775266878685347e-06,
      "loss": 0.1307,
      "step": 13812
    },
    {
      "epoch": 0.40296983487951454,
      "grad_norm": 0.6461842201212494,
      "learning_rate": 6.774825217565683e-06,
      "loss": 0.1213,
      "step": 13813
    },
    {
      "epoch": 0.4029990081101581,
      "grad_norm": 0.8215650821296505,
      "learning_rate": 6.774383540600802e-06,
      "loss": 0.1348,
      "step": 13814
    },
    {
      "epoch": 0.4030281813408017,
      "grad_norm": 0.7159503319524297,
      "learning_rate": 6.773941847794649e-06,
      "loss": 0.1274,
      "step": 13815
    },
    {
      "epoch": 0.40305735457144526,
      "grad_norm": 1.0293789422131108,
      "learning_rate": 6.773500139151168e-06,
      "loss": 0.1448,
      "step": 13816
    },
    {
      "epoch": 0.4030865278020888,
      "grad_norm": 0.784740719277153,
      "learning_rate": 6.7730584146743e-06,
      "loss": 0.1541,
      "step": 13817
    },
    {
      "epoch": 0.40311570103273237,
      "grad_norm": 0.7981040480949031,
      "learning_rate": 6.772616674367989e-06,
      "loss": 0.1421,
      "step": 13818
    },
    {
      "epoch": 0.4031448742633759,
      "grad_norm": 1.3657133072490035,
      "learning_rate": 6.772174918236181e-06,
      "loss": 0.1348,
      "step": 13819
    },
    {
      "epoch": 0.40317404749401947,
      "grad_norm": 1.1758712817903028,
      "learning_rate": 6.771733146282816e-06,
      "loss": 0.1657,
      "step": 13820
    },
    {
      "epoch": 0.403203220724663,
      "grad_norm": 0.7212966744394569,
      "learning_rate": 6.7712913585118434e-06,
      "loss": 0.1284,
      "step": 13821
    },
    {
      "epoch": 0.40323239395530663,
      "grad_norm": 0.8898275224673643,
      "learning_rate": 6.770849554927203e-06,
      "loss": 0.1612,
      "step": 13822
    },
    {
      "epoch": 0.4032615671859502,
      "grad_norm": 1.1733907420083407,
      "learning_rate": 6.77040773553284e-06,
      "loss": 0.1539,
      "step": 13823
    },
    {
      "epoch": 0.40329074041659374,
      "grad_norm": 0.8545844898165131,
      "learning_rate": 6.7699659003327e-06,
      "loss": 0.136,
      "step": 13824
    },
    {
      "epoch": 0.4033199136472373,
      "grad_norm": 0.6994677993077235,
      "learning_rate": 6.769524049330727e-06,
      "loss": 0.1172,
      "step": 13825
    },
    {
      "epoch": 0.40334908687788085,
      "grad_norm": 1.154375089571428,
      "learning_rate": 6.769082182530866e-06,
      "loss": 0.1431,
      "step": 13826
    },
    {
      "epoch": 0.4033782601085244,
      "grad_norm": 0.9552074751750567,
      "learning_rate": 6.76864029993706e-06,
      "loss": 0.1682,
      "step": 13827
    },
    {
      "epoch": 0.40340743333916795,
      "grad_norm": 0.9023359896616971,
      "learning_rate": 6.768198401553258e-06,
      "loss": 0.1644,
      "step": 13828
    },
    {
      "epoch": 0.40343660656981156,
      "grad_norm": 0.9605314386736236,
      "learning_rate": 6.767756487383401e-06,
      "loss": 0.135,
      "step": 13829
    },
    {
      "epoch": 0.4034657798004551,
      "grad_norm": 1.0007858002830456,
      "learning_rate": 6.767314557431437e-06,
      "loss": 0.1452,
      "step": 13830
    },
    {
      "epoch": 0.40349495303109867,
      "grad_norm": 0.7484454973488388,
      "learning_rate": 6.76687261170131e-06,
      "loss": 0.148,
      "step": 13831
    },
    {
      "epoch": 0.4035241262617422,
      "grad_norm": 0.7623243774816232,
      "learning_rate": 6.766430650196966e-06,
      "loss": 0.1243,
      "step": 13832
    },
    {
      "epoch": 0.4035532994923858,
      "grad_norm": 0.8400999008001412,
      "learning_rate": 6.76598867292235e-06,
      "loss": 0.1515,
      "step": 13833
    },
    {
      "epoch": 0.40358247272302933,
      "grad_norm": 0.7423313540985967,
      "learning_rate": 6.765546679881412e-06,
      "loss": 0.1226,
      "step": 13834
    },
    {
      "epoch": 0.40361164595367294,
      "grad_norm": 0.8349194568129509,
      "learning_rate": 6.765104671078091e-06,
      "loss": 0.1341,
      "step": 13835
    },
    {
      "epoch": 0.4036408191843165,
      "grad_norm": 0.9111840040080945,
      "learning_rate": 6.764662646516339e-06,
      "loss": 0.1444,
      "step": 13836
    },
    {
      "epoch": 0.40366999241496004,
      "grad_norm": 0.8620917387428572,
      "learning_rate": 6.7642206062001e-06,
      "loss": 0.1619,
      "step": 13837
    },
    {
      "epoch": 0.4036991656456036,
      "grad_norm": 1.0970345348337347,
      "learning_rate": 6.763778550133319e-06,
      "loss": 0.1235,
      "step": 13838
    },
    {
      "epoch": 0.40372833887624715,
      "grad_norm": 0.7091912525413521,
      "learning_rate": 6.763336478319946e-06,
      "loss": 0.1236,
      "step": 13839
    },
    {
      "epoch": 0.4037575121068907,
      "grad_norm": 0.9205896424246612,
      "learning_rate": 6.762894390763926e-06,
      "loss": 0.1356,
      "step": 13840
    },
    {
      "epoch": 0.40378668533753426,
      "grad_norm": 0.6969497000615098,
      "learning_rate": 6.762452287469203e-06,
      "loss": 0.1302,
      "step": 13841
    },
    {
      "epoch": 0.40381585856817787,
      "grad_norm": 0.9055544492457576,
      "learning_rate": 6.762010168439729e-06,
      "loss": 0.1581,
      "step": 13842
    },
    {
      "epoch": 0.4038450317988214,
      "grad_norm": 0.9098400181396813,
      "learning_rate": 6.7615680336794485e-06,
      "loss": 0.1268,
      "step": 13843
    },
    {
      "epoch": 0.40387420502946497,
      "grad_norm": 0.8242693953386512,
      "learning_rate": 6.761125883192309e-06,
      "loss": 0.1566,
      "step": 13844
    },
    {
      "epoch": 0.4039033782601085,
      "grad_norm": 0.729184034319107,
      "learning_rate": 6.7606837169822585e-06,
      "loss": 0.1531,
      "step": 13845
    },
    {
      "epoch": 0.4039325514907521,
      "grad_norm": 0.681249633724076,
      "learning_rate": 6.7602415350532425e-06,
      "loss": 0.132,
      "step": 13846
    },
    {
      "epoch": 0.40396172472139563,
      "grad_norm": 0.8098940345284736,
      "learning_rate": 6.759799337409212e-06,
      "loss": 0.1305,
      "step": 13847
    },
    {
      "epoch": 0.4039908979520392,
      "grad_norm": 0.9028666825782868,
      "learning_rate": 6.759357124054113e-06,
      "loss": 0.1411,
      "step": 13848
    },
    {
      "epoch": 0.4040200711826828,
      "grad_norm": 0.7851334979480659,
      "learning_rate": 6.758914894991892e-06,
      "loss": 0.1659,
      "step": 13849
    },
    {
      "epoch": 0.40404924441332635,
      "grad_norm": 0.8001877139594606,
      "learning_rate": 6.7584726502264994e-06,
      "loss": 0.1306,
      "step": 13850
    },
    {
      "epoch": 0.4040784176439699,
      "grad_norm": 0.8583107281229556,
      "learning_rate": 6.7580303897618845e-06,
      "loss": 0.1512,
      "step": 13851
    },
    {
      "epoch": 0.40410759087461345,
      "grad_norm": 0.9444092669818012,
      "learning_rate": 6.757588113601993e-06,
      "loss": 0.147,
      "step": 13852
    },
    {
      "epoch": 0.404136764105257,
      "grad_norm": 0.959684922988974,
      "learning_rate": 6.757145821750772e-06,
      "loss": 0.1303,
      "step": 13853
    },
    {
      "epoch": 0.40416593733590056,
      "grad_norm": 1.23689071723602,
      "learning_rate": 6.7567035142121765e-06,
      "loss": 0.156,
      "step": 13854
    },
    {
      "epoch": 0.4041951105665441,
      "grad_norm": 0.960208884381623,
      "learning_rate": 6.7562611909901485e-06,
      "loss": 0.1291,
      "step": 13855
    },
    {
      "epoch": 0.4042242837971877,
      "grad_norm": 0.8049659595421716,
      "learning_rate": 6.755818852088641e-06,
      "loss": 0.1299,
      "step": 13856
    },
    {
      "epoch": 0.4042534570278313,
      "grad_norm": 0.9656629770882997,
      "learning_rate": 6.755376497511602e-06,
      "loss": 0.1591,
      "step": 13857
    },
    {
      "epoch": 0.40428263025847483,
      "grad_norm": 0.7832230564762516,
      "learning_rate": 6.75493412726298e-06,
      "loss": 0.1533,
      "step": 13858
    },
    {
      "epoch": 0.4043118034891184,
      "grad_norm": 0.993521752311356,
      "learning_rate": 6.754491741346726e-06,
      "loss": 0.1564,
      "step": 13859
    },
    {
      "epoch": 0.40434097671976194,
      "grad_norm": 0.7183243951121271,
      "learning_rate": 6.754049339766787e-06,
      "loss": 0.121,
      "step": 13860
    },
    {
      "epoch": 0.4043701499504055,
      "grad_norm": 0.7915070967006594,
      "learning_rate": 6.753606922527116e-06,
      "loss": 0.1203,
      "step": 13861
    },
    {
      "epoch": 0.40439932318104904,
      "grad_norm": 0.9709987998774869,
      "learning_rate": 6.75316448963166e-06,
      "loss": 0.1401,
      "step": 13862
    },
    {
      "epoch": 0.40442849641169265,
      "grad_norm": 0.9789264146470273,
      "learning_rate": 6.75272204108437e-06,
      "loss": 0.1619,
      "step": 13863
    },
    {
      "epoch": 0.4044576696423362,
      "grad_norm": 0.8771147484309805,
      "learning_rate": 6.752279576889197e-06,
      "loss": 0.152,
      "step": 13864
    },
    {
      "epoch": 0.40448684287297976,
      "grad_norm": 1.167611679520056,
      "learning_rate": 6.751837097050089e-06,
      "loss": 0.1554,
      "step": 13865
    },
    {
      "epoch": 0.4045160161036233,
      "grad_norm": 0.9789670282267943,
      "learning_rate": 6.751394601570999e-06,
      "loss": 0.1502,
      "step": 13866
    },
    {
      "epoch": 0.40454518933426686,
      "grad_norm": 0.8399982462391157,
      "learning_rate": 6.750952090455875e-06,
      "loss": 0.1338,
      "step": 13867
    },
    {
      "epoch": 0.4045743625649104,
      "grad_norm": 0.8032501860187403,
      "learning_rate": 6.750509563708667e-06,
      "loss": 0.126,
      "step": 13868
    },
    {
      "epoch": 0.404603535795554,
      "grad_norm": 0.9235085166172942,
      "learning_rate": 6.750067021333331e-06,
      "loss": 0.1584,
      "step": 13869
    },
    {
      "epoch": 0.4046327090261976,
      "grad_norm": 1.0484610246377482,
      "learning_rate": 6.749624463333812e-06,
      "loss": 0.1671,
      "step": 13870
    },
    {
      "epoch": 0.40466188225684113,
      "grad_norm": 1.0220461955362339,
      "learning_rate": 6.749181889714065e-06,
      "loss": 0.1477,
      "step": 13871
    },
    {
      "epoch": 0.4046910554874847,
      "grad_norm": 0.8179953457084675,
      "learning_rate": 6.748739300478038e-06,
      "loss": 0.1577,
      "step": 13872
    },
    {
      "epoch": 0.40472022871812824,
      "grad_norm": 0.9453756368427266,
      "learning_rate": 6.748296695629686e-06,
      "loss": 0.1306,
      "step": 13873
    },
    {
      "epoch": 0.4047494019487718,
      "grad_norm": 0.9213055063477555,
      "learning_rate": 6.747854075172957e-06,
      "loss": 0.1742,
      "step": 13874
    },
    {
      "epoch": 0.40477857517941535,
      "grad_norm": 0.7635865055614118,
      "learning_rate": 6.747411439111804e-06,
      "loss": 0.1376,
      "step": 13875
    },
    {
      "epoch": 0.40480774841005895,
      "grad_norm": 0.6466773592673615,
      "learning_rate": 6.746968787450179e-06,
      "loss": 0.1379,
      "step": 13876
    },
    {
      "epoch": 0.4048369216407025,
      "grad_norm": 1.035500235797688,
      "learning_rate": 6.746526120192034e-06,
      "loss": 0.1361,
      "step": 13877
    },
    {
      "epoch": 0.40486609487134606,
      "grad_norm": 0.8626095453778869,
      "learning_rate": 6.74608343734132e-06,
      "loss": 0.1423,
      "step": 13878
    },
    {
      "epoch": 0.4048952681019896,
      "grad_norm": 0.678155769586051,
      "learning_rate": 6.7456407389019914e-06,
      "loss": 0.1245,
      "step": 13879
    },
    {
      "epoch": 0.40492444133263317,
      "grad_norm": 0.9401401481724677,
      "learning_rate": 6.745198024877997e-06,
      "loss": 0.1586,
      "step": 13880
    },
    {
      "epoch": 0.4049536145632767,
      "grad_norm": 1.68582785052616,
      "learning_rate": 6.744755295273293e-06,
      "loss": 0.121,
      "step": 13881
    },
    {
      "epoch": 0.4049827877939203,
      "grad_norm": 0.7929425097320372,
      "learning_rate": 6.74431255009183e-06,
      "loss": 0.1316,
      "step": 13882
    },
    {
      "epoch": 0.4050119610245639,
      "grad_norm": 0.8223080449054122,
      "learning_rate": 6.743869789337561e-06,
      "loss": 0.1426,
      "step": 13883
    },
    {
      "epoch": 0.40504113425520744,
      "grad_norm": 0.9757420853281957,
      "learning_rate": 6.743427013014439e-06,
      "loss": 0.158,
      "step": 13884
    },
    {
      "epoch": 0.405070307485851,
      "grad_norm": 0.7557406490237888,
      "learning_rate": 6.742984221126415e-06,
      "loss": 0.1408,
      "step": 13885
    },
    {
      "epoch": 0.40509948071649454,
      "grad_norm": 1.1250477990480532,
      "learning_rate": 6.7425414136774455e-06,
      "loss": 0.1428,
      "step": 13886
    },
    {
      "epoch": 0.4051286539471381,
      "grad_norm": 0.7119943340348683,
      "learning_rate": 6.742098590671482e-06,
      "loss": 0.1488,
      "step": 13887
    },
    {
      "epoch": 0.40515782717778165,
      "grad_norm": 0.8331837913748907,
      "learning_rate": 6.741655752112477e-06,
      "loss": 0.1548,
      "step": 13888
    },
    {
      "epoch": 0.4051870004084252,
      "grad_norm": 1.1710702037678071,
      "learning_rate": 6.741212898004387e-06,
      "loss": 0.1301,
      "step": 13889
    },
    {
      "epoch": 0.4052161736390688,
      "grad_norm": 0.8428828786739003,
      "learning_rate": 6.740770028351162e-06,
      "loss": 0.1437,
      "step": 13890
    },
    {
      "epoch": 0.40524534686971236,
      "grad_norm": 0.735871318414556,
      "learning_rate": 6.74032714315676e-06,
      "loss": 0.1369,
      "step": 13891
    },
    {
      "epoch": 0.4052745201003559,
      "grad_norm": 1.1021626705588285,
      "learning_rate": 6.739884242425131e-06,
      "loss": 0.1389,
      "step": 13892
    },
    {
      "epoch": 0.40530369333099947,
      "grad_norm": 0.841698844264047,
      "learning_rate": 6.739441326160232e-06,
      "loss": 0.1433,
      "step": 13893
    },
    {
      "epoch": 0.405332866561643,
      "grad_norm": 0.8515204871458688,
      "learning_rate": 6.7389983943660166e-06,
      "loss": 0.1331,
      "step": 13894
    },
    {
      "epoch": 0.4053620397922866,
      "grad_norm": 0.8197276598858572,
      "learning_rate": 6.738555447046435e-06,
      "loss": 0.1428,
      "step": 13895
    },
    {
      "epoch": 0.4053912130229302,
      "grad_norm": 0.6604443901026178,
      "learning_rate": 6.73811248420545e-06,
      "loss": 0.1458,
      "step": 13896
    },
    {
      "epoch": 0.40542038625357374,
      "grad_norm": 0.6456051881981346,
      "learning_rate": 6.73766950584701e-06,
      "loss": 0.1314,
      "step": 13897
    },
    {
      "epoch": 0.4054495594842173,
      "grad_norm": 0.9471223019182881,
      "learning_rate": 6.73722651197507e-06,
      "loss": 0.1425,
      "step": 13898
    },
    {
      "epoch": 0.40547873271486085,
      "grad_norm": 0.9073627970640993,
      "learning_rate": 6.736783502593588e-06,
      "loss": 0.1476,
      "step": 13899
    },
    {
      "epoch": 0.4055079059455044,
      "grad_norm": 0.7660624869700193,
      "learning_rate": 6.7363404777065165e-06,
      "loss": 0.1361,
      "step": 13900
    },
    {
      "epoch": 0.40553707917614795,
      "grad_norm": 0.6532465243325225,
      "learning_rate": 6.735897437317814e-06,
      "loss": 0.1342,
      "step": 13901
    },
    {
      "epoch": 0.4055662524067915,
      "grad_norm": 0.9244306440291867,
      "learning_rate": 6.73545438143143e-06,
      "loss": 0.1538,
      "step": 13902
    },
    {
      "epoch": 0.4055954256374351,
      "grad_norm": 1.262680194200995,
      "learning_rate": 6.735011310051326e-06,
      "loss": 0.1483,
      "step": 13903
    },
    {
      "epoch": 0.40562459886807867,
      "grad_norm": 0.7438170902124872,
      "learning_rate": 6.734568223181454e-06,
      "loss": 0.1292,
      "step": 13904
    },
    {
      "epoch": 0.4056537720987222,
      "grad_norm": 0.8378501698766265,
      "learning_rate": 6.734125120825772e-06,
      "loss": 0.1339,
      "step": 13905
    },
    {
      "epoch": 0.4056829453293658,
      "grad_norm": 0.7786892693350929,
      "learning_rate": 6.733682002988234e-06,
      "loss": 0.1544,
      "step": 13906
    },
    {
      "epoch": 0.4057121185600093,
      "grad_norm": 0.7707653887747307,
      "learning_rate": 6.733238869672798e-06,
      "loss": 0.1526,
      "step": 13907
    },
    {
      "epoch": 0.4057412917906529,
      "grad_norm": 0.824372124571805,
      "learning_rate": 6.732795720883418e-06,
      "loss": 0.1449,
      "step": 13908
    },
    {
      "epoch": 0.40577046502129643,
      "grad_norm": 0.8756119621790452,
      "learning_rate": 6.732352556624054e-06,
      "loss": 0.1457,
      "step": 13909
    },
    {
      "epoch": 0.40579963825194004,
      "grad_norm": 0.8489565094040106,
      "learning_rate": 6.731909376898655e-06,
      "loss": 0.1546,
      "step": 13910
    },
    {
      "epoch": 0.4058288114825836,
      "grad_norm": 0.8823960475572548,
      "learning_rate": 6.731466181711187e-06,
      "loss": 0.1331,
      "step": 13911
    },
    {
      "epoch": 0.40585798471322715,
      "grad_norm": 0.9194805386582351,
      "learning_rate": 6.7310229710656e-06,
      "loss": 0.1301,
      "step": 13912
    },
    {
      "epoch": 0.4058871579438707,
      "grad_norm": 0.8924955956329476,
      "learning_rate": 6.730579744965853e-06,
      "loss": 0.1783,
      "step": 13913
    },
    {
      "epoch": 0.40591633117451426,
      "grad_norm": 0.7918824527035137,
      "learning_rate": 6.730136503415905e-06,
      "loss": 0.1293,
      "step": 13914
    },
    {
      "epoch": 0.4059455044051578,
      "grad_norm": 1.1705718073496392,
      "learning_rate": 6.72969324641971e-06,
      "loss": 0.1413,
      "step": 13915
    },
    {
      "epoch": 0.40597467763580136,
      "grad_norm": 1.3340861208821262,
      "learning_rate": 6.7292499739812265e-06,
      "loss": 0.1422,
      "step": 13916
    },
    {
      "epoch": 0.40600385086644497,
      "grad_norm": 0.7533503981511502,
      "learning_rate": 6.7288066861044135e-06,
      "loss": 0.153,
      "step": 13917
    },
    {
      "epoch": 0.4060330240970885,
      "grad_norm": 0.7670577018488539,
      "learning_rate": 6.728363382793226e-06,
      "loss": 0.1093,
      "step": 13918
    },
    {
      "epoch": 0.4060621973277321,
      "grad_norm": 1.0263661022388535,
      "learning_rate": 6.727920064051623e-06,
      "loss": 0.1281,
      "step": 13919
    },
    {
      "epoch": 0.40609137055837563,
      "grad_norm": 1.0922248593278725,
      "learning_rate": 6.727476729883562e-06,
      "loss": 0.1392,
      "step": 13920
    },
    {
      "epoch": 0.4061205437890192,
      "grad_norm": 0.8025404990280155,
      "learning_rate": 6.727033380293e-06,
      "loss": 0.1514,
      "step": 13921
    },
    {
      "epoch": 0.40614971701966274,
      "grad_norm": 0.9046361301964011,
      "learning_rate": 6.726590015283898e-06,
      "loss": 0.1371,
      "step": 13922
    },
    {
      "epoch": 0.40617889025030635,
      "grad_norm": 0.9155983421745632,
      "learning_rate": 6.726146634860211e-06,
      "loss": 0.1564,
      "step": 13923
    },
    {
      "epoch": 0.4062080634809499,
      "grad_norm": 0.9383638023490466,
      "learning_rate": 6.725703239025902e-06,
      "loss": 0.1161,
      "step": 13924
    },
    {
      "epoch": 0.40623723671159345,
      "grad_norm": 0.7331267287852953,
      "learning_rate": 6.7252598277849224e-06,
      "loss": 0.1147,
      "step": 13925
    },
    {
      "epoch": 0.406266409942237,
      "grad_norm": 0.7813416010787895,
      "learning_rate": 6.724816401141238e-06,
      "loss": 0.14,
      "step": 13926
    },
    {
      "epoch": 0.40629558317288056,
      "grad_norm": 0.7237744179297166,
      "learning_rate": 6.724372959098804e-06,
      "loss": 0.1195,
      "step": 13927
    },
    {
      "epoch": 0.4063247564035241,
      "grad_norm": 0.7361917382200347,
      "learning_rate": 6.723929501661577e-06,
      "loss": 0.1245,
      "step": 13928
    },
    {
      "epoch": 0.40635392963416767,
      "grad_norm": 0.8156374258026021,
      "learning_rate": 6.7234860288335226e-06,
      "loss": 0.1464,
      "step": 13929
    },
    {
      "epoch": 0.4063831028648113,
      "grad_norm": 0.892837999734048,
      "learning_rate": 6.723042540618594e-06,
      "loss": 0.127,
      "step": 13930
    },
    {
      "epoch": 0.4064122760954548,
      "grad_norm": 0.7440565252222753,
      "learning_rate": 6.722599037020754e-06,
      "loss": 0.1363,
      "step": 13931
    },
    {
      "epoch": 0.4064414493260984,
      "grad_norm": 0.983731475863947,
      "learning_rate": 6.722155518043961e-06,
      "loss": 0.1534,
      "step": 13932
    },
    {
      "epoch": 0.40647062255674193,
      "grad_norm": 0.8136877659832364,
      "learning_rate": 6.721711983692174e-06,
      "loss": 0.1436,
      "step": 13933
    },
    {
      "epoch": 0.4064997957873855,
      "grad_norm": 0.7771830696086671,
      "learning_rate": 6.721268433969354e-06,
      "loss": 0.1349,
      "step": 13934
    },
    {
      "epoch": 0.40652896901802904,
      "grad_norm": 0.8047228295417147,
      "learning_rate": 6.720824868879461e-06,
      "loss": 0.1331,
      "step": 13935
    },
    {
      "epoch": 0.4065581422486726,
      "grad_norm": 1.1618088355115954,
      "learning_rate": 6.720381288426453e-06,
      "loss": 0.1345,
      "step": 13936
    },
    {
      "epoch": 0.4065873154793162,
      "grad_norm": 0.8696414737240781,
      "learning_rate": 6.719937692614291e-06,
      "loss": 0.13,
      "step": 13937
    },
    {
      "epoch": 0.40661648870995976,
      "grad_norm": 0.8758969354676212,
      "learning_rate": 6.719494081446938e-06,
      "loss": 0.1376,
      "step": 13938
    },
    {
      "epoch": 0.4066456619406033,
      "grad_norm": 0.9729964811323002,
      "learning_rate": 6.719050454928352e-06,
      "loss": 0.144,
      "step": 13939
    },
    {
      "epoch": 0.40667483517124686,
      "grad_norm": 0.7917882584987335,
      "learning_rate": 6.718606813062491e-06,
      "loss": 0.1451,
      "step": 13940
    },
    {
      "epoch": 0.4067040084018904,
      "grad_norm": 1.034237100355872,
      "learning_rate": 6.718163155853324e-06,
      "loss": 0.1478,
      "step": 13941
    },
    {
      "epoch": 0.40673318163253397,
      "grad_norm": 0.8205713409659606,
      "learning_rate": 6.717719483304802e-06,
      "loss": 0.131,
      "step": 13942
    },
    {
      "epoch": 0.4067623548631775,
      "grad_norm": 0.9173721626722815,
      "learning_rate": 6.717275795420891e-06,
      "loss": 0.1312,
      "step": 13943
    },
    {
      "epoch": 0.40679152809382113,
      "grad_norm": 0.8580739255366375,
      "learning_rate": 6.716832092205553e-06,
      "loss": 0.127,
      "step": 13944
    },
    {
      "epoch": 0.4068207013244647,
      "grad_norm": 0.7384111949999307,
      "learning_rate": 6.716388373662748e-06,
      "loss": 0.1455,
      "step": 13945
    },
    {
      "epoch": 0.40684987455510824,
      "grad_norm": 0.7810865620045411,
      "learning_rate": 6.7159446397964365e-06,
      "loss": 0.1337,
      "step": 13946
    },
    {
      "epoch": 0.4068790477857518,
      "grad_norm": 0.608604351621167,
      "learning_rate": 6.71550089061058e-06,
      "loss": 0.1504,
      "step": 13947
    },
    {
      "epoch": 0.40690822101639534,
      "grad_norm": 0.7273311664070611,
      "learning_rate": 6.715057126109144e-06,
      "loss": 0.1451,
      "step": 13948
    },
    {
      "epoch": 0.4069373942470389,
      "grad_norm": 0.8768176956995616,
      "learning_rate": 6.714613346296084e-06,
      "loss": 0.1259,
      "step": 13949
    },
    {
      "epoch": 0.4069665674776825,
      "grad_norm": 0.6964834610105543,
      "learning_rate": 6.7141695511753665e-06,
      "loss": 0.1361,
      "step": 13950
    },
    {
      "epoch": 0.40699574070832606,
      "grad_norm": 0.8443813847819621,
      "learning_rate": 6.7137257407509535e-06,
      "loss": 0.1344,
      "step": 13951
    },
    {
      "epoch": 0.4070249139389696,
      "grad_norm": 0.7819794995447436,
      "learning_rate": 6.7132819150268055e-06,
      "loss": 0.1478,
      "step": 13952
    },
    {
      "epoch": 0.40705408716961317,
      "grad_norm": 1.0522374776671786,
      "learning_rate": 6.712838074006886e-06,
      "loss": 0.1512,
      "step": 13953
    },
    {
      "epoch": 0.4070832604002567,
      "grad_norm": 0.7433847992382673,
      "learning_rate": 6.712394217695157e-06,
      "loss": 0.124,
      "step": 13954
    },
    {
      "epoch": 0.4071124336309003,
      "grad_norm": 0.8055643101783762,
      "learning_rate": 6.71195034609558e-06,
      "loss": 0.1505,
      "step": 13955
    },
    {
      "epoch": 0.4071416068615438,
      "grad_norm": 0.8761014103587407,
      "learning_rate": 6.711506459212121e-06,
      "loss": 0.1362,
      "step": 13956
    },
    {
      "epoch": 0.40717078009218743,
      "grad_norm": 0.8862245770951604,
      "learning_rate": 6.7110625570487396e-06,
      "loss": 0.1293,
      "step": 13957
    },
    {
      "epoch": 0.407199953322831,
      "grad_norm": 0.7771320225609402,
      "learning_rate": 6.7106186396094e-06,
      "loss": 0.1555,
      "step": 13958
    },
    {
      "epoch": 0.40722912655347454,
      "grad_norm": 0.8519494872743051,
      "learning_rate": 6.710174706898066e-06,
      "loss": 0.1249,
      "step": 13959
    },
    {
      "epoch": 0.4072582997841181,
      "grad_norm": 0.8705566898812466,
      "learning_rate": 6.7097307589187e-06,
      "loss": 0.141,
      "step": 13960
    },
    {
      "epoch": 0.40728747301476165,
      "grad_norm": 0.8819784237527349,
      "learning_rate": 6.709286795675267e-06,
      "loss": 0.1804,
      "step": 13961
    },
    {
      "epoch": 0.4073166462454052,
      "grad_norm": 0.8451911639438492,
      "learning_rate": 6.708842817171728e-06,
      "loss": 0.1475,
      "step": 13962
    },
    {
      "epoch": 0.40734581947604875,
      "grad_norm": 0.8277653220416917,
      "learning_rate": 6.708398823412048e-06,
      "loss": 0.1511,
      "step": 13963
    },
    {
      "epoch": 0.40737499270669236,
      "grad_norm": 0.7493702203092666,
      "learning_rate": 6.707954814400194e-06,
      "loss": 0.1412,
      "step": 13964
    },
    {
      "epoch": 0.4074041659373359,
      "grad_norm": 1.0245989458513236,
      "learning_rate": 6.707510790140125e-06,
      "loss": 0.1299,
      "step": 13965
    },
    {
      "epoch": 0.40743333916797947,
      "grad_norm": 0.9824661100612829,
      "learning_rate": 6.707066750635808e-06,
      "loss": 0.1336,
      "step": 13966
    },
    {
      "epoch": 0.407462512398623,
      "grad_norm": 0.6584938280855426,
      "learning_rate": 6.706622695891205e-06,
      "loss": 0.1147,
      "step": 13967
    },
    {
      "epoch": 0.4074916856292666,
      "grad_norm": 1.0934168843934498,
      "learning_rate": 6.7061786259102836e-06,
      "loss": 0.1276,
      "step": 13968
    },
    {
      "epoch": 0.40752085885991013,
      "grad_norm": 1.0310521905153107,
      "learning_rate": 6.705734540697007e-06,
      "loss": 0.1469,
      "step": 13969
    },
    {
      "epoch": 0.4075500320905537,
      "grad_norm": 0.8286522643462688,
      "learning_rate": 6.705290440255339e-06,
      "loss": 0.1515,
      "step": 13970
    },
    {
      "epoch": 0.4075792053211973,
      "grad_norm": 0.9236250938075553,
      "learning_rate": 6.704846324589245e-06,
      "loss": 0.1248,
      "step": 13971
    },
    {
      "epoch": 0.40760837855184084,
      "grad_norm": 1.0354918369542758,
      "learning_rate": 6.704402193702688e-06,
      "loss": 0.1536,
      "step": 13972
    },
    {
      "epoch": 0.4076375517824844,
      "grad_norm": 0.8376302250466713,
      "learning_rate": 6.703958047599638e-06,
      "loss": 0.1354,
      "step": 13973
    },
    {
      "epoch": 0.40766672501312795,
      "grad_norm": 0.8339055544052391,
      "learning_rate": 6.703513886284057e-06,
      "loss": 0.1143,
      "step": 13974
    },
    {
      "epoch": 0.4076958982437715,
      "grad_norm": 0.7567474029921512,
      "learning_rate": 6.703069709759908e-06,
      "loss": 0.1284,
      "step": 13975
    },
    {
      "epoch": 0.40772507147441506,
      "grad_norm": 0.9610262830773799,
      "learning_rate": 6.702625518031163e-06,
      "loss": 0.1504,
      "step": 13976
    },
    {
      "epoch": 0.4077542447050586,
      "grad_norm": 0.8865308435034972,
      "learning_rate": 6.702181311101782e-06,
      "loss": 0.1793,
      "step": 13977
    },
    {
      "epoch": 0.4077834179357022,
      "grad_norm": 0.6574608969593219,
      "learning_rate": 6.7017370889757316e-06,
      "loss": 0.1374,
      "step": 13978
    },
    {
      "epoch": 0.4078125911663458,
      "grad_norm": 1.0120752558987616,
      "learning_rate": 6.701292851656981e-06,
      "loss": 0.136,
      "step": 13979
    },
    {
      "epoch": 0.4078417643969893,
      "grad_norm": 1.9644773530936295,
      "learning_rate": 6.700848599149492e-06,
      "loss": 0.1435,
      "step": 13980
    },
    {
      "epoch": 0.4078709376276329,
      "grad_norm": 1.160227490228442,
      "learning_rate": 6.7004043314572334e-06,
      "loss": 0.1643,
      "step": 13981
    },
    {
      "epoch": 0.40790011085827643,
      "grad_norm": 0.7789937767422863,
      "learning_rate": 6.699960048584171e-06,
      "loss": 0.1537,
      "step": 13982
    },
    {
      "epoch": 0.40792928408892,
      "grad_norm": 0.8362555771905549,
      "learning_rate": 6.699515750534271e-06,
      "loss": 0.162,
      "step": 13983
    },
    {
      "epoch": 0.4079584573195636,
      "grad_norm": 1.1339844282933316,
      "learning_rate": 6.699071437311499e-06,
      "loss": 0.1443,
      "step": 13984
    },
    {
      "epoch": 0.40798763055020715,
      "grad_norm": 0.9865979361972813,
      "learning_rate": 6.6986271089198255e-06,
      "loss": 0.1389,
      "step": 13985
    },
    {
      "epoch": 0.4080168037808507,
      "grad_norm": 0.6972531061018157,
      "learning_rate": 6.698182765363213e-06,
      "loss": 0.1508,
      "step": 13986
    },
    {
      "epoch": 0.40804597701149425,
      "grad_norm": 0.6979498330401717,
      "learning_rate": 6.69773840664563e-06,
      "loss": 0.1378,
      "step": 13987
    },
    {
      "epoch": 0.4080751502421378,
      "grad_norm": 0.8524600649031473,
      "learning_rate": 6.697294032771044e-06,
      "loss": 0.1461,
      "step": 13988
    },
    {
      "epoch": 0.40810432347278136,
      "grad_norm": 0.7584244475405565,
      "learning_rate": 6.696849643743423e-06,
      "loss": 0.1661,
      "step": 13989
    },
    {
      "epoch": 0.4081334967034249,
      "grad_norm": 0.7086604018047018,
      "learning_rate": 6.69640523956673e-06,
      "loss": 0.1431,
      "step": 13990
    },
    {
      "epoch": 0.4081626699340685,
      "grad_norm": 0.829624793440343,
      "learning_rate": 6.69596082024494e-06,
      "loss": 0.1413,
      "step": 13991
    },
    {
      "epoch": 0.4081918431647121,
      "grad_norm": 0.891454242123997,
      "learning_rate": 6.695516385782015e-06,
      "loss": 0.156,
      "step": 13992
    },
    {
      "epoch": 0.40822101639535563,
      "grad_norm": 0.7184089098495967,
      "learning_rate": 6.6950719361819235e-06,
      "loss": 0.1315,
      "step": 13993
    },
    {
      "epoch": 0.4082501896259992,
      "grad_norm": 0.8348011185440816,
      "learning_rate": 6.694627471448637e-06,
      "loss": 0.14,
      "step": 13994
    },
    {
      "epoch": 0.40827936285664274,
      "grad_norm": 0.7968066873687387,
      "learning_rate": 6.694182991586119e-06,
      "loss": 0.1303,
      "step": 13995
    },
    {
      "epoch": 0.4083085360872863,
      "grad_norm": 0.824678742292664,
      "learning_rate": 6.69373849659834e-06,
      "loss": 0.1311,
      "step": 13996
    },
    {
      "epoch": 0.40833770931792984,
      "grad_norm": 1.0167257240538659,
      "learning_rate": 6.693293986489269e-06,
      "loss": 0.1352,
      "step": 13997
    },
    {
      "epoch": 0.40836688254857345,
      "grad_norm": 1.0375429848464421,
      "learning_rate": 6.692849461262871e-06,
      "loss": 0.1482,
      "step": 13998
    },
    {
      "epoch": 0.408396055779217,
      "grad_norm": 0.8255680474348037,
      "learning_rate": 6.692404920923119e-06,
      "loss": 0.1226,
      "step": 13999
    },
    {
      "epoch": 0.40842522900986056,
      "grad_norm": 1.3039704058943398,
      "learning_rate": 6.69196036547398e-06,
      "loss": 0.1601,
      "step": 14000
    },
    {
      "epoch": 0.4084544022405041,
      "grad_norm": 1.0278586590171908,
      "learning_rate": 6.6915157949194235e-06,
      "loss": 0.1416,
      "step": 14001
    },
    {
      "epoch": 0.40848357547114766,
      "grad_norm": 0.8303631477247092,
      "learning_rate": 6.691071209263416e-06,
      "loss": 0.1479,
      "step": 14002
    },
    {
      "epoch": 0.4085127487017912,
      "grad_norm": 1.3375067488748817,
      "learning_rate": 6.690626608509929e-06,
      "loss": 0.1316,
      "step": 14003
    },
    {
      "epoch": 0.40854192193243477,
      "grad_norm": 0.8226352275152343,
      "learning_rate": 6.690181992662932e-06,
      "loss": 0.1434,
      "step": 14004
    },
    {
      "epoch": 0.4085710951630784,
      "grad_norm": 0.8093347680830837,
      "learning_rate": 6.689737361726392e-06,
      "loss": 0.163,
      "step": 14005
    },
    {
      "epoch": 0.40860026839372193,
      "grad_norm": 1.2951534940925888,
      "learning_rate": 6.689292715704282e-06,
      "loss": 0.1386,
      "step": 14006
    },
    {
      "epoch": 0.4086294416243655,
      "grad_norm": 0.946998839121098,
      "learning_rate": 6.6888480546005695e-06,
      "loss": 0.1399,
      "step": 14007
    },
    {
      "epoch": 0.40865861485500904,
      "grad_norm": 0.8178770682230518,
      "learning_rate": 6.688403378419224e-06,
      "loss": 0.1551,
      "step": 14008
    },
    {
      "epoch": 0.4086877880856526,
      "grad_norm": 0.6966696986146201,
      "learning_rate": 6.687958687164217e-06,
      "loss": 0.1274,
      "step": 14009
    },
    {
      "epoch": 0.40871696131629615,
      "grad_norm": 0.7511313940706461,
      "learning_rate": 6.6875139808395175e-06,
      "loss": 0.1575,
      "step": 14010
    },
    {
      "epoch": 0.40874613454693975,
      "grad_norm": 0.9318682540544598,
      "learning_rate": 6.687069259449095e-06,
      "loss": 0.152,
      "step": 14011
    },
    {
      "epoch": 0.4087753077775833,
      "grad_norm": 0.8470311858234206,
      "learning_rate": 6.686624522996922e-06,
      "loss": 0.1517,
      "step": 14012
    },
    {
      "epoch": 0.40880448100822686,
      "grad_norm": 0.7094907056735673,
      "learning_rate": 6.686179771486967e-06,
      "loss": 0.1488,
      "step": 14013
    },
    {
      "epoch": 0.4088336542388704,
      "grad_norm": 0.9403389917136222,
      "learning_rate": 6.685735004923203e-06,
      "loss": 0.1317,
      "step": 14014
    },
    {
      "epoch": 0.40886282746951397,
      "grad_norm": 0.797529144309354,
      "learning_rate": 6.685290223309598e-06,
      "loss": 0.1355,
      "step": 14015
    },
    {
      "epoch": 0.4088920007001575,
      "grad_norm": 0.7167927773370429,
      "learning_rate": 6.684845426650126e-06,
      "loss": 0.1369,
      "step": 14016
    },
    {
      "epoch": 0.4089211739308011,
      "grad_norm": 0.6890553891716754,
      "learning_rate": 6.684400614948754e-06,
      "loss": 0.1257,
      "step": 14017
    },
    {
      "epoch": 0.4089503471614447,
      "grad_norm": 0.7739541831571997,
      "learning_rate": 6.683955788209455e-06,
      "loss": 0.1415,
      "step": 14018
    },
    {
      "epoch": 0.40897952039208824,
      "grad_norm": 1.0386203791338848,
      "learning_rate": 6.6835109464362035e-06,
      "loss": 0.1455,
      "step": 14019
    },
    {
      "epoch": 0.4090086936227318,
      "grad_norm": 0.8341902669434255,
      "learning_rate": 6.683066089632965e-06,
      "loss": 0.1272,
      "step": 14020
    },
    {
      "epoch": 0.40903786685337534,
      "grad_norm": 0.7716372682979363,
      "learning_rate": 6.682621217803718e-06,
      "loss": 0.123,
      "step": 14021
    },
    {
      "epoch": 0.4090670400840189,
      "grad_norm": 0.86523064711934,
      "learning_rate": 6.682176330952428e-06,
      "loss": 0.1428,
      "step": 14022
    },
    {
      "epoch": 0.40909621331466245,
      "grad_norm": 0.9150572348856957,
      "learning_rate": 6.681731429083068e-06,
      "loss": 0.1674,
      "step": 14023
    },
    {
      "epoch": 0.409125386545306,
      "grad_norm": 0.8572875063355055,
      "learning_rate": 6.681286512199614e-06,
      "loss": 0.1531,
      "step": 14024
    },
    {
      "epoch": 0.4091545597759496,
      "grad_norm": 0.8999123305672735,
      "learning_rate": 6.680841580306035e-06,
      "loss": 0.1212,
      "step": 14025
    },
    {
      "epoch": 0.40918373300659316,
      "grad_norm": 0.7772689415316842,
      "learning_rate": 6.6803966334063035e-06,
      "loss": 0.1492,
      "step": 14026
    },
    {
      "epoch": 0.4092129062372367,
      "grad_norm": 1.009921908207947,
      "learning_rate": 6.67995167150439e-06,
      "loss": 0.1471,
      "step": 14027
    },
    {
      "epoch": 0.40924207946788027,
      "grad_norm": 1.6112817363116887,
      "learning_rate": 6.679506694604271e-06,
      "loss": 0.1364,
      "step": 14028
    },
    {
      "epoch": 0.4092712526985238,
      "grad_norm": 1.0200856189028351,
      "learning_rate": 6.679061702709916e-06,
      "loss": 0.1575,
      "step": 14029
    },
    {
      "epoch": 0.4093004259291674,
      "grad_norm": 0.7929669231238768,
      "learning_rate": 6.6786166958253e-06,
      "loss": 0.1415,
      "step": 14030
    },
    {
      "epoch": 0.40932959915981093,
      "grad_norm": 0.7426867630516113,
      "learning_rate": 6.678171673954394e-06,
      "loss": 0.1445,
      "step": 14031
    },
    {
      "epoch": 0.40935877239045454,
      "grad_norm": 0.7855405902335418,
      "learning_rate": 6.677726637101172e-06,
      "loss": 0.1343,
      "step": 14032
    },
    {
      "epoch": 0.4093879456210981,
      "grad_norm": 0.9421841198000807,
      "learning_rate": 6.677281585269607e-06,
      "loss": 0.1424,
      "step": 14033
    },
    {
      "epoch": 0.40941711885174165,
      "grad_norm": 0.6584107554121822,
      "learning_rate": 6.676836518463674e-06,
      "loss": 0.1483,
      "step": 14034
    },
    {
      "epoch": 0.4094462920823852,
      "grad_norm": 0.8858738294690848,
      "learning_rate": 6.676391436687343e-06,
      "loss": 0.1567,
      "step": 14035
    },
    {
      "epoch": 0.40947546531302875,
      "grad_norm": 0.881739182410847,
      "learning_rate": 6.67594633994459e-06,
      "loss": 0.1418,
      "step": 14036
    },
    {
      "epoch": 0.4095046385436723,
      "grad_norm": 0.870955021038668,
      "learning_rate": 6.67550122823939e-06,
      "loss": 0.1675,
      "step": 14037
    },
    {
      "epoch": 0.4095338117743159,
      "grad_norm": 1.054631251684788,
      "learning_rate": 6.675056101575711e-06,
      "loss": 0.146,
      "step": 14038
    },
    {
      "epoch": 0.40956298500495947,
      "grad_norm": 0.7255648835603584,
      "learning_rate": 6.674610959957535e-06,
      "loss": 0.1225,
      "step": 14039
    },
    {
      "epoch": 0.409592158235603,
      "grad_norm": 0.9959082762794741,
      "learning_rate": 6.67416580338883e-06,
      "loss": 0.1515,
      "step": 14040
    },
    {
      "epoch": 0.4096213314662466,
      "grad_norm": 0.7235494032116029,
      "learning_rate": 6.673720631873572e-06,
      "loss": 0.1448,
      "step": 14041
    },
    {
      "epoch": 0.4096505046968901,
      "grad_norm": 0.8384810544456339,
      "learning_rate": 6.673275445415736e-06,
      "loss": 0.1511,
      "step": 14042
    },
    {
      "epoch": 0.4096796779275337,
      "grad_norm": 0.7629219454266113,
      "learning_rate": 6.672830244019297e-06,
      "loss": 0.1502,
      "step": 14043
    },
    {
      "epoch": 0.40970885115817723,
      "grad_norm": 0.7565425254724589,
      "learning_rate": 6.6723850276882285e-06,
      "loss": 0.1356,
      "step": 14044
    },
    {
      "epoch": 0.40973802438882084,
      "grad_norm": 0.8899515804339725,
      "learning_rate": 6.671939796426507e-06,
      "loss": 0.1403,
      "step": 14045
    },
    {
      "epoch": 0.4097671976194644,
      "grad_norm": 0.7793189627037008,
      "learning_rate": 6.671494550238105e-06,
      "loss": 0.1241,
      "step": 14046
    },
    {
      "epoch": 0.40979637085010795,
      "grad_norm": 0.6827247511238914,
      "learning_rate": 6.671049289126997e-06,
      "loss": 0.1348,
      "step": 14047
    },
    {
      "epoch": 0.4098255440807515,
      "grad_norm": 1.9911398154900386,
      "learning_rate": 6.670604013097162e-06,
      "loss": 0.1434,
      "step": 14048
    },
    {
      "epoch": 0.40985471731139506,
      "grad_norm": 0.9247532299116882,
      "learning_rate": 6.670158722152574e-06,
      "loss": 0.1661,
      "step": 14049
    },
    {
      "epoch": 0.4098838905420386,
      "grad_norm": 0.8381483537966381,
      "learning_rate": 6.669713416297205e-06,
      "loss": 0.1287,
      "step": 14050
    },
    {
      "epoch": 0.40991306377268216,
      "grad_norm": 0.8890645281254,
      "learning_rate": 6.669268095535035e-06,
      "loss": 0.1566,
      "step": 14051
    },
    {
      "epoch": 0.40994223700332577,
      "grad_norm": 0.7941016350632227,
      "learning_rate": 6.668822759870037e-06,
      "loss": 0.1402,
      "step": 14052
    },
    {
      "epoch": 0.4099714102339693,
      "grad_norm": 0.7829813544891336,
      "learning_rate": 6.668377409306188e-06,
      "loss": 0.1505,
      "step": 14053
    },
    {
      "epoch": 0.4100005834646129,
      "grad_norm": 0.98636950730738,
      "learning_rate": 6.6679320438474645e-06,
      "loss": 0.1397,
      "step": 14054
    },
    {
      "epoch": 0.41002975669525643,
      "grad_norm": 0.8465746512410242,
      "learning_rate": 6.667486663497842e-06,
      "loss": 0.1855,
      "step": 14055
    },
    {
      "epoch": 0.4100589299259,
      "grad_norm": 0.831950303805014,
      "learning_rate": 6.667041268261295e-06,
      "loss": 0.1486,
      "step": 14056
    },
    {
      "epoch": 0.41008810315654354,
      "grad_norm": 0.8592891237764462,
      "learning_rate": 6.6665958581418025e-06,
      "loss": 0.1418,
      "step": 14057
    },
    {
      "epoch": 0.4101172763871871,
      "grad_norm": 0.8936015891060176,
      "learning_rate": 6.66615043314334e-06,
      "loss": 0.1677,
      "step": 14058
    },
    {
      "epoch": 0.4101464496178307,
      "grad_norm": 0.7843285603489508,
      "learning_rate": 6.665704993269884e-06,
      "loss": 0.131,
      "step": 14059
    },
    {
      "epoch": 0.41017562284847425,
      "grad_norm": 1.0010218166116722,
      "learning_rate": 6.665259538525413e-06,
      "loss": 0.1392,
      "step": 14060
    },
    {
      "epoch": 0.4102047960791178,
      "grad_norm": 0.6198236187489929,
      "learning_rate": 6.664814068913901e-06,
      "loss": 0.1254,
      "step": 14061
    },
    {
      "epoch": 0.41023396930976136,
      "grad_norm": 0.8065200834730276,
      "learning_rate": 6.664368584439326e-06,
      "loss": 0.1636,
      "step": 14062
    },
    {
      "epoch": 0.4102631425404049,
      "grad_norm": 0.9592785676171229,
      "learning_rate": 6.663923085105666e-06,
      "loss": 0.1471,
      "step": 14063
    },
    {
      "epoch": 0.41029231577104847,
      "grad_norm": 0.829300522966816,
      "learning_rate": 6.663477570916898e-06,
      "loss": 0.1339,
      "step": 14064
    },
    {
      "epoch": 0.4103214890016921,
      "grad_norm": 0.685073210625489,
      "learning_rate": 6.663032041876999e-06,
      "loss": 0.127,
      "step": 14065
    },
    {
      "epoch": 0.41035066223233563,
      "grad_norm": 1.0905391998464318,
      "learning_rate": 6.662586497989948e-06,
      "loss": 0.1348,
      "step": 14066
    },
    {
      "epoch": 0.4103798354629792,
      "grad_norm": 0.8738693239342453,
      "learning_rate": 6.66214093925972e-06,
      "loss": 0.1454,
      "step": 14067
    },
    {
      "epoch": 0.41040900869362273,
      "grad_norm": 0.7284334938180899,
      "learning_rate": 6.661695365690295e-06,
      "loss": 0.1296,
      "step": 14068
    },
    {
      "epoch": 0.4104381819242663,
      "grad_norm": 0.8823968510654029,
      "learning_rate": 6.661249777285652e-06,
      "loss": 0.1495,
      "step": 14069
    },
    {
      "epoch": 0.41046735515490984,
      "grad_norm": 1.1569356128860702,
      "learning_rate": 6.6608041740497665e-06,
      "loss": 0.1479,
      "step": 14070
    },
    {
      "epoch": 0.4104965283855534,
      "grad_norm": 0.7709293904305606,
      "learning_rate": 6.660358555986617e-06,
      "loss": 0.1469,
      "step": 14071
    },
    {
      "epoch": 0.410525701616197,
      "grad_norm": 1.1563332843555503,
      "learning_rate": 6.659912923100184e-06,
      "loss": 0.1276,
      "step": 14072
    },
    {
      "epoch": 0.41055487484684056,
      "grad_norm": 1.0544475281449661,
      "learning_rate": 6.659467275394443e-06,
      "loss": 0.1611,
      "step": 14073
    },
    {
      "epoch": 0.4105840480774841,
      "grad_norm": 0.7960526210038408,
      "learning_rate": 6.659021612873375e-06,
      "loss": 0.1441,
      "step": 14074
    },
    {
      "epoch": 0.41061322130812766,
      "grad_norm": 0.6788930089530776,
      "learning_rate": 6.658575935540958e-06,
      "loss": 0.1473,
      "step": 14075
    },
    {
      "epoch": 0.4106423945387712,
      "grad_norm": 0.7889712563877609,
      "learning_rate": 6.658130243401173e-06,
      "loss": 0.1659,
      "step": 14076
    },
    {
      "epoch": 0.41067156776941477,
      "grad_norm": 0.9817376923813845,
      "learning_rate": 6.6576845364579946e-06,
      "loss": 0.1658,
      "step": 14077
    },
    {
      "epoch": 0.4107007410000583,
      "grad_norm": 0.8720322947938877,
      "learning_rate": 6.657238814715406e-06,
      "loss": 0.1385,
      "step": 14078
    },
    {
      "epoch": 0.41072991423070193,
      "grad_norm": 0.7252926333411631,
      "learning_rate": 6.656793078177384e-06,
      "loss": 0.1358,
      "step": 14079
    },
    {
      "epoch": 0.4107590874613455,
      "grad_norm": 0.9131611531639156,
      "learning_rate": 6.656347326847907e-06,
      "loss": 0.1743,
      "step": 14080
    },
    {
      "epoch": 0.41078826069198904,
      "grad_norm": 1.0655033481842031,
      "learning_rate": 6.65590156073096e-06,
      "loss": 0.134,
      "step": 14081
    },
    {
      "epoch": 0.4108174339226326,
      "grad_norm": 0.758471771459251,
      "learning_rate": 6.655455779830517e-06,
      "loss": 0.1321,
      "step": 14082
    },
    {
      "epoch": 0.41084660715327614,
      "grad_norm": 0.7336107284578407,
      "learning_rate": 6.65500998415056e-06,
      "loss": 0.1508,
      "step": 14083
    },
    {
      "epoch": 0.4108757803839197,
      "grad_norm": 0.9340191362850672,
      "learning_rate": 6.65456417369507e-06,
      "loss": 0.1252,
      "step": 14084
    },
    {
      "epoch": 0.41090495361456325,
      "grad_norm": 0.9134081318565378,
      "learning_rate": 6.654118348468026e-06,
      "loss": 0.1506,
      "step": 14085
    },
    {
      "epoch": 0.41093412684520686,
      "grad_norm": 0.8297543236724341,
      "learning_rate": 6.653672508473408e-06,
      "loss": 0.1541,
      "step": 14086
    },
    {
      "epoch": 0.4109633000758504,
      "grad_norm": 0.9345170815107622,
      "learning_rate": 6.653226653715197e-06,
      "loss": 0.1435,
      "step": 14087
    },
    {
      "epoch": 0.41099247330649397,
      "grad_norm": 0.9732913954697464,
      "learning_rate": 6.652780784197371e-06,
      "loss": 0.132,
      "step": 14088
    },
    {
      "epoch": 0.4110216465371375,
      "grad_norm": 0.8123372090511947,
      "learning_rate": 6.652334899923914e-06,
      "loss": 0.1371,
      "step": 14089
    },
    {
      "epoch": 0.4110508197677811,
      "grad_norm": 0.8663632495507103,
      "learning_rate": 6.651889000898807e-06,
      "loss": 0.1317,
      "step": 14090
    },
    {
      "epoch": 0.4110799929984246,
      "grad_norm": 0.7733858395994007,
      "learning_rate": 6.651443087126028e-06,
      "loss": 0.1443,
      "step": 14091
    },
    {
      "epoch": 0.41110916622906823,
      "grad_norm": 0.8989077850530516,
      "learning_rate": 6.650997158609559e-06,
      "loss": 0.1538,
      "step": 14092
    },
    {
      "epoch": 0.4111383394597118,
      "grad_norm": 0.8221939205733133,
      "learning_rate": 6.650551215353381e-06,
      "loss": 0.1374,
      "step": 14093
    },
    {
      "epoch": 0.41116751269035534,
      "grad_norm": 0.7128271885855784,
      "learning_rate": 6.650105257361478e-06,
      "loss": 0.1489,
      "step": 14094
    },
    {
      "epoch": 0.4111966859209989,
      "grad_norm": 0.8325895993213441,
      "learning_rate": 6.649659284637826e-06,
      "loss": 0.148,
      "step": 14095
    },
    {
      "epoch": 0.41122585915164245,
      "grad_norm": 0.7752183889670637,
      "learning_rate": 6.649213297186413e-06,
      "loss": 0.1383,
      "step": 14096
    },
    {
      "epoch": 0.411255032382286,
      "grad_norm": 1.1079170416632855,
      "learning_rate": 6.648767295011216e-06,
      "loss": 0.1705,
      "step": 14097
    },
    {
      "epoch": 0.41128420561292955,
      "grad_norm": 0.9474382692371365,
      "learning_rate": 6.648321278116216e-06,
      "loss": 0.1579,
      "step": 14098
    },
    {
      "epoch": 0.41131337884357316,
      "grad_norm": 1.0019968475051197,
      "learning_rate": 6.6478752465054005e-06,
      "loss": 0.1465,
      "step": 14099
    },
    {
      "epoch": 0.4113425520742167,
      "grad_norm": 0.8485983597306349,
      "learning_rate": 6.6474292001827475e-06,
      "loss": 0.127,
      "step": 14100
    },
    {
      "epoch": 0.41137172530486027,
      "grad_norm": 0.9700884182027817,
      "learning_rate": 6.646983139152239e-06,
      "loss": 0.1749,
      "step": 14101
    },
    {
      "epoch": 0.4114008985355038,
      "grad_norm": 1.0427712116214656,
      "learning_rate": 6.646537063417858e-06,
      "loss": 0.1512,
      "step": 14102
    },
    {
      "epoch": 0.4114300717661474,
      "grad_norm": 0.866105005706269,
      "learning_rate": 6.646090972983588e-06,
      "loss": 0.1433,
      "step": 14103
    },
    {
      "epoch": 0.41145924499679093,
      "grad_norm": 0.8438360414774141,
      "learning_rate": 6.64564486785341e-06,
      "loss": 0.1414,
      "step": 14104
    },
    {
      "epoch": 0.4114884182274345,
      "grad_norm": 1.0502680853296236,
      "learning_rate": 6.6451987480313085e-06,
      "loss": 0.1477,
      "step": 14105
    },
    {
      "epoch": 0.4115175914580781,
      "grad_norm": 0.8129688190336931,
      "learning_rate": 6.644752613521266e-06,
      "loss": 0.1629,
      "step": 14106
    },
    {
      "epoch": 0.41154676468872164,
      "grad_norm": 0.7820296113578517,
      "learning_rate": 6.644306464327261e-06,
      "loss": 0.1461,
      "step": 14107
    },
    {
      "epoch": 0.4115759379193652,
      "grad_norm": 0.7590526242828608,
      "learning_rate": 6.643860300453283e-06,
      "loss": 0.1413,
      "step": 14108
    },
    {
      "epoch": 0.41160511115000875,
      "grad_norm": 0.7411650521546309,
      "learning_rate": 6.643414121903313e-06,
      "loss": 0.1463,
      "step": 14109
    },
    {
      "epoch": 0.4116342843806523,
      "grad_norm": 0.8121287706621918,
      "learning_rate": 6.642967928681333e-06,
      "loss": 0.1591,
      "step": 14110
    },
    {
      "epoch": 0.41166345761129586,
      "grad_norm": 0.7481389329202898,
      "learning_rate": 6.64252172079133e-06,
      "loss": 0.1483,
      "step": 14111
    },
    {
      "epoch": 0.4116926308419394,
      "grad_norm": 0.7614692513847493,
      "learning_rate": 6.642075498237283e-06,
      "loss": 0.1315,
      "step": 14112
    },
    {
      "epoch": 0.411721804072583,
      "grad_norm": 0.7969165419936783,
      "learning_rate": 6.641629261023177e-06,
      "loss": 0.1459,
      "step": 14113
    },
    {
      "epoch": 0.4117509773032266,
      "grad_norm": 0.7107974912860358,
      "learning_rate": 6.6411830091529984e-06,
      "loss": 0.1466,
      "step": 14114
    },
    {
      "epoch": 0.4117801505338701,
      "grad_norm": 0.891300069827346,
      "learning_rate": 6.640736742630729e-06,
      "loss": 0.1385,
      "step": 14115
    },
    {
      "epoch": 0.4118093237645137,
      "grad_norm": 0.8454777967311086,
      "learning_rate": 6.6402904614603546e-06,
      "loss": 0.1702,
      "step": 14116
    },
    {
      "epoch": 0.41183849699515723,
      "grad_norm": 0.7000881618005538,
      "learning_rate": 6.639844165645858e-06,
      "loss": 0.149,
      "step": 14117
    },
    {
      "epoch": 0.4118676702258008,
      "grad_norm": 0.8146304024869583,
      "learning_rate": 6.639397855191223e-06,
      "loss": 0.1594,
      "step": 14118
    },
    {
      "epoch": 0.41189684345644434,
      "grad_norm": 0.7466521834770206,
      "learning_rate": 6.638951530100437e-06,
      "loss": 0.137,
      "step": 14119
    },
    {
      "epoch": 0.41192601668708795,
      "grad_norm": 0.8587020813308973,
      "learning_rate": 6.638505190377482e-06,
      "loss": 0.1641,
      "step": 14120
    },
    {
      "epoch": 0.4119551899177315,
      "grad_norm": 0.8324195692325999,
      "learning_rate": 6.6380588360263455e-06,
      "loss": 0.1492,
      "step": 14121
    },
    {
      "epoch": 0.41198436314837505,
      "grad_norm": 0.7463544353546875,
      "learning_rate": 6.637612467051008e-06,
      "loss": 0.1268,
      "step": 14122
    },
    {
      "epoch": 0.4120135363790186,
      "grad_norm": 0.8463558671027969,
      "learning_rate": 6.6371660834554586e-06,
      "loss": 0.1402,
      "step": 14123
    },
    {
      "epoch": 0.41204270960966216,
      "grad_norm": 0.8271932931175923,
      "learning_rate": 6.6367196852436826e-06,
      "loss": 0.1337,
      "step": 14124
    },
    {
      "epoch": 0.4120718828403057,
      "grad_norm": 0.7450770554727912,
      "learning_rate": 6.636273272419661e-06,
      "loss": 0.1491,
      "step": 14125
    },
    {
      "epoch": 0.4121010560709493,
      "grad_norm": 0.757882374639118,
      "learning_rate": 6.635826844987385e-06,
      "loss": 0.1154,
      "step": 14126
    },
    {
      "epoch": 0.4121302293015929,
      "grad_norm": 0.7802016560566676,
      "learning_rate": 6.6353804029508376e-06,
      "loss": 0.1556,
      "step": 14127
    },
    {
      "epoch": 0.41215940253223643,
      "grad_norm": 0.7780001687262968,
      "learning_rate": 6.634933946314002e-06,
      "loss": 0.1251,
      "step": 14128
    },
    {
      "epoch": 0.41218857576288,
      "grad_norm": 1.3313772094344312,
      "learning_rate": 6.634487475080867e-06,
      "loss": 0.1476,
      "step": 14129
    },
    {
      "epoch": 0.41221774899352354,
      "grad_norm": 1.0211085561052402,
      "learning_rate": 6.634040989255419e-06,
      "loss": 0.1289,
      "step": 14130
    },
    {
      "epoch": 0.4122469222241671,
      "grad_norm": 0.966672387384863,
      "learning_rate": 6.633594488841642e-06,
      "loss": 0.1565,
      "step": 14131
    },
    {
      "epoch": 0.41227609545481064,
      "grad_norm": 0.8019155350181045,
      "learning_rate": 6.633147973843525e-06,
      "loss": 0.1419,
      "step": 14132
    },
    {
      "epoch": 0.41230526868545425,
      "grad_norm": 0.8361654518518425,
      "learning_rate": 6.632701444265052e-06,
      "loss": 0.1281,
      "step": 14133
    },
    {
      "epoch": 0.4123344419160978,
      "grad_norm": 1.112056236003766,
      "learning_rate": 6.632254900110209e-06,
      "loss": 0.1575,
      "step": 14134
    },
    {
      "epoch": 0.41236361514674136,
      "grad_norm": 0.8007026380726106,
      "learning_rate": 6.631808341382986e-06,
      "loss": 0.1367,
      "step": 14135
    },
    {
      "epoch": 0.4123927883773849,
      "grad_norm": 0.8146151167565929,
      "learning_rate": 6.631361768087368e-06,
      "loss": 0.1343,
      "step": 14136
    },
    {
      "epoch": 0.41242196160802846,
      "grad_norm": 0.8897923106760964,
      "learning_rate": 6.630915180227338e-06,
      "loss": 0.1306,
      "step": 14137
    },
    {
      "epoch": 0.412451134838672,
      "grad_norm": 0.705878373741667,
      "learning_rate": 6.630468577806889e-06,
      "loss": 0.1419,
      "step": 14138
    },
    {
      "epoch": 0.41248030806931557,
      "grad_norm": 0.6555732784778344,
      "learning_rate": 6.630021960830007e-06,
      "loss": 0.1403,
      "step": 14139
    },
    {
      "epoch": 0.4125094812999592,
      "grad_norm": 0.7100573139655991,
      "learning_rate": 6.6295753293006745e-06,
      "loss": 0.1513,
      "step": 14140
    },
    {
      "epoch": 0.41253865453060273,
      "grad_norm": 0.7408558010398478,
      "learning_rate": 6.629128683222886e-06,
      "loss": 0.1658,
      "step": 14141
    },
    {
      "epoch": 0.4125678277612463,
      "grad_norm": 0.8762724704427244,
      "learning_rate": 6.628682022600624e-06,
      "loss": 0.1433,
      "step": 14142
    },
    {
      "epoch": 0.41259700099188984,
      "grad_norm": 0.8278080142316044,
      "learning_rate": 6.628235347437878e-06,
      "loss": 0.1414,
      "step": 14143
    },
    {
      "epoch": 0.4126261742225334,
      "grad_norm": 0.7474656768972003,
      "learning_rate": 6.627788657738635e-06,
      "loss": 0.1421,
      "step": 14144
    },
    {
      "epoch": 0.41265534745317695,
      "grad_norm": 0.8289650710922033,
      "learning_rate": 6.627341953506884e-06,
      "loss": 0.149,
      "step": 14145
    },
    {
      "epoch": 0.4126845206838205,
      "grad_norm": 0.7614300428490463,
      "learning_rate": 6.6268952347466124e-06,
      "loss": 0.1202,
      "step": 14146
    },
    {
      "epoch": 0.4127136939144641,
      "grad_norm": 0.7006325906898112,
      "learning_rate": 6.6264485014618086e-06,
      "loss": 0.1455,
      "step": 14147
    },
    {
      "epoch": 0.41274286714510766,
      "grad_norm": 0.7188461142267647,
      "learning_rate": 6.62600175365646e-06,
      "loss": 0.1263,
      "step": 14148
    },
    {
      "epoch": 0.4127720403757512,
      "grad_norm": 0.8691992835050434,
      "learning_rate": 6.6255549913345564e-06,
      "loss": 0.1604,
      "step": 14149
    },
    {
      "epoch": 0.41280121360639477,
      "grad_norm": 0.7837695638760147,
      "learning_rate": 6.625108214500086e-06,
      "loss": 0.1374,
      "step": 14150
    },
    {
      "epoch": 0.4128303868370383,
      "grad_norm": 0.7637990297262751,
      "learning_rate": 6.624661423157038e-06,
      "loss": 0.1407,
      "step": 14151
    },
    {
      "epoch": 0.4128595600676819,
      "grad_norm": 0.9888144909637261,
      "learning_rate": 6.624214617309399e-06,
      "loss": 0.1278,
      "step": 14152
    },
    {
      "epoch": 0.4128887332983255,
      "grad_norm": 0.9697521391268682,
      "learning_rate": 6.623767796961161e-06,
      "loss": 0.1527,
      "step": 14153
    },
    {
      "epoch": 0.41291790652896904,
      "grad_norm": 0.9551915536592764,
      "learning_rate": 6.623320962116312e-06,
      "loss": 0.152,
      "step": 14154
    },
    {
      "epoch": 0.4129470797596126,
      "grad_norm": 0.8308831093437837,
      "learning_rate": 6.62287411277884e-06,
      "loss": 0.1397,
      "step": 14155
    },
    {
      "epoch": 0.41297625299025614,
      "grad_norm": 1.2305136064429363,
      "learning_rate": 6.622427248952736e-06,
      "loss": 0.1501,
      "step": 14156
    },
    {
      "epoch": 0.4130054262208997,
      "grad_norm": 1.108882331570643,
      "learning_rate": 6.621980370641988e-06,
      "loss": 0.1297,
      "step": 14157
    },
    {
      "epoch": 0.41303459945154325,
      "grad_norm": 0.8715651073385032,
      "learning_rate": 6.621533477850588e-06,
      "loss": 0.1454,
      "step": 14158
    },
    {
      "epoch": 0.4130637726821868,
      "grad_norm": 0.9190812515719307,
      "learning_rate": 6.621086570582523e-06,
      "loss": 0.1424,
      "step": 14159
    },
    {
      "epoch": 0.4130929459128304,
      "grad_norm": 0.8071557816911274,
      "learning_rate": 6.6206396488417835e-06,
      "loss": 0.1179,
      "step": 14160
    },
    {
      "epoch": 0.41312211914347396,
      "grad_norm": 0.9930843256093073,
      "learning_rate": 6.620192712632361e-06,
      "loss": 0.1329,
      "step": 14161
    },
    {
      "epoch": 0.4131512923741175,
      "grad_norm": 0.7460166055800551,
      "learning_rate": 6.619745761958245e-06,
      "loss": 0.1516,
      "step": 14162
    },
    {
      "epoch": 0.41318046560476107,
      "grad_norm": 0.914321139545343,
      "learning_rate": 6.619298796823426e-06,
      "loss": 0.1269,
      "step": 14163
    },
    {
      "epoch": 0.4132096388354046,
      "grad_norm": 0.808775444438091,
      "learning_rate": 6.6188518172318925e-06,
      "loss": 0.1492,
      "step": 14164
    },
    {
      "epoch": 0.4132388120660482,
      "grad_norm": 0.8998984093268817,
      "learning_rate": 6.6184048231876375e-06,
      "loss": 0.1353,
      "step": 14165
    },
    {
      "epoch": 0.41326798529669173,
      "grad_norm": 0.8507902726391359,
      "learning_rate": 6.61795781469465e-06,
      "loss": 0.1577,
      "step": 14166
    },
    {
      "epoch": 0.41329715852733534,
      "grad_norm": 0.9064598430984236,
      "learning_rate": 6.61751079175692e-06,
      "loss": 0.1189,
      "step": 14167
    },
    {
      "epoch": 0.4133263317579789,
      "grad_norm": 0.7571406762091855,
      "learning_rate": 6.617063754378442e-06,
      "loss": 0.1302,
      "step": 14168
    },
    {
      "epoch": 0.41335550498862245,
      "grad_norm": 0.8348164600558217,
      "learning_rate": 6.616616702563204e-06,
      "loss": 0.147,
      "step": 14169
    },
    {
      "epoch": 0.413384678219266,
      "grad_norm": 0.933216345129164,
      "learning_rate": 6.6161696363151986e-06,
      "loss": 0.1358,
      "step": 14170
    },
    {
      "epoch": 0.41341385144990955,
      "grad_norm": 0.9152142623685499,
      "learning_rate": 6.615722555638416e-06,
      "loss": 0.1344,
      "step": 14171
    },
    {
      "epoch": 0.4134430246805531,
      "grad_norm": 0.9600133495594002,
      "learning_rate": 6.615275460536847e-06,
      "loss": 0.1343,
      "step": 14172
    },
    {
      "epoch": 0.41347219791119666,
      "grad_norm": 1.096416770061301,
      "learning_rate": 6.614828351014487e-06,
      "loss": 0.1331,
      "step": 14173
    },
    {
      "epoch": 0.41350137114184027,
      "grad_norm": 0.8320915241840282,
      "learning_rate": 6.614381227075323e-06,
      "loss": 0.1345,
      "step": 14174
    },
    {
      "epoch": 0.4135305443724838,
      "grad_norm": 0.88374329390917,
      "learning_rate": 6.613934088723349e-06,
      "loss": 0.1344,
      "step": 14175
    },
    {
      "epoch": 0.4135597176031274,
      "grad_norm": 1.157412923880576,
      "learning_rate": 6.613486935962556e-06,
      "loss": 0.142,
      "step": 14176
    },
    {
      "epoch": 0.41358889083377093,
      "grad_norm": 1.0243470901503362,
      "learning_rate": 6.613039768796938e-06,
      "loss": 0.1504,
      "step": 14177
    },
    {
      "epoch": 0.4136180640644145,
      "grad_norm": 0.8814405839444529,
      "learning_rate": 6.6125925872304865e-06,
      "loss": 0.1441,
      "step": 14178
    },
    {
      "epoch": 0.41364723729505803,
      "grad_norm": 1.2422971184799716,
      "learning_rate": 6.612145391267192e-06,
      "loss": 0.1579,
      "step": 14179
    },
    {
      "epoch": 0.41367641052570164,
      "grad_norm": 1.04550647310779,
      "learning_rate": 6.611698180911048e-06,
      "loss": 0.1321,
      "step": 14180
    },
    {
      "epoch": 0.4137055837563452,
      "grad_norm": 1.1812782521171592,
      "learning_rate": 6.611250956166049e-06,
      "loss": 0.1289,
      "step": 14181
    },
    {
      "epoch": 0.41373475698698875,
      "grad_norm": 0.8090381359730179,
      "learning_rate": 6.610803717036185e-06,
      "loss": 0.1316,
      "step": 14182
    },
    {
      "epoch": 0.4137639302176323,
      "grad_norm": 0.9203050921883699,
      "learning_rate": 6.6103564635254505e-06,
      "loss": 0.1409,
      "step": 14183
    },
    {
      "epoch": 0.41379310344827586,
      "grad_norm": 0.8137031224583076,
      "learning_rate": 6.609909195637837e-06,
      "loss": 0.1416,
      "step": 14184
    },
    {
      "epoch": 0.4138222766789194,
      "grad_norm": 0.8290285424710755,
      "learning_rate": 6.60946191337734e-06,
      "loss": 0.1514,
      "step": 14185
    },
    {
      "epoch": 0.41385144990956296,
      "grad_norm": 0.7854435898787894,
      "learning_rate": 6.609014616747951e-06,
      "loss": 0.1499,
      "step": 14186
    },
    {
      "epoch": 0.41388062314020657,
      "grad_norm": 0.7491527552813726,
      "learning_rate": 6.608567305753661e-06,
      "loss": 0.1494,
      "step": 14187
    },
    {
      "epoch": 0.4139097963708501,
      "grad_norm": 0.8043017259004319,
      "learning_rate": 6.60811998039847e-06,
      "loss": 0.1351,
      "step": 14188
    },
    {
      "epoch": 0.4139389696014937,
      "grad_norm": 0.6789911228331924,
      "learning_rate": 6.607672640686365e-06,
      "loss": 0.1221,
      "step": 14189
    },
    {
      "epoch": 0.41396814283213723,
      "grad_norm": 0.8191645199722037,
      "learning_rate": 6.607225286621342e-06,
      "loss": 0.1367,
      "step": 14190
    },
    {
      "epoch": 0.4139973160627808,
      "grad_norm": 0.7379204395749995,
      "learning_rate": 6.6067779182073974e-06,
      "loss": 0.1267,
      "step": 14191
    },
    {
      "epoch": 0.41402648929342434,
      "grad_norm": 0.951248367738476,
      "learning_rate": 6.606330535448523e-06,
      "loss": 0.1474,
      "step": 14192
    },
    {
      "epoch": 0.4140556625240679,
      "grad_norm": 0.7327792998003283,
      "learning_rate": 6.605883138348712e-06,
      "loss": 0.1197,
      "step": 14193
    },
    {
      "epoch": 0.4140848357547115,
      "grad_norm": 0.7894826381083477,
      "learning_rate": 6.605435726911959e-06,
      "loss": 0.1226,
      "step": 14194
    },
    {
      "epoch": 0.41411400898535505,
      "grad_norm": 0.9321632495459524,
      "learning_rate": 6.604988301142261e-06,
      "loss": 0.1353,
      "step": 14195
    },
    {
      "epoch": 0.4141431822159986,
      "grad_norm": 0.7572130205053703,
      "learning_rate": 6.604540861043609e-06,
      "loss": 0.1309,
      "step": 14196
    },
    {
      "epoch": 0.41417235544664216,
      "grad_norm": 0.7593881376595455,
      "learning_rate": 6.60409340662e-06,
      "loss": 0.1374,
      "step": 14197
    },
    {
      "epoch": 0.4142015286772857,
      "grad_norm": 0.7482341088492079,
      "learning_rate": 6.603645937875428e-06,
      "loss": 0.129,
      "step": 14198
    },
    {
      "epoch": 0.41423070190792927,
      "grad_norm": 0.8130791401643425,
      "learning_rate": 6.603198454813888e-06,
      "loss": 0.1208,
      "step": 14199
    },
    {
      "epoch": 0.4142598751385728,
      "grad_norm": 0.8647814715625625,
      "learning_rate": 6.602750957439374e-06,
      "loss": 0.151,
      "step": 14200
    },
    {
      "epoch": 0.41428904836921643,
      "grad_norm": 0.7238282092367369,
      "learning_rate": 6.6023034457558846e-06,
      "loss": 0.1615,
      "step": 14201
    },
    {
      "epoch": 0.41431822159986,
      "grad_norm": 0.6866369237153953,
      "learning_rate": 6.6018559197674094e-06,
      "loss": 0.1333,
      "step": 14202
    },
    {
      "epoch": 0.41434739483050353,
      "grad_norm": 0.7287680754131481,
      "learning_rate": 6.601408379477949e-06,
      "loss": 0.1366,
      "step": 14203
    },
    {
      "epoch": 0.4143765680611471,
      "grad_norm": 0.6889348026106697,
      "learning_rate": 6.600960824891496e-06,
      "loss": 0.1266,
      "step": 14204
    },
    {
      "epoch": 0.41440574129179064,
      "grad_norm": 0.7416949954049658,
      "learning_rate": 6.600513256012047e-06,
      "loss": 0.1528,
      "step": 14205
    },
    {
      "epoch": 0.4144349145224342,
      "grad_norm": 0.9181624808884554,
      "learning_rate": 6.600065672843597e-06,
      "loss": 0.1505,
      "step": 14206
    },
    {
      "epoch": 0.4144640877530778,
      "grad_norm": 0.7794913680494193,
      "learning_rate": 6.599618075390144e-06,
      "loss": 0.1306,
      "step": 14207
    },
    {
      "epoch": 0.41449326098372136,
      "grad_norm": 0.9401234257825831,
      "learning_rate": 6.599170463655682e-06,
      "loss": 0.1277,
      "step": 14208
    },
    {
      "epoch": 0.4145224342143649,
      "grad_norm": 0.7856146179718172,
      "learning_rate": 6.598722837644208e-06,
      "loss": 0.1561,
      "step": 14209
    },
    {
      "epoch": 0.41455160744500846,
      "grad_norm": 0.8748548482321112,
      "learning_rate": 6.5982751973597185e-06,
      "loss": 0.1282,
      "step": 14210
    },
    {
      "epoch": 0.414580780675652,
      "grad_norm": 1.0139285038810066,
      "learning_rate": 6.597827542806209e-06,
      "loss": 0.1159,
      "step": 14211
    },
    {
      "epoch": 0.41460995390629557,
      "grad_norm": 0.9835177770005753,
      "learning_rate": 6.597379873987677e-06,
      "loss": 0.1689,
      "step": 14212
    },
    {
      "epoch": 0.4146391271369391,
      "grad_norm": 0.8719525758696153,
      "learning_rate": 6.596932190908119e-06,
      "loss": 0.133,
      "step": 14213
    },
    {
      "epoch": 0.41466830036758273,
      "grad_norm": 0.9193789647211665,
      "learning_rate": 6.59648449357153e-06,
      "loss": 0.129,
      "step": 14214
    },
    {
      "epoch": 0.4146974735982263,
      "grad_norm": 0.8156922230844491,
      "learning_rate": 6.596036781981909e-06,
      "loss": 0.1466,
      "step": 14215
    },
    {
      "epoch": 0.41472664682886984,
      "grad_norm": 0.6906246025773246,
      "learning_rate": 6.595589056143255e-06,
      "loss": 0.1423,
      "step": 14216
    },
    {
      "epoch": 0.4147558200595134,
      "grad_norm": 0.777090503405836,
      "learning_rate": 6.59514131605956e-06,
      "loss": 0.1397,
      "step": 14217
    },
    {
      "epoch": 0.41478499329015694,
      "grad_norm": 0.8214239367704292,
      "learning_rate": 6.594693561734826e-06,
      "loss": 0.1652,
      "step": 14218
    },
    {
      "epoch": 0.4148141665208005,
      "grad_norm": 0.8431376353961676,
      "learning_rate": 6.594245793173049e-06,
      "loss": 0.1308,
      "step": 14219
    },
    {
      "epoch": 0.41484333975144405,
      "grad_norm": 0.7942742018087572,
      "learning_rate": 6.593798010378223e-06,
      "loss": 0.1349,
      "step": 14220
    },
    {
      "epoch": 0.41487251298208766,
      "grad_norm": 0.8673458762180498,
      "learning_rate": 6.593350213354353e-06,
      "loss": 0.1354,
      "step": 14221
    },
    {
      "epoch": 0.4149016862127312,
      "grad_norm": 0.7163674722069435,
      "learning_rate": 6.59290240210543e-06,
      "loss": 0.1356,
      "step": 14222
    },
    {
      "epoch": 0.41493085944337477,
      "grad_norm": 0.7939042595607231,
      "learning_rate": 6.592454576635454e-06,
      "loss": 0.1536,
      "step": 14223
    },
    {
      "epoch": 0.4149600326740183,
      "grad_norm": 0.7638234883573528,
      "learning_rate": 6.592006736948425e-06,
      "loss": 0.1406,
      "step": 14224
    },
    {
      "epoch": 0.4149892059046619,
      "grad_norm": 0.6860195885204661,
      "learning_rate": 6.59155888304834e-06,
      "loss": 0.1451,
      "step": 14225
    },
    {
      "epoch": 0.4150183791353054,
      "grad_norm": 0.6717922327951451,
      "learning_rate": 6.5911110149391976e-06,
      "loss": 0.1486,
      "step": 14226
    },
    {
      "epoch": 0.415047552365949,
      "grad_norm": 0.7714242591097691,
      "learning_rate": 6.590663132624995e-06,
      "loss": 0.1322,
      "step": 14227
    },
    {
      "epoch": 0.4150767255965926,
      "grad_norm": 0.71453038723994,
      "learning_rate": 6.590215236109731e-06,
      "loss": 0.1491,
      "step": 14228
    },
    {
      "epoch": 0.41510589882723614,
      "grad_norm": 0.8724219654697126,
      "learning_rate": 6.589767325397407e-06,
      "loss": 0.134,
      "step": 14229
    },
    {
      "epoch": 0.4151350720578797,
      "grad_norm": 0.9593656382652455,
      "learning_rate": 6.589319400492018e-06,
      "loss": 0.152,
      "step": 14230
    },
    {
      "epoch": 0.41516424528852325,
      "grad_norm": 0.9514974888951511,
      "learning_rate": 6.588871461397567e-06,
      "loss": 0.13,
      "step": 14231
    },
    {
      "epoch": 0.4151934185191668,
      "grad_norm": 1.1342312671634571,
      "learning_rate": 6.588423508118048e-06,
      "loss": 0.1552,
      "step": 14232
    },
    {
      "epoch": 0.41522259174981035,
      "grad_norm": 1.0483684124579047,
      "learning_rate": 6.587975540657465e-06,
      "loss": 0.142,
      "step": 14233
    },
    {
      "epoch": 0.4152517649804539,
      "grad_norm": 1.0360670981700808,
      "learning_rate": 6.587527559019815e-06,
      "loss": 0.1457,
      "step": 14234
    },
    {
      "epoch": 0.4152809382110975,
      "grad_norm": 0.9073539394442789,
      "learning_rate": 6.5870795632090965e-06,
      "loss": 0.1359,
      "step": 14235
    },
    {
      "epoch": 0.41531011144174107,
      "grad_norm": 1.0480011025979166,
      "learning_rate": 6.586631553229313e-06,
      "loss": 0.1496,
      "step": 14236
    },
    {
      "epoch": 0.4153392846723846,
      "grad_norm": 0.7141874492710977,
      "learning_rate": 6.5861835290844615e-06,
      "loss": 0.1516,
      "step": 14237
    },
    {
      "epoch": 0.4153684579030282,
      "grad_norm": 0.7419194638758977,
      "learning_rate": 6.585735490778541e-06,
      "loss": 0.136,
      "step": 14238
    },
    {
      "epoch": 0.41539763113367173,
      "grad_norm": 0.6361997929822755,
      "learning_rate": 6.585287438315553e-06,
      "loss": 0.1412,
      "step": 14239
    },
    {
      "epoch": 0.4154268043643153,
      "grad_norm": 1.7003927530082934,
      "learning_rate": 6.5848393716994966e-06,
      "loss": 0.1341,
      "step": 14240
    },
    {
      "epoch": 0.4154559775949589,
      "grad_norm": 0.8609963655650889,
      "learning_rate": 6.5843912909343734e-06,
      "loss": 0.1401,
      "step": 14241
    },
    {
      "epoch": 0.41548515082560245,
      "grad_norm": 0.8800828096276959,
      "learning_rate": 6.583943196024182e-06,
      "loss": 0.1525,
      "step": 14242
    },
    {
      "epoch": 0.415514324056246,
      "grad_norm": 0.735639549049615,
      "learning_rate": 6.583495086972924e-06,
      "loss": 0.1498,
      "step": 14243
    },
    {
      "epoch": 0.41554349728688955,
      "grad_norm": 1.018300855877259,
      "learning_rate": 6.5830469637846e-06,
      "loss": 0.1597,
      "step": 14244
    },
    {
      "epoch": 0.4155726705175331,
      "grad_norm": 0.79913859611663,
      "learning_rate": 6.582598826463211e-06,
      "loss": 0.1591,
      "step": 14245
    },
    {
      "epoch": 0.41560184374817666,
      "grad_norm": 0.827353692461927,
      "learning_rate": 6.58215067501276e-06,
      "loss": 0.1286,
      "step": 14246
    },
    {
      "epoch": 0.4156310169788202,
      "grad_norm": 0.67302947878698,
      "learning_rate": 6.5817025094372415e-06,
      "loss": 0.135,
      "step": 14247
    },
    {
      "epoch": 0.4156601902094638,
      "grad_norm": 1.0326698294250154,
      "learning_rate": 6.581254329740663e-06,
      "loss": 0.1242,
      "step": 14248
    },
    {
      "epoch": 0.4156893634401074,
      "grad_norm": 0.7923780509932484,
      "learning_rate": 6.580806135927021e-06,
      "loss": 0.1433,
      "step": 14249
    },
    {
      "epoch": 0.4157185366707509,
      "grad_norm": 0.8635468660717607,
      "learning_rate": 6.580357928000321e-06,
      "loss": 0.1393,
      "step": 14250
    },
    {
      "epoch": 0.4157477099013945,
      "grad_norm": 1.1185468153332023,
      "learning_rate": 6.579909705964562e-06,
      "loss": 0.1444,
      "step": 14251
    },
    {
      "epoch": 0.41577688313203803,
      "grad_norm": 0.9428650205713817,
      "learning_rate": 6.5794614698237465e-06,
      "loss": 0.1466,
      "step": 14252
    },
    {
      "epoch": 0.4158060563626816,
      "grad_norm": 0.7958016525600325,
      "learning_rate": 6.579013219581876e-06,
      "loss": 0.1425,
      "step": 14253
    },
    {
      "epoch": 0.41583522959332514,
      "grad_norm": 1.004094928846351,
      "learning_rate": 6.578564955242952e-06,
      "loss": 0.1507,
      "step": 14254
    },
    {
      "epoch": 0.41586440282396875,
      "grad_norm": 0.7654458813658859,
      "learning_rate": 6.578116676810979e-06,
      "loss": 0.1497,
      "step": 14255
    },
    {
      "epoch": 0.4158935760546123,
      "grad_norm": 1.0215519857910305,
      "learning_rate": 6.577668384289955e-06,
      "loss": 0.1243,
      "step": 14256
    },
    {
      "epoch": 0.41592274928525585,
      "grad_norm": 0.8578334702034305,
      "learning_rate": 6.577220077683884e-06,
      "loss": 0.13,
      "step": 14257
    },
    {
      "epoch": 0.4159519225158994,
      "grad_norm": 0.7564010945740879,
      "learning_rate": 6.57677175699677e-06,
      "loss": 0.1471,
      "step": 14258
    },
    {
      "epoch": 0.41598109574654296,
      "grad_norm": 0.9085508334254474,
      "learning_rate": 6.576323422232612e-06,
      "loss": 0.1234,
      "step": 14259
    },
    {
      "epoch": 0.4160102689771865,
      "grad_norm": 0.8234747830665352,
      "learning_rate": 6.575875073395417e-06,
      "loss": 0.1196,
      "step": 14260
    },
    {
      "epoch": 0.41603944220783007,
      "grad_norm": 0.7241153612141866,
      "learning_rate": 6.5754267104891855e-06,
      "loss": 0.1273,
      "step": 14261
    },
    {
      "epoch": 0.4160686154384737,
      "grad_norm": 0.696367475586553,
      "learning_rate": 6.574978333517918e-06,
      "loss": 0.1653,
      "step": 14262
    },
    {
      "epoch": 0.41609778866911723,
      "grad_norm": 0.7856989032450266,
      "learning_rate": 6.574529942485623e-06,
      "loss": 0.1326,
      "step": 14263
    },
    {
      "epoch": 0.4161269618997608,
      "grad_norm": 0.9594314337708004,
      "learning_rate": 6.574081537396299e-06,
      "loss": 0.1446,
      "step": 14264
    },
    {
      "epoch": 0.41615613513040434,
      "grad_norm": 0.7745130245349363,
      "learning_rate": 6.573633118253951e-06,
      "loss": 0.1201,
      "step": 14265
    },
    {
      "epoch": 0.4161853083610479,
      "grad_norm": 0.8539250642295578,
      "learning_rate": 6.5731846850625824e-06,
      "loss": 0.1628,
      "step": 14266
    },
    {
      "epoch": 0.41621448159169144,
      "grad_norm": 0.8183359594746717,
      "learning_rate": 6.572736237826196e-06,
      "loss": 0.1347,
      "step": 14267
    },
    {
      "epoch": 0.41624365482233505,
      "grad_norm": 0.8686788464309594,
      "learning_rate": 6.572287776548797e-06,
      "loss": 0.1516,
      "step": 14268
    },
    {
      "epoch": 0.4162728280529786,
      "grad_norm": 0.9002621558730742,
      "learning_rate": 6.571839301234386e-06,
      "loss": 0.1465,
      "step": 14269
    },
    {
      "epoch": 0.41630200128362216,
      "grad_norm": 0.8924838825693661,
      "learning_rate": 6.571390811886971e-06,
      "loss": 0.129,
      "step": 14270
    },
    {
      "epoch": 0.4163311745142657,
      "grad_norm": 0.7981411053326004,
      "learning_rate": 6.570942308510553e-06,
      "loss": 0.1241,
      "step": 14271
    },
    {
      "epoch": 0.41636034774490926,
      "grad_norm": 1.0778965375656522,
      "learning_rate": 6.570493791109137e-06,
      "loss": 0.1224,
      "step": 14272
    },
    {
      "epoch": 0.4163895209755528,
      "grad_norm": 1.0137178303005545,
      "learning_rate": 6.570045259686728e-06,
      "loss": 0.1176,
      "step": 14273
    },
    {
      "epoch": 0.41641869420619637,
      "grad_norm": 0.8599627669058896,
      "learning_rate": 6.569596714247328e-06,
      "loss": 0.1361,
      "step": 14274
    },
    {
      "epoch": 0.41644786743684,
      "grad_norm": 1.2350963687471905,
      "learning_rate": 6.569148154794945e-06,
      "loss": 0.1503,
      "step": 14275
    },
    {
      "epoch": 0.41647704066748353,
      "grad_norm": 1.1456342598343054,
      "learning_rate": 6.568699581333583e-06,
      "loss": 0.1484,
      "step": 14276
    },
    {
      "epoch": 0.4165062138981271,
      "grad_norm": 0.896590792772618,
      "learning_rate": 6.568250993867242e-06,
      "loss": 0.15,
      "step": 14277
    },
    {
      "epoch": 0.41653538712877064,
      "grad_norm": 0.9495093826242231,
      "learning_rate": 6.567802392399934e-06,
      "loss": 0.1306,
      "step": 14278
    },
    {
      "epoch": 0.4165645603594142,
      "grad_norm": 2.4680776091761025,
      "learning_rate": 6.567353776935659e-06,
      "loss": 0.117,
      "step": 14279
    },
    {
      "epoch": 0.41659373359005775,
      "grad_norm": 1.0618009436803515,
      "learning_rate": 6.566905147478422e-06,
      "loss": 0.1496,
      "step": 14280
    },
    {
      "epoch": 0.4166229068207013,
      "grad_norm": 0.9761818292036695,
      "learning_rate": 6.5664565040322325e-06,
      "loss": 0.1631,
      "step": 14281
    },
    {
      "epoch": 0.4166520800513449,
      "grad_norm": 0.8427616603059416,
      "learning_rate": 6.566007846601092e-06,
      "loss": 0.145,
      "step": 14282
    },
    {
      "epoch": 0.41668125328198846,
      "grad_norm": 1.1511932611745979,
      "learning_rate": 6.565559175189008e-06,
      "loss": 0.1511,
      "step": 14283
    },
    {
      "epoch": 0.416710426512632,
      "grad_norm": 1.165155613581096,
      "learning_rate": 6.565110489799985e-06,
      "loss": 0.1333,
      "step": 14284
    },
    {
      "epoch": 0.41673959974327557,
      "grad_norm": 0.8810995836888235,
      "learning_rate": 6.564661790438029e-06,
      "loss": 0.1482,
      "step": 14285
    },
    {
      "epoch": 0.4167687729739191,
      "grad_norm": 0.9295967316792687,
      "learning_rate": 6.564213077107147e-06,
      "loss": 0.1291,
      "step": 14286
    },
    {
      "epoch": 0.4167979462045627,
      "grad_norm": 1.298575314455602,
      "learning_rate": 6.563764349811342e-06,
      "loss": 0.1496,
      "step": 14287
    },
    {
      "epoch": 0.41682711943520623,
      "grad_norm": 0.8940517436562201,
      "learning_rate": 6.563315608554624e-06,
      "loss": 0.1503,
      "step": 14288
    },
    {
      "epoch": 0.41685629266584984,
      "grad_norm": 0.949590246522986,
      "learning_rate": 6.562866853340997e-06,
      "loss": 0.1398,
      "step": 14289
    },
    {
      "epoch": 0.4168854658964934,
      "grad_norm": 0.7319174477402393,
      "learning_rate": 6.562418084174467e-06,
      "loss": 0.1338,
      "step": 14290
    },
    {
      "epoch": 0.41691463912713694,
      "grad_norm": 0.8444589471790982,
      "learning_rate": 6.561969301059044e-06,
      "loss": 0.1373,
      "step": 14291
    },
    {
      "epoch": 0.4169438123577805,
      "grad_norm": 0.9852698717575947,
      "learning_rate": 6.561520503998728e-06,
      "loss": 0.1312,
      "step": 14292
    },
    {
      "epoch": 0.41697298558842405,
      "grad_norm": 0.8153453148167985,
      "learning_rate": 6.561071692997533e-06,
      "loss": 0.1603,
      "step": 14293
    },
    {
      "epoch": 0.4170021588190676,
      "grad_norm": 0.8232336892224774,
      "learning_rate": 6.560622868059461e-06,
      "loss": 0.1622,
      "step": 14294
    },
    {
      "epoch": 0.4170313320497112,
      "grad_norm": 0.912693790723682,
      "learning_rate": 6.56017402918852e-06,
      "loss": 0.176,
      "step": 14295
    },
    {
      "epoch": 0.41706050528035477,
      "grad_norm": 0.7135774955610511,
      "learning_rate": 6.559725176388719e-06,
      "loss": 0.1303,
      "step": 14296
    },
    {
      "epoch": 0.4170896785109983,
      "grad_norm": 0.8108366963627028,
      "learning_rate": 6.559276309664064e-06,
      "loss": 0.1459,
      "step": 14297
    },
    {
      "epoch": 0.41711885174164187,
      "grad_norm": 0.8494333748366347,
      "learning_rate": 6.558827429018562e-06,
      "loss": 0.1343,
      "step": 14298
    },
    {
      "epoch": 0.4171480249722854,
      "grad_norm": 0.66545406291422,
      "learning_rate": 6.5583785344562204e-06,
      "loss": 0.1463,
      "step": 14299
    },
    {
      "epoch": 0.417177198202929,
      "grad_norm": 1.0578226466632783,
      "learning_rate": 6.557929625981048e-06,
      "loss": 0.1539,
      "step": 14300
    },
    {
      "epoch": 0.41720637143357253,
      "grad_norm": 0.7599889971263503,
      "learning_rate": 6.557480703597051e-06,
      "loss": 0.1684,
      "step": 14301
    },
    {
      "epoch": 0.41723554466421614,
      "grad_norm": 0.7508250181418441,
      "learning_rate": 6.5570317673082385e-06,
      "loss": 0.1659,
      "step": 14302
    },
    {
      "epoch": 0.4172647178948597,
      "grad_norm": 0.8196772922829928,
      "learning_rate": 6.5565828171186175e-06,
      "loss": 0.1467,
      "step": 14303
    },
    {
      "epoch": 0.41729389112550325,
      "grad_norm": 0.8000356929982854,
      "learning_rate": 6.556133853032197e-06,
      "loss": 0.1399,
      "step": 14304
    },
    {
      "epoch": 0.4173230643561468,
      "grad_norm": 0.8587873979531085,
      "learning_rate": 6.555684875052985e-06,
      "loss": 0.1308,
      "step": 14305
    },
    {
      "epoch": 0.41735223758679035,
      "grad_norm": 0.7785555743013844,
      "learning_rate": 6.555235883184991e-06,
      "loss": 0.1305,
      "step": 14306
    },
    {
      "epoch": 0.4173814108174339,
      "grad_norm": 0.7203983435626452,
      "learning_rate": 6.55478687743222e-06,
      "loss": 0.1392,
      "step": 14307
    },
    {
      "epoch": 0.41741058404807746,
      "grad_norm": 0.8982224874390651,
      "learning_rate": 6.554337857798686e-06,
      "loss": 0.1395,
      "step": 14308
    },
    {
      "epoch": 0.41743975727872107,
      "grad_norm": 0.9033788887895698,
      "learning_rate": 6.553888824288393e-06,
      "loss": 0.1334,
      "step": 14309
    },
    {
      "epoch": 0.4174689305093646,
      "grad_norm": 0.880630137005619,
      "learning_rate": 6.55343977690535e-06,
      "loss": 0.1487,
      "step": 14310
    },
    {
      "epoch": 0.4174981037400082,
      "grad_norm": 0.7806417589295644,
      "learning_rate": 6.55299071565357e-06,
      "loss": 0.1225,
      "step": 14311
    },
    {
      "epoch": 0.41752727697065173,
      "grad_norm": 0.9294498284452497,
      "learning_rate": 6.552541640537058e-06,
      "loss": 0.1645,
      "step": 14312
    },
    {
      "epoch": 0.4175564502012953,
      "grad_norm": 0.9978673562227008,
      "learning_rate": 6.552092551559825e-06,
      "loss": 0.1535,
      "step": 14313
    },
    {
      "epoch": 0.41758562343193883,
      "grad_norm": 0.9282543665195457,
      "learning_rate": 6.55164344872588e-06,
      "loss": 0.1527,
      "step": 14314
    },
    {
      "epoch": 0.4176147966625824,
      "grad_norm": 0.8826576051342357,
      "learning_rate": 6.551194332039235e-06,
      "loss": 0.129,
      "step": 14315
    },
    {
      "epoch": 0.417643969893226,
      "grad_norm": 0.8600713731233143,
      "learning_rate": 6.550745201503894e-06,
      "loss": 0.1515,
      "step": 14316
    },
    {
      "epoch": 0.41767314312386955,
      "grad_norm": 1.198180508330796,
      "learning_rate": 6.550296057123872e-06,
      "loss": 0.1312,
      "step": 14317
    },
    {
      "epoch": 0.4177023163545131,
      "grad_norm": 0.94208104984621,
      "learning_rate": 6.549846898903176e-06,
      "loss": 0.1515,
      "step": 14318
    },
    {
      "epoch": 0.41773148958515666,
      "grad_norm": 0.8037888861901492,
      "learning_rate": 6.549397726845817e-06,
      "loss": 0.156,
      "step": 14319
    },
    {
      "epoch": 0.4177606628158002,
      "grad_norm": 0.8551748986067956,
      "learning_rate": 6.548948540955806e-06,
      "loss": 0.143,
      "step": 14320
    },
    {
      "epoch": 0.41778983604644376,
      "grad_norm": 0.9323192723203955,
      "learning_rate": 6.548499341237152e-06,
      "loss": 0.1407,
      "step": 14321
    },
    {
      "epoch": 0.41781900927708737,
      "grad_norm": 0.7603429502452196,
      "learning_rate": 6.548050127693865e-06,
      "loss": 0.1391,
      "step": 14322
    },
    {
      "epoch": 0.4178481825077309,
      "grad_norm": 1.425711060718987,
      "learning_rate": 6.547600900329957e-06,
      "loss": 0.1343,
      "step": 14323
    },
    {
      "epoch": 0.4178773557383745,
      "grad_norm": 0.9718586473008777,
      "learning_rate": 6.547151659149435e-06,
      "loss": 0.139,
      "step": 14324
    },
    {
      "epoch": 0.41790652896901803,
      "grad_norm": 0.9740842615198593,
      "learning_rate": 6.546702404156313e-06,
      "loss": 0.1285,
      "step": 14325
    },
    {
      "epoch": 0.4179357021996616,
      "grad_norm": 0.7313133104116794,
      "learning_rate": 6.546253135354603e-06,
      "loss": 0.1466,
      "step": 14326
    },
    {
      "epoch": 0.41796487543030514,
      "grad_norm": 0.64246601816848,
      "learning_rate": 6.545803852748314e-06,
      "loss": 0.1342,
      "step": 14327
    },
    {
      "epoch": 0.4179940486609487,
      "grad_norm": 0.805222785127508,
      "learning_rate": 6.545354556341457e-06,
      "loss": 0.1233,
      "step": 14328
    },
    {
      "epoch": 0.4180232218915923,
      "grad_norm": 0.7740098512065345,
      "learning_rate": 6.544905246138042e-06,
      "loss": 0.145,
      "step": 14329
    },
    {
      "epoch": 0.41805239512223585,
      "grad_norm": 0.8370453777839455,
      "learning_rate": 6.544455922142084e-06,
      "loss": 0.136,
      "step": 14330
    },
    {
      "epoch": 0.4180815683528794,
      "grad_norm": 0.855682185482877,
      "learning_rate": 6.54400658435759e-06,
      "loss": 0.1557,
      "step": 14331
    },
    {
      "epoch": 0.41811074158352296,
      "grad_norm": 1.056530004972702,
      "learning_rate": 6.543557232788574e-06,
      "loss": 0.1391,
      "step": 14332
    },
    {
      "epoch": 0.4181399148141665,
      "grad_norm": 0.9034832075931041,
      "learning_rate": 6.543107867439049e-06,
      "loss": 0.153,
      "step": 14333
    },
    {
      "epoch": 0.41816908804481007,
      "grad_norm": 0.6181331059510731,
      "learning_rate": 6.542658488313024e-06,
      "loss": 0.1275,
      "step": 14334
    },
    {
      "epoch": 0.4181982612754536,
      "grad_norm": 0.8372381582547589,
      "learning_rate": 6.542209095414512e-06,
      "loss": 0.1293,
      "step": 14335
    },
    {
      "epoch": 0.41822743450609723,
      "grad_norm": 1.1441327183955639,
      "learning_rate": 6.541759688747528e-06,
      "loss": 0.1261,
      "step": 14336
    },
    {
      "epoch": 0.4182566077367408,
      "grad_norm": 0.8883941768455438,
      "learning_rate": 6.541310268316079e-06,
      "loss": 0.1454,
      "step": 14337
    },
    {
      "epoch": 0.41828578096738434,
      "grad_norm": 1.0577726495234712,
      "learning_rate": 6.5408608341241805e-06,
      "loss": 0.1317,
      "step": 14338
    },
    {
      "epoch": 0.4183149541980279,
      "grad_norm": 1.3522374229605751,
      "learning_rate": 6.5404113861758446e-06,
      "loss": 0.1311,
      "step": 14339
    },
    {
      "epoch": 0.41834412742867144,
      "grad_norm": 0.8630830853602386,
      "learning_rate": 6.539961924475083e-06,
      "loss": 0.1569,
      "step": 14340
    },
    {
      "epoch": 0.418373300659315,
      "grad_norm": 0.7511730437186435,
      "learning_rate": 6.53951244902591e-06,
      "loss": 0.1538,
      "step": 14341
    },
    {
      "epoch": 0.41840247388995855,
      "grad_norm": 1.1997545421136582,
      "learning_rate": 6.539062959832337e-06,
      "loss": 0.1514,
      "step": 14342
    },
    {
      "epoch": 0.41843164712060216,
      "grad_norm": 0.7480991491496013,
      "learning_rate": 6.538613456898376e-06,
      "loss": 0.1259,
      "step": 14343
    },
    {
      "epoch": 0.4184608203512457,
      "grad_norm": 0.8950531223354506,
      "learning_rate": 6.538163940228043e-06,
      "loss": 0.1465,
      "step": 14344
    },
    {
      "epoch": 0.41848999358188926,
      "grad_norm": 1.5710228081205437,
      "learning_rate": 6.537714409825349e-06,
      "loss": 0.1447,
      "step": 14345
    },
    {
      "epoch": 0.4185191668125328,
      "grad_norm": 0.8818041009882807,
      "learning_rate": 6.537264865694307e-06,
      "loss": 0.1451,
      "step": 14346
    },
    {
      "epoch": 0.41854834004317637,
      "grad_norm": 0.7327222044131102,
      "learning_rate": 6.5368153078389315e-06,
      "loss": 0.148,
      "step": 14347
    },
    {
      "epoch": 0.4185775132738199,
      "grad_norm": 0.899954505310372,
      "learning_rate": 6.536365736263236e-06,
      "loss": 0.1388,
      "step": 14348
    },
    {
      "epoch": 0.4186066865044635,
      "grad_norm": 0.9258798806308532,
      "learning_rate": 6.535916150971234e-06,
      "loss": 0.1374,
      "step": 14349
    },
    {
      "epoch": 0.4186358597351071,
      "grad_norm": 0.8946088431754494,
      "learning_rate": 6.5354665519669405e-06,
      "loss": 0.1336,
      "step": 14350
    },
    {
      "epoch": 0.41866503296575064,
      "grad_norm": 0.9430193967223919,
      "learning_rate": 6.535016939254366e-06,
      "loss": 0.1608,
      "step": 14351
    },
    {
      "epoch": 0.4186942061963942,
      "grad_norm": 0.8612935527285908,
      "learning_rate": 6.534567312837528e-06,
      "loss": 0.1399,
      "step": 14352
    },
    {
      "epoch": 0.41872337942703775,
      "grad_norm": 0.7180210667748339,
      "learning_rate": 6.53411767272044e-06,
      "loss": 0.1291,
      "step": 14353
    },
    {
      "epoch": 0.4187525526576813,
      "grad_norm": 0.7933929550154948,
      "learning_rate": 6.5336680189071135e-06,
      "loss": 0.1484,
      "step": 14354
    },
    {
      "epoch": 0.41878172588832485,
      "grad_norm": 0.7875205272562983,
      "learning_rate": 6.533218351401567e-06,
      "loss": 0.1583,
      "step": 14355
    },
    {
      "epoch": 0.41881089911896846,
      "grad_norm": 0.739509735175548,
      "learning_rate": 6.532768670207813e-06,
      "loss": 0.1323,
      "step": 14356
    },
    {
      "epoch": 0.418840072349612,
      "grad_norm": 0.7523924236931829,
      "learning_rate": 6.532318975329864e-06,
      "loss": 0.1348,
      "step": 14357
    },
    {
      "epoch": 0.41886924558025557,
      "grad_norm": 0.8304539510543331,
      "learning_rate": 6.5318692667717395e-06,
      "loss": 0.1586,
      "step": 14358
    },
    {
      "epoch": 0.4188984188108991,
      "grad_norm": 0.8434055583480368,
      "learning_rate": 6.531419544537452e-06,
      "loss": 0.1478,
      "step": 14359
    },
    {
      "epoch": 0.4189275920415427,
      "grad_norm": 0.7399589595592506,
      "learning_rate": 6.530969808631014e-06,
      "loss": 0.1321,
      "step": 14360
    },
    {
      "epoch": 0.4189567652721862,
      "grad_norm": 0.9081734487926589,
      "learning_rate": 6.530520059056446e-06,
      "loss": 0.141,
      "step": 14361
    },
    {
      "epoch": 0.4189859385028298,
      "grad_norm": 0.703386302611474,
      "learning_rate": 6.5300702958177585e-06,
      "loss": 0.1296,
      "step": 14362
    },
    {
      "epoch": 0.4190151117334734,
      "grad_norm": 0.8498679100186433,
      "learning_rate": 6.529620518918969e-06,
      "loss": 0.1273,
      "step": 14363
    },
    {
      "epoch": 0.41904428496411694,
      "grad_norm": 0.833550905480528,
      "learning_rate": 6.529170728364092e-06,
      "loss": 0.1391,
      "step": 14364
    },
    {
      "epoch": 0.4190734581947605,
      "grad_norm": 4.407525355618035,
      "learning_rate": 6.528720924157144e-06,
      "loss": 0.1446,
      "step": 14365
    },
    {
      "epoch": 0.41910263142540405,
      "grad_norm": 0.8079687573902641,
      "learning_rate": 6.528271106302141e-06,
      "loss": 0.1194,
      "step": 14366
    },
    {
      "epoch": 0.4191318046560476,
      "grad_norm": 0.8849809924407699,
      "learning_rate": 6.527821274803098e-06,
      "loss": 0.1394,
      "step": 14367
    },
    {
      "epoch": 0.41916097788669116,
      "grad_norm": 0.8365329865325013,
      "learning_rate": 6.527371429664032e-06,
      "loss": 0.1374,
      "step": 14368
    },
    {
      "epoch": 0.4191901511173347,
      "grad_norm": 0.6877693104433702,
      "learning_rate": 6.526921570888958e-06,
      "loss": 0.1422,
      "step": 14369
    },
    {
      "epoch": 0.4192193243479783,
      "grad_norm": 1.159280137860826,
      "learning_rate": 6.526471698481892e-06,
      "loss": 0.1465,
      "step": 14370
    },
    {
      "epoch": 0.41924849757862187,
      "grad_norm": 0.7981103899872733,
      "learning_rate": 6.526021812446854e-06,
      "loss": 0.1379,
      "step": 14371
    },
    {
      "epoch": 0.4192776708092654,
      "grad_norm": 0.9183917751391264,
      "learning_rate": 6.525571912787854e-06,
      "loss": 0.1441,
      "step": 14372
    },
    {
      "epoch": 0.419306844039909,
      "grad_norm": 0.7898228443070926,
      "learning_rate": 6.525121999508915e-06,
      "loss": 0.1201,
      "step": 14373
    },
    {
      "epoch": 0.41933601727055253,
      "grad_norm": 1.109406773607781,
      "learning_rate": 6.524672072614048e-06,
      "loss": 0.1493,
      "step": 14374
    },
    {
      "epoch": 0.4193651905011961,
      "grad_norm": 0.7248682319379581,
      "learning_rate": 6.524222132107273e-06,
      "loss": 0.1129,
      "step": 14375
    },
    {
      "epoch": 0.41939436373183964,
      "grad_norm": 1.0096391272567418,
      "learning_rate": 6.5237721779926086e-06,
      "loss": 0.137,
      "step": 14376
    },
    {
      "epoch": 0.41942353696248325,
      "grad_norm": 0.7742284748243854,
      "learning_rate": 6.52332221027407e-06,
      "loss": 0.1327,
      "step": 14377
    },
    {
      "epoch": 0.4194527101931268,
      "grad_norm": 1.0158524719397997,
      "learning_rate": 6.522872228955672e-06,
      "loss": 0.1391,
      "step": 14378
    },
    {
      "epoch": 0.41948188342377035,
      "grad_norm": 0.7942257032163648,
      "learning_rate": 6.522422234041436e-06,
      "loss": 0.151,
      "step": 14379
    },
    {
      "epoch": 0.4195110566544139,
      "grad_norm": 0.9800663996007741,
      "learning_rate": 6.521972225535378e-06,
      "loss": 0.1466,
      "step": 14380
    },
    {
      "epoch": 0.41954022988505746,
      "grad_norm": 0.8590362545214911,
      "learning_rate": 6.5215222034415146e-06,
      "loss": 0.1458,
      "step": 14381
    },
    {
      "epoch": 0.419569403115701,
      "grad_norm": 0.7623873539684414,
      "learning_rate": 6.521072167763864e-06,
      "loss": 0.1116,
      "step": 14382
    },
    {
      "epoch": 0.4195985763463446,
      "grad_norm": 1.0187623040401035,
      "learning_rate": 6.520622118506446e-06,
      "loss": 0.1436,
      "step": 14383
    },
    {
      "epoch": 0.4196277495769882,
      "grad_norm": 0.8541720230399411,
      "learning_rate": 6.520172055673274e-06,
      "loss": 0.1534,
      "step": 14384
    },
    {
      "epoch": 0.4196569228076317,
      "grad_norm": 0.8024418784142257,
      "learning_rate": 6.5197219792683695e-06,
      "loss": 0.15,
      "step": 14385
    },
    {
      "epoch": 0.4196860960382753,
      "grad_norm": 0.7600298330378055,
      "learning_rate": 6.519271889295752e-06,
      "loss": 0.1366,
      "step": 14386
    },
    {
      "epoch": 0.41971526926891883,
      "grad_norm": 0.9907289182929588,
      "learning_rate": 6.518821785759435e-06,
      "loss": 0.1476,
      "step": 14387
    },
    {
      "epoch": 0.4197444424995624,
      "grad_norm": 0.7972380548786816,
      "learning_rate": 6.518371668663442e-06,
      "loss": 0.1517,
      "step": 14388
    },
    {
      "epoch": 0.41977361573020594,
      "grad_norm": 1.100186635034028,
      "learning_rate": 6.517921538011789e-06,
      "loss": 0.1425,
      "step": 14389
    },
    {
      "epoch": 0.41980278896084955,
      "grad_norm": 1.0191478368419349,
      "learning_rate": 6.517471393808492e-06,
      "loss": 0.1349,
      "step": 14390
    },
    {
      "epoch": 0.4198319621914931,
      "grad_norm": 0.750193984141269,
      "learning_rate": 6.517021236057575e-06,
      "loss": 0.1436,
      "step": 14391
    },
    {
      "epoch": 0.41986113542213666,
      "grad_norm": 0.8731682549347246,
      "learning_rate": 6.516571064763055e-06,
      "loss": 0.1573,
      "step": 14392
    },
    {
      "epoch": 0.4198903086527802,
      "grad_norm": 1.0952546377060774,
      "learning_rate": 6.51612087992895e-06,
      "loss": 0.1234,
      "step": 14393
    },
    {
      "epoch": 0.41991948188342376,
      "grad_norm": 0.9693942403539806,
      "learning_rate": 6.51567068155928e-06,
      "loss": 0.1414,
      "step": 14394
    },
    {
      "epoch": 0.4199486551140673,
      "grad_norm": 0.9917884406099452,
      "learning_rate": 6.515220469658062e-06,
      "loss": 0.1447,
      "step": 14395
    },
    {
      "epoch": 0.41997782834471087,
      "grad_norm": 1.0461003193772818,
      "learning_rate": 6.514770244229319e-06,
      "loss": 0.1443,
      "step": 14396
    },
    {
      "epoch": 0.4200070015753545,
      "grad_norm": 0.7730216763770943,
      "learning_rate": 6.51432000527707e-06,
      "loss": 0.1479,
      "step": 14397
    },
    {
      "epoch": 0.42003617480599803,
      "grad_norm": 0.7271575490953223,
      "learning_rate": 6.513869752805333e-06,
      "loss": 0.1598,
      "step": 14398
    },
    {
      "epoch": 0.4200653480366416,
      "grad_norm": 0.7973071694822216,
      "learning_rate": 6.513419486818125e-06,
      "loss": 0.152,
      "step": 14399
    },
    {
      "epoch": 0.42009452126728514,
      "grad_norm": 0.9407392053654383,
      "learning_rate": 6.512969207319472e-06,
      "loss": 0.1625,
      "step": 14400
    },
    {
      "epoch": 0.4201236944979287,
      "grad_norm": 0.807174221705208,
      "learning_rate": 6.512518914313392e-06,
      "loss": 0.1352,
      "step": 14401
    },
    {
      "epoch": 0.42015286772857224,
      "grad_norm": 0.7364112184600825,
      "learning_rate": 6.512068607803901e-06,
      "loss": 0.1457,
      "step": 14402
    },
    {
      "epoch": 0.4201820409592158,
      "grad_norm": 0.7971310429385652,
      "learning_rate": 6.5116182877950255e-06,
      "loss": 0.1461,
      "step": 14403
    },
    {
      "epoch": 0.4202112141898594,
      "grad_norm": 0.8979404807664068,
      "learning_rate": 6.511167954290781e-06,
      "loss": 0.138,
      "step": 14404
    },
    {
      "epoch": 0.42024038742050296,
      "grad_norm": 0.8636420617153241,
      "learning_rate": 6.5107176072951895e-06,
      "loss": 0.1515,
      "step": 14405
    },
    {
      "epoch": 0.4202695606511465,
      "grad_norm": 0.8233094600114146,
      "learning_rate": 6.510267246812274e-06,
      "loss": 0.1355,
      "step": 14406
    },
    {
      "epoch": 0.42029873388179007,
      "grad_norm": 0.8062573146011515,
      "learning_rate": 6.5098168728460505e-06,
      "loss": 0.1086,
      "step": 14407
    },
    {
      "epoch": 0.4203279071124336,
      "grad_norm": 0.7171526571763346,
      "learning_rate": 6.509366485400544e-06,
      "loss": 0.1325,
      "step": 14408
    },
    {
      "epoch": 0.42035708034307717,
      "grad_norm": 0.7503885416778169,
      "learning_rate": 6.508916084479774e-06,
      "loss": 0.1545,
      "step": 14409
    },
    {
      "epoch": 0.4203862535737208,
      "grad_norm": 0.8119253575625889,
      "learning_rate": 6.50846567008776e-06,
      "loss": 0.1856,
      "step": 14410
    },
    {
      "epoch": 0.42041542680436433,
      "grad_norm": 1.0800479061082418,
      "learning_rate": 6.5080152422285255e-06,
      "loss": 0.1501,
      "step": 14411
    },
    {
      "epoch": 0.4204446000350079,
      "grad_norm": 0.9561561455476036,
      "learning_rate": 6.507564800906091e-06,
      "loss": 0.1566,
      "step": 14412
    },
    {
      "epoch": 0.42047377326565144,
      "grad_norm": 0.7765485467964146,
      "learning_rate": 6.507114346124479e-06,
      "loss": 0.1282,
      "step": 14413
    },
    {
      "epoch": 0.420502946496295,
      "grad_norm": 0.7646190425731896,
      "learning_rate": 6.506663877887707e-06,
      "loss": 0.1249,
      "step": 14414
    },
    {
      "epoch": 0.42053211972693855,
      "grad_norm": 0.8564671391059466,
      "learning_rate": 6.506213396199801e-06,
      "loss": 0.1429,
      "step": 14415
    },
    {
      "epoch": 0.4205612929575821,
      "grad_norm": 1.058010102157088,
      "learning_rate": 6.505762901064782e-06,
      "loss": 0.1376,
      "step": 14416
    },
    {
      "epoch": 0.4205904661882257,
      "grad_norm": 0.82715533223091,
      "learning_rate": 6.50531239248667e-06,
      "loss": 0.1318,
      "step": 14417
    },
    {
      "epoch": 0.42061963941886926,
      "grad_norm": 0.7235873882819607,
      "learning_rate": 6.50486187046949e-06,
      "loss": 0.1342,
      "step": 14418
    },
    {
      "epoch": 0.4206488126495128,
      "grad_norm": 0.877946213738441,
      "learning_rate": 6.504411335017263e-06,
      "loss": 0.1493,
      "step": 14419
    },
    {
      "epoch": 0.42067798588015637,
      "grad_norm": 1.0487731356546257,
      "learning_rate": 6.503960786134007e-06,
      "loss": 0.1396,
      "step": 14420
    },
    {
      "epoch": 0.4207071591107999,
      "grad_norm": 0.7336313983219429,
      "learning_rate": 6.503510223823751e-06,
      "loss": 0.1184,
      "step": 14421
    },
    {
      "epoch": 0.4207363323414435,
      "grad_norm": 0.9128619470625778,
      "learning_rate": 6.503059648090514e-06,
      "loss": 0.1426,
      "step": 14422
    },
    {
      "epoch": 0.42076550557208703,
      "grad_norm": 0.9919108847115311,
      "learning_rate": 6.502609058938319e-06,
      "loss": 0.1635,
      "step": 14423
    },
    {
      "epoch": 0.42079467880273064,
      "grad_norm": 0.701259220471712,
      "learning_rate": 6.50215845637119e-06,
      "loss": 0.1226,
      "step": 14424
    },
    {
      "epoch": 0.4208238520333742,
      "grad_norm": 0.8996779412217872,
      "learning_rate": 6.501707840393147e-06,
      "loss": 0.1307,
      "step": 14425
    },
    {
      "epoch": 0.42085302526401774,
      "grad_norm": 0.7019403756544518,
      "learning_rate": 6.501257211008216e-06,
      "loss": 0.1554,
      "step": 14426
    },
    {
      "epoch": 0.4208821984946613,
      "grad_norm": 1.04852831197307,
      "learning_rate": 6.500806568220419e-06,
      "loss": 0.1465,
      "step": 14427
    },
    {
      "epoch": 0.42091137172530485,
      "grad_norm": 0.7910520105499192,
      "learning_rate": 6.500355912033781e-06,
      "loss": 0.1607,
      "step": 14428
    },
    {
      "epoch": 0.4209405449559484,
      "grad_norm": 0.8815956341747757,
      "learning_rate": 6.49990524245232e-06,
      "loss": 0.1665,
      "step": 14429
    },
    {
      "epoch": 0.42096971818659196,
      "grad_norm": 0.790428077954022,
      "learning_rate": 6.4994545594800655e-06,
      "loss": 0.128,
      "step": 14430
    },
    {
      "epoch": 0.42099889141723557,
      "grad_norm": 0.8413021886541057,
      "learning_rate": 6.499003863121039e-06,
      "loss": 0.1562,
      "step": 14431
    },
    {
      "epoch": 0.4210280646478791,
      "grad_norm": 0.8960692385578597,
      "learning_rate": 6.498553153379262e-06,
      "loss": 0.1362,
      "step": 14432
    },
    {
      "epoch": 0.42105723787852267,
      "grad_norm": 1.081311930001512,
      "learning_rate": 6.498102430258761e-06,
      "loss": 0.1454,
      "step": 14433
    },
    {
      "epoch": 0.4210864111091662,
      "grad_norm": 0.876389351039299,
      "learning_rate": 6.49765169376356e-06,
      "loss": 0.1357,
      "step": 14434
    },
    {
      "epoch": 0.4211155843398098,
      "grad_norm": 0.875251342324052,
      "learning_rate": 6.49720094389768e-06,
      "loss": 0.1289,
      "step": 14435
    },
    {
      "epoch": 0.42114475757045333,
      "grad_norm": 0.810552797688318,
      "learning_rate": 6.49675018066515e-06,
      "loss": 0.1378,
      "step": 14436
    },
    {
      "epoch": 0.42117393080109694,
      "grad_norm": 0.8625619733077718,
      "learning_rate": 6.496299404069991e-06,
      "loss": 0.1612,
      "step": 14437
    },
    {
      "epoch": 0.4212031040317405,
      "grad_norm": 1.0399405499631094,
      "learning_rate": 6.4958486141162266e-06,
      "loss": 0.1672,
      "step": 14438
    },
    {
      "epoch": 0.42123227726238405,
      "grad_norm": 0.9267043636752254,
      "learning_rate": 6.495397810807884e-06,
      "loss": 0.1235,
      "step": 14439
    },
    {
      "epoch": 0.4212614504930276,
      "grad_norm": 0.7273239851740533,
      "learning_rate": 6.4949469941489874e-06,
      "loss": 0.1309,
      "step": 14440
    },
    {
      "epoch": 0.42129062372367115,
      "grad_norm": 0.9197924239614961,
      "learning_rate": 6.49449616414356e-06,
      "loss": 0.1402,
      "step": 14441
    },
    {
      "epoch": 0.4213197969543147,
      "grad_norm": 1.041942525152656,
      "learning_rate": 6.4940453207956274e-06,
      "loss": 0.1296,
      "step": 14442
    },
    {
      "epoch": 0.42134897018495826,
      "grad_norm": 0.8788726373225624,
      "learning_rate": 6.493594464109217e-06,
      "loss": 0.1253,
      "step": 14443
    },
    {
      "epoch": 0.42137814341560187,
      "grad_norm": 0.7326265049777108,
      "learning_rate": 6.493143594088348e-06,
      "loss": 0.14,
      "step": 14444
    },
    {
      "epoch": 0.4214073166462454,
      "grad_norm": 0.9904664601160023,
      "learning_rate": 6.492692710737052e-06,
      "loss": 0.1328,
      "step": 14445
    },
    {
      "epoch": 0.421436489876889,
      "grad_norm": 0.9432998346701613,
      "learning_rate": 6.492241814059351e-06,
      "loss": 0.1454,
      "step": 14446
    },
    {
      "epoch": 0.42146566310753253,
      "grad_norm": 0.7541833112111631,
      "learning_rate": 6.491790904059271e-06,
      "loss": 0.1219,
      "step": 14447
    },
    {
      "epoch": 0.4214948363381761,
      "grad_norm": 0.9898427213653226,
      "learning_rate": 6.491339980740839e-06,
      "loss": 0.1599,
      "step": 14448
    },
    {
      "epoch": 0.42152400956881964,
      "grad_norm": 1.103395260444594,
      "learning_rate": 6.490889044108079e-06,
      "loss": 0.1725,
      "step": 14449
    },
    {
      "epoch": 0.4215531827994632,
      "grad_norm": 0.8698384097384274,
      "learning_rate": 6.490438094165017e-06,
      "loss": 0.1362,
      "step": 14450
    },
    {
      "epoch": 0.4215823560301068,
      "grad_norm": 0.7955909545275446,
      "learning_rate": 6.48998713091568e-06,
      "loss": 0.1436,
      "step": 14451
    },
    {
      "epoch": 0.42161152926075035,
      "grad_norm": 0.9972732986336026,
      "learning_rate": 6.4895361543640945e-06,
      "loss": 0.1428,
      "step": 14452
    },
    {
      "epoch": 0.4216407024913939,
      "grad_norm": 0.8758179208475545,
      "learning_rate": 6.489085164514285e-06,
      "loss": 0.1189,
      "step": 14453
    },
    {
      "epoch": 0.42166987572203746,
      "grad_norm": 0.8987846600605,
      "learning_rate": 6.4886341613702785e-06,
      "loss": 0.1247,
      "step": 14454
    },
    {
      "epoch": 0.421699048952681,
      "grad_norm": 0.8682519836965628,
      "learning_rate": 6.4881831449361025e-06,
      "loss": 0.1225,
      "step": 14455
    },
    {
      "epoch": 0.42172822218332456,
      "grad_norm": 1.0115723125029012,
      "learning_rate": 6.487732115215781e-06,
      "loss": 0.1617,
      "step": 14456
    },
    {
      "epoch": 0.4217573954139681,
      "grad_norm": 0.7916856556748872,
      "learning_rate": 6.487281072213343e-06,
      "loss": 0.1344,
      "step": 14457
    },
    {
      "epoch": 0.4217865686446117,
      "grad_norm": 0.9089706188771904,
      "learning_rate": 6.486830015932816e-06,
      "loss": 0.1487,
      "step": 14458
    },
    {
      "epoch": 0.4218157418752553,
      "grad_norm": 0.8129103123432081,
      "learning_rate": 6.486378946378222e-06,
      "loss": 0.1182,
      "step": 14459
    },
    {
      "epoch": 0.42184491510589883,
      "grad_norm": 0.8407381672554703,
      "learning_rate": 6.485927863553595e-06,
      "loss": 0.1509,
      "step": 14460
    },
    {
      "epoch": 0.4218740883365424,
      "grad_norm": 0.7697383827971408,
      "learning_rate": 6.485476767462958e-06,
      "loss": 0.1277,
      "step": 14461
    },
    {
      "epoch": 0.42190326156718594,
      "grad_norm": 0.7826646804597619,
      "learning_rate": 6.485025658110337e-06,
      "loss": 0.1479,
      "step": 14462
    },
    {
      "epoch": 0.4219324347978295,
      "grad_norm": 1.0353386333869485,
      "learning_rate": 6.484574535499766e-06,
      "loss": 0.1557,
      "step": 14463
    },
    {
      "epoch": 0.4219616080284731,
      "grad_norm": 0.8493185436058729,
      "learning_rate": 6.484123399635264e-06,
      "loss": 0.1557,
      "step": 14464
    },
    {
      "epoch": 0.42199078125911665,
      "grad_norm": 0.8082244009837359,
      "learning_rate": 6.483672250520863e-06,
      "loss": 0.1512,
      "step": 14465
    },
    {
      "epoch": 0.4220199544897602,
      "grad_norm": 1.1399350390091791,
      "learning_rate": 6.483221088160592e-06,
      "loss": 0.1488,
      "step": 14466
    },
    {
      "epoch": 0.42204912772040376,
      "grad_norm": 1.1131730149976795,
      "learning_rate": 6.482769912558475e-06,
      "loss": 0.1312,
      "step": 14467
    },
    {
      "epoch": 0.4220783009510473,
      "grad_norm": 0.8989369341205465,
      "learning_rate": 6.482318723718544e-06,
      "loss": 0.1116,
      "step": 14468
    },
    {
      "epoch": 0.42210747418169087,
      "grad_norm": 0.901983020841205,
      "learning_rate": 6.481867521644825e-06,
      "loss": 0.1527,
      "step": 14469
    },
    {
      "epoch": 0.4221366474123344,
      "grad_norm": 1.1364380199037103,
      "learning_rate": 6.481416306341346e-06,
      "loss": 0.161,
      "step": 14470
    },
    {
      "epoch": 0.42216582064297803,
      "grad_norm": 0.8151538578658158,
      "learning_rate": 6.480965077812136e-06,
      "loss": 0.1235,
      "step": 14471
    },
    {
      "epoch": 0.4221949938736216,
      "grad_norm": 0.7338103446092081,
      "learning_rate": 6.480513836061223e-06,
      "loss": 0.1301,
      "step": 14472
    },
    {
      "epoch": 0.42222416710426514,
      "grad_norm": 0.7493153678674694,
      "learning_rate": 6.480062581092638e-06,
      "loss": 0.1287,
      "step": 14473
    },
    {
      "epoch": 0.4222533403349087,
      "grad_norm": 0.9227149055644112,
      "learning_rate": 6.479611312910405e-06,
      "loss": 0.1377,
      "step": 14474
    },
    {
      "epoch": 0.42228251356555224,
      "grad_norm": 1.029601597535191,
      "learning_rate": 6.479160031518555e-06,
      "loss": 0.1528,
      "step": 14475
    },
    {
      "epoch": 0.4223116867961958,
      "grad_norm": 0.8191931311640929,
      "learning_rate": 6.47870873692112e-06,
      "loss": 0.1482,
      "step": 14476
    },
    {
      "epoch": 0.42234086002683935,
      "grad_norm": 0.7787834985340119,
      "learning_rate": 6.4782574291221234e-06,
      "loss": 0.1396,
      "step": 14477
    },
    {
      "epoch": 0.42237003325748296,
      "grad_norm": 1.0113480106310186,
      "learning_rate": 6.4778061081256e-06,
      "loss": 0.1599,
      "step": 14478
    },
    {
      "epoch": 0.4223992064881265,
      "grad_norm": 0.8803704920654022,
      "learning_rate": 6.477354773935576e-06,
      "loss": 0.131,
      "step": 14479
    },
    {
      "epoch": 0.42242837971877006,
      "grad_norm": 0.8194151344060884,
      "learning_rate": 6.476903426556079e-06,
      "loss": 0.1369,
      "step": 14480
    },
    {
      "epoch": 0.4224575529494136,
      "grad_norm": 1.0973736802943928,
      "learning_rate": 6.4764520659911436e-06,
      "loss": 0.1289,
      "step": 14481
    },
    {
      "epoch": 0.42248672618005717,
      "grad_norm": 0.7909811108443876,
      "learning_rate": 6.476000692244795e-06,
      "loss": 0.1429,
      "step": 14482
    },
    {
      "epoch": 0.4225158994107007,
      "grad_norm": 0.8807801299318434,
      "learning_rate": 6.475549305321065e-06,
      "loss": 0.1467,
      "step": 14483
    },
    {
      "epoch": 0.4225450726413443,
      "grad_norm": 0.8934675700711427,
      "learning_rate": 6.475097905223984e-06,
      "loss": 0.1458,
      "step": 14484
    },
    {
      "epoch": 0.4225742458719879,
      "grad_norm": 1.0066656807002248,
      "learning_rate": 6.474646491957579e-06,
      "loss": 0.1418,
      "step": 14485
    },
    {
      "epoch": 0.42260341910263144,
      "grad_norm": 0.7798560813803963,
      "learning_rate": 6.474195065525884e-06,
      "loss": 0.1267,
      "step": 14486
    },
    {
      "epoch": 0.422632592333275,
      "grad_norm": 0.8455626917926337,
      "learning_rate": 6.473743625932926e-06,
      "loss": 0.1555,
      "step": 14487
    },
    {
      "epoch": 0.42266176556391855,
      "grad_norm": 1.2057873204756218,
      "learning_rate": 6.473292173182738e-06,
      "loss": 0.1396,
      "step": 14488
    },
    {
      "epoch": 0.4226909387945621,
      "grad_norm": 0.7783230851033105,
      "learning_rate": 6.472840707279348e-06,
      "loss": 0.1434,
      "step": 14489
    },
    {
      "epoch": 0.42272011202520565,
      "grad_norm": 0.9383578860812518,
      "learning_rate": 6.4723892282267875e-06,
      "loss": 0.1389,
      "step": 14490
    },
    {
      "epoch": 0.4227492852558492,
      "grad_norm": 0.9187106361483238,
      "learning_rate": 6.47193773602909e-06,
      "loss": 0.139,
      "step": 14491
    },
    {
      "epoch": 0.4227784584864928,
      "grad_norm": 1.0385874121281264,
      "learning_rate": 6.47148623069028e-06,
      "loss": 0.1322,
      "step": 14492
    },
    {
      "epoch": 0.42280763171713637,
      "grad_norm": 1.0229260498219677,
      "learning_rate": 6.471034712214396e-06,
      "loss": 0.1401,
      "step": 14493
    },
    {
      "epoch": 0.4228368049477799,
      "grad_norm": 0.7925077642592967,
      "learning_rate": 6.470583180605463e-06,
      "loss": 0.1258,
      "step": 14494
    },
    {
      "epoch": 0.4228659781784235,
      "grad_norm": 1.0985345433187705,
      "learning_rate": 6.470131635867515e-06,
      "loss": 0.1682,
      "step": 14495
    },
    {
      "epoch": 0.422895151409067,
      "grad_norm": 0.9135708400307052,
      "learning_rate": 6.4696800780045825e-06,
      "loss": 0.1395,
      "step": 14496
    },
    {
      "epoch": 0.4229243246397106,
      "grad_norm": 1.0214609418602645,
      "learning_rate": 6.469228507020697e-06,
      "loss": 0.1427,
      "step": 14497
    },
    {
      "epoch": 0.4229534978703542,
      "grad_norm": 2.079836989571655,
      "learning_rate": 6.46877692291989e-06,
      "loss": 0.1438,
      "step": 14498
    },
    {
      "epoch": 0.42298267110099774,
      "grad_norm": 0.9745922787004977,
      "learning_rate": 6.468325325706194e-06,
      "loss": 0.1397,
      "step": 14499
    },
    {
      "epoch": 0.4230118443316413,
      "grad_norm": 0.8420242500977787,
      "learning_rate": 6.467873715383639e-06,
      "loss": 0.1297,
      "step": 14500
    },
    {
      "epoch": 0.42304101756228485,
      "grad_norm": 0.9924336625137476,
      "learning_rate": 6.4674220919562594e-06,
      "loss": 0.1465,
      "step": 14501
    },
    {
      "epoch": 0.4230701907929284,
      "grad_norm": 0.9787686309765826,
      "learning_rate": 6.466970455428085e-06,
      "loss": 0.1493,
      "step": 14502
    },
    {
      "epoch": 0.42309936402357196,
      "grad_norm": 0.91428197702099,
      "learning_rate": 6.466518805803148e-06,
      "loss": 0.161,
      "step": 14503
    },
    {
      "epoch": 0.4231285372542155,
      "grad_norm": 1.0981507514878934,
      "learning_rate": 6.466067143085481e-06,
      "loss": 0.1687,
      "step": 14504
    },
    {
      "epoch": 0.4231577104848591,
      "grad_norm": 0.8314098051543142,
      "learning_rate": 6.465615467279116e-06,
      "loss": 0.159,
      "step": 14505
    },
    {
      "epoch": 0.42318688371550267,
      "grad_norm": 0.9551817398368962,
      "learning_rate": 6.4651637783880885e-06,
      "loss": 0.141,
      "step": 14506
    },
    {
      "epoch": 0.4232160569461462,
      "grad_norm": 0.6840919919862214,
      "learning_rate": 6.464712076416426e-06,
      "loss": 0.113,
      "step": 14507
    },
    {
      "epoch": 0.4232452301767898,
      "grad_norm": 0.7784800829644861,
      "learning_rate": 6.464260361368165e-06,
      "loss": 0.1543,
      "step": 14508
    },
    {
      "epoch": 0.42327440340743333,
      "grad_norm": 1.1386175511507357,
      "learning_rate": 6.463808633247337e-06,
      "loss": 0.1512,
      "step": 14509
    },
    {
      "epoch": 0.4233035766380769,
      "grad_norm": 0.8722414067316532,
      "learning_rate": 6.463356892057975e-06,
      "loss": 0.1558,
      "step": 14510
    },
    {
      "epoch": 0.42333274986872044,
      "grad_norm": 0.7786503128579964,
      "learning_rate": 6.462905137804112e-06,
      "loss": 0.1371,
      "step": 14511
    },
    {
      "epoch": 0.42336192309936405,
      "grad_norm": 0.9231178955877767,
      "learning_rate": 6.462453370489781e-06,
      "loss": 0.1693,
      "step": 14512
    },
    {
      "epoch": 0.4233910963300076,
      "grad_norm": 1.0801509375056284,
      "learning_rate": 6.462001590119015e-06,
      "loss": 0.1686,
      "step": 14513
    },
    {
      "epoch": 0.42342026956065115,
      "grad_norm": 0.9346433310084615,
      "learning_rate": 6.461549796695847e-06,
      "loss": 0.1345,
      "step": 14514
    },
    {
      "epoch": 0.4234494427912947,
      "grad_norm": 0.9262236702603318,
      "learning_rate": 6.461097990224313e-06,
      "loss": 0.1633,
      "step": 14515
    },
    {
      "epoch": 0.42347861602193826,
      "grad_norm": 0.8092654480157292,
      "learning_rate": 6.460646170708445e-06,
      "loss": 0.1399,
      "step": 14516
    },
    {
      "epoch": 0.4235077892525818,
      "grad_norm": 0.9282686039633216,
      "learning_rate": 6.460194338152276e-06,
      "loss": 0.1293,
      "step": 14517
    },
    {
      "epoch": 0.42353696248322537,
      "grad_norm": 0.7488744299677202,
      "learning_rate": 6.459742492559842e-06,
      "loss": 0.1208,
      "step": 14518
    },
    {
      "epoch": 0.423566135713869,
      "grad_norm": 0.919009376017385,
      "learning_rate": 6.459290633935172e-06,
      "loss": 0.1464,
      "step": 14519
    },
    {
      "epoch": 0.4235953089445125,
      "grad_norm": 0.8989568180018619,
      "learning_rate": 6.458838762282306e-06,
      "loss": 0.1453,
      "step": 14520
    },
    {
      "epoch": 0.4236244821751561,
      "grad_norm": 0.7095194367777338,
      "learning_rate": 6.458386877605276e-06,
      "loss": 0.1225,
      "step": 14521
    },
    {
      "epoch": 0.42365365540579963,
      "grad_norm": 0.8215149469125433,
      "learning_rate": 6.457934979908115e-06,
      "loss": 0.1393,
      "step": 14522
    },
    {
      "epoch": 0.4236828286364432,
      "grad_norm": 0.7256366165278979,
      "learning_rate": 6.45748306919486e-06,
      "loss": 0.1368,
      "step": 14523
    },
    {
      "epoch": 0.42371200186708674,
      "grad_norm": 0.7054796777177985,
      "learning_rate": 6.457031145469543e-06,
      "loss": 0.1318,
      "step": 14524
    },
    {
      "epoch": 0.42374117509773035,
      "grad_norm": 0.8383542805823619,
      "learning_rate": 6.4565792087362e-06,
      "loss": 0.1462,
      "step": 14525
    },
    {
      "epoch": 0.4237703483283739,
      "grad_norm": 0.6398447224104609,
      "learning_rate": 6.456127258998866e-06,
      "loss": 0.1517,
      "step": 14526
    },
    {
      "epoch": 0.42379952155901746,
      "grad_norm": 0.7920031033802036,
      "learning_rate": 6.455675296261574e-06,
      "loss": 0.1441,
      "step": 14527
    },
    {
      "epoch": 0.423828694789661,
      "grad_norm": 0.8937732273513436,
      "learning_rate": 6.455223320528361e-06,
      "loss": 0.1362,
      "step": 14528
    },
    {
      "epoch": 0.42385786802030456,
      "grad_norm": 0.9071346261385165,
      "learning_rate": 6.454771331803262e-06,
      "loss": 0.1426,
      "step": 14529
    },
    {
      "epoch": 0.4238870412509481,
      "grad_norm": 0.734115501364661,
      "learning_rate": 6.454319330090313e-06,
      "loss": 0.1337,
      "step": 14530
    },
    {
      "epoch": 0.42391621448159167,
      "grad_norm": 0.9098881502519791,
      "learning_rate": 6.453867315393546e-06,
      "loss": 0.1339,
      "step": 14531
    },
    {
      "epoch": 0.4239453877122353,
      "grad_norm": 1.0463011771926354,
      "learning_rate": 6.453415287717e-06,
      "loss": 0.11,
      "step": 14532
    },
    {
      "epoch": 0.42397456094287883,
      "grad_norm": 0.8406661020750411,
      "learning_rate": 6.45296324706471e-06,
      "loss": 0.1252,
      "step": 14533
    },
    {
      "epoch": 0.4240037341735224,
      "grad_norm": 0.7746094591515468,
      "learning_rate": 6.452511193440708e-06,
      "loss": 0.1392,
      "step": 14534
    },
    {
      "epoch": 0.42403290740416594,
      "grad_norm": 1.0186412532074003,
      "learning_rate": 6.452059126849035e-06,
      "loss": 0.1379,
      "step": 14535
    },
    {
      "epoch": 0.4240620806348095,
      "grad_norm": 0.9899846271410743,
      "learning_rate": 6.451607047293726e-06,
      "loss": 0.1355,
      "step": 14536
    },
    {
      "epoch": 0.42409125386545304,
      "grad_norm": 0.7079457275795795,
      "learning_rate": 6.451154954778813e-06,
      "loss": 0.1294,
      "step": 14537
    },
    {
      "epoch": 0.4241204270960966,
      "grad_norm": 0.8899821635349057,
      "learning_rate": 6.4507028493083365e-06,
      "loss": 0.1384,
      "step": 14538
    },
    {
      "epoch": 0.4241496003267402,
      "grad_norm": 1.3216708054820439,
      "learning_rate": 6.4502507308863316e-06,
      "loss": 0.1489,
      "step": 14539
    },
    {
      "epoch": 0.42417877355738376,
      "grad_norm": 0.7976546901755325,
      "learning_rate": 6.449798599516833e-06,
      "loss": 0.1283,
      "step": 14540
    },
    {
      "epoch": 0.4242079467880273,
      "grad_norm": 0.8192831887327774,
      "learning_rate": 6.44934645520388e-06,
      "loss": 0.1446,
      "step": 14541
    },
    {
      "epoch": 0.42423712001867087,
      "grad_norm": 1.0154037535246627,
      "learning_rate": 6.448894297951507e-06,
      "loss": 0.1393,
      "step": 14542
    },
    {
      "epoch": 0.4242662932493144,
      "grad_norm": 0.8159730911855505,
      "learning_rate": 6.448442127763752e-06,
      "loss": 0.1407,
      "step": 14543
    },
    {
      "epoch": 0.424295466479958,
      "grad_norm": 1.1149308617173088,
      "learning_rate": 6.447989944644651e-06,
      "loss": 0.1503,
      "step": 14544
    },
    {
      "epoch": 0.4243246397106015,
      "grad_norm": 0.6467155633479809,
      "learning_rate": 6.447537748598241e-06,
      "loss": 0.1495,
      "step": 14545
    },
    {
      "epoch": 0.42435381294124513,
      "grad_norm": 1.0370129458736717,
      "learning_rate": 6.447085539628562e-06,
      "loss": 0.141,
      "step": 14546
    },
    {
      "epoch": 0.4243829861718887,
      "grad_norm": 0.8784085698016646,
      "learning_rate": 6.446633317739646e-06,
      "loss": 0.1542,
      "step": 14547
    },
    {
      "epoch": 0.42441215940253224,
      "grad_norm": 0.8093690792754681,
      "learning_rate": 6.446181082935534e-06,
      "loss": 0.1434,
      "step": 14548
    },
    {
      "epoch": 0.4244413326331758,
      "grad_norm": 1.4952964465652518,
      "learning_rate": 6.445728835220262e-06,
      "loss": 0.1478,
      "step": 14549
    },
    {
      "epoch": 0.42447050586381935,
      "grad_norm": 0.6586631728253155,
      "learning_rate": 6.44527657459787e-06,
      "loss": 0.1148,
      "step": 14550
    },
    {
      "epoch": 0.4244996790944629,
      "grad_norm": 0.7988990831046745,
      "learning_rate": 6.444824301072391e-06,
      "loss": 0.1425,
      "step": 14551
    },
    {
      "epoch": 0.4245288523251065,
      "grad_norm": 0.7756335620708912,
      "learning_rate": 6.4443720146478675e-06,
      "loss": 0.149,
      "step": 14552
    },
    {
      "epoch": 0.42455802555575006,
      "grad_norm": 0.7363998507458365,
      "learning_rate": 6.443919715328336e-06,
      "loss": 0.1458,
      "step": 14553
    },
    {
      "epoch": 0.4245871987863936,
      "grad_norm": 0.8815222354786677,
      "learning_rate": 6.4434674031178314e-06,
      "loss": 0.1527,
      "step": 14554
    },
    {
      "epoch": 0.42461637201703717,
      "grad_norm": 0.924317890620181,
      "learning_rate": 6.443015078020397e-06,
      "loss": 0.1362,
      "step": 14555
    },
    {
      "epoch": 0.4246455452476807,
      "grad_norm": 0.6482306932868144,
      "learning_rate": 6.442562740040067e-06,
      "loss": 0.1226,
      "step": 14556
    },
    {
      "epoch": 0.4246747184783243,
      "grad_norm": 0.8306777726432536,
      "learning_rate": 6.442110389180881e-06,
      "loss": 0.1348,
      "step": 14557
    },
    {
      "epoch": 0.42470389170896783,
      "grad_norm": 0.8132825556688807,
      "learning_rate": 6.4416580254468795e-06,
      "loss": 0.1212,
      "step": 14558
    },
    {
      "epoch": 0.42473306493961144,
      "grad_norm": 0.8170276126492356,
      "learning_rate": 6.441205648842097e-06,
      "loss": 0.1524,
      "step": 14559
    },
    {
      "epoch": 0.424762238170255,
      "grad_norm": 0.8744322554012237,
      "learning_rate": 6.440753259370575e-06,
      "loss": 0.1311,
      "step": 14560
    },
    {
      "epoch": 0.42479141140089854,
      "grad_norm": 1.0156791192303587,
      "learning_rate": 6.440300857036354e-06,
      "loss": 0.1267,
      "step": 14561
    },
    {
      "epoch": 0.4248205846315421,
      "grad_norm": 0.977216342020889,
      "learning_rate": 6.439848441843469e-06,
      "loss": 0.1473,
      "step": 14562
    },
    {
      "epoch": 0.42484975786218565,
      "grad_norm": 0.9484923266452084,
      "learning_rate": 6.439396013795961e-06,
      "loss": 0.1433,
      "step": 14563
    },
    {
      "epoch": 0.4248789310928292,
      "grad_norm": 0.9721265502642883,
      "learning_rate": 6.438943572897869e-06,
      "loss": 0.1459,
      "step": 14564
    },
    {
      "epoch": 0.42490810432347276,
      "grad_norm": 0.8066516855911635,
      "learning_rate": 6.4384911191532316e-06,
      "loss": 0.169,
      "step": 14565
    },
    {
      "epoch": 0.42493727755411637,
      "grad_norm": 1.0033952960392376,
      "learning_rate": 6.43803865256609e-06,
      "loss": 0.1259,
      "step": 14566
    },
    {
      "epoch": 0.4249664507847599,
      "grad_norm": 1.0417315083259715,
      "learning_rate": 6.437586173140482e-06,
      "loss": 0.165,
      "step": 14567
    },
    {
      "epoch": 0.4249956240154035,
      "grad_norm": 0.9037656702375,
      "learning_rate": 6.43713368088045e-06,
      "loss": 0.1449,
      "step": 14568
    },
    {
      "epoch": 0.425024797246047,
      "grad_norm": 0.7885990323875087,
      "learning_rate": 6.436681175790028e-06,
      "loss": 0.1257,
      "step": 14569
    },
    {
      "epoch": 0.4250539704766906,
      "grad_norm": 0.8877062285620676,
      "learning_rate": 6.4362286578732626e-06,
      "loss": 0.1671,
      "step": 14570
    },
    {
      "epoch": 0.42508314370733413,
      "grad_norm": 0.9317443602253012,
      "learning_rate": 6.4357761271341876e-06,
      "loss": 0.1491,
      "step": 14571
    },
    {
      "epoch": 0.4251123169379777,
      "grad_norm": 1.0402581857014654,
      "learning_rate": 6.435323583576847e-06,
      "loss": 0.1796,
      "step": 14572
    },
    {
      "epoch": 0.4251414901686213,
      "grad_norm": 0.9707473246841538,
      "learning_rate": 6.434871027205282e-06,
      "loss": 0.1152,
      "step": 14573
    },
    {
      "epoch": 0.42517066339926485,
      "grad_norm": 0.8164273297113386,
      "learning_rate": 6.434418458023529e-06,
      "loss": 0.1311,
      "step": 14574
    },
    {
      "epoch": 0.4251998366299084,
      "grad_norm": 0.8475827553944394,
      "learning_rate": 6.433965876035631e-06,
      "loss": 0.1492,
      "step": 14575
    },
    {
      "epoch": 0.42522900986055195,
      "grad_norm": 0.7173770494645649,
      "learning_rate": 6.433513281245628e-06,
      "loss": 0.1274,
      "step": 14576
    },
    {
      "epoch": 0.4252581830911955,
      "grad_norm": 0.8127496231813877,
      "learning_rate": 6.43306067365756e-06,
      "loss": 0.1228,
      "step": 14577
    },
    {
      "epoch": 0.42528735632183906,
      "grad_norm": 0.6928186794485005,
      "learning_rate": 6.43260805327547e-06,
      "loss": 0.1198,
      "step": 14578
    },
    {
      "epoch": 0.42531652955248267,
      "grad_norm": 0.7726298241645552,
      "learning_rate": 6.432155420103396e-06,
      "loss": 0.1304,
      "step": 14579
    },
    {
      "epoch": 0.4253457027831262,
      "grad_norm": 0.7400268183665162,
      "learning_rate": 6.431702774145381e-06,
      "loss": 0.1569,
      "step": 14580
    },
    {
      "epoch": 0.4253748760137698,
      "grad_norm": 0.9906871144526957,
      "learning_rate": 6.4312501154054655e-06,
      "loss": 0.1326,
      "step": 14581
    },
    {
      "epoch": 0.42540404924441333,
      "grad_norm": 0.7263777764166814,
      "learning_rate": 6.430797443887689e-06,
      "loss": 0.132,
      "step": 14582
    },
    {
      "epoch": 0.4254332224750569,
      "grad_norm": 0.7966484002152375,
      "learning_rate": 6.430344759596096e-06,
      "loss": 0.1349,
      "step": 14583
    },
    {
      "epoch": 0.42546239570570044,
      "grad_norm": 0.8534298329644914,
      "learning_rate": 6.429892062534726e-06,
      "loss": 0.15,
      "step": 14584
    },
    {
      "epoch": 0.425491568936344,
      "grad_norm": 1.0236583563235566,
      "learning_rate": 6.429439352707623e-06,
      "loss": 0.1386,
      "step": 14585
    },
    {
      "epoch": 0.4255207421669876,
      "grad_norm": 0.7415576548091419,
      "learning_rate": 6.428986630118824e-06,
      "loss": 0.1505,
      "step": 14586
    },
    {
      "epoch": 0.42554991539763115,
      "grad_norm": 0.7181493695752493,
      "learning_rate": 6.428533894772373e-06,
      "loss": 0.133,
      "step": 14587
    },
    {
      "epoch": 0.4255790886282747,
      "grad_norm": 0.7921312425054876,
      "learning_rate": 6.428081146672315e-06,
      "loss": 0.1403,
      "step": 14588
    },
    {
      "epoch": 0.42560826185891826,
      "grad_norm": 0.8902890918129877,
      "learning_rate": 6.427628385822688e-06,
      "loss": 0.1396,
      "step": 14589
    },
    {
      "epoch": 0.4256374350895618,
      "grad_norm": 0.8471636344004445,
      "learning_rate": 6.427175612227535e-06,
      "loss": 0.1617,
      "step": 14590
    },
    {
      "epoch": 0.42566660832020536,
      "grad_norm": 0.9145104986876964,
      "learning_rate": 6.4267228258909e-06,
      "loss": 0.1448,
      "step": 14591
    },
    {
      "epoch": 0.4256957815508489,
      "grad_norm": 1.274479869068229,
      "learning_rate": 6.426270026816824e-06,
      "loss": 0.1206,
      "step": 14592
    },
    {
      "epoch": 0.4257249547814925,
      "grad_norm": 0.6782055025083349,
      "learning_rate": 6.425817215009349e-06,
      "loss": 0.1352,
      "step": 14593
    },
    {
      "epoch": 0.4257541280121361,
      "grad_norm": 0.8963187453287486,
      "learning_rate": 6.425364390472518e-06,
      "loss": 0.1496,
      "step": 14594
    },
    {
      "epoch": 0.42578330124277963,
      "grad_norm": 0.7295762114599001,
      "learning_rate": 6.424911553210376e-06,
      "loss": 0.1437,
      "step": 14595
    },
    {
      "epoch": 0.4258124744734232,
      "grad_norm": 0.7534925172133695,
      "learning_rate": 6.4244587032269615e-06,
      "loss": 0.122,
      "step": 14596
    },
    {
      "epoch": 0.42584164770406674,
      "grad_norm": 0.7328190284111709,
      "learning_rate": 6.424005840526321e-06,
      "loss": 0.1509,
      "step": 14597
    },
    {
      "epoch": 0.4258708209347103,
      "grad_norm": 0.7813073881683321,
      "learning_rate": 6.423552965112496e-06,
      "loss": 0.1511,
      "step": 14598
    },
    {
      "epoch": 0.42589999416535385,
      "grad_norm": 0.9991050549661694,
      "learning_rate": 6.42310007698953e-06,
      "loss": 0.1505,
      "step": 14599
    },
    {
      "epoch": 0.42592916739599745,
      "grad_norm": 0.8524195923266077,
      "learning_rate": 6.4226471761614675e-06,
      "loss": 0.1314,
      "step": 14600
    },
    {
      "epoch": 0.425958340626641,
      "grad_norm": 0.9428984302763539,
      "learning_rate": 6.422194262632349e-06,
      "loss": 0.1393,
      "step": 14601
    },
    {
      "epoch": 0.42598751385728456,
      "grad_norm": 0.7506115387752303,
      "learning_rate": 6.421741336406218e-06,
      "loss": 0.1408,
      "step": 14602
    },
    {
      "epoch": 0.4260166870879281,
      "grad_norm": 0.8425262154277137,
      "learning_rate": 6.4212883974871236e-06,
      "loss": 0.1451,
      "step": 14603
    },
    {
      "epoch": 0.42604586031857167,
      "grad_norm": 0.8511437934435212,
      "learning_rate": 6.4208354458791035e-06,
      "loss": 0.1551,
      "step": 14604
    },
    {
      "epoch": 0.4260750335492152,
      "grad_norm": 0.881979714026643,
      "learning_rate": 6.420382481586203e-06,
      "loss": 0.1641,
      "step": 14605
    },
    {
      "epoch": 0.4261042067798588,
      "grad_norm": 0.9978972110053544,
      "learning_rate": 6.419929504612469e-06,
      "loss": 0.1662,
      "step": 14606
    },
    {
      "epoch": 0.4261333800105024,
      "grad_norm": 0.8786849002723329,
      "learning_rate": 6.419476514961942e-06,
      "loss": 0.1579,
      "step": 14607
    },
    {
      "epoch": 0.42616255324114594,
      "grad_norm": 1.0140316415914195,
      "learning_rate": 6.419023512638667e-06,
      "loss": 0.1448,
      "step": 14608
    },
    {
      "epoch": 0.4261917264717895,
      "grad_norm": 0.9005428459177359,
      "learning_rate": 6.41857049764669e-06,
      "loss": 0.1406,
      "step": 14609
    },
    {
      "epoch": 0.42622089970243304,
      "grad_norm": 0.7864340508166872,
      "learning_rate": 6.418117469990053e-06,
      "loss": 0.1507,
      "step": 14610
    },
    {
      "epoch": 0.4262500729330766,
      "grad_norm": 0.8805637404277392,
      "learning_rate": 6.417664429672803e-06,
      "loss": 0.1246,
      "step": 14611
    },
    {
      "epoch": 0.42627924616372015,
      "grad_norm": 0.8653322469341848,
      "learning_rate": 6.417211376698982e-06,
      "loss": 0.157,
      "step": 14612
    },
    {
      "epoch": 0.42630841939436376,
      "grad_norm": 0.7611503716150947,
      "learning_rate": 6.416758311072638e-06,
      "loss": 0.1357,
      "step": 14613
    },
    {
      "epoch": 0.4263375926250073,
      "grad_norm": 0.7678749486288232,
      "learning_rate": 6.416305232797813e-06,
      "loss": 0.1477,
      "step": 14614
    },
    {
      "epoch": 0.42636676585565086,
      "grad_norm": 0.7823079024797566,
      "learning_rate": 6.415852141878553e-06,
      "loss": 0.1613,
      "step": 14615
    },
    {
      "epoch": 0.4263959390862944,
      "grad_norm": 0.8640111274829947,
      "learning_rate": 6.415399038318903e-06,
      "loss": 0.1425,
      "step": 14616
    },
    {
      "epoch": 0.42642511231693797,
      "grad_norm": 0.9216988519330479,
      "learning_rate": 6.414945922122908e-06,
      "loss": 0.1532,
      "step": 14617
    },
    {
      "epoch": 0.4264542855475815,
      "grad_norm": 0.791367072581288,
      "learning_rate": 6.414492793294615e-06,
      "loss": 0.1167,
      "step": 14618
    },
    {
      "epoch": 0.4264834587782251,
      "grad_norm": 0.879186370276052,
      "learning_rate": 6.414039651838066e-06,
      "loss": 0.1591,
      "step": 14619
    },
    {
      "epoch": 0.4265126320088687,
      "grad_norm": 0.976453014064819,
      "learning_rate": 6.41358649775731e-06,
      "loss": 0.1443,
      "step": 14620
    },
    {
      "epoch": 0.42654180523951224,
      "grad_norm": 0.7046471432503829,
      "learning_rate": 6.413133331056391e-06,
      "loss": 0.1367,
      "step": 14621
    },
    {
      "epoch": 0.4265709784701558,
      "grad_norm": 1.0266429249007463,
      "learning_rate": 6.412680151739354e-06,
      "loss": 0.1399,
      "step": 14622
    },
    {
      "epoch": 0.42660015170079935,
      "grad_norm": 0.7657002313823391,
      "learning_rate": 6.412226959810246e-06,
      "loss": 0.1357,
      "step": 14623
    },
    {
      "epoch": 0.4266293249314429,
      "grad_norm": 0.9034638933677264,
      "learning_rate": 6.411773755273114e-06,
      "loss": 0.126,
      "step": 14624
    },
    {
      "epoch": 0.42665849816208645,
      "grad_norm": 0.7096191941575555,
      "learning_rate": 6.411320538132002e-06,
      "loss": 0.1174,
      "step": 14625
    },
    {
      "epoch": 0.42668767139273,
      "grad_norm": 0.6205600714182188,
      "learning_rate": 6.410867308390958e-06,
      "loss": 0.1377,
      "step": 14626
    },
    {
      "epoch": 0.4267168446233736,
      "grad_norm": 0.830338251694489,
      "learning_rate": 6.410414066054026e-06,
      "loss": 0.1359,
      "step": 14627
    },
    {
      "epoch": 0.42674601785401717,
      "grad_norm": 0.8140714428121494,
      "learning_rate": 6.409960811125256e-06,
      "loss": 0.1316,
      "step": 14628
    },
    {
      "epoch": 0.4267751910846607,
      "grad_norm": 1.0873122870007383,
      "learning_rate": 6.40950754360869e-06,
      "loss": 0.1445,
      "step": 14629
    },
    {
      "epoch": 0.4268043643153043,
      "grad_norm": 0.7581253703358832,
      "learning_rate": 6.40905426350838e-06,
      "loss": 0.132,
      "step": 14630
    },
    {
      "epoch": 0.4268335375459478,
      "grad_norm": 0.8040443130093091,
      "learning_rate": 6.408600970828367e-06,
      "loss": 0.1509,
      "step": 14631
    },
    {
      "epoch": 0.4268627107765914,
      "grad_norm": 0.8831884676715346,
      "learning_rate": 6.408147665572701e-06,
      "loss": 0.1342,
      "step": 14632
    },
    {
      "epoch": 0.42689188400723493,
      "grad_norm": 0.9123740565031518,
      "learning_rate": 6.407694347745431e-06,
      "loss": 0.1393,
      "step": 14633
    },
    {
      "epoch": 0.42692105723787854,
      "grad_norm": 2.6416471712195997,
      "learning_rate": 6.407241017350601e-06,
      "loss": 0.1462,
      "step": 14634
    },
    {
      "epoch": 0.4269502304685221,
      "grad_norm": 0.8061560738824025,
      "learning_rate": 6.406787674392259e-06,
      "loss": 0.1611,
      "step": 14635
    },
    {
      "epoch": 0.42697940369916565,
      "grad_norm": 0.9363533775038286,
      "learning_rate": 6.406334318874452e-06,
      "loss": 0.1451,
      "step": 14636
    },
    {
      "epoch": 0.4270085769298092,
      "grad_norm": 0.867880207892632,
      "learning_rate": 6.4058809508012285e-06,
      "loss": 0.1448,
      "step": 14637
    },
    {
      "epoch": 0.42703775016045276,
      "grad_norm": 0.8735382344438637,
      "learning_rate": 6.405427570176635e-06,
      "loss": 0.1204,
      "step": 14638
    },
    {
      "epoch": 0.4270669233910963,
      "grad_norm": 0.9445295126908109,
      "learning_rate": 6.40497417700472e-06,
      "loss": 0.145,
      "step": 14639
    },
    {
      "epoch": 0.4270960966217399,
      "grad_norm": 0.8897433233589293,
      "learning_rate": 6.404520771289531e-06,
      "loss": 0.1316,
      "step": 14640
    },
    {
      "epoch": 0.42712526985238347,
      "grad_norm": 1.2586564701146228,
      "learning_rate": 6.404067353035115e-06,
      "loss": 0.1382,
      "step": 14641
    },
    {
      "epoch": 0.427154443083027,
      "grad_norm": 0.9972580081860094,
      "learning_rate": 6.403613922245522e-06,
      "loss": 0.1609,
      "step": 14642
    },
    {
      "epoch": 0.4271836163136706,
      "grad_norm": 0.8452344557746346,
      "learning_rate": 6.403160478924799e-06,
      "loss": 0.1524,
      "step": 14643
    },
    {
      "epoch": 0.42721278954431413,
      "grad_norm": 1.3583397959402321,
      "learning_rate": 6.402707023076993e-06,
      "loss": 0.1462,
      "step": 14644
    },
    {
      "epoch": 0.4272419627749577,
      "grad_norm": 1.063121484670113,
      "learning_rate": 6.402253554706155e-06,
      "loss": 0.1483,
      "step": 14645
    },
    {
      "epoch": 0.42727113600560124,
      "grad_norm": 0.8858529375043536,
      "learning_rate": 6.401800073816331e-06,
      "loss": 0.1359,
      "step": 14646
    },
    {
      "epoch": 0.42730030923624485,
      "grad_norm": 1.106632331475958,
      "learning_rate": 6.401346580411571e-06,
      "loss": 0.1426,
      "step": 14647
    },
    {
      "epoch": 0.4273294824668884,
      "grad_norm": 1.1434067015461586,
      "learning_rate": 6.400893074495923e-06,
      "loss": 0.1442,
      "step": 14648
    },
    {
      "epoch": 0.42735865569753195,
      "grad_norm": 0.961129580601906,
      "learning_rate": 6.4004395560734366e-06,
      "loss": 0.1307,
      "step": 14649
    },
    {
      "epoch": 0.4273878289281755,
      "grad_norm": 1.217218499848346,
      "learning_rate": 6.39998602514816e-06,
      "loss": 0.1172,
      "step": 14650
    },
    {
      "epoch": 0.42741700215881906,
      "grad_norm": 0.9946967120904634,
      "learning_rate": 6.399532481724142e-06,
      "loss": 0.1245,
      "step": 14651
    },
    {
      "epoch": 0.4274461753894626,
      "grad_norm": 0.7041848791125103,
      "learning_rate": 6.399078925805432e-06,
      "loss": 0.123,
      "step": 14652
    },
    {
      "epoch": 0.42747534862010617,
      "grad_norm": 1.0551097008424255,
      "learning_rate": 6.398625357396079e-06,
      "loss": 0.1329,
      "step": 14653
    },
    {
      "epoch": 0.4275045218507498,
      "grad_norm": 1.2890936070936188,
      "learning_rate": 6.398171776500132e-06,
      "loss": 0.1304,
      "step": 14654
    },
    {
      "epoch": 0.42753369508139333,
      "grad_norm": 0.7244104729640806,
      "learning_rate": 6.397718183121644e-06,
      "loss": 0.1334,
      "step": 14655
    },
    {
      "epoch": 0.4275628683120369,
      "grad_norm": 0.7180189347348244,
      "learning_rate": 6.397264577264659e-06,
      "loss": 0.1486,
      "step": 14656
    },
    {
      "epoch": 0.42759204154268043,
      "grad_norm": 0.7370659794570515,
      "learning_rate": 6.396810958933231e-06,
      "loss": 0.1218,
      "step": 14657
    },
    {
      "epoch": 0.427621214773324,
      "grad_norm": 1.2249162954026172,
      "learning_rate": 6.396357328131408e-06,
      "loss": 0.1288,
      "step": 14658
    },
    {
      "epoch": 0.42765038800396754,
      "grad_norm": 0.7801917318751426,
      "learning_rate": 6.3959036848632395e-06,
      "loss": 0.1374,
      "step": 14659
    },
    {
      "epoch": 0.4276795612346111,
      "grad_norm": 0.6327083136322517,
      "learning_rate": 6.395450029132777e-06,
      "loss": 0.1112,
      "step": 14660
    },
    {
      "epoch": 0.4277087344652547,
      "grad_norm": 0.9239410078834523,
      "learning_rate": 6.39499636094407e-06,
      "loss": 0.1371,
      "step": 14661
    },
    {
      "epoch": 0.42773790769589826,
      "grad_norm": 0.7501179303141055,
      "learning_rate": 6.394542680301165e-06,
      "loss": 0.1303,
      "step": 14662
    },
    {
      "epoch": 0.4277670809265418,
      "grad_norm": 0.7814820705795176,
      "learning_rate": 6.3940889872081205e-06,
      "loss": 0.1485,
      "step": 14663
    },
    {
      "epoch": 0.42779625415718536,
      "grad_norm": 0.7109996794925011,
      "learning_rate": 6.39363528166898e-06,
      "loss": 0.1244,
      "step": 14664
    },
    {
      "epoch": 0.4278254273878289,
      "grad_norm": 0.907689957657468,
      "learning_rate": 6.393181563687798e-06,
      "loss": 0.1306,
      "step": 14665
    },
    {
      "epoch": 0.42785460061847247,
      "grad_norm": 0.7765637906615048,
      "learning_rate": 6.3927278332686215e-06,
      "loss": 0.1283,
      "step": 14666
    },
    {
      "epoch": 0.4278837738491161,
      "grad_norm": 0.8711063592658128,
      "learning_rate": 6.392274090415505e-06,
      "loss": 0.1428,
      "step": 14667
    },
    {
      "epoch": 0.42791294707975963,
      "grad_norm": 0.9415437304505929,
      "learning_rate": 6.391820335132497e-06,
      "loss": 0.1452,
      "step": 14668
    },
    {
      "epoch": 0.4279421203104032,
      "grad_norm": 0.6453696198741923,
      "learning_rate": 6.391366567423649e-06,
      "loss": 0.1427,
      "step": 14669
    },
    {
      "epoch": 0.42797129354104674,
      "grad_norm": 1.0611456114369375,
      "learning_rate": 6.390912787293012e-06,
      "loss": 0.1602,
      "step": 14670
    },
    {
      "epoch": 0.4280004667716903,
      "grad_norm": 0.8196304221773512,
      "learning_rate": 6.390458994744638e-06,
      "loss": 0.131,
      "step": 14671
    },
    {
      "epoch": 0.42802964000233384,
      "grad_norm": 0.9389071162102353,
      "learning_rate": 6.390005189782579e-06,
      "loss": 0.1619,
      "step": 14672
    },
    {
      "epoch": 0.4280588132329774,
      "grad_norm": 0.920338543674002,
      "learning_rate": 6.389551372410886e-06,
      "loss": 0.1505,
      "step": 14673
    },
    {
      "epoch": 0.428087986463621,
      "grad_norm": 0.8812668029713562,
      "learning_rate": 6.389097542633608e-06,
      "loss": 0.1366,
      "step": 14674
    },
    {
      "epoch": 0.42811715969426456,
      "grad_norm": 0.8265439211426261,
      "learning_rate": 6.388643700454801e-06,
      "loss": 0.1411,
      "step": 14675
    },
    {
      "epoch": 0.4281463329249081,
      "grad_norm": 1.0629184186352492,
      "learning_rate": 6.388189845878513e-06,
      "loss": 0.1625,
      "step": 14676
    },
    {
      "epoch": 0.42817550615555167,
      "grad_norm": 0.9247386992323844,
      "learning_rate": 6.387735978908797e-06,
      "loss": 0.1404,
      "step": 14677
    },
    {
      "epoch": 0.4282046793861952,
      "grad_norm": 0.9480840655829462,
      "learning_rate": 6.387282099549707e-06,
      "loss": 0.1414,
      "step": 14678
    },
    {
      "epoch": 0.4282338526168388,
      "grad_norm": 0.6551231763411762,
      "learning_rate": 6.386828207805292e-06,
      "loss": 0.1297,
      "step": 14679
    },
    {
      "epoch": 0.4282630258474823,
      "grad_norm": 0.7151633617029399,
      "learning_rate": 6.386374303679607e-06,
      "loss": 0.1209,
      "step": 14680
    },
    {
      "epoch": 0.42829219907812593,
      "grad_norm": 1.0121783239850635,
      "learning_rate": 6.385920387176703e-06,
      "loss": 0.1431,
      "step": 14681
    },
    {
      "epoch": 0.4283213723087695,
      "grad_norm": 1.028630375965957,
      "learning_rate": 6.385466458300632e-06,
      "loss": 0.1454,
      "step": 14682
    },
    {
      "epoch": 0.42835054553941304,
      "grad_norm": 0.9145442814616266,
      "learning_rate": 6.385012517055448e-06,
      "loss": 0.1477,
      "step": 14683
    },
    {
      "epoch": 0.4283797187700566,
      "grad_norm": 0.8999939217874624,
      "learning_rate": 6.384558563445203e-06,
      "loss": 0.17,
      "step": 14684
    },
    {
      "epoch": 0.42840889200070015,
      "grad_norm": 0.8934297591872845,
      "learning_rate": 6.384104597473948e-06,
      "loss": 0.1426,
      "step": 14685
    },
    {
      "epoch": 0.4284380652313437,
      "grad_norm": 0.9772242512820827,
      "learning_rate": 6.383650619145738e-06,
      "loss": 0.1505,
      "step": 14686
    },
    {
      "epoch": 0.42846723846198725,
      "grad_norm": 0.7706815869965222,
      "learning_rate": 6.383196628464627e-06,
      "loss": 0.1528,
      "step": 14687
    },
    {
      "epoch": 0.42849641169263086,
      "grad_norm": 0.8005157520564964,
      "learning_rate": 6.382742625434667e-06,
      "loss": 0.1521,
      "step": 14688
    },
    {
      "epoch": 0.4285255849232744,
      "grad_norm": 1.0033328094096647,
      "learning_rate": 6.382288610059908e-06,
      "loss": 0.1391,
      "step": 14689
    },
    {
      "epoch": 0.42855475815391797,
      "grad_norm": 0.8924927853595284,
      "learning_rate": 6.3818345823444094e-06,
      "loss": 0.1515,
      "step": 14690
    },
    {
      "epoch": 0.4285839313845615,
      "grad_norm": 0.8280280209418701,
      "learning_rate": 6.38138054229222e-06,
      "loss": 0.1419,
      "step": 14691
    },
    {
      "epoch": 0.4286131046152051,
      "grad_norm": 0.8792603523785534,
      "learning_rate": 6.380926489907394e-06,
      "loss": 0.1614,
      "step": 14692
    },
    {
      "epoch": 0.42864227784584863,
      "grad_norm": 0.9324739732655796,
      "learning_rate": 6.380472425193989e-06,
      "loss": 0.1259,
      "step": 14693
    },
    {
      "epoch": 0.42867145107649224,
      "grad_norm": 0.7579055524898174,
      "learning_rate": 6.380018348156054e-06,
      "loss": 0.1331,
      "step": 14694
    },
    {
      "epoch": 0.4287006243071358,
      "grad_norm": 0.7546100364855088,
      "learning_rate": 6.379564258797644e-06,
      "loss": 0.1603,
      "step": 14695
    },
    {
      "epoch": 0.42872979753777934,
      "grad_norm": 0.9605596683417,
      "learning_rate": 6.379110157122815e-06,
      "loss": 0.1404,
      "step": 14696
    },
    {
      "epoch": 0.4287589707684229,
      "grad_norm": 0.9019800145363126,
      "learning_rate": 6.378656043135618e-06,
      "loss": 0.1459,
      "step": 14697
    },
    {
      "epoch": 0.42878814399906645,
      "grad_norm": 1.2597422167241183,
      "learning_rate": 6.37820191684011e-06,
      "loss": 0.1395,
      "step": 14698
    },
    {
      "epoch": 0.42881731722971,
      "grad_norm": 0.7761258828569346,
      "learning_rate": 6.377747778240344e-06,
      "loss": 0.1407,
      "step": 14699
    },
    {
      "epoch": 0.42884649046035356,
      "grad_norm": 0.6569514069900847,
      "learning_rate": 6.377293627340374e-06,
      "loss": 0.1284,
      "step": 14700
    },
    {
      "epoch": 0.42887566369099717,
      "grad_norm": 0.7491732766116863,
      "learning_rate": 6.376839464144257e-06,
      "loss": 0.1438,
      "step": 14701
    },
    {
      "epoch": 0.4289048369216407,
      "grad_norm": 0.9238875908563786,
      "learning_rate": 6.376385288656044e-06,
      "loss": 0.1654,
      "step": 14702
    },
    {
      "epoch": 0.4289340101522843,
      "grad_norm": 0.7969852178845906,
      "learning_rate": 6.3759311008797945e-06,
      "loss": 0.1444,
      "step": 14703
    },
    {
      "epoch": 0.4289631833829278,
      "grad_norm": 0.7045271765828216,
      "learning_rate": 6.3754769008195576e-06,
      "loss": 0.1431,
      "step": 14704
    },
    {
      "epoch": 0.4289923566135714,
      "grad_norm": 1.0154308113320514,
      "learning_rate": 6.375022688479393e-06,
      "loss": 0.151,
      "step": 14705
    },
    {
      "epoch": 0.42902152984421493,
      "grad_norm": 0.9667412064910351,
      "learning_rate": 6.374568463863353e-06,
      "loss": 0.1422,
      "step": 14706
    },
    {
      "epoch": 0.4290507030748585,
      "grad_norm": 0.7824217836663594,
      "learning_rate": 6.374114226975494e-06,
      "loss": 0.1273,
      "step": 14707
    },
    {
      "epoch": 0.4290798763055021,
      "grad_norm": 1.1880867361015324,
      "learning_rate": 6.3736599778198725e-06,
      "loss": 0.1203,
      "step": 14708
    },
    {
      "epoch": 0.42910904953614565,
      "grad_norm": 1.0002914702130004,
      "learning_rate": 6.373205716400543e-06,
      "loss": 0.1455,
      "step": 14709
    },
    {
      "epoch": 0.4291382227667892,
      "grad_norm": 1.0439607342833814,
      "learning_rate": 6.372751442721559e-06,
      "loss": 0.1555,
      "step": 14710
    },
    {
      "epoch": 0.42916739599743275,
      "grad_norm": 1.0342263842925121,
      "learning_rate": 6.372297156786978e-06,
      "loss": 0.1154,
      "step": 14711
    },
    {
      "epoch": 0.4291965692280763,
      "grad_norm": 0.9284556281035707,
      "learning_rate": 6.371842858600856e-06,
      "loss": 0.1457,
      "step": 14712
    },
    {
      "epoch": 0.42922574245871986,
      "grad_norm": 0.9488620328940125,
      "learning_rate": 6.3713885481672476e-06,
      "loss": 0.135,
      "step": 14713
    },
    {
      "epoch": 0.4292549156893634,
      "grad_norm": 0.9318273083737971,
      "learning_rate": 6.37093422549021e-06,
      "loss": 0.1406,
      "step": 14714
    },
    {
      "epoch": 0.429284088920007,
      "grad_norm": 0.6390769448191116,
      "learning_rate": 6.3704798905737995e-06,
      "loss": 0.1315,
      "step": 14715
    },
    {
      "epoch": 0.4293132621506506,
      "grad_norm": 0.8955550043126647,
      "learning_rate": 6.3700255434220714e-06,
      "loss": 0.144,
      "step": 14716
    },
    {
      "epoch": 0.42934243538129413,
      "grad_norm": 0.8678171250181997,
      "learning_rate": 6.3695711840390826e-06,
      "loss": 0.1457,
      "step": 14717
    },
    {
      "epoch": 0.4293716086119377,
      "grad_norm": 0.797499210866375,
      "learning_rate": 6.36911681242889e-06,
      "loss": 0.1627,
      "step": 14718
    },
    {
      "epoch": 0.42940078184258124,
      "grad_norm": 0.9453525502046212,
      "learning_rate": 6.368662428595548e-06,
      "loss": 0.1451,
      "step": 14719
    },
    {
      "epoch": 0.4294299550732248,
      "grad_norm": 0.6540759317187861,
      "learning_rate": 6.368208032543115e-06,
      "loss": 0.1609,
      "step": 14720
    },
    {
      "epoch": 0.42945912830386834,
      "grad_norm": 0.8733203342358636,
      "learning_rate": 6.367753624275648e-06,
      "loss": 0.1464,
      "step": 14721
    },
    {
      "epoch": 0.42948830153451195,
      "grad_norm": 0.8924426659836293,
      "learning_rate": 6.367299203797202e-06,
      "loss": 0.1104,
      "step": 14722
    },
    {
      "epoch": 0.4295174747651555,
      "grad_norm": 0.7593034303139613,
      "learning_rate": 6.366844771111835e-06,
      "loss": 0.1509,
      "step": 14723
    },
    {
      "epoch": 0.42954664799579906,
      "grad_norm": 0.8137304725602087,
      "learning_rate": 6.366390326223605e-06,
      "loss": 0.1425,
      "step": 14724
    },
    {
      "epoch": 0.4295758212264426,
      "grad_norm": 1.0327459850613934,
      "learning_rate": 6.365935869136568e-06,
      "loss": 0.1279,
      "step": 14725
    },
    {
      "epoch": 0.42960499445708616,
      "grad_norm": 0.6340026185915157,
      "learning_rate": 6.365481399854782e-06,
      "loss": 0.1454,
      "step": 14726
    },
    {
      "epoch": 0.4296341676877297,
      "grad_norm": 0.7648429592184687,
      "learning_rate": 6.365026918382303e-06,
      "loss": 0.1585,
      "step": 14727
    },
    {
      "epoch": 0.4296633409183733,
      "grad_norm": 1.2101507521140271,
      "learning_rate": 6.36457242472319e-06,
      "loss": 0.1483,
      "step": 14728
    },
    {
      "epoch": 0.4296925141490169,
      "grad_norm": 0.7692745981042659,
      "learning_rate": 6.3641179188815e-06,
      "loss": 0.1363,
      "step": 14729
    },
    {
      "epoch": 0.42972168737966043,
      "grad_norm": 0.6070084951386207,
      "learning_rate": 6.363663400861291e-06,
      "loss": 0.1166,
      "step": 14730
    },
    {
      "epoch": 0.429750860610304,
      "grad_norm": 0.9497120536580956,
      "learning_rate": 6.363208870666621e-06,
      "loss": 0.1697,
      "step": 14731
    },
    {
      "epoch": 0.42978003384094754,
      "grad_norm": 0.9847011205316754,
      "learning_rate": 6.362754328301548e-06,
      "loss": 0.133,
      "step": 14732
    },
    {
      "epoch": 0.4298092070715911,
      "grad_norm": 0.7350037064593803,
      "learning_rate": 6.36229977377013e-06,
      "loss": 0.1421,
      "step": 14733
    },
    {
      "epoch": 0.42983838030223465,
      "grad_norm": 0.8404109498205895,
      "learning_rate": 6.361845207076423e-06,
      "loss": 0.1665,
      "step": 14734
    },
    {
      "epoch": 0.42986755353287825,
      "grad_norm": 0.829169797032689,
      "learning_rate": 6.361390628224488e-06,
      "loss": 0.1252,
      "step": 14735
    },
    {
      "epoch": 0.4298967267635218,
      "grad_norm": 0.9767726647577462,
      "learning_rate": 6.3609360372183834e-06,
      "loss": 0.141,
      "step": 14736
    },
    {
      "epoch": 0.42992589999416536,
      "grad_norm": 1.0249799083053788,
      "learning_rate": 6.360481434062164e-06,
      "loss": 0.1345,
      "step": 14737
    },
    {
      "epoch": 0.4299550732248089,
      "grad_norm": 1.0543286085668138,
      "learning_rate": 6.360026818759894e-06,
      "loss": 0.1477,
      "step": 14738
    },
    {
      "epoch": 0.42998424645545247,
      "grad_norm": 0.8235070795515166,
      "learning_rate": 6.359572191315629e-06,
      "loss": 0.1285,
      "step": 14739
    },
    {
      "epoch": 0.430013419686096,
      "grad_norm": 0.7234196639753296,
      "learning_rate": 6.359117551733427e-06,
      "loss": 0.1303,
      "step": 14740
    },
    {
      "epoch": 0.4300425929167396,
      "grad_norm": 0.7910670223734967,
      "learning_rate": 6.358662900017348e-06,
      "loss": 0.1565,
      "step": 14741
    },
    {
      "epoch": 0.4300717661473832,
      "grad_norm": 1.1594702431343762,
      "learning_rate": 6.358208236171451e-06,
      "loss": 0.1157,
      "step": 14742
    },
    {
      "epoch": 0.43010093937802674,
      "grad_norm": 0.7541241378096104,
      "learning_rate": 6.357753560199795e-06,
      "loss": 0.1321,
      "step": 14743
    },
    {
      "epoch": 0.4301301126086703,
      "grad_norm": 0.7988039846351058,
      "learning_rate": 6.35729887210644e-06,
      "loss": 0.1454,
      "step": 14744
    },
    {
      "epoch": 0.43015928583931384,
      "grad_norm": 0.8143874563860272,
      "learning_rate": 6.356844171895444e-06,
      "loss": 0.1423,
      "step": 14745
    },
    {
      "epoch": 0.4301884590699574,
      "grad_norm": 0.8809231822137538,
      "learning_rate": 6.356389459570868e-06,
      "loss": 0.1291,
      "step": 14746
    },
    {
      "epoch": 0.43021763230060095,
      "grad_norm": 0.7402927413175663,
      "learning_rate": 6.35593473513677e-06,
      "loss": 0.1326,
      "step": 14747
    },
    {
      "epoch": 0.4302468055312445,
      "grad_norm": 0.8641214382003821,
      "learning_rate": 6.355479998597211e-06,
      "loss": 0.1348,
      "step": 14748
    },
    {
      "epoch": 0.4302759787618881,
      "grad_norm": 1.1837464571437748,
      "learning_rate": 6.355025249956249e-06,
      "loss": 0.1471,
      "step": 14749
    },
    {
      "epoch": 0.43030515199253166,
      "grad_norm": 0.7054205118269635,
      "learning_rate": 6.354570489217946e-06,
      "loss": 0.1292,
      "step": 14750
    },
    {
      "epoch": 0.4303343252231752,
      "grad_norm": 0.9815155821544417,
      "learning_rate": 6.35411571638636e-06,
      "loss": 0.1482,
      "step": 14751
    },
    {
      "epoch": 0.43036349845381877,
      "grad_norm": 0.7517246662679853,
      "learning_rate": 6.353660931465553e-06,
      "loss": 0.1495,
      "step": 14752
    },
    {
      "epoch": 0.4303926716844623,
      "grad_norm": 0.8042249174324039,
      "learning_rate": 6.353206134459585e-06,
      "loss": 0.1376,
      "step": 14753
    },
    {
      "epoch": 0.4304218449151059,
      "grad_norm": 0.7419311244258114,
      "learning_rate": 6.352751325372515e-06,
      "loss": 0.1348,
      "step": 14754
    },
    {
      "epoch": 0.4304510181457495,
      "grad_norm": 0.7243243445421994,
      "learning_rate": 6.352296504208404e-06,
      "loss": 0.1289,
      "step": 14755
    },
    {
      "epoch": 0.43048019137639304,
      "grad_norm": 0.7399725796332371,
      "learning_rate": 6.351841670971313e-06,
      "loss": 0.1236,
      "step": 14756
    },
    {
      "epoch": 0.4305093646070366,
      "grad_norm": 0.7262352627221309,
      "learning_rate": 6.3513868256653e-06,
      "loss": 0.1465,
      "step": 14757
    },
    {
      "epoch": 0.43053853783768015,
      "grad_norm": 0.982726738830854,
      "learning_rate": 6.350931968294432e-06,
      "loss": 0.1353,
      "step": 14758
    },
    {
      "epoch": 0.4305677110683237,
      "grad_norm": 0.8203260531758041,
      "learning_rate": 6.3504770988627625e-06,
      "loss": 0.1286,
      "step": 14759
    },
    {
      "epoch": 0.43059688429896725,
      "grad_norm": 0.8721819696136724,
      "learning_rate": 6.350022217374358e-06,
      "loss": 0.1239,
      "step": 14760
    },
    {
      "epoch": 0.4306260575296108,
      "grad_norm": 0.924114420605603,
      "learning_rate": 6.349567323833277e-06,
      "loss": 0.125,
      "step": 14761
    },
    {
      "epoch": 0.4306552307602544,
      "grad_norm": 0.9240299533530848,
      "learning_rate": 6.349112418243579e-06,
      "loss": 0.143,
      "step": 14762
    },
    {
      "epoch": 0.43068440399089797,
      "grad_norm": 0.7282105079622296,
      "learning_rate": 6.3486575006093295e-06,
      "loss": 0.133,
      "step": 14763
    },
    {
      "epoch": 0.4307135772215415,
      "grad_norm": 0.8709789885973791,
      "learning_rate": 6.348202570934588e-06,
      "loss": 0.1231,
      "step": 14764
    },
    {
      "epoch": 0.4307427504521851,
      "grad_norm": 1.1936752809308744,
      "learning_rate": 6.347747629223415e-06,
      "loss": 0.1609,
      "step": 14765
    },
    {
      "epoch": 0.43077192368282863,
      "grad_norm": 0.9580558806630077,
      "learning_rate": 6.347292675479872e-06,
      "loss": 0.1489,
      "step": 14766
    },
    {
      "epoch": 0.4308010969134722,
      "grad_norm": 0.8337521198576897,
      "learning_rate": 6.346837709708023e-06,
      "loss": 0.1415,
      "step": 14767
    },
    {
      "epoch": 0.43083027014411573,
      "grad_norm": 0.9152428122636086,
      "learning_rate": 6.34638273191193e-06,
      "loss": 0.1323,
      "step": 14768
    },
    {
      "epoch": 0.43085944337475934,
      "grad_norm": 1.162065846525339,
      "learning_rate": 6.34592774209565e-06,
      "loss": 0.1692,
      "step": 14769
    },
    {
      "epoch": 0.4308886166054029,
      "grad_norm": 0.8715189540513543,
      "learning_rate": 6.345472740263251e-06,
      "loss": 0.1455,
      "step": 14770
    },
    {
      "epoch": 0.43091778983604645,
      "grad_norm": 0.9425556467260457,
      "learning_rate": 6.345017726418792e-06,
      "loss": 0.1382,
      "step": 14771
    },
    {
      "epoch": 0.43094696306669,
      "grad_norm": 1.1258620194234759,
      "learning_rate": 6.344562700566334e-06,
      "loss": 0.1311,
      "step": 14772
    },
    {
      "epoch": 0.43097613629733356,
      "grad_norm": 0.9633974929046847,
      "learning_rate": 6.344107662709943e-06,
      "loss": 0.1332,
      "step": 14773
    },
    {
      "epoch": 0.4310053095279771,
      "grad_norm": 0.7794259895237825,
      "learning_rate": 6.343652612853679e-06,
      "loss": 0.1058,
      "step": 14774
    },
    {
      "epoch": 0.43103448275862066,
      "grad_norm": 1.0422691345915542,
      "learning_rate": 6.343197551001605e-06,
      "loss": 0.1167,
      "step": 14775
    },
    {
      "epoch": 0.43106365598926427,
      "grad_norm": 0.9600283141702454,
      "learning_rate": 6.342742477157784e-06,
      "loss": 0.1456,
      "step": 14776
    },
    {
      "epoch": 0.4310928292199078,
      "grad_norm": 0.8345360247156994,
      "learning_rate": 6.3422873913262796e-06,
      "loss": 0.1537,
      "step": 14777
    },
    {
      "epoch": 0.4311220024505514,
      "grad_norm": 0.9786213226755879,
      "learning_rate": 6.341832293511152e-06,
      "loss": 0.1425,
      "step": 14778
    },
    {
      "epoch": 0.43115117568119493,
      "grad_norm": 0.9059194181708891,
      "learning_rate": 6.341377183716469e-06,
      "loss": 0.1461,
      "step": 14779
    },
    {
      "epoch": 0.4311803489118385,
      "grad_norm": 0.6796709303229915,
      "learning_rate": 6.340922061946288e-06,
      "loss": 0.1464,
      "step": 14780
    },
    {
      "epoch": 0.43120952214248204,
      "grad_norm": 0.788282698161525,
      "learning_rate": 6.3404669282046745e-06,
      "loss": 0.1427,
      "step": 14781
    },
    {
      "epoch": 0.43123869537312565,
      "grad_norm": 0.9207172393032127,
      "learning_rate": 6.340011782495694e-06,
      "loss": 0.1189,
      "step": 14782
    },
    {
      "epoch": 0.4312678686037692,
      "grad_norm": 0.7501648475307601,
      "learning_rate": 6.339556624823409e-06,
      "loss": 0.1365,
      "step": 14783
    },
    {
      "epoch": 0.43129704183441275,
      "grad_norm": 0.7625215407292547,
      "learning_rate": 6.339101455191881e-06,
      "loss": 0.1503,
      "step": 14784
    },
    {
      "epoch": 0.4313262150650563,
      "grad_norm": 0.8133516307255715,
      "learning_rate": 6.338646273605175e-06,
      "loss": 0.1318,
      "step": 14785
    },
    {
      "epoch": 0.43135538829569986,
      "grad_norm": 0.6745848108498254,
      "learning_rate": 6.338191080067354e-06,
      "loss": 0.1322,
      "step": 14786
    },
    {
      "epoch": 0.4313845615263434,
      "grad_norm": 0.6268664280390904,
      "learning_rate": 6.337735874582482e-06,
      "loss": 0.1542,
      "step": 14787
    },
    {
      "epoch": 0.43141373475698697,
      "grad_norm": 0.7997227880190497,
      "learning_rate": 6.337280657154625e-06,
      "loss": 0.1309,
      "step": 14788
    },
    {
      "epoch": 0.4314429079876306,
      "grad_norm": 0.703119308180586,
      "learning_rate": 6.336825427787845e-06,
      "loss": 0.1286,
      "step": 14789
    },
    {
      "epoch": 0.43147208121827413,
      "grad_norm": 0.7206278096767809,
      "learning_rate": 6.336370186486207e-06,
      "loss": 0.1321,
      "step": 14790
    },
    {
      "epoch": 0.4315012544489177,
      "grad_norm": 0.7961418270278572,
      "learning_rate": 6.335914933253775e-06,
      "loss": 0.1137,
      "step": 14791
    },
    {
      "epoch": 0.43153042767956123,
      "grad_norm": 1.242814905356728,
      "learning_rate": 6.335459668094612e-06,
      "loss": 0.1316,
      "step": 14792
    },
    {
      "epoch": 0.4315596009102048,
      "grad_norm": 0.774466874132768,
      "learning_rate": 6.335004391012786e-06,
      "loss": 0.1186,
      "step": 14793
    },
    {
      "epoch": 0.43158877414084834,
      "grad_norm": 2.4898668528798806,
      "learning_rate": 6.334549102012357e-06,
      "loss": 0.135,
      "step": 14794
    },
    {
      "epoch": 0.4316179473714919,
      "grad_norm": 0.8596545447807837,
      "learning_rate": 6.334093801097395e-06,
      "loss": 0.1519,
      "step": 14795
    },
    {
      "epoch": 0.4316471206021355,
      "grad_norm": 0.844686921865426,
      "learning_rate": 6.333638488271961e-06,
      "loss": 0.1436,
      "step": 14796
    },
    {
      "epoch": 0.43167629383277906,
      "grad_norm": 0.9872144068219241,
      "learning_rate": 6.33318316354012e-06,
      "loss": 0.1603,
      "step": 14797
    },
    {
      "epoch": 0.4317054670634226,
      "grad_norm": 0.749878854173997,
      "learning_rate": 6.332727826905939e-06,
      "loss": 0.1258,
      "step": 14798
    },
    {
      "epoch": 0.43173464029406616,
      "grad_norm": 0.7914791480108483,
      "learning_rate": 6.33227247837348e-06,
      "loss": 0.156,
      "step": 14799
    },
    {
      "epoch": 0.4317638135247097,
      "grad_norm": 0.7703284842657782,
      "learning_rate": 6.331817117946814e-06,
      "loss": 0.1167,
      "step": 14800
    },
    {
      "epoch": 0.43179298675535327,
      "grad_norm": 0.7347550592703608,
      "learning_rate": 6.33136174563e-06,
      "loss": 0.1318,
      "step": 14801
    },
    {
      "epoch": 0.4318221599859968,
      "grad_norm": 0.8049458505427863,
      "learning_rate": 6.330906361427106e-06,
      "loss": 0.1351,
      "step": 14802
    },
    {
      "epoch": 0.43185133321664043,
      "grad_norm": 0.8281217131378312,
      "learning_rate": 6.330450965342199e-06,
      "loss": 0.1359,
      "step": 14803
    },
    {
      "epoch": 0.431880506447284,
      "grad_norm": 0.8203016360463499,
      "learning_rate": 6.329995557379344e-06,
      "loss": 0.1542,
      "step": 14804
    },
    {
      "epoch": 0.43190967967792754,
      "grad_norm": 0.748088965862053,
      "learning_rate": 6.329540137542605e-06,
      "loss": 0.152,
      "step": 14805
    },
    {
      "epoch": 0.4319388529085711,
      "grad_norm": 0.8582098892345322,
      "learning_rate": 6.329084705836049e-06,
      "loss": 0.1433,
      "step": 14806
    },
    {
      "epoch": 0.43196802613921464,
      "grad_norm": 0.8470891127566365,
      "learning_rate": 6.328629262263741e-06,
      "loss": 0.1301,
      "step": 14807
    },
    {
      "epoch": 0.4319971993698582,
      "grad_norm": 0.7133027560523214,
      "learning_rate": 6.328173806829751e-06,
      "loss": 0.1521,
      "step": 14808
    },
    {
      "epoch": 0.4320263726005018,
      "grad_norm": 0.9062881346168233,
      "learning_rate": 6.3277183395381405e-06,
      "loss": 0.1244,
      "step": 14809
    },
    {
      "epoch": 0.43205554583114536,
      "grad_norm": 0.7081085915181456,
      "learning_rate": 6.3272628603929775e-06,
      "loss": 0.123,
      "step": 14810
    },
    {
      "epoch": 0.4320847190617889,
      "grad_norm": 0.7425128005166141,
      "learning_rate": 6.3268073693983275e-06,
      "loss": 0.1323,
      "step": 14811
    },
    {
      "epoch": 0.43211389229243247,
      "grad_norm": 0.8403564542402145,
      "learning_rate": 6.3263518665582606e-06,
      "loss": 0.1224,
      "step": 14812
    },
    {
      "epoch": 0.432143065523076,
      "grad_norm": 0.7591884647532012,
      "learning_rate": 6.32589635187684e-06,
      "loss": 0.1504,
      "step": 14813
    },
    {
      "epoch": 0.4321722387537196,
      "grad_norm": 0.7700852059272205,
      "learning_rate": 6.325440825358131e-06,
      "loss": 0.1317,
      "step": 14814
    },
    {
      "epoch": 0.4322014119843631,
      "grad_norm": 0.79680827071898,
      "learning_rate": 6.324985287006206e-06,
      "loss": 0.1345,
      "step": 14815
    },
    {
      "epoch": 0.43223058521500674,
      "grad_norm": 0.9241577630626693,
      "learning_rate": 6.324529736825127e-06,
      "loss": 0.125,
      "step": 14816
    },
    {
      "epoch": 0.4322597584456503,
      "grad_norm": 0.8675621657450447,
      "learning_rate": 6.324074174818961e-06,
      "loss": 0.1365,
      "step": 14817
    },
    {
      "epoch": 0.43228893167629384,
      "grad_norm": 0.709867319033397,
      "learning_rate": 6.323618600991781e-06,
      "loss": 0.1414,
      "step": 14818
    },
    {
      "epoch": 0.4323181049069374,
      "grad_norm": 0.7159572321632133,
      "learning_rate": 6.323163015347648e-06,
      "loss": 0.1476,
      "step": 14819
    },
    {
      "epoch": 0.43234727813758095,
      "grad_norm": 0.7680476626555585,
      "learning_rate": 6.322707417890631e-06,
      "loss": 0.1443,
      "step": 14820
    },
    {
      "epoch": 0.4323764513682245,
      "grad_norm": 0.8288063399065261,
      "learning_rate": 6.322251808624799e-06,
      "loss": 0.1141,
      "step": 14821
    },
    {
      "epoch": 0.43240562459886805,
      "grad_norm": 0.6191127988079769,
      "learning_rate": 6.321796187554217e-06,
      "loss": 0.1378,
      "step": 14822
    },
    {
      "epoch": 0.43243479782951166,
      "grad_norm": 1.4845953736268853,
      "learning_rate": 6.321340554682955e-06,
      "loss": 0.1489,
      "step": 14823
    },
    {
      "epoch": 0.4324639710601552,
      "grad_norm": 1.2885937859330068,
      "learning_rate": 6.320884910015079e-06,
      "loss": 0.1437,
      "step": 14824
    },
    {
      "epoch": 0.43249314429079877,
      "grad_norm": 0.8138087163816917,
      "learning_rate": 6.320429253554661e-06,
      "loss": 0.1098,
      "step": 14825
    },
    {
      "epoch": 0.4325223175214423,
      "grad_norm": 0.8907509065064294,
      "learning_rate": 6.319973585305762e-06,
      "loss": 0.1451,
      "step": 14826
    },
    {
      "epoch": 0.4325514907520859,
      "grad_norm": 0.8669764743772568,
      "learning_rate": 6.319517905272455e-06,
      "loss": 0.1552,
      "step": 14827
    },
    {
      "epoch": 0.43258066398272943,
      "grad_norm": 1.0586071764642637,
      "learning_rate": 6.319062213458808e-06,
      "loss": 0.1477,
      "step": 14828
    },
    {
      "epoch": 0.432609837213373,
      "grad_norm": 0.8915242741557635,
      "learning_rate": 6.318606509868888e-06,
      "loss": 0.1427,
      "step": 14829
    },
    {
      "epoch": 0.4326390104440166,
      "grad_norm": 0.7399806161453074,
      "learning_rate": 6.318150794506765e-06,
      "loss": 0.13,
      "step": 14830
    },
    {
      "epoch": 0.43266818367466015,
      "grad_norm": 0.939475842359072,
      "learning_rate": 6.317695067376506e-06,
      "loss": 0.1449,
      "step": 14831
    },
    {
      "epoch": 0.4326973569053037,
      "grad_norm": 0.7400291112559871,
      "learning_rate": 6.3172393284821775e-06,
      "loss": 0.114,
      "step": 14832
    },
    {
      "epoch": 0.43272653013594725,
      "grad_norm": 0.7331330322028462,
      "learning_rate": 6.316783577827854e-06,
      "loss": 0.1268,
      "step": 14833
    },
    {
      "epoch": 0.4327557033665908,
      "grad_norm": 1.0497645154772755,
      "learning_rate": 6.3163278154176e-06,
      "loss": 0.1552,
      "step": 14834
    },
    {
      "epoch": 0.43278487659723436,
      "grad_norm": 0.9624530482482117,
      "learning_rate": 6.315872041255484e-06,
      "loss": 0.1213,
      "step": 14835
    },
    {
      "epoch": 0.43281404982787797,
      "grad_norm": 1.0323126643004623,
      "learning_rate": 6.3154162553455775e-06,
      "loss": 0.142,
      "step": 14836
    },
    {
      "epoch": 0.4328432230585215,
      "grad_norm": 0.980677544834582,
      "learning_rate": 6.31496045769195e-06,
      "loss": 0.1549,
      "step": 14837
    },
    {
      "epoch": 0.4328723962891651,
      "grad_norm": 0.8966870514937749,
      "learning_rate": 6.314504648298667e-06,
      "loss": 0.1512,
      "step": 14838
    },
    {
      "epoch": 0.4329015695198086,
      "grad_norm": 0.7708401591962507,
      "learning_rate": 6.3140488271698015e-06,
      "loss": 0.1196,
      "step": 14839
    },
    {
      "epoch": 0.4329307427504522,
      "grad_norm": 0.9611765463011959,
      "learning_rate": 6.3135929943094235e-06,
      "loss": 0.152,
      "step": 14840
    },
    {
      "epoch": 0.43295991598109573,
      "grad_norm": 0.7808004179677782,
      "learning_rate": 6.313137149721597e-06,
      "loss": 0.1158,
      "step": 14841
    },
    {
      "epoch": 0.4329890892117393,
      "grad_norm": 0.7507308527471663,
      "learning_rate": 6.312681293410399e-06,
      "loss": 0.135,
      "step": 14842
    },
    {
      "epoch": 0.4330182624423829,
      "grad_norm": 0.7333275014877074,
      "learning_rate": 6.312225425379896e-06,
      "loss": 0.1319,
      "step": 14843
    },
    {
      "epoch": 0.43304743567302645,
      "grad_norm": 0.7817531313835508,
      "learning_rate": 6.311769545634154e-06,
      "loss": 0.1585,
      "step": 14844
    },
    {
      "epoch": 0.43307660890367,
      "grad_norm": 0.8548138312330888,
      "learning_rate": 6.311313654177249e-06,
      "loss": 0.1431,
      "step": 14845
    },
    {
      "epoch": 0.43310578213431355,
      "grad_norm": 0.7211573466060593,
      "learning_rate": 6.310857751013248e-06,
      "loss": 0.1541,
      "step": 14846
    },
    {
      "epoch": 0.4331349553649571,
      "grad_norm": 0.645913438630516,
      "learning_rate": 6.3104018361462225e-06,
      "loss": 0.1229,
      "step": 14847
    },
    {
      "epoch": 0.43316412859560066,
      "grad_norm": 0.9447276864129122,
      "learning_rate": 6.309945909580243e-06,
      "loss": 0.139,
      "step": 14848
    },
    {
      "epoch": 0.4331933018262442,
      "grad_norm": 0.7900679300978004,
      "learning_rate": 6.309489971319378e-06,
      "loss": 0.1307,
      "step": 14849
    },
    {
      "epoch": 0.4332224750568878,
      "grad_norm": 0.8623032168405524,
      "learning_rate": 6.309034021367699e-06,
      "loss": 0.156,
      "step": 14850
    },
    {
      "epoch": 0.4332516482875314,
      "grad_norm": 1.346691040609426,
      "learning_rate": 6.308578059729278e-06,
      "loss": 0.1723,
      "step": 14851
    },
    {
      "epoch": 0.43328082151817493,
      "grad_norm": 1.0181550369805636,
      "learning_rate": 6.308122086408184e-06,
      "loss": 0.1391,
      "step": 14852
    },
    {
      "epoch": 0.4333099947488185,
      "grad_norm": 0.66617601220997,
      "learning_rate": 6.307666101408487e-06,
      "loss": 0.1405,
      "step": 14853
    },
    {
      "epoch": 0.43333916797946204,
      "grad_norm": 1.0803410521516597,
      "learning_rate": 6.30721010473426e-06,
      "loss": 0.1717,
      "step": 14854
    },
    {
      "epoch": 0.4333683412101056,
      "grad_norm": 0.9935671643146751,
      "learning_rate": 6.306754096389575e-06,
      "loss": 0.1304,
      "step": 14855
    },
    {
      "epoch": 0.43339751444074914,
      "grad_norm": 0.9995445759687146,
      "learning_rate": 6.306298076378499e-06,
      "loss": 0.1719,
      "step": 14856
    },
    {
      "epoch": 0.43342668767139275,
      "grad_norm": 0.8930670840373487,
      "learning_rate": 6.305842044705105e-06,
      "loss": 0.1458,
      "step": 14857
    },
    {
      "epoch": 0.4334558609020363,
      "grad_norm": 0.729476831124679,
      "learning_rate": 6.305386001373468e-06,
      "loss": 0.1315,
      "step": 14858
    },
    {
      "epoch": 0.43348503413267986,
      "grad_norm": 0.8435005806158672,
      "learning_rate": 6.3049299463876535e-06,
      "loss": 0.1395,
      "step": 14859
    },
    {
      "epoch": 0.4335142073633234,
      "grad_norm": 0.9560165677874352,
      "learning_rate": 6.304473879751738e-06,
      "loss": 0.1497,
      "step": 14860
    },
    {
      "epoch": 0.43354338059396696,
      "grad_norm": 0.8345507564547471,
      "learning_rate": 6.3040178014697905e-06,
      "loss": 0.1444,
      "step": 14861
    },
    {
      "epoch": 0.4335725538246105,
      "grad_norm": 1.1152359866359134,
      "learning_rate": 6.303561711545883e-06,
      "loss": 0.1392,
      "step": 14862
    },
    {
      "epoch": 0.43360172705525407,
      "grad_norm": 0.8206332388495481,
      "learning_rate": 6.303105609984087e-06,
      "loss": 0.1363,
      "step": 14863
    },
    {
      "epoch": 0.4336309002858977,
      "grad_norm": 0.7987801003957105,
      "learning_rate": 6.302649496788476e-06,
      "loss": 0.141,
      "step": 14864
    },
    {
      "epoch": 0.43366007351654123,
      "grad_norm": 0.8427239224657411,
      "learning_rate": 6.3021933719631215e-06,
      "loss": 0.1303,
      "step": 14865
    },
    {
      "epoch": 0.4336892467471848,
      "grad_norm": 0.851809512482157,
      "learning_rate": 6.301737235512096e-06,
      "loss": 0.1272,
      "step": 14866
    },
    {
      "epoch": 0.43371841997782834,
      "grad_norm": 0.760481169540387,
      "learning_rate": 6.301281087439469e-06,
      "loss": 0.1356,
      "step": 14867
    },
    {
      "epoch": 0.4337475932084719,
      "grad_norm": 0.8702583160884054,
      "learning_rate": 6.300824927749317e-06,
      "loss": 0.1262,
      "step": 14868
    },
    {
      "epoch": 0.43377676643911545,
      "grad_norm": 0.9830728223049329,
      "learning_rate": 6.300368756445709e-06,
      "loss": 0.1411,
      "step": 14869
    },
    {
      "epoch": 0.43380593966975906,
      "grad_norm": 0.7699137388317029,
      "learning_rate": 6.299912573532723e-06,
      "loss": 0.1248,
      "step": 14870
    },
    {
      "epoch": 0.4338351129004026,
      "grad_norm": 0.8193503189857513,
      "learning_rate": 6.299456379014424e-06,
      "loss": 0.1584,
      "step": 14871
    },
    {
      "epoch": 0.43386428613104616,
      "grad_norm": 0.7466214930730316,
      "learning_rate": 6.299000172894889e-06,
      "loss": 0.1465,
      "step": 14872
    },
    {
      "epoch": 0.4338934593616897,
      "grad_norm": 0.9512890462432595,
      "learning_rate": 6.298543955178192e-06,
      "loss": 0.1463,
      "step": 14873
    },
    {
      "epoch": 0.43392263259233327,
      "grad_norm": 0.9847601613888907,
      "learning_rate": 6.298087725868403e-06,
      "loss": 0.1606,
      "step": 14874
    },
    {
      "epoch": 0.4339518058229768,
      "grad_norm": 0.678758495208975,
      "learning_rate": 6.2976314849695985e-06,
      "loss": 0.1286,
      "step": 14875
    },
    {
      "epoch": 0.4339809790536204,
      "grad_norm": 0.8780937813226559,
      "learning_rate": 6.297175232485849e-06,
      "loss": 0.132,
      "step": 14876
    },
    {
      "epoch": 0.434010152284264,
      "grad_norm": 1.015683950220094,
      "learning_rate": 6.296718968421228e-06,
      "loss": 0.1433,
      "step": 14877
    },
    {
      "epoch": 0.43403932551490754,
      "grad_norm": 0.7916238415866884,
      "learning_rate": 6.296262692779811e-06,
      "loss": 0.1223,
      "step": 14878
    },
    {
      "epoch": 0.4340684987455511,
      "grad_norm": 0.5392514728974622,
      "learning_rate": 6.295806405565668e-06,
      "loss": 0.1095,
      "step": 14879
    },
    {
      "epoch": 0.43409767197619464,
      "grad_norm": 0.8330809840747785,
      "learning_rate": 6.295350106782877e-06,
      "loss": 0.1337,
      "step": 14880
    },
    {
      "epoch": 0.4341268452068382,
      "grad_norm": 1.262089795742695,
      "learning_rate": 6.294893796435508e-06,
      "loss": 0.14,
      "step": 14881
    },
    {
      "epoch": 0.43415601843748175,
      "grad_norm": 0.866794026353123,
      "learning_rate": 6.294437474527637e-06,
      "loss": 0.1564,
      "step": 14882
    },
    {
      "epoch": 0.4341851916681253,
      "grad_norm": 0.7113868590294488,
      "learning_rate": 6.293981141063336e-06,
      "loss": 0.1326,
      "step": 14883
    },
    {
      "epoch": 0.4342143648987689,
      "grad_norm": 0.9040129207199873,
      "learning_rate": 6.293524796046683e-06,
      "loss": 0.1393,
      "step": 14884
    },
    {
      "epoch": 0.43424353812941247,
      "grad_norm": 0.9098606553163996,
      "learning_rate": 6.293068439481749e-06,
      "loss": 0.1279,
      "step": 14885
    },
    {
      "epoch": 0.434272711360056,
      "grad_norm": 0.9560544364486673,
      "learning_rate": 6.2926120713726055e-06,
      "loss": 0.1355,
      "step": 14886
    },
    {
      "epoch": 0.43430188459069957,
      "grad_norm": 0.9017628328457967,
      "learning_rate": 6.292155691723331e-06,
      "loss": 0.1274,
      "step": 14887
    },
    {
      "epoch": 0.4343310578213431,
      "grad_norm": 1.0174906176820488,
      "learning_rate": 6.291699300538001e-06,
      "loss": 0.1381,
      "step": 14888
    },
    {
      "epoch": 0.4343602310519867,
      "grad_norm": 0.7896600965768121,
      "learning_rate": 6.291242897820686e-06,
      "loss": 0.1125,
      "step": 14889
    },
    {
      "epoch": 0.43438940428263023,
      "grad_norm": 0.8736816177417097,
      "learning_rate": 6.290786483575465e-06,
      "loss": 0.141,
      "step": 14890
    },
    {
      "epoch": 0.43441857751327384,
      "grad_norm": 0.8666296258123337,
      "learning_rate": 6.290330057806408e-06,
      "loss": 0.1431,
      "step": 14891
    },
    {
      "epoch": 0.4344477507439174,
      "grad_norm": 0.9022593673584454,
      "learning_rate": 6.289873620517594e-06,
      "loss": 0.1285,
      "step": 14892
    },
    {
      "epoch": 0.43447692397456095,
      "grad_norm": 0.7726111119294767,
      "learning_rate": 6.289417171713095e-06,
      "loss": 0.1192,
      "step": 14893
    },
    {
      "epoch": 0.4345060972052045,
      "grad_norm": 0.9764752659765082,
      "learning_rate": 6.288960711396987e-06,
      "loss": 0.1531,
      "step": 14894
    },
    {
      "epoch": 0.43453527043584805,
      "grad_norm": 1.1802016135698505,
      "learning_rate": 6.288504239573348e-06,
      "loss": 0.1348,
      "step": 14895
    },
    {
      "epoch": 0.4345644436664916,
      "grad_norm": 0.7652544979387879,
      "learning_rate": 6.2880477562462475e-06,
      "loss": 0.1306,
      "step": 14896
    },
    {
      "epoch": 0.4345936168971352,
      "grad_norm": 0.9221428203868074,
      "learning_rate": 6.287591261419765e-06,
      "loss": 0.1353,
      "step": 14897
    },
    {
      "epoch": 0.43462279012777877,
      "grad_norm": 0.9333945401115743,
      "learning_rate": 6.287134755097977e-06,
      "loss": 0.1694,
      "step": 14898
    },
    {
      "epoch": 0.4346519633584223,
      "grad_norm": 0.997088890826675,
      "learning_rate": 6.2866782372849555e-06,
      "loss": 0.1511,
      "step": 14899
    },
    {
      "epoch": 0.4346811365890659,
      "grad_norm": 0.7862373800922435,
      "learning_rate": 6.286221707984778e-06,
      "loss": 0.1322,
      "step": 14900
    },
    {
      "epoch": 0.43471030981970943,
      "grad_norm": 0.8032756680491813,
      "learning_rate": 6.28576516720152e-06,
      "loss": 0.1375,
      "step": 14901
    },
    {
      "epoch": 0.434739483050353,
      "grad_norm": 0.8915256699252233,
      "learning_rate": 6.285308614939259e-06,
      "loss": 0.1441,
      "step": 14902
    },
    {
      "epoch": 0.43476865628099653,
      "grad_norm": 0.8143586950469169,
      "learning_rate": 6.284852051202069e-06,
      "loss": 0.1562,
      "step": 14903
    },
    {
      "epoch": 0.43479782951164014,
      "grad_norm": 0.8097365289970733,
      "learning_rate": 6.284395475994024e-06,
      "loss": 0.1588,
      "step": 14904
    },
    {
      "epoch": 0.4348270027422837,
      "grad_norm": 0.6316064871648317,
      "learning_rate": 6.283938889319205e-06,
      "loss": 0.1258,
      "step": 14905
    },
    {
      "epoch": 0.43485617597292725,
      "grad_norm": 0.7199241791446711,
      "learning_rate": 6.283482291181686e-06,
      "loss": 0.1332,
      "step": 14906
    },
    {
      "epoch": 0.4348853492035708,
      "grad_norm": 0.9549830821817287,
      "learning_rate": 6.283025681585544e-06,
      "loss": 0.1295,
      "step": 14907
    },
    {
      "epoch": 0.43491452243421436,
      "grad_norm": 0.5998824774171313,
      "learning_rate": 6.282569060534854e-06,
      "loss": 0.132,
      "step": 14908
    },
    {
      "epoch": 0.4349436956648579,
      "grad_norm": 0.8522325645392712,
      "learning_rate": 6.2821124280336934e-06,
      "loss": 0.1778,
      "step": 14909
    },
    {
      "epoch": 0.43497286889550146,
      "grad_norm": 0.9510984597363902,
      "learning_rate": 6.28165578408614e-06,
      "loss": 0.1334,
      "step": 14910
    },
    {
      "epoch": 0.43500204212614507,
      "grad_norm": 0.7994132652116095,
      "learning_rate": 6.281199128696269e-06,
      "loss": 0.1362,
      "step": 14911
    },
    {
      "epoch": 0.4350312153567886,
      "grad_norm": 0.7392170295309389,
      "learning_rate": 6.280742461868159e-06,
      "loss": 0.1265,
      "step": 14912
    },
    {
      "epoch": 0.4350603885874322,
      "grad_norm": 0.7661820989637711,
      "learning_rate": 6.280285783605885e-06,
      "loss": 0.1386,
      "step": 14913
    },
    {
      "epoch": 0.43508956181807573,
      "grad_norm": 0.815470895969181,
      "learning_rate": 6.279829093913525e-06,
      "loss": 0.1496,
      "step": 14914
    },
    {
      "epoch": 0.4351187350487193,
      "grad_norm": 0.9107718400772307,
      "learning_rate": 6.2793723927951575e-06,
      "loss": 0.1439,
      "step": 14915
    },
    {
      "epoch": 0.43514790827936284,
      "grad_norm": 0.9852545037025268,
      "learning_rate": 6.278915680254858e-06,
      "loss": 0.1523,
      "step": 14916
    },
    {
      "epoch": 0.4351770815100064,
      "grad_norm": 0.844753075866623,
      "learning_rate": 6.2784589562967045e-06,
      "loss": 0.1344,
      "step": 14917
    },
    {
      "epoch": 0.43520625474065,
      "grad_norm": 0.7164684216736642,
      "learning_rate": 6.278002220924776e-06,
      "loss": 0.1148,
      "step": 14918
    },
    {
      "epoch": 0.43523542797129355,
      "grad_norm": 0.8886839598601174,
      "learning_rate": 6.277545474143146e-06,
      "loss": 0.1262,
      "step": 14919
    },
    {
      "epoch": 0.4352646012019371,
      "grad_norm": 0.9947075280271497,
      "learning_rate": 6.277088715955898e-06,
      "loss": 0.1341,
      "step": 14920
    },
    {
      "epoch": 0.43529377443258066,
      "grad_norm": 1.2448171324132684,
      "learning_rate": 6.276631946367106e-06,
      "loss": 0.1653,
      "step": 14921
    },
    {
      "epoch": 0.4353229476632242,
      "grad_norm": 0.9888601040175079,
      "learning_rate": 6.276175165380847e-06,
      "loss": 0.1308,
      "step": 14922
    },
    {
      "epoch": 0.43535212089386777,
      "grad_norm": 0.9215285039816433,
      "learning_rate": 6.275718373001203e-06,
      "loss": 0.1321,
      "step": 14923
    },
    {
      "epoch": 0.4353812941245114,
      "grad_norm": 0.7205768417382196,
      "learning_rate": 6.2752615692322485e-06,
      "loss": 0.1367,
      "step": 14924
    },
    {
      "epoch": 0.43541046735515493,
      "grad_norm": 0.7417452238793568,
      "learning_rate": 6.274804754078063e-06,
      "loss": 0.1131,
      "step": 14925
    },
    {
      "epoch": 0.4354396405857985,
      "grad_norm": 1.022941663769502,
      "learning_rate": 6.2743479275427255e-06,
      "loss": 0.1343,
      "step": 14926
    },
    {
      "epoch": 0.43546881381644204,
      "grad_norm": 1.143967356093502,
      "learning_rate": 6.273891089630313e-06,
      "loss": 0.1701,
      "step": 14927
    },
    {
      "epoch": 0.4354979870470856,
      "grad_norm": 0.7368982907513386,
      "learning_rate": 6.273434240344906e-06,
      "loss": 0.1265,
      "step": 14928
    },
    {
      "epoch": 0.43552716027772914,
      "grad_norm": 0.9170885634221387,
      "learning_rate": 6.272977379690583e-06,
      "loss": 0.128,
      "step": 14929
    },
    {
      "epoch": 0.4355563335083727,
      "grad_norm": 1.0144873266327827,
      "learning_rate": 6.2725205076714215e-06,
      "loss": 0.1514,
      "step": 14930
    },
    {
      "epoch": 0.4355855067390163,
      "grad_norm": 0.8171141688562251,
      "learning_rate": 6.272063624291498e-06,
      "loss": 0.1571,
      "step": 14931
    },
    {
      "epoch": 0.43561467996965986,
      "grad_norm": 1.1620975943386103,
      "learning_rate": 6.271606729554897e-06,
      "loss": 0.1408,
      "step": 14932
    },
    {
      "epoch": 0.4356438532003034,
      "grad_norm": 1.1871283517654905,
      "learning_rate": 6.271149823465693e-06,
      "loss": 0.1349,
      "step": 14933
    },
    {
      "epoch": 0.43567302643094696,
      "grad_norm": 0.847428614997877,
      "learning_rate": 6.270692906027968e-06,
      "loss": 0.1352,
      "step": 14934
    },
    {
      "epoch": 0.4357021996615905,
      "grad_norm": 1.0326812778059362,
      "learning_rate": 6.2702359772458e-06,
      "loss": 0.1473,
      "step": 14935
    },
    {
      "epoch": 0.43573137289223407,
      "grad_norm": 0.9176456457317715,
      "learning_rate": 6.269779037123267e-06,
      "loss": 0.1451,
      "step": 14936
    },
    {
      "epoch": 0.4357605461228776,
      "grad_norm": 0.7759409629178708,
      "learning_rate": 6.269322085664452e-06,
      "loss": 0.1432,
      "step": 14937
    },
    {
      "epoch": 0.43578971935352123,
      "grad_norm": 1.2262564725803873,
      "learning_rate": 6.268865122873431e-06,
      "loss": 0.171,
      "step": 14938
    },
    {
      "epoch": 0.4358188925841648,
      "grad_norm": 0.829829756502998,
      "learning_rate": 6.268408148754285e-06,
      "loss": 0.1361,
      "step": 14939
    },
    {
      "epoch": 0.43584806581480834,
      "grad_norm": 0.8094543078533318,
      "learning_rate": 6.267951163311095e-06,
      "loss": 0.1466,
      "step": 14940
    },
    {
      "epoch": 0.4358772390454519,
      "grad_norm": 0.7789330195323289,
      "learning_rate": 6.267494166547938e-06,
      "loss": 0.1413,
      "step": 14941
    },
    {
      "epoch": 0.43590641227609545,
      "grad_norm": 0.8812221258860311,
      "learning_rate": 6.267037158468897e-06,
      "loss": 0.1695,
      "step": 14942
    },
    {
      "epoch": 0.435935585506739,
      "grad_norm": 0.9116048139269639,
      "learning_rate": 6.266580139078051e-06,
      "loss": 0.1426,
      "step": 14943
    },
    {
      "epoch": 0.43596475873738255,
      "grad_norm": 0.7102903023901466,
      "learning_rate": 6.266123108379478e-06,
      "loss": 0.1274,
      "step": 14944
    },
    {
      "epoch": 0.43599393196802616,
      "grad_norm": 0.8306328790161557,
      "learning_rate": 6.265666066377262e-06,
      "loss": 0.1406,
      "step": 14945
    },
    {
      "epoch": 0.4360231051986697,
      "grad_norm": 1.0817500740617516,
      "learning_rate": 6.265209013075481e-06,
      "loss": 0.1409,
      "step": 14946
    },
    {
      "epoch": 0.43605227842931327,
      "grad_norm": 0.8373492525705621,
      "learning_rate": 6.264751948478216e-06,
      "loss": 0.141,
      "step": 14947
    },
    {
      "epoch": 0.4360814516599568,
      "grad_norm": 1.006839374873911,
      "learning_rate": 6.264294872589547e-06,
      "loss": 0.1501,
      "step": 14948
    },
    {
      "epoch": 0.4361106248906004,
      "grad_norm": 1.2613639955868652,
      "learning_rate": 6.263837785413556e-06,
      "loss": 0.1353,
      "step": 14949
    },
    {
      "epoch": 0.4361397981212439,
      "grad_norm": 0.844220750536089,
      "learning_rate": 6.263380686954324e-06,
      "loss": 0.1376,
      "step": 14950
    },
    {
      "epoch": 0.43616897135188754,
      "grad_norm": 0.8256806551009317,
      "learning_rate": 6.2629235772159266e-06,
      "loss": 0.137,
      "step": 14951
    },
    {
      "epoch": 0.4361981445825311,
      "grad_norm": 0.8015607269478686,
      "learning_rate": 6.262466456202453e-06,
      "loss": 0.1512,
      "step": 14952
    },
    {
      "epoch": 0.43622731781317464,
      "grad_norm": 0.8865343343345692,
      "learning_rate": 6.262009323917979e-06,
      "loss": 0.1394,
      "step": 14953
    },
    {
      "epoch": 0.4362564910438182,
      "grad_norm": 0.7404050238691539,
      "learning_rate": 6.261552180366586e-06,
      "loss": 0.1637,
      "step": 14954
    },
    {
      "epoch": 0.43628566427446175,
      "grad_norm": 0.8766041674094689,
      "learning_rate": 6.261095025552359e-06,
      "loss": 0.1636,
      "step": 14955
    },
    {
      "epoch": 0.4363148375051053,
      "grad_norm": 0.9401005495181551,
      "learning_rate": 6.260637859479374e-06,
      "loss": 0.1458,
      "step": 14956
    },
    {
      "epoch": 0.43634401073574886,
      "grad_norm": 0.7294483064104669,
      "learning_rate": 6.260180682151716e-06,
      "loss": 0.1086,
      "step": 14957
    },
    {
      "epoch": 0.43637318396639246,
      "grad_norm": 0.8767594795626354,
      "learning_rate": 6.259723493573467e-06,
      "loss": 0.1393,
      "step": 14958
    },
    {
      "epoch": 0.436402357197036,
      "grad_norm": 0.8694135397666469,
      "learning_rate": 6.259266293748705e-06,
      "loss": 0.1594,
      "step": 14959
    },
    {
      "epoch": 0.43643153042767957,
      "grad_norm": 1.0462151480352384,
      "learning_rate": 6.258809082681515e-06,
      "loss": 0.1464,
      "step": 14960
    },
    {
      "epoch": 0.4364607036583231,
      "grad_norm": 0.9366276317687989,
      "learning_rate": 6.258351860375979e-06,
      "loss": 0.109,
      "step": 14961
    },
    {
      "epoch": 0.4364898768889667,
      "grad_norm": 0.9656065547459961,
      "learning_rate": 6.257894626836176e-06,
      "loss": 0.1393,
      "step": 14962
    },
    {
      "epoch": 0.43651905011961023,
      "grad_norm": 0.9458351499044808,
      "learning_rate": 6.257437382066191e-06,
      "loss": 0.1341,
      "step": 14963
    },
    {
      "epoch": 0.4365482233502538,
      "grad_norm": 0.7728687151152849,
      "learning_rate": 6.256980126070107e-06,
      "loss": 0.1462,
      "step": 14964
    },
    {
      "epoch": 0.4365773965808974,
      "grad_norm": 0.7792620701017975,
      "learning_rate": 6.256522858852003e-06,
      "loss": 0.1398,
      "step": 14965
    },
    {
      "epoch": 0.43660656981154095,
      "grad_norm": 0.795409314591477,
      "learning_rate": 6.256065580415962e-06,
      "loss": 0.164,
      "step": 14966
    },
    {
      "epoch": 0.4366357430421845,
      "grad_norm": 0.9598838014143705,
      "learning_rate": 6.2556082907660685e-06,
      "loss": 0.1268,
      "step": 14967
    },
    {
      "epoch": 0.43666491627282805,
      "grad_norm": 0.8031767049728477,
      "learning_rate": 6.255150989906405e-06,
      "loss": 0.1431,
      "step": 14968
    },
    {
      "epoch": 0.4366940895034716,
      "grad_norm": 1.1100118664682679,
      "learning_rate": 6.254693677841051e-06,
      "loss": 0.145,
      "step": 14969
    },
    {
      "epoch": 0.43672326273411516,
      "grad_norm": 0.7209447122471424,
      "learning_rate": 6.254236354574092e-06,
      "loss": 0.1401,
      "step": 14970
    },
    {
      "epoch": 0.4367524359647587,
      "grad_norm": 0.7933848661987428,
      "learning_rate": 6.25377902010961e-06,
      "loss": 0.1217,
      "step": 14971
    },
    {
      "epoch": 0.4367816091954023,
      "grad_norm": 0.8697372108140659,
      "learning_rate": 6.253321674451689e-06,
      "loss": 0.1393,
      "step": 14972
    },
    {
      "epoch": 0.4368107824260459,
      "grad_norm": 0.8834304271180028,
      "learning_rate": 6.252864317604411e-06,
      "loss": 0.1579,
      "step": 14973
    },
    {
      "epoch": 0.4368399556566894,
      "grad_norm": 0.8534089619224059,
      "learning_rate": 6.252406949571858e-06,
      "loss": 0.1436,
      "step": 14974
    },
    {
      "epoch": 0.436869128887333,
      "grad_norm": 0.8595474014569607,
      "learning_rate": 6.2519495703581165e-06,
      "loss": 0.1305,
      "step": 14975
    },
    {
      "epoch": 0.43689830211797653,
      "grad_norm": 1.116247587712783,
      "learning_rate": 6.2514921799672675e-06,
      "loss": 0.1445,
      "step": 14976
    },
    {
      "epoch": 0.4369274753486201,
      "grad_norm": 1.06846417250696,
      "learning_rate": 6.251034778403396e-06,
      "loss": 0.1363,
      "step": 14977
    },
    {
      "epoch": 0.43695664857926364,
      "grad_norm": 0.9021355009698474,
      "learning_rate": 6.250577365670584e-06,
      "loss": 0.1361,
      "step": 14978
    },
    {
      "epoch": 0.43698582180990725,
      "grad_norm": 0.878252703424149,
      "learning_rate": 6.250119941772915e-06,
      "loss": 0.1362,
      "step": 14979
    },
    {
      "epoch": 0.4370149950405508,
      "grad_norm": 0.961434453542702,
      "learning_rate": 6.2496625067144755e-06,
      "loss": 0.1505,
      "step": 14980
    },
    {
      "epoch": 0.43704416827119436,
      "grad_norm": 0.6862118257027884,
      "learning_rate": 6.249205060499345e-06,
      "loss": 0.1371,
      "step": 14981
    },
    {
      "epoch": 0.4370733415018379,
      "grad_norm": 0.9255086249462283,
      "learning_rate": 6.248747603131612e-06,
      "loss": 0.1137,
      "step": 14982
    },
    {
      "epoch": 0.43710251473248146,
      "grad_norm": 0.9150855053642007,
      "learning_rate": 6.2482901346153575e-06,
      "loss": 0.1457,
      "step": 14983
    },
    {
      "epoch": 0.437131687963125,
      "grad_norm": 0.8467982806994376,
      "learning_rate": 6.247832654954666e-06,
      "loss": 0.1751,
      "step": 14984
    },
    {
      "epoch": 0.4371608611937686,
      "grad_norm": 0.8952665039433834,
      "learning_rate": 6.247375164153624e-06,
      "loss": 0.1263,
      "step": 14985
    },
    {
      "epoch": 0.4371900344244122,
      "grad_norm": 0.698666395867167,
      "learning_rate": 6.246917662216314e-06,
      "loss": 0.1576,
      "step": 14986
    },
    {
      "epoch": 0.43721920765505573,
      "grad_norm": 0.9601414635502714,
      "learning_rate": 6.24646014914682e-06,
      "loss": 0.1541,
      "step": 14987
    },
    {
      "epoch": 0.4372483808856993,
      "grad_norm": 0.8327388951772458,
      "learning_rate": 6.246002624949228e-06,
      "loss": 0.1543,
      "step": 14988
    },
    {
      "epoch": 0.43727755411634284,
      "grad_norm": 0.7883870737055048,
      "learning_rate": 6.245545089627622e-06,
      "loss": 0.1337,
      "step": 14989
    },
    {
      "epoch": 0.4373067273469864,
      "grad_norm": 0.7564155832898626,
      "learning_rate": 6.2450875431860855e-06,
      "loss": 0.1703,
      "step": 14990
    },
    {
      "epoch": 0.43733590057762994,
      "grad_norm": 0.8815126459544893,
      "learning_rate": 6.244629985628706e-06,
      "loss": 0.1471,
      "step": 14991
    },
    {
      "epoch": 0.43736507380827355,
      "grad_norm": 0.8876949536195939,
      "learning_rate": 6.2441724169595665e-06,
      "loss": 0.1385,
      "step": 14992
    },
    {
      "epoch": 0.4373942470389171,
      "grad_norm": 0.7380826998864822,
      "learning_rate": 6.243714837182753e-06,
      "loss": 0.148,
      "step": 14993
    },
    {
      "epoch": 0.43742342026956066,
      "grad_norm": 0.8595716256583968,
      "learning_rate": 6.24325724630235e-06,
      "loss": 0.1449,
      "step": 14994
    },
    {
      "epoch": 0.4374525935002042,
      "grad_norm": 0.8102512183599935,
      "learning_rate": 6.242799644322445e-06,
      "loss": 0.1439,
      "step": 14995
    },
    {
      "epoch": 0.43748176673084777,
      "grad_norm": 0.7982252801663071,
      "learning_rate": 6.2423420312471185e-06,
      "loss": 0.1231,
      "step": 14996
    },
    {
      "epoch": 0.4375109399614913,
      "grad_norm": 0.7994109582589674,
      "learning_rate": 6.241884407080461e-06,
      "loss": 0.1628,
      "step": 14997
    },
    {
      "epoch": 0.43754011319213487,
      "grad_norm": 0.7772701160558678,
      "learning_rate": 6.241426771826555e-06,
      "loss": 0.1458,
      "step": 14998
    },
    {
      "epoch": 0.4375692864227785,
      "grad_norm": 0.8368242603943941,
      "learning_rate": 6.240969125489486e-06,
      "loss": 0.1534,
      "step": 14999
    },
    {
      "epoch": 0.43759845965342203,
      "grad_norm": 0.8074603876020473,
      "learning_rate": 6.240511468073343e-06,
      "loss": 0.1478,
      "step": 15000
    },
    {
      "epoch": 0.4376276328840656,
      "grad_norm": 0.7153335338167677,
      "learning_rate": 6.2400537995822085e-06,
      "loss": 0.1383,
      "step": 15001
    },
    {
      "epoch": 0.43765680611470914,
      "grad_norm": 0.7662948335841253,
      "learning_rate": 6.23959612002017e-06,
      "loss": 0.1434,
      "step": 15002
    },
    {
      "epoch": 0.4376859793453527,
      "grad_norm": 0.9111369640201166,
      "learning_rate": 6.239138429391314e-06,
      "loss": 0.1436,
      "step": 15003
    },
    {
      "epoch": 0.43771515257599625,
      "grad_norm": 0.8663095010290788,
      "learning_rate": 6.238680727699726e-06,
      "loss": 0.1317,
      "step": 15004
    },
    {
      "epoch": 0.4377443258066398,
      "grad_norm": 1.058155303477025,
      "learning_rate": 6.2382230149494906e-06,
      "loss": 0.1717,
      "step": 15005
    },
    {
      "epoch": 0.4377734990372834,
      "grad_norm": 0.8210054234280479,
      "learning_rate": 6.237765291144696e-06,
      "loss": 0.1501,
      "step": 15006
    },
    {
      "epoch": 0.43780267226792696,
      "grad_norm": 0.8199647565725456,
      "learning_rate": 6.237307556289429e-06,
      "loss": 0.1314,
      "step": 15007
    },
    {
      "epoch": 0.4378318454985705,
      "grad_norm": 1.223085026269827,
      "learning_rate": 6.236849810387776e-06,
      "loss": 0.1625,
      "step": 15008
    },
    {
      "epoch": 0.43786101872921407,
      "grad_norm": 1.1614711613469009,
      "learning_rate": 6.236392053443822e-06,
      "loss": 0.1234,
      "step": 15009
    },
    {
      "epoch": 0.4378901919598576,
      "grad_norm": 0.9369550405747571,
      "learning_rate": 6.235934285461656e-06,
      "loss": 0.1158,
      "step": 15010
    },
    {
      "epoch": 0.4379193651905012,
      "grad_norm": 0.8493194095409426,
      "learning_rate": 6.235476506445362e-06,
      "loss": 0.1246,
      "step": 15011
    },
    {
      "epoch": 0.4379485384211448,
      "grad_norm": 1.3030959463256946,
      "learning_rate": 6.2350187163990314e-06,
      "loss": 0.1556,
      "step": 15012
    },
    {
      "epoch": 0.43797771165178834,
      "grad_norm": 0.9238553779865656,
      "learning_rate": 6.234560915326747e-06,
      "loss": 0.1468,
      "step": 15013
    },
    {
      "epoch": 0.4380068848824319,
      "grad_norm": 0.9696903616097787,
      "learning_rate": 6.234103103232597e-06,
      "loss": 0.1363,
      "step": 15014
    },
    {
      "epoch": 0.43803605811307544,
      "grad_norm": 1.2887541279913717,
      "learning_rate": 6.233645280120671e-06,
      "loss": 0.1583,
      "step": 15015
    },
    {
      "epoch": 0.438065231343719,
      "grad_norm": 1.113477237477694,
      "learning_rate": 6.233187445995053e-06,
      "loss": 0.1368,
      "step": 15016
    },
    {
      "epoch": 0.43809440457436255,
      "grad_norm": 0.9901409573743811,
      "learning_rate": 6.232729600859832e-06,
      "loss": 0.1444,
      "step": 15017
    },
    {
      "epoch": 0.4381235778050061,
      "grad_norm": 0.9425912909861479,
      "learning_rate": 6.232271744719094e-06,
      "loss": 0.1431,
      "step": 15018
    },
    {
      "epoch": 0.4381527510356497,
      "grad_norm": 0.8408452784656811,
      "learning_rate": 6.23181387757693e-06,
      "loss": 0.1307,
      "step": 15019
    },
    {
      "epoch": 0.43818192426629327,
      "grad_norm": 0.8291973248767233,
      "learning_rate": 6.231355999437425e-06,
      "loss": 0.1542,
      "step": 15020
    },
    {
      "epoch": 0.4382110974969368,
      "grad_norm": 0.887432534102003,
      "learning_rate": 6.230898110304668e-06,
      "loss": 0.1206,
      "step": 15021
    },
    {
      "epoch": 0.43824027072758037,
      "grad_norm": 0.8618449996123551,
      "learning_rate": 6.230440210182745e-06,
      "loss": 0.1462,
      "step": 15022
    },
    {
      "epoch": 0.4382694439582239,
      "grad_norm": 0.8350368418007432,
      "learning_rate": 6.2299822990757475e-06,
      "loss": 0.1338,
      "step": 15023
    },
    {
      "epoch": 0.4382986171888675,
      "grad_norm": 0.7501682509218951,
      "learning_rate": 6.22952437698776e-06,
      "loss": 0.1292,
      "step": 15024
    },
    {
      "epoch": 0.43832779041951103,
      "grad_norm": 0.7891398829649721,
      "learning_rate": 6.229066443922874e-06,
      "loss": 0.1189,
      "step": 15025
    },
    {
      "epoch": 0.43835696365015464,
      "grad_norm": 0.8053139780836996,
      "learning_rate": 6.228608499885174e-06,
      "loss": 0.148,
      "step": 15026
    },
    {
      "epoch": 0.4383861368807982,
      "grad_norm": 0.7746703807058757,
      "learning_rate": 6.228150544878754e-06,
      "loss": 0.1347,
      "step": 15027
    },
    {
      "epoch": 0.43841531011144175,
      "grad_norm": 0.7696282133756824,
      "learning_rate": 6.227692578907697e-06,
      "loss": 0.1438,
      "step": 15028
    },
    {
      "epoch": 0.4384444833420853,
      "grad_norm": 0.7889312279455797,
      "learning_rate": 6.2272346019760936e-06,
      "loss": 0.149,
      "step": 15029
    },
    {
      "epoch": 0.43847365657272885,
      "grad_norm": 0.8572841859126313,
      "learning_rate": 6.2267766140880325e-06,
      "loss": 0.1316,
      "step": 15030
    },
    {
      "epoch": 0.4385028298033724,
      "grad_norm": 0.9308072763704206,
      "learning_rate": 6.226318615247604e-06,
      "loss": 0.1376,
      "step": 15031
    },
    {
      "epoch": 0.43853200303401596,
      "grad_norm": 0.9821851734638208,
      "learning_rate": 6.225860605458895e-06,
      "loss": 0.1418,
      "step": 15032
    },
    {
      "epoch": 0.43856117626465957,
      "grad_norm": 0.6586206659882248,
      "learning_rate": 6.225402584725993e-06,
      "loss": 0.1475,
      "step": 15033
    },
    {
      "epoch": 0.4385903494953031,
      "grad_norm": 0.8719938041849924,
      "learning_rate": 6.224944553052992e-06,
      "loss": 0.1403,
      "step": 15034
    },
    {
      "epoch": 0.4386195227259467,
      "grad_norm": 0.7686687697514415,
      "learning_rate": 6.224486510443978e-06,
      "loss": 0.1354,
      "step": 15035
    },
    {
      "epoch": 0.43864869595659023,
      "grad_norm": 0.7872009238631145,
      "learning_rate": 6.2240284569030395e-06,
      "loss": 0.1254,
      "step": 15036
    },
    {
      "epoch": 0.4386778691872338,
      "grad_norm": 0.6871719476135176,
      "learning_rate": 6.223570392434268e-06,
      "loss": 0.1316,
      "step": 15037
    },
    {
      "epoch": 0.43870704241787734,
      "grad_norm": 1.059686281698016,
      "learning_rate": 6.223112317041751e-06,
      "loss": 0.1475,
      "step": 15038
    },
    {
      "epoch": 0.43873621564852094,
      "grad_norm": 0.7529910432543049,
      "learning_rate": 6.222654230729582e-06,
      "loss": 0.1137,
      "step": 15039
    },
    {
      "epoch": 0.4387653888791645,
      "grad_norm": 1.4046174304789645,
      "learning_rate": 6.2221961335018464e-06,
      "loss": 0.1392,
      "step": 15040
    },
    {
      "epoch": 0.43879456210980805,
      "grad_norm": 0.7354673971840441,
      "learning_rate": 6.2217380253626346e-06,
      "loss": 0.1434,
      "step": 15041
    },
    {
      "epoch": 0.4388237353404516,
      "grad_norm": 0.7909726582936715,
      "learning_rate": 6.221279906316039e-06,
      "loss": 0.129,
      "step": 15042
    },
    {
      "epoch": 0.43885290857109516,
      "grad_norm": 0.7945483450078731,
      "learning_rate": 6.220821776366146e-06,
      "loss": 0.1347,
      "step": 15043
    },
    {
      "epoch": 0.4388820818017387,
      "grad_norm": 0.7010766125587274,
      "learning_rate": 6.2203636355170485e-06,
      "loss": 0.1185,
      "step": 15044
    },
    {
      "epoch": 0.43891125503238226,
      "grad_norm": 0.77055252530375,
      "learning_rate": 6.219905483772837e-06,
      "loss": 0.1544,
      "step": 15045
    },
    {
      "epoch": 0.4389404282630259,
      "grad_norm": 0.6051215517554468,
      "learning_rate": 6.2194473211376e-06,
      "loss": 0.1216,
      "step": 15046
    },
    {
      "epoch": 0.4389696014936694,
      "grad_norm": 0.7614440524907657,
      "learning_rate": 6.218989147615426e-06,
      "loss": 0.1486,
      "step": 15047
    },
    {
      "epoch": 0.438998774724313,
      "grad_norm": 0.7310503216162159,
      "learning_rate": 6.218530963210411e-06,
      "loss": 0.1438,
      "step": 15048
    },
    {
      "epoch": 0.43902794795495653,
      "grad_norm": 0.8824424699031387,
      "learning_rate": 6.21807276792664e-06,
      "loss": 0.15,
      "step": 15049
    },
    {
      "epoch": 0.4390571211856001,
      "grad_norm": 1.1171730097733568,
      "learning_rate": 6.217614561768208e-06,
      "loss": 0.1269,
      "step": 15050
    },
    {
      "epoch": 0.43908629441624364,
      "grad_norm": 0.6888003281689934,
      "learning_rate": 6.217156344739203e-06,
      "loss": 0.1411,
      "step": 15051
    },
    {
      "epoch": 0.4391154676468872,
      "grad_norm": 0.6657787649455748,
      "learning_rate": 6.2166981168437165e-06,
      "loss": 0.1229,
      "step": 15052
    },
    {
      "epoch": 0.4391446408775308,
      "grad_norm": 0.7578786141444442,
      "learning_rate": 6.21623987808584e-06,
      "loss": 0.1442,
      "step": 15053
    },
    {
      "epoch": 0.43917381410817435,
      "grad_norm": 0.8051741284438174,
      "learning_rate": 6.215781628469663e-06,
      "loss": 0.1484,
      "step": 15054
    },
    {
      "epoch": 0.4392029873388179,
      "grad_norm": 0.8568323410142273,
      "learning_rate": 6.2153233679992805e-06,
      "loss": 0.1431,
      "step": 15055
    },
    {
      "epoch": 0.43923216056946146,
      "grad_norm": 0.7701121083595855,
      "learning_rate": 6.214865096678779e-06,
      "loss": 0.1355,
      "step": 15056
    },
    {
      "epoch": 0.439261333800105,
      "grad_norm": 0.9624893288209205,
      "learning_rate": 6.214406814512254e-06,
      "loss": 0.1419,
      "step": 15057
    },
    {
      "epoch": 0.43929050703074857,
      "grad_norm": 0.7941603210473188,
      "learning_rate": 6.213948521503793e-06,
      "loss": 0.1653,
      "step": 15058
    },
    {
      "epoch": 0.4393196802613921,
      "grad_norm": 0.7443878015701055,
      "learning_rate": 6.2134902176574884e-06,
      "loss": 0.1404,
      "step": 15059
    },
    {
      "epoch": 0.43934885349203573,
      "grad_norm": 0.8533354162846383,
      "learning_rate": 6.213031902977436e-06,
      "loss": 0.1166,
      "step": 15060
    },
    {
      "epoch": 0.4393780267226793,
      "grad_norm": 0.9405274831200605,
      "learning_rate": 6.212573577467722e-06,
      "loss": 0.1437,
      "step": 15061
    },
    {
      "epoch": 0.43940719995332284,
      "grad_norm": 0.7169491241073812,
      "learning_rate": 6.212115241132441e-06,
      "loss": 0.1453,
      "step": 15062
    },
    {
      "epoch": 0.4394363731839664,
      "grad_norm": 1.0823337820424184,
      "learning_rate": 6.211656893975685e-06,
      "loss": 0.1327,
      "step": 15063
    },
    {
      "epoch": 0.43946554641460994,
      "grad_norm": 0.8252949874601155,
      "learning_rate": 6.211198536001545e-06,
      "loss": 0.1303,
      "step": 15064
    },
    {
      "epoch": 0.4394947196452535,
      "grad_norm": 1.031682793059374,
      "learning_rate": 6.210740167214114e-06,
      "loss": 0.1398,
      "step": 15065
    },
    {
      "epoch": 0.4395238928758971,
      "grad_norm": 0.7706924572131718,
      "learning_rate": 6.210281787617483e-06,
      "loss": 0.15,
      "step": 15066
    },
    {
      "epoch": 0.43955306610654066,
      "grad_norm": 0.832935142749437,
      "learning_rate": 6.209823397215746e-06,
      "loss": 0.1258,
      "step": 15067
    },
    {
      "epoch": 0.4395822393371842,
      "grad_norm": 0.9778245354120296,
      "learning_rate": 6.209364996012994e-06,
      "loss": 0.1274,
      "step": 15068
    },
    {
      "epoch": 0.43961141256782776,
      "grad_norm": 0.89526369817899,
      "learning_rate": 6.20890658401332e-06,
      "loss": 0.1301,
      "step": 15069
    },
    {
      "epoch": 0.4396405857984713,
      "grad_norm": 0.6959084421482379,
      "learning_rate": 6.208448161220818e-06,
      "loss": 0.1338,
      "step": 15070
    },
    {
      "epoch": 0.43966975902911487,
      "grad_norm": 1.5306102217470179,
      "learning_rate": 6.207989727639577e-06,
      "loss": 0.1257,
      "step": 15071
    },
    {
      "epoch": 0.4396989322597584,
      "grad_norm": 0.9210846705347121,
      "learning_rate": 6.2075312832736945e-06,
      "loss": 0.157,
      "step": 15072
    },
    {
      "epoch": 0.43972810549040203,
      "grad_norm": 0.8404251767950718,
      "learning_rate": 6.2070728281272594e-06,
      "loss": 0.1411,
      "step": 15073
    },
    {
      "epoch": 0.4397572787210456,
      "grad_norm": 0.763211868240203,
      "learning_rate": 6.206614362204366e-06,
      "loss": 0.1296,
      "step": 15074
    },
    {
      "epoch": 0.43978645195168914,
      "grad_norm": 0.883705407748208,
      "learning_rate": 6.206155885509108e-06,
      "loss": 0.1219,
      "step": 15075
    },
    {
      "epoch": 0.4398156251823327,
      "grad_norm": 0.9266157241004488,
      "learning_rate": 6.2056973980455795e-06,
      "loss": 0.1237,
      "step": 15076
    },
    {
      "epoch": 0.43984479841297625,
      "grad_norm": 0.9700898960791888,
      "learning_rate": 6.2052388998178705e-06,
      "loss": 0.1742,
      "step": 15077
    },
    {
      "epoch": 0.4398739716436198,
      "grad_norm": 0.9618272210553835,
      "learning_rate": 6.2047803908300776e-06,
      "loss": 0.1336,
      "step": 15078
    },
    {
      "epoch": 0.43990314487426335,
      "grad_norm": 0.863116055024015,
      "learning_rate": 6.204321871086292e-06,
      "loss": 0.133,
      "step": 15079
    },
    {
      "epoch": 0.43993231810490696,
      "grad_norm": 1.0521571374995566,
      "learning_rate": 6.203863340590609e-06,
      "loss": 0.1354,
      "step": 15080
    },
    {
      "epoch": 0.4399614913355505,
      "grad_norm": 0.9063771646807343,
      "learning_rate": 6.203404799347122e-06,
      "loss": 0.1264,
      "step": 15081
    },
    {
      "epoch": 0.43999066456619407,
      "grad_norm": 0.9149711879709472,
      "learning_rate": 6.202946247359922e-06,
      "loss": 0.1477,
      "step": 15082
    },
    {
      "epoch": 0.4400198377968376,
      "grad_norm": 0.979774442979104,
      "learning_rate": 6.202487684633107e-06,
      "loss": 0.141,
      "step": 15083
    },
    {
      "epoch": 0.4400490110274812,
      "grad_norm": 0.9123487969392395,
      "learning_rate": 6.202029111170769e-06,
      "loss": 0.1393,
      "step": 15084
    },
    {
      "epoch": 0.4400781842581247,
      "grad_norm": 0.8849756322792716,
      "learning_rate": 6.201570526977001e-06,
      "loss": 0.1287,
      "step": 15085
    },
    {
      "epoch": 0.4401073574887683,
      "grad_norm": 0.8109589019185897,
      "learning_rate": 6.2011119320558986e-06,
      "loss": 0.1462,
      "step": 15086
    },
    {
      "epoch": 0.4401365307194119,
      "grad_norm": 0.8302390639730577,
      "learning_rate": 6.2006533264115564e-06,
      "loss": 0.1255,
      "step": 15087
    },
    {
      "epoch": 0.44016570395005544,
      "grad_norm": 0.7300103239122315,
      "learning_rate": 6.2001947100480675e-06,
      "loss": 0.1481,
      "step": 15088
    },
    {
      "epoch": 0.440194877180699,
      "grad_norm": 0.9699959274058704,
      "learning_rate": 6.199736082969525e-06,
      "loss": 0.1314,
      "step": 15089
    },
    {
      "epoch": 0.44022405041134255,
      "grad_norm": 0.7641958932511366,
      "learning_rate": 6.199277445180028e-06,
      "loss": 0.1622,
      "step": 15090
    },
    {
      "epoch": 0.4402532236419861,
      "grad_norm": 0.7577420220487728,
      "learning_rate": 6.198818796683666e-06,
      "loss": 0.1461,
      "step": 15091
    },
    {
      "epoch": 0.44028239687262966,
      "grad_norm": 0.7869450527540598,
      "learning_rate": 6.198360137484537e-06,
      "loss": 0.1396,
      "step": 15092
    },
    {
      "epoch": 0.4403115701032732,
      "grad_norm": 0.7217383889627569,
      "learning_rate": 6.1979014675867345e-06,
      "loss": 0.1396,
      "step": 15093
    },
    {
      "epoch": 0.4403407433339168,
      "grad_norm": 0.7495436122585886,
      "learning_rate": 6.197442786994354e-06,
      "loss": 0.1183,
      "step": 15094
    },
    {
      "epoch": 0.44036991656456037,
      "grad_norm": 0.8044067727692116,
      "learning_rate": 6.1969840957114904e-06,
      "loss": 0.157,
      "step": 15095
    },
    {
      "epoch": 0.4403990897952039,
      "grad_norm": 0.7255909826283716,
      "learning_rate": 6.196525393742238e-06,
      "loss": 0.1701,
      "step": 15096
    },
    {
      "epoch": 0.4404282630258475,
      "grad_norm": 0.8239930130991369,
      "learning_rate": 6.196066681090692e-06,
      "loss": 0.1358,
      "step": 15097
    },
    {
      "epoch": 0.44045743625649103,
      "grad_norm": 0.8686482362650536,
      "learning_rate": 6.1956079577609485e-06,
      "loss": 0.143,
      "step": 15098
    },
    {
      "epoch": 0.4404866094871346,
      "grad_norm": 0.8572453656024628,
      "learning_rate": 6.195149223757103e-06,
      "loss": 0.1392,
      "step": 15099
    },
    {
      "epoch": 0.4405157827177782,
      "grad_norm": 0.7974897270986314,
      "learning_rate": 6.194690479083251e-06,
      "loss": 0.1489,
      "step": 15100
    },
    {
      "epoch": 0.44054495594842175,
      "grad_norm": 1.0348902345146171,
      "learning_rate": 6.194231723743486e-06,
      "loss": 0.1479,
      "step": 15101
    },
    {
      "epoch": 0.4405741291790653,
      "grad_norm": 0.9820780283957675,
      "learning_rate": 6.193772957741907e-06,
      "loss": 0.1756,
      "step": 15102
    },
    {
      "epoch": 0.44060330240970885,
      "grad_norm": 0.8434465511246421,
      "learning_rate": 6.193314181082607e-06,
      "loss": 0.14,
      "step": 15103
    },
    {
      "epoch": 0.4406324756403524,
      "grad_norm": 0.6533601682398729,
      "learning_rate": 6.192855393769683e-06,
      "loss": 0.1337,
      "step": 15104
    },
    {
      "epoch": 0.44066164887099596,
      "grad_norm": 0.8235956027060124,
      "learning_rate": 6.192396595807231e-06,
      "loss": 0.1576,
      "step": 15105
    },
    {
      "epoch": 0.4406908221016395,
      "grad_norm": 1.0340137000191478,
      "learning_rate": 6.191937787199347e-06,
      "loss": 0.1479,
      "step": 15106
    },
    {
      "epoch": 0.4407199953322831,
      "grad_norm": 0.9728982450315318,
      "learning_rate": 6.1914789679501266e-06,
      "loss": 0.1302,
      "step": 15107
    },
    {
      "epoch": 0.4407491685629267,
      "grad_norm": 0.8074227064702215,
      "learning_rate": 6.191020138063666e-06,
      "loss": 0.1589,
      "step": 15108
    },
    {
      "epoch": 0.4407783417935702,
      "grad_norm": 0.9898810044190635,
      "learning_rate": 6.190561297544063e-06,
      "loss": 0.1296,
      "step": 15109
    },
    {
      "epoch": 0.4408075150242138,
      "grad_norm": 0.7838370141602546,
      "learning_rate": 6.190102446395412e-06,
      "loss": 0.1513,
      "step": 15110
    },
    {
      "epoch": 0.44083668825485733,
      "grad_norm": 0.8351440670730054,
      "learning_rate": 6.189643584621811e-06,
      "loss": 0.1408,
      "step": 15111
    },
    {
      "epoch": 0.4408658614855009,
      "grad_norm": 0.9429585268779143,
      "learning_rate": 6.189184712227356e-06,
      "loss": 0.1598,
      "step": 15112
    },
    {
      "epoch": 0.44089503471614444,
      "grad_norm": 0.8872677081948187,
      "learning_rate": 6.1887258292161435e-06,
      "loss": 0.1627,
      "step": 15113
    },
    {
      "epoch": 0.44092420794678805,
      "grad_norm": 0.7317606862709696,
      "learning_rate": 6.1882669355922706e-06,
      "loss": 0.1216,
      "step": 15114
    },
    {
      "epoch": 0.4409533811774316,
      "grad_norm": 0.8599470645369262,
      "learning_rate": 6.187808031359835e-06,
      "loss": 0.1377,
      "step": 15115
    },
    {
      "epoch": 0.44098255440807516,
      "grad_norm": 0.9091108097425813,
      "learning_rate": 6.187349116522932e-06,
      "loss": 0.1308,
      "step": 15116
    },
    {
      "epoch": 0.4410117276387187,
      "grad_norm": 0.6868528400456547,
      "learning_rate": 6.186890191085659e-06,
      "loss": 0.1224,
      "step": 15117
    },
    {
      "epoch": 0.44104090086936226,
      "grad_norm": 0.7895111323825614,
      "learning_rate": 6.1864312550521156e-06,
      "loss": 0.1134,
      "step": 15118
    },
    {
      "epoch": 0.4410700741000058,
      "grad_norm": 0.9170317658962038,
      "learning_rate": 6.185972308426394e-06,
      "loss": 0.1425,
      "step": 15119
    },
    {
      "epoch": 0.44109924733064937,
      "grad_norm": 0.8898762215953443,
      "learning_rate": 6.185513351212599e-06,
      "loss": 0.1339,
      "step": 15120
    },
    {
      "epoch": 0.441128420561293,
      "grad_norm": 0.7120990645692094,
      "learning_rate": 6.185054383414821e-06,
      "loss": 0.128,
      "step": 15121
    },
    {
      "epoch": 0.44115759379193653,
      "grad_norm": 0.9098797685642345,
      "learning_rate": 6.18459540503716e-06,
      "loss": 0.1396,
      "step": 15122
    },
    {
      "epoch": 0.4411867670225801,
      "grad_norm": 0.9099002629450361,
      "learning_rate": 6.184136416083716e-06,
      "loss": 0.1754,
      "step": 15123
    },
    {
      "epoch": 0.44121594025322364,
      "grad_norm": 0.8491899787753415,
      "learning_rate": 6.1836774165585835e-06,
      "loss": 0.1255,
      "step": 15124
    },
    {
      "epoch": 0.4412451134838672,
      "grad_norm": 1.0133237976787148,
      "learning_rate": 6.183218406465861e-06,
      "loss": 0.1295,
      "step": 15125
    },
    {
      "epoch": 0.44127428671451074,
      "grad_norm": 0.937482166158476,
      "learning_rate": 6.182759385809648e-06,
      "loss": 0.1598,
      "step": 15126
    },
    {
      "epoch": 0.44130345994515435,
      "grad_norm": 0.8495937070330164,
      "learning_rate": 6.182300354594041e-06,
      "loss": 0.1587,
      "step": 15127
    },
    {
      "epoch": 0.4413326331757979,
      "grad_norm": 0.9365884837859367,
      "learning_rate": 6.181841312823139e-06,
      "loss": 0.1495,
      "step": 15128
    },
    {
      "epoch": 0.44136180640644146,
      "grad_norm": 0.828917931816457,
      "learning_rate": 6.18138226050104e-06,
      "loss": 0.1354,
      "step": 15129
    },
    {
      "epoch": 0.441390979637085,
      "grad_norm": 1.2121680427116477,
      "learning_rate": 6.1809231976318414e-06,
      "loss": 0.1461,
      "step": 15130
    },
    {
      "epoch": 0.44142015286772857,
      "grad_norm": 0.8267442464136356,
      "learning_rate": 6.1804641242196435e-06,
      "loss": 0.1297,
      "step": 15131
    },
    {
      "epoch": 0.4414493260983721,
      "grad_norm": 0.8669047273309362,
      "learning_rate": 6.180005040268544e-06,
      "loss": 0.1639,
      "step": 15132
    },
    {
      "epoch": 0.4414784993290157,
      "grad_norm": 0.9900346816148035,
      "learning_rate": 6.179545945782639e-06,
      "loss": 0.1352,
      "step": 15133
    },
    {
      "epoch": 0.4415076725596593,
      "grad_norm": 0.8302777317742639,
      "learning_rate": 6.179086840766031e-06,
      "loss": 0.1545,
      "step": 15134
    },
    {
      "epoch": 0.44153684579030283,
      "grad_norm": 0.8540477899928897,
      "learning_rate": 6.178627725222819e-06,
      "loss": 0.1528,
      "step": 15135
    },
    {
      "epoch": 0.4415660190209464,
      "grad_norm": 0.7911456172678053,
      "learning_rate": 6.178168599157096e-06,
      "loss": 0.1047,
      "step": 15136
    },
    {
      "epoch": 0.44159519225158994,
      "grad_norm": 0.9700265681787416,
      "learning_rate": 6.177709462572969e-06,
      "loss": 0.1471,
      "step": 15137
    },
    {
      "epoch": 0.4416243654822335,
      "grad_norm": 0.7729885089326664,
      "learning_rate": 6.17725031547453e-06,
      "loss": 0.157,
      "step": 15138
    },
    {
      "epoch": 0.44165353871287705,
      "grad_norm": 1.0104582272958682,
      "learning_rate": 6.176791157865881e-06,
      "loss": 0.1337,
      "step": 15139
    },
    {
      "epoch": 0.4416827119435206,
      "grad_norm": 0.775961171060146,
      "learning_rate": 6.176331989751125e-06,
      "loss": 0.1253,
      "step": 15140
    },
    {
      "epoch": 0.4417118851741642,
      "grad_norm": 1.0101315435786071,
      "learning_rate": 6.175872811134355e-06,
      "loss": 0.1335,
      "step": 15141
    },
    {
      "epoch": 0.44174105840480776,
      "grad_norm": 0.756935795113428,
      "learning_rate": 6.175413622019674e-06,
      "loss": 0.1401,
      "step": 15142
    },
    {
      "epoch": 0.4417702316354513,
      "grad_norm": 0.8103766556796341,
      "learning_rate": 6.1749544224111805e-06,
      "loss": 0.1261,
      "step": 15143
    },
    {
      "epoch": 0.44179940486609487,
      "grad_norm": 0.7480765217196009,
      "learning_rate": 6.174495212312974e-06,
      "loss": 0.1561,
      "step": 15144
    },
    {
      "epoch": 0.4418285780967384,
      "grad_norm": 0.7150165037411563,
      "learning_rate": 6.174035991729155e-06,
      "loss": 0.1752,
      "step": 15145
    },
    {
      "epoch": 0.441857751327382,
      "grad_norm": 0.9834870920906477,
      "learning_rate": 6.173576760663823e-06,
      "loss": 0.15,
      "step": 15146
    },
    {
      "epoch": 0.44188692455802553,
      "grad_norm": 0.9306883206375008,
      "learning_rate": 6.173117519121079e-06,
      "loss": 0.1472,
      "step": 15147
    },
    {
      "epoch": 0.44191609778866914,
      "grad_norm": 0.6708625275650458,
      "learning_rate": 6.172658267105019e-06,
      "loss": 0.1182,
      "step": 15148
    },
    {
      "epoch": 0.4419452710193127,
      "grad_norm": 0.6782386383396968,
      "learning_rate": 6.172199004619748e-06,
      "loss": 0.1478,
      "step": 15149
    },
    {
      "epoch": 0.44197444424995624,
      "grad_norm": 0.7840701456064665,
      "learning_rate": 6.171739731669365e-06,
      "loss": 0.1354,
      "step": 15150
    },
    {
      "epoch": 0.4420036174805998,
      "grad_norm": 0.8921163707375518,
      "learning_rate": 6.171280448257967e-06,
      "loss": 0.1115,
      "step": 15151
    },
    {
      "epoch": 0.44203279071124335,
      "grad_norm": 0.7350397465064393,
      "learning_rate": 6.170821154389659e-06,
      "loss": 0.1397,
      "step": 15152
    },
    {
      "epoch": 0.4420619639418869,
      "grad_norm": 0.8706135341382112,
      "learning_rate": 6.170361850068538e-06,
      "loss": 0.1366,
      "step": 15153
    },
    {
      "epoch": 0.4420911371725305,
      "grad_norm": 0.8786752834643791,
      "learning_rate": 6.169902535298704e-06,
      "loss": 0.1148,
      "step": 15154
    },
    {
      "epoch": 0.44212031040317407,
      "grad_norm": 0.9376217221559763,
      "learning_rate": 6.169443210084262e-06,
      "loss": 0.1354,
      "step": 15155
    },
    {
      "epoch": 0.4421494836338176,
      "grad_norm": 0.7440517010432058,
      "learning_rate": 6.1689838744293105e-06,
      "loss": 0.1375,
      "step": 15156
    },
    {
      "epoch": 0.4421786568644612,
      "grad_norm": 0.7252953866774031,
      "learning_rate": 6.168524528337949e-06,
      "loss": 0.1439,
      "step": 15157
    },
    {
      "epoch": 0.4422078300951047,
      "grad_norm": 0.8294662727384108,
      "learning_rate": 6.168065171814279e-06,
      "loss": 0.1416,
      "step": 15158
    },
    {
      "epoch": 0.4422370033257483,
      "grad_norm": 1.006490711539657,
      "learning_rate": 6.1676058048624035e-06,
      "loss": 0.1264,
      "step": 15159
    },
    {
      "epoch": 0.44226617655639183,
      "grad_norm": 0.8004298319702237,
      "learning_rate": 6.167146427486421e-06,
      "loss": 0.1325,
      "step": 15160
    },
    {
      "epoch": 0.44229534978703544,
      "grad_norm": 0.6268073336564143,
      "learning_rate": 6.166687039690433e-06,
      "loss": 0.1251,
      "step": 15161
    },
    {
      "epoch": 0.442324523017679,
      "grad_norm": 0.8880811293595925,
      "learning_rate": 6.166227641478544e-06,
      "loss": 0.1372,
      "step": 15162
    },
    {
      "epoch": 0.44235369624832255,
      "grad_norm": 0.9086839478739694,
      "learning_rate": 6.1657682328548505e-06,
      "loss": 0.1435,
      "step": 15163
    },
    {
      "epoch": 0.4423828694789661,
      "grad_norm": 0.873657569457763,
      "learning_rate": 6.165308813823457e-06,
      "loss": 0.1293,
      "step": 15164
    },
    {
      "epoch": 0.44241204270960965,
      "grad_norm": 0.833198728873711,
      "learning_rate": 6.164849384388467e-06,
      "loss": 0.141,
      "step": 15165
    },
    {
      "epoch": 0.4424412159402532,
      "grad_norm": 0.7669766276065918,
      "learning_rate": 6.164389944553977e-06,
      "loss": 0.1584,
      "step": 15166
    },
    {
      "epoch": 0.44247038917089676,
      "grad_norm": 1.2068899831680615,
      "learning_rate": 6.163930494324093e-06,
      "loss": 0.1478,
      "step": 15167
    },
    {
      "epoch": 0.44249956240154037,
      "grad_norm": 0.7778010498215501,
      "learning_rate": 6.163471033702914e-06,
      "loss": 0.1153,
      "step": 15168
    },
    {
      "epoch": 0.4425287356321839,
      "grad_norm": 0.770797837270307,
      "learning_rate": 6.1630115626945445e-06,
      "loss": 0.1552,
      "step": 15169
    },
    {
      "epoch": 0.4425579088628275,
      "grad_norm": 1.055067195964919,
      "learning_rate": 6.1625520813030855e-06,
      "loss": 0.1304,
      "step": 15170
    },
    {
      "epoch": 0.44258708209347103,
      "grad_norm": 0.790305789887117,
      "learning_rate": 6.162092589532639e-06,
      "loss": 0.1389,
      "step": 15171
    },
    {
      "epoch": 0.4426162553241146,
      "grad_norm": 0.7926830874783952,
      "learning_rate": 6.1616330873873065e-06,
      "loss": 0.1296,
      "step": 15172
    },
    {
      "epoch": 0.44264542855475814,
      "grad_norm": 0.8228868513259496,
      "learning_rate": 6.161173574871192e-06,
      "loss": 0.1313,
      "step": 15173
    },
    {
      "epoch": 0.4426746017854017,
      "grad_norm": 0.7739986899820893,
      "learning_rate": 6.160714051988396e-06,
      "loss": 0.1407,
      "step": 15174
    },
    {
      "epoch": 0.4427037750160453,
      "grad_norm": 0.7870682635859886,
      "learning_rate": 6.160254518743023e-06,
      "loss": 0.1022,
      "step": 15175
    },
    {
      "epoch": 0.44273294824668885,
      "grad_norm": 0.8602714911088228,
      "learning_rate": 6.159794975139174e-06,
      "loss": 0.1296,
      "step": 15176
    },
    {
      "epoch": 0.4427621214773324,
      "grad_norm": 0.8641677714563666,
      "learning_rate": 6.159335421180954e-06,
      "loss": 0.1355,
      "step": 15177
    },
    {
      "epoch": 0.44279129470797596,
      "grad_norm": 0.8789323711322151,
      "learning_rate": 6.158875856872462e-06,
      "loss": 0.1198,
      "step": 15178
    },
    {
      "epoch": 0.4428204679386195,
      "grad_norm": 0.948326178954136,
      "learning_rate": 6.158416282217803e-06,
      "loss": 0.1767,
      "step": 15179
    },
    {
      "epoch": 0.44284964116926306,
      "grad_norm": 1.11965728484572,
      "learning_rate": 6.157956697221082e-06,
      "loss": 0.1376,
      "step": 15180
    },
    {
      "epoch": 0.4428788143999067,
      "grad_norm": 0.8302308190233277,
      "learning_rate": 6.157497101886397e-06,
      "loss": 0.1307,
      "step": 15181
    },
    {
      "epoch": 0.4429079876305502,
      "grad_norm": 1.045664337871547,
      "learning_rate": 6.157037496217857e-06,
      "loss": 0.1194,
      "step": 15182
    },
    {
      "epoch": 0.4429371608611938,
      "grad_norm": 0.7982028729623004,
      "learning_rate": 6.156577880219561e-06,
      "loss": 0.1474,
      "step": 15183
    },
    {
      "epoch": 0.44296633409183733,
      "grad_norm": 0.9772078210105002,
      "learning_rate": 6.156118253895613e-06,
      "loss": 0.1386,
      "step": 15184
    },
    {
      "epoch": 0.4429955073224809,
      "grad_norm": 0.8324946152115834,
      "learning_rate": 6.15565861725012e-06,
      "loss": 0.1312,
      "step": 15185
    },
    {
      "epoch": 0.44302468055312444,
      "grad_norm": 0.8117971938902022,
      "learning_rate": 6.155198970287181e-06,
      "loss": 0.1317,
      "step": 15186
    },
    {
      "epoch": 0.443053853783768,
      "grad_norm": 0.9119895557336309,
      "learning_rate": 6.154739313010901e-06,
      "loss": 0.1308,
      "step": 15187
    },
    {
      "epoch": 0.4430830270144116,
      "grad_norm": 0.8644022003527182,
      "learning_rate": 6.154279645425385e-06,
      "loss": 0.1339,
      "step": 15188
    },
    {
      "epoch": 0.44311220024505515,
      "grad_norm": 0.6803353772697368,
      "learning_rate": 6.153819967534734e-06,
      "loss": 0.1451,
      "step": 15189
    },
    {
      "epoch": 0.4431413734756987,
      "grad_norm": 0.879070882971226,
      "learning_rate": 6.153360279343056e-06,
      "loss": 0.1267,
      "step": 15190
    },
    {
      "epoch": 0.44317054670634226,
      "grad_norm": 0.766632869208845,
      "learning_rate": 6.152900580854452e-06,
      "loss": 0.1361,
      "step": 15191
    },
    {
      "epoch": 0.4431997199369858,
      "grad_norm": 0.9171909654778313,
      "learning_rate": 6.1524408720730276e-06,
      "loss": 0.1592,
      "step": 15192
    },
    {
      "epoch": 0.44322889316762937,
      "grad_norm": 1.0116900483386435,
      "learning_rate": 6.1519811530028836e-06,
      "loss": 0.1347,
      "step": 15193
    },
    {
      "epoch": 0.4432580663982729,
      "grad_norm": 0.9236343921387395,
      "learning_rate": 6.151521423648129e-06,
      "loss": 0.1317,
      "step": 15194
    },
    {
      "epoch": 0.44328723962891653,
      "grad_norm": 0.8200229888697481,
      "learning_rate": 6.151061684012867e-06,
      "loss": 0.1476,
      "step": 15195
    },
    {
      "epoch": 0.4433164128595601,
      "grad_norm": 1.0263558504613906,
      "learning_rate": 6.150601934101198e-06,
      "loss": 0.1367,
      "step": 15196
    },
    {
      "epoch": 0.44334558609020364,
      "grad_norm": 1.0179915279890752,
      "learning_rate": 6.150142173917233e-06,
      "loss": 0.1562,
      "step": 15197
    },
    {
      "epoch": 0.4433747593208472,
      "grad_norm": 0.8193260872849141,
      "learning_rate": 6.1496824034650715e-06,
      "loss": 0.1457,
      "step": 15198
    },
    {
      "epoch": 0.44340393255149074,
      "grad_norm": 0.8125875373992769,
      "learning_rate": 6.149222622748818e-06,
      "loss": 0.1672,
      "step": 15199
    },
    {
      "epoch": 0.4434331057821343,
      "grad_norm": 0.9401759401314863,
      "learning_rate": 6.148762831772582e-06,
      "loss": 0.1409,
      "step": 15200
    },
    {
      "epoch": 0.44346227901277785,
      "grad_norm": 1.061541647071786,
      "learning_rate": 6.148303030540466e-06,
      "loss": 0.1317,
      "step": 15201
    },
    {
      "epoch": 0.44349145224342146,
      "grad_norm": 0.9208910349135103,
      "learning_rate": 6.1478432190565725e-06,
      "loss": 0.1472,
      "step": 15202
    },
    {
      "epoch": 0.443520625474065,
      "grad_norm": 0.7130989810499107,
      "learning_rate": 6.14738339732501e-06,
      "loss": 0.1403,
      "step": 15203
    },
    {
      "epoch": 0.44354979870470856,
      "grad_norm": 1.1571797061398832,
      "learning_rate": 6.146923565349882e-06,
      "loss": 0.163,
      "step": 15204
    },
    {
      "epoch": 0.4435789719353521,
      "grad_norm": 0.8491187005356661,
      "learning_rate": 6.146463723135295e-06,
      "loss": 0.1467,
      "step": 15205
    },
    {
      "epoch": 0.44360814516599567,
      "grad_norm": 0.7907530667174092,
      "learning_rate": 6.146003870685353e-06,
      "loss": 0.1215,
      "step": 15206
    },
    {
      "epoch": 0.4436373183966392,
      "grad_norm": 0.8576470547447197,
      "learning_rate": 6.145544008004163e-06,
      "loss": 0.1469,
      "step": 15207
    },
    {
      "epoch": 0.44366649162728283,
      "grad_norm": 0.6210393668457042,
      "learning_rate": 6.145084135095827e-06,
      "loss": 0.13,
      "step": 15208
    },
    {
      "epoch": 0.4436956648579264,
      "grad_norm": 0.9519110825112453,
      "learning_rate": 6.144624251964455e-06,
      "loss": 0.1293,
      "step": 15209
    },
    {
      "epoch": 0.44372483808856994,
      "grad_norm": 0.8792011471834411,
      "learning_rate": 6.144164358614152e-06,
      "loss": 0.1432,
      "step": 15210
    },
    {
      "epoch": 0.4437540113192135,
      "grad_norm": 0.7657075243259581,
      "learning_rate": 6.14370445504902e-06,
      "loss": 0.1162,
      "step": 15211
    },
    {
      "epoch": 0.44378318454985705,
      "grad_norm": 0.8340099443319311,
      "learning_rate": 6.14324454127317e-06,
      "loss": 0.144,
      "step": 15212
    },
    {
      "epoch": 0.4438123577805006,
      "grad_norm": 0.8752137407083753,
      "learning_rate": 6.1427846172907045e-06,
      "loss": 0.1488,
      "step": 15213
    },
    {
      "epoch": 0.44384153101114415,
      "grad_norm": 0.9318068773304626,
      "learning_rate": 6.14232468310573e-06,
      "loss": 0.1284,
      "step": 15214
    },
    {
      "epoch": 0.44387070424178776,
      "grad_norm": 0.8736516655055431,
      "learning_rate": 6.141864738722356e-06,
      "loss": 0.1607,
      "step": 15215
    },
    {
      "epoch": 0.4438998774724313,
      "grad_norm": 0.8076426652809794,
      "learning_rate": 6.141404784144685e-06,
      "loss": 0.1443,
      "step": 15216
    },
    {
      "epoch": 0.44392905070307487,
      "grad_norm": 0.9168196815559632,
      "learning_rate": 6.140944819376824e-06,
      "loss": 0.1482,
      "step": 15217
    },
    {
      "epoch": 0.4439582239337184,
      "grad_norm": 0.7397749370047907,
      "learning_rate": 6.140484844422879e-06,
      "loss": 0.1244,
      "step": 15218
    },
    {
      "epoch": 0.443987397164362,
      "grad_norm": 0.7476313378941652,
      "learning_rate": 6.14002485928696e-06,
      "loss": 0.1526,
      "step": 15219
    },
    {
      "epoch": 0.4440165703950055,
      "grad_norm": 0.6667299942179602,
      "learning_rate": 6.139564863973169e-06,
      "loss": 0.1326,
      "step": 15220
    },
    {
      "epoch": 0.4440457436256491,
      "grad_norm": 0.9066358944844822,
      "learning_rate": 6.139104858485616e-06,
      "loss": 0.1348,
      "step": 15221
    },
    {
      "epoch": 0.4440749168562927,
      "grad_norm": 0.7812964435545219,
      "learning_rate": 6.138644842828407e-06,
      "loss": 0.138,
      "step": 15222
    },
    {
      "epoch": 0.44410409008693624,
      "grad_norm": 0.8349967115734126,
      "learning_rate": 6.138184817005648e-06,
      "loss": 0.1524,
      "step": 15223
    },
    {
      "epoch": 0.4441332633175798,
      "grad_norm": 1.1972323678106125,
      "learning_rate": 6.1377247810214466e-06,
      "loss": 0.1452,
      "step": 15224
    },
    {
      "epoch": 0.44416243654822335,
      "grad_norm": 0.9094728032049704,
      "learning_rate": 6.137264734879912e-06,
      "loss": 0.132,
      "step": 15225
    },
    {
      "epoch": 0.4441916097788669,
      "grad_norm": 0.8020759584277014,
      "learning_rate": 6.136804678585146e-06,
      "loss": 0.1341,
      "step": 15226
    },
    {
      "epoch": 0.44422078300951046,
      "grad_norm": 1.1431151200320264,
      "learning_rate": 6.136344612141262e-06,
      "loss": 0.1435,
      "step": 15227
    },
    {
      "epoch": 0.444249956240154,
      "grad_norm": 0.9889349789349048,
      "learning_rate": 6.135884535552363e-06,
      "loss": 0.1479,
      "step": 15228
    },
    {
      "epoch": 0.4442791294707976,
      "grad_norm": 0.7478560976135962,
      "learning_rate": 6.135424448822559e-06,
      "loss": 0.1271,
      "step": 15229
    },
    {
      "epoch": 0.44430830270144117,
      "grad_norm": 0.7186266917342702,
      "learning_rate": 6.134964351955955e-06,
      "loss": 0.1181,
      "step": 15230
    },
    {
      "epoch": 0.4443374759320847,
      "grad_norm": 0.8726330738217674,
      "learning_rate": 6.134504244956662e-06,
      "loss": 0.1172,
      "step": 15231
    },
    {
      "epoch": 0.4443666491627283,
      "grad_norm": 0.9163240605568302,
      "learning_rate": 6.134044127828785e-06,
      "loss": 0.124,
      "step": 15232
    },
    {
      "epoch": 0.44439582239337183,
      "grad_norm": 0.96480251333729,
      "learning_rate": 6.133584000576433e-06,
      "loss": 0.1463,
      "step": 15233
    },
    {
      "epoch": 0.4444249956240154,
      "grad_norm": 0.7747088266092722,
      "learning_rate": 6.133123863203714e-06,
      "loss": 0.13,
      "step": 15234
    },
    {
      "epoch": 0.44445416885465894,
      "grad_norm": 1.0229218733197905,
      "learning_rate": 6.132663715714735e-06,
      "loss": 0.131,
      "step": 15235
    },
    {
      "epoch": 0.44448334208530255,
      "grad_norm": 0.8384492580311871,
      "learning_rate": 6.132203558113604e-06,
      "loss": 0.1064,
      "step": 15236
    },
    {
      "epoch": 0.4445125153159461,
      "grad_norm": 0.7906416335983961,
      "learning_rate": 6.131743390404432e-06,
      "loss": 0.1413,
      "step": 15237
    },
    {
      "epoch": 0.44454168854658965,
      "grad_norm": 0.7826541865543741,
      "learning_rate": 6.131283212591324e-06,
      "loss": 0.1527,
      "step": 15238
    },
    {
      "epoch": 0.4445708617772332,
      "grad_norm": 0.9590447953863227,
      "learning_rate": 6.130823024678388e-06,
      "loss": 0.1454,
      "step": 15239
    },
    {
      "epoch": 0.44460003500787676,
      "grad_norm": 0.9056538663663815,
      "learning_rate": 6.1303628266697365e-06,
      "loss": 0.1467,
      "step": 15240
    },
    {
      "epoch": 0.4446292082385203,
      "grad_norm": 0.8059288224470611,
      "learning_rate": 6.129902618569474e-06,
      "loss": 0.1477,
      "step": 15241
    },
    {
      "epoch": 0.4446583814691639,
      "grad_norm": 0.8979235488551794,
      "learning_rate": 6.129442400381712e-06,
      "loss": 0.1207,
      "step": 15242
    },
    {
      "epoch": 0.4446875546998075,
      "grad_norm": 0.8475314209268942,
      "learning_rate": 6.128982172110558e-06,
      "loss": 0.1386,
      "step": 15243
    },
    {
      "epoch": 0.44471672793045103,
      "grad_norm": 0.5986386792810503,
      "learning_rate": 6.128521933760119e-06,
      "loss": 0.1282,
      "step": 15244
    },
    {
      "epoch": 0.4447459011610946,
      "grad_norm": 0.8971110629639041,
      "learning_rate": 6.1280616853345065e-06,
      "loss": 0.1489,
      "step": 15245
    },
    {
      "epoch": 0.44477507439173813,
      "grad_norm": 0.9085603557417934,
      "learning_rate": 6.127601426837828e-06,
      "loss": 0.1624,
      "step": 15246
    },
    {
      "epoch": 0.4448042476223817,
      "grad_norm": 0.8271277986371625,
      "learning_rate": 6.127141158274194e-06,
      "loss": 0.1638,
      "step": 15247
    },
    {
      "epoch": 0.44483342085302524,
      "grad_norm": 1.1073249084395338,
      "learning_rate": 6.126680879647712e-06,
      "loss": 0.1424,
      "step": 15248
    },
    {
      "epoch": 0.44486259408366885,
      "grad_norm": 0.9205586686240007,
      "learning_rate": 6.126220590962493e-06,
      "loss": 0.1486,
      "step": 15249
    },
    {
      "epoch": 0.4448917673143124,
      "grad_norm": 0.7290816364217954,
      "learning_rate": 6.1257602922226445e-06,
      "loss": 0.1311,
      "step": 15250
    },
    {
      "epoch": 0.44492094054495596,
      "grad_norm": 1.2122255502811066,
      "learning_rate": 6.1252999834322766e-06,
      "loss": 0.16,
      "step": 15251
    },
    {
      "epoch": 0.4449501137755995,
      "grad_norm": 1.0996832928390419,
      "learning_rate": 6.124839664595501e-06,
      "loss": 0.1589,
      "step": 15252
    },
    {
      "epoch": 0.44497928700624306,
      "grad_norm": 0.9662578582152325,
      "learning_rate": 6.1243793357164224e-06,
      "loss": 0.1348,
      "step": 15253
    },
    {
      "epoch": 0.4450084602368866,
      "grad_norm": 0.8472291855870548,
      "learning_rate": 6.123918996799155e-06,
      "loss": 0.1675,
      "step": 15254
    },
    {
      "epoch": 0.44503763346753017,
      "grad_norm": 1.1205126844670723,
      "learning_rate": 6.123458647847808e-06,
      "loss": 0.1689,
      "step": 15255
    },
    {
      "epoch": 0.4450668066981738,
      "grad_norm": 0.9057560283475344,
      "learning_rate": 6.1229982888664895e-06,
      "loss": 0.161,
      "step": 15256
    },
    {
      "epoch": 0.44509597992881733,
      "grad_norm": 0.8111224915309679,
      "learning_rate": 6.122537919859312e-06,
      "loss": 0.1204,
      "step": 15257
    },
    {
      "epoch": 0.4451251531594609,
      "grad_norm": 0.752264863469283,
      "learning_rate": 6.1220775408303825e-06,
      "loss": 0.1355,
      "step": 15258
    },
    {
      "epoch": 0.44515432639010444,
      "grad_norm": 0.9918192620855522,
      "learning_rate": 6.121617151783812e-06,
      "loss": 0.1696,
      "step": 15259
    },
    {
      "epoch": 0.445183499620748,
      "grad_norm": 0.7428392567570353,
      "learning_rate": 6.1211567527237115e-06,
      "loss": 0.1471,
      "step": 15260
    },
    {
      "epoch": 0.44521267285139154,
      "grad_norm": 0.8730197701897332,
      "learning_rate": 6.120696343654191e-06,
      "loss": 0.1305,
      "step": 15261
    },
    {
      "epoch": 0.4452418460820351,
      "grad_norm": 0.7914013776401143,
      "learning_rate": 6.120235924579362e-06,
      "loss": 0.1259,
      "step": 15262
    },
    {
      "epoch": 0.4452710193126787,
      "grad_norm": 0.7101655196438709,
      "learning_rate": 6.119775495503334e-06,
      "loss": 0.142,
      "step": 15263
    },
    {
      "epoch": 0.44530019254332226,
      "grad_norm": 0.8127695382551665,
      "learning_rate": 6.119315056430217e-06,
      "loss": 0.1323,
      "step": 15264
    },
    {
      "epoch": 0.4453293657739658,
      "grad_norm": 0.8752157719510197,
      "learning_rate": 6.118854607364122e-06,
      "loss": 0.1405,
      "step": 15265
    },
    {
      "epoch": 0.44535853900460937,
      "grad_norm": 0.8859459423157212,
      "learning_rate": 6.118394148309161e-06,
      "loss": 0.1511,
      "step": 15266
    },
    {
      "epoch": 0.4453877122352529,
      "grad_norm": 0.9430807259674541,
      "learning_rate": 6.117933679269446e-06,
      "loss": 0.1602,
      "step": 15267
    },
    {
      "epoch": 0.4454168854658965,
      "grad_norm": 0.8101264308156917,
      "learning_rate": 6.117473200249082e-06,
      "loss": 0.1251,
      "step": 15268
    },
    {
      "epoch": 0.4454460586965401,
      "grad_norm": 0.6454273830947004,
      "learning_rate": 6.117012711252186e-06,
      "loss": 0.1311,
      "step": 15269
    },
    {
      "epoch": 0.44547523192718363,
      "grad_norm": 1.0266229742097204,
      "learning_rate": 6.116552212282868e-06,
      "loss": 0.1252,
      "step": 15270
    },
    {
      "epoch": 0.4455044051578272,
      "grad_norm": 0.8443968042739745,
      "learning_rate": 6.116091703345236e-06,
      "loss": 0.1326,
      "step": 15271
    },
    {
      "epoch": 0.44553357838847074,
      "grad_norm": 0.7291060015392791,
      "learning_rate": 6.1156311844434065e-06,
      "loss": 0.1311,
      "step": 15272
    },
    {
      "epoch": 0.4455627516191143,
      "grad_norm": 0.8763559814127485,
      "learning_rate": 6.115170655581486e-06,
      "loss": 0.1279,
      "step": 15273
    },
    {
      "epoch": 0.44559192484975785,
      "grad_norm": 0.7304809459449753,
      "learning_rate": 6.114710116763589e-06,
      "loss": 0.1284,
      "step": 15274
    },
    {
      "epoch": 0.4456210980804014,
      "grad_norm": 0.788802497349413,
      "learning_rate": 6.114249567993826e-06,
      "loss": 0.1394,
      "step": 15275
    },
    {
      "epoch": 0.445650271311045,
      "grad_norm": 1.0010644044930095,
      "learning_rate": 6.11378900927631e-06,
      "loss": 0.1526,
      "step": 15276
    },
    {
      "epoch": 0.44567944454168856,
      "grad_norm": 0.9800887845963783,
      "learning_rate": 6.1133284406151494e-06,
      "loss": 0.1468,
      "step": 15277
    },
    {
      "epoch": 0.4457086177723321,
      "grad_norm": 0.8837972629123051,
      "learning_rate": 6.11286786201446e-06,
      "loss": 0.1246,
      "step": 15278
    },
    {
      "epoch": 0.44573779100297567,
      "grad_norm": 0.756446040358204,
      "learning_rate": 6.112407273478351e-06,
      "loss": 0.1459,
      "step": 15279
    },
    {
      "epoch": 0.4457669642336192,
      "grad_norm": 0.8495668524065141,
      "learning_rate": 6.111946675010936e-06,
      "loss": 0.1448,
      "step": 15280
    },
    {
      "epoch": 0.4457961374642628,
      "grad_norm": 1.01471428766146,
      "learning_rate": 6.111486066616326e-06,
      "loss": 0.1664,
      "step": 15281
    },
    {
      "epoch": 0.44582531069490633,
      "grad_norm": 0.916121638870755,
      "learning_rate": 6.1110254482986354e-06,
      "loss": 0.1574,
      "step": 15282
    },
    {
      "epoch": 0.44585448392554994,
      "grad_norm": 0.7760158580921451,
      "learning_rate": 6.110564820061972e-06,
      "loss": 0.1525,
      "step": 15283
    },
    {
      "epoch": 0.4458836571561935,
      "grad_norm": 0.853142412517895,
      "learning_rate": 6.110104181910452e-06,
      "loss": 0.1295,
      "step": 15284
    },
    {
      "epoch": 0.44591283038683704,
      "grad_norm": 0.9426743939597242,
      "learning_rate": 6.1096435338481885e-06,
      "loss": 0.1505,
      "step": 15285
    },
    {
      "epoch": 0.4459420036174806,
      "grad_norm": 0.7478221107434188,
      "learning_rate": 6.10918287587929e-06,
      "loss": 0.1502,
      "step": 15286
    },
    {
      "epoch": 0.44597117684812415,
      "grad_norm": 0.7782289003441114,
      "learning_rate": 6.108722208007875e-06,
      "loss": 0.1607,
      "step": 15287
    },
    {
      "epoch": 0.4460003500787677,
      "grad_norm": 0.904992190235612,
      "learning_rate": 6.10826153023805e-06,
      "loss": 0.1362,
      "step": 15288
    },
    {
      "epoch": 0.44602952330941126,
      "grad_norm": 0.8852852530556897,
      "learning_rate": 6.107800842573931e-06,
      "loss": 0.1436,
      "step": 15289
    },
    {
      "epoch": 0.44605869654005487,
      "grad_norm": 1.429744324549573,
      "learning_rate": 6.10734014501963e-06,
      "loss": 0.1426,
      "step": 15290
    },
    {
      "epoch": 0.4460878697706984,
      "grad_norm": 0.9704718485114926,
      "learning_rate": 6.106879437579262e-06,
      "loss": 0.1535,
      "step": 15291
    },
    {
      "epoch": 0.446117043001342,
      "grad_norm": 0.866394347707187,
      "learning_rate": 6.106418720256938e-06,
      "loss": 0.1261,
      "step": 15292
    },
    {
      "epoch": 0.4461462162319855,
      "grad_norm": 0.8273542824415847,
      "learning_rate": 6.105957993056772e-06,
      "loss": 0.1243,
      "step": 15293
    },
    {
      "epoch": 0.4461753894626291,
      "grad_norm": 1.5233395354131498,
      "learning_rate": 6.105497255982876e-06,
      "loss": 0.1381,
      "step": 15294
    },
    {
      "epoch": 0.44620456269327263,
      "grad_norm": 1.0572255648397941,
      "learning_rate": 6.105036509039365e-06,
      "loss": 0.1579,
      "step": 15295
    },
    {
      "epoch": 0.44623373592391624,
      "grad_norm": 0.7290005488993977,
      "learning_rate": 6.1045757522303516e-06,
      "loss": 0.154,
      "step": 15296
    },
    {
      "epoch": 0.4462629091545598,
      "grad_norm": 0.7555598067273647,
      "learning_rate": 6.104114985559952e-06,
      "loss": 0.1504,
      "step": 15297
    },
    {
      "epoch": 0.44629208238520335,
      "grad_norm": 1.0388390205532618,
      "learning_rate": 6.1036542090322736e-06,
      "loss": 0.1504,
      "step": 15298
    },
    {
      "epoch": 0.4463212556158469,
      "grad_norm": 0.890318762127317,
      "learning_rate": 6.103193422651436e-06,
      "loss": 0.144,
      "step": 15299
    },
    {
      "epoch": 0.44635042884649045,
      "grad_norm": 0.6257069102607349,
      "learning_rate": 6.102732626421552e-06,
      "loss": 0.129,
      "step": 15300
    },
    {
      "epoch": 0.446379602077134,
      "grad_norm": 0.8375940792515587,
      "learning_rate": 6.102271820346731e-06,
      "loss": 0.1485,
      "step": 15301
    },
    {
      "epoch": 0.44640877530777756,
      "grad_norm": 1.1803825090593871,
      "learning_rate": 6.101811004431093e-06,
      "loss": 0.1412,
      "step": 15302
    },
    {
      "epoch": 0.44643794853842117,
      "grad_norm": 1.0694821347337167,
      "learning_rate": 6.101350178678749e-06,
      "loss": 0.1196,
      "step": 15303
    },
    {
      "epoch": 0.4464671217690647,
      "grad_norm": 0.7030966777568456,
      "learning_rate": 6.100889343093812e-06,
      "loss": 0.1313,
      "step": 15304
    },
    {
      "epoch": 0.4464962949997083,
      "grad_norm": 0.8758459983348651,
      "learning_rate": 6.1004284976804e-06,
      "loss": 0.1426,
      "step": 15305
    },
    {
      "epoch": 0.44652546823035183,
      "grad_norm": 0.8054310547956597,
      "learning_rate": 6.099967642442623e-06,
      "loss": 0.1436,
      "step": 15306
    },
    {
      "epoch": 0.4465546414609954,
      "grad_norm": 0.9332237157358919,
      "learning_rate": 6.099506777384598e-06,
      "loss": 0.1381,
      "step": 15307
    },
    {
      "epoch": 0.44658381469163894,
      "grad_norm": 0.8506058449099817,
      "learning_rate": 6.09904590251044e-06,
      "loss": 0.1403,
      "step": 15308
    },
    {
      "epoch": 0.4466129879222825,
      "grad_norm": 0.7043375782536109,
      "learning_rate": 6.098585017824261e-06,
      "loss": 0.1346,
      "step": 15309
    },
    {
      "epoch": 0.4466421611529261,
      "grad_norm": 0.9915756333596731,
      "learning_rate": 6.098124123330178e-06,
      "loss": 0.1271,
      "step": 15310
    },
    {
      "epoch": 0.44667133438356965,
      "grad_norm": 0.7555900798427305,
      "learning_rate": 6.097663219032306e-06,
      "loss": 0.1277,
      "step": 15311
    },
    {
      "epoch": 0.4467005076142132,
      "grad_norm": 0.7811465571733777,
      "learning_rate": 6.097202304934758e-06,
      "loss": 0.1369,
      "step": 15312
    },
    {
      "epoch": 0.44672968084485676,
      "grad_norm": 0.7760424626545825,
      "learning_rate": 6.096741381041649e-06,
      "loss": 0.1393,
      "step": 15313
    },
    {
      "epoch": 0.4467588540755003,
      "grad_norm": 0.8137295964514707,
      "learning_rate": 6.096280447357095e-06,
      "loss": 0.1242,
      "step": 15314
    },
    {
      "epoch": 0.44678802730614386,
      "grad_norm": 0.8190671346470227,
      "learning_rate": 6.0958195038852115e-06,
      "loss": 0.1736,
      "step": 15315
    },
    {
      "epoch": 0.4468172005367874,
      "grad_norm": 0.7415349402083039,
      "learning_rate": 6.095358550630113e-06,
      "loss": 0.1358,
      "step": 15316
    },
    {
      "epoch": 0.446846373767431,
      "grad_norm": 0.7279555472391306,
      "learning_rate": 6.0948975875959145e-06,
      "loss": 0.1279,
      "step": 15317
    },
    {
      "epoch": 0.4468755469980746,
      "grad_norm": 0.8911639321268853,
      "learning_rate": 6.094436614786733e-06,
      "loss": 0.1519,
      "step": 15318
    },
    {
      "epoch": 0.44690472022871813,
      "grad_norm": 0.9726188669800133,
      "learning_rate": 6.093975632206681e-06,
      "loss": 0.1252,
      "step": 15319
    },
    {
      "epoch": 0.4469338934593617,
      "grad_norm": 0.8049216253395035,
      "learning_rate": 6.093514639859877e-06,
      "loss": 0.1671,
      "step": 15320
    },
    {
      "epoch": 0.44696306669000524,
      "grad_norm": 0.8248019570433789,
      "learning_rate": 6.093053637750433e-06,
      "loss": 0.1335,
      "step": 15321
    },
    {
      "epoch": 0.4469922399206488,
      "grad_norm": 0.9190140038585972,
      "learning_rate": 6.09259262588247e-06,
      "loss": 0.1481,
      "step": 15322
    },
    {
      "epoch": 0.4470214131512924,
      "grad_norm": 1.0802199683662481,
      "learning_rate": 6.092131604260099e-06,
      "loss": 0.1237,
      "step": 15323
    },
    {
      "epoch": 0.44705058638193595,
      "grad_norm": 1.9020566396685108,
      "learning_rate": 6.091670572887438e-06,
      "loss": 0.1477,
      "step": 15324
    },
    {
      "epoch": 0.4470797596125795,
      "grad_norm": 0.7611700628300063,
      "learning_rate": 6.091209531768603e-06,
      "loss": 0.1403,
      "step": 15325
    },
    {
      "epoch": 0.44710893284322306,
      "grad_norm": 0.8753853301393958,
      "learning_rate": 6.09074848090771e-06,
      "loss": 0.13,
      "step": 15326
    },
    {
      "epoch": 0.4471381060738666,
      "grad_norm": 0.9155815837711926,
      "learning_rate": 6.0902874203088744e-06,
      "loss": 0.1358,
      "step": 15327
    },
    {
      "epoch": 0.44716727930451017,
      "grad_norm": 1.1288010289736823,
      "learning_rate": 6.089826349976213e-06,
      "loss": 0.1342,
      "step": 15328
    },
    {
      "epoch": 0.4471964525351537,
      "grad_norm": 0.7041431192759933,
      "learning_rate": 6.0893652699138425e-06,
      "loss": 0.135,
      "step": 15329
    },
    {
      "epoch": 0.44722562576579733,
      "grad_norm": 0.916718058298528,
      "learning_rate": 6.088904180125878e-06,
      "loss": 0.1359,
      "step": 15330
    },
    {
      "epoch": 0.4472547989964409,
      "grad_norm": 0.8518468216049682,
      "learning_rate": 6.088443080616439e-06,
      "loss": 0.1593,
      "step": 15331
    },
    {
      "epoch": 0.44728397222708444,
      "grad_norm": 1.1255361767521337,
      "learning_rate": 6.087981971389639e-06,
      "loss": 0.1362,
      "step": 15332
    },
    {
      "epoch": 0.447313145457728,
      "grad_norm": 0.6877519479258729,
      "learning_rate": 6.0875208524495945e-06,
      "loss": 0.1262,
      "step": 15333
    },
    {
      "epoch": 0.44734231868837154,
      "grad_norm": 0.8847023564377745,
      "learning_rate": 6.087059723800426e-06,
      "loss": 0.1368,
      "step": 15334
    },
    {
      "epoch": 0.4473714919190151,
      "grad_norm": 0.7625876272799125,
      "learning_rate": 6.086598585446245e-06,
      "loss": 0.1436,
      "step": 15335
    },
    {
      "epoch": 0.44740066514965865,
      "grad_norm": 0.78945406976093,
      "learning_rate": 6.086137437391172e-06,
      "loss": 0.1253,
      "step": 15336
    },
    {
      "epoch": 0.44742983838030226,
      "grad_norm": 0.7537447445197407,
      "learning_rate": 6.0856762796393244e-06,
      "loss": 0.149,
      "step": 15337
    },
    {
      "epoch": 0.4474590116109458,
      "grad_norm": 0.7916157298811274,
      "learning_rate": 6.085215112194818e-06,
      "loss": 0.1312,
      "step": 15338
    },
    {
      "epoch": 0.44748818484158936,
      "grad_norm": 0.9057345129733425,
      "learning_rate": 6.084753935061769e-06,
      "loss": 0.1543,
      "step": 15339
    },
    {
      "epoch": 0.4475173580722329,
      "grad_norm": 0.6811350085174357,
      "learning_rate": 6.084292748244296e-06,
      "loss": 0.1302,
      "step": 15340
    },
    {
      "epoch": 0.44754653130287647,
      "grad_norm": 0.7061628425165356,
      "learning_rate": 6.083831551746516e-06,
      "loss": 0.1418,
      "step": 15341
    },
    {
      "epoch": 0.44757570453352,
      "grad_norm": 0.9942950514244173,
      "learning_rate": 6.083370345572548e-06,
      "loss": 0.1333,
      "step": 15342
    },
    {
      "epoch": 0.4476048777641636,
      "grad_norm": 0.8480920884999015,
      "learning_rate": 6.082909129726506e-06,
      "loss": 0.1311,
      "step": 15343
    },
    {
      "epoch": 0.4476340509948072,
      "grad_norm": 0.7649499289265241,
      "learning_rate": 6.082447904212512e-06,
      "loss": 0.1371,
      "step": 15344
    },
    {
      "epoch": 0.44766322422545074,
      "grad_norm": 0.7993715485804653,
      "learning_rate": 6.081986669034681e-06,
      "loss": 0.14,
      "step": 15345
    },
    {
      "epoch": 0.4476923974560943,
      "grad_norm": 0.8205791858500433,
      "learning_rate": 6.08152542419713e-06,
      "loss": 0.1245,
      "step": 15346
    },
    {
      "epoch": 0.44772157068673785,
      "grad_norm": 0.7218935117331445,
      "learning_rate": 6.081064169703981e-06,
      "loss": 0.1305,
      "step": 15347
    },
    {
      "epoch": 0.4477507439173814,
      "grad_norm": 0.8263799578193723,
      "learning_rate": 6.080602905559346e-06,
      "loss": 0.1597,
      "step": 15348
    },
    {
      "epoch": 0.44777991714802495,
      "grad_norm": 0.8179836136294776,
      "learning_rate": 6.080141631767349e-06,
      "loss": 0.1604,
      "step": 15349
    },
    {
      "epoch": 0.4478090903786685,
      "grad_norm": 0.9099487724906116,
      "learning_rate": 6.079680348332103e-06,
      "loss": 0.1421,
      "step": 15350
    },
    {
      "epoch": 0.4478382636093121,
      "grad_norm": 1.0850840366906596,
      "learning_rate": 6.079219055257729e-06,
      "loss": 0.1333,
      "step": 15351
    },
    {
      "epoch": 0.44786743683995567,
      "grad_norm": 0.789915599834623,
      "learning_rate": 6.078757752548346e-06,
      "loss": 0.132,
      "step": 15352
    },
    {
      "epoch": 0.4478966100705992,
      "grad_norm": 0.8338508408773504,
      "learning_rate": 6.07829644020807e-06,
      "loss": 0.1281,
      "step": 15353
    },
    {
      "epoch": 0.4479257833012428,
      "grad_norm": 0.7019238584494384,
      "learning_rate": 6.0778351182410226e-06,
      "loss": 0.109,
      "step": 15354
    },
    {
      "epoch": 0.44795495653188633,
      "grad_norm": 0.8044855984225002,
      "learning_rate": 6.077373786651319e-06,
      "loss": 0.1535,
      "step": 15355
    },
    {
      "epoch": 0.4479841297625299,
      "grad_norm": 0.7373770750377117,
      "learning_rate": 6.076912445443079e-06,
      "loss": 0.1124,
      "step": 15356
    },
    {
      "epoch": 0.4480133029931735,
      "grad_norm": 0.7168772588691699,
      "learning_rate": 6.076451094620424e-06,
      "loss": 0.1466,
      "step": 15357
    },
    {
      "epoch": 0.44804247622381704,
      "grad_norm": 0.95791986581113,
      "learning_rate": 6.075989734187469e-06,
      "loss": 0.1563,
      "step": 15358
    },
    {
      "epoch": 0.4480716494544606,
      "grad_norm": 0.7106563143561379,
      "learning_rate": 6.075528364148335e-06,
      "loss": 0.1061,
      "step": 15359
    },
    {
      "epoch": 0.44810082268510415,
      "grad_norm": 0.8339195536507266,
      "learning_rate": 6.07506698450714e-06,
      "loss": 0.1729,
      "step": 15360
    },
    {
      "epoch": 0.4481299959157477,
      "grad_norm": 0.8239540782182683,
      "learning_rate": 6.074605595268002e-06,
      "loss": 0.1284,
      "step": 15361
    },
    {
      "epoch": 0.44815916914639126,
      "grad_norm": 0.7366551213719733,
      "learning_rate": 6.074144196435045e-06,
      "loss": 0.1367,
      "step": 15362
    },
    {
      "epoch": 0.4481883423770348,
      "grad_norm": 0.7667253677875338,
      "learning_rate": 6.073682788012384e-06,
      "loss": 0.1103,
      "step": 15363
    },
    {
      "epoch": 0.4482175156076784,
      "grad_norm": 0.7472561908775801,
      "learning_rate": 6.073221370004139e-06,
      "loss": 0.1373,
      "step": 15364
    },
    {
      "epoch": 0.44824668883832197,
      "grad_norm": 0.9541954510364338,
      "learning_rate": 6.07275994241443e-06,
      "loss": 0.1223,
      "step": 15365
    },
    {
      "epoch": 0.4482758620689655,
      "grad_norm": 0.8713491107684442,
      "learning_rate": 6.072298505247376e-06,
      "loss": 0.1183,
      "step": 15366
    },
    {
      "epoch": 0.4483050352996091,
      "grad_norm": 1.3063701456978865,
      "learning_rate": 6.071837058507097e-06,
      "loss": 0.1942,
      "step": 15367
    },
    {
      "epoch": 0.44833420853025263,
      "grad_norm": 0.8573021583863663,
      "learning_rate": 6.071375602197713e-06,
      "loss": 0.1376,
      "step": 15368
    },
    {
      "epoch": 0.4483633817608962,
      "grad_norm": 0.9923513666614028,
      "learning_rate": 6.070914136323342e-06,
      "loss": 0.1429,
      "step": 15369
    },
    {
      "epoch": 0.44839255499153974,
      "grad_norm": 0.9075965650298964,
      "learning_rate": 6.070452660888108e-06,
      "loss": 0.1554,
      "step": 15370
    },
    {
      "epoch": 0.44842172822218335,
      "grad_norm": 0.7643302118184612,
      "learning_rate": 6.069991175896126e-06,
      "loss": 0.1473,
      "step": 15371
    },
    {
      "epoch": 0.4484509014528269,
      "grad_norm": 1.0499260942359514,
      "learning_rate": 6.069529681351518e-06,
      "loss": 0.16,
      "step": 15372
    },
    {
      "epoch": 0.44848007468347045,
      "grad_norm": 0.8892435689751551,
      "learning_rate": 6.069068177258406e-06,
      "loss": 0.1542,
      "step": 15373
    },
    {
      "epoch": 0.448509247914114,
      "grad_norm": 0.8745325863367627,
      "learning_rate": 6.068606663620907e-06,
      "loss": 0.1316,
      "step": 15374
    },
    {
      "epoch": 0.44853842114475756,
      "grad_norm": 1.2117473936439547,
      "learning_rate": 6.068145140443143e-06,
      "loss": 0.1457,
      "step": 15375
    },
    {
      "epoch": 0.4485675943754011,
      "grad_norm": 0.9842029551190973,
      "learning_rate": 6.067683607729234e-06,
      "loss": 0.1585,
      "step": 15376
    },
    {
      "epoch": 0.44859676760604467,
      "grad_norm": 0.942666837965468,
      "learning_rate": 6.067222065483303e-06,
      "loss": 0.1406,
      "step": 15377
    },
    {
      "epoch": 0.4486259408366883,
      "grad_norm": 0.8178630689461366,
      "learning_rate": 6.066760513709466e-06,
      "loss": 0.1281,
      "step": 15378
    },
    {
      "epoch": 0.44865511406733183,
      "grad_norm": 0.7872626468120314,
      "learning_rate": 6.066298952411846e-06,
      "loss": 0.14,
      "step": 15379
    },
    {
      "epoch": 0.4486842872979754,
      "grad_norm": 0.9733597246995307,
      "learning_rate": 6.065837381594563e-06,
      "loss": 0.1416,
      "step": 15380
    },
    {
      "epoch": 0.44871346052861893,
      "grad_norm": 0.8273228500542338,
      "learning_rate": 6.065375801261739e-06,
      "loss": 0.153,
      "step": 15381
    },
    {
      "epoch": 0.4487426337592625,
      "grad_norm": 1.01081477485419,
      "learning_rate": 6.064914211417495e-06,
      "loss": 0.1465,
      "step": 15382
    },
    {
      "epoch": 0.44877180698990604,
      "grad_norm": 0.7620129358509248,
      "learning_rate": 6.06445261206595e-06,
      "loss": 0.1337,
      "step": 15383
    },
    {
      "epoch": 0.44880098022054965,
      "grad_norm": 0.875167098626487,
      "learning_rate": 6.063991003211227e-06,
      "loss": 0.1402,
      "step": 15384
    },
    {
      "epoch": 0.4488301534511932,
      "grad_norm": 0.8485254629721719,
      "learning_rate": 6.063529384857445e-06,
      "loss": 0.1427,
      "step": 15385
    },
    {
      "epoch": 0.44885932668183676,
      "grad_norm": 0.874625989164001,
      "learning_rate": 6.063067757008727e-06,
      "loss": 0.1343,
      "step": 15386
    },
    {
      "epoch": 0.4488884999124803,
      "grad_norm": 0.8749592803137143,
      "learning_rate": 6.062606119669194e-06,
      "loss": 0.1499,
      "step": 15387
    },
    {
      "epoch": 0.44891767314312386,
      "grad_norm": 0.9409782433805018,
      "learning_rate": 6.0621444728429675e-06,
      "loss": 0.1678,
      "step": 15388
    },
    {
      "epoch": 0.4489468463737674,
      "grad_norm": 0.9233812981784898,
      "learning_rate": 6.061682816534169e-06,
      "loss": 0.1501,
      "step": 15389
    },
    {
      "epoch": 0.44897601960441097,
      "grad_norm": 0.9928736377413834,
      "learning_rate": 6.061221150746919e-06,
      "loss": 0.1468,
      "step": 15390
    },
    {
      "epoch": 0.4490051928350546,
      "grad_norm": 0.8676091112081428,
      "learning_rate": 6.060759475485341e-06,
      "loss": 0.1581,
      "step": 15391
    },
    {
      "epoch": 0.44903436606569813,
      "grad_norm": 0.8149235286502625,
      "learning_rate": 6.060297790753555e-06,
      "loss": 0.1564,
      "step": 15392
    },
    {
      "epoch": 0.4490635392963417,
      "grad_norm": 0.7952708965899153,
      "learning_rate": 6.059836096555682e-06,
      "loss": 0.1651,
      "step": 15393
    },
    {
      "epoch": 0.44909271252698524,
      "grad_norm": 0.774273371444286,
      "learning_rate": 6.059374392895847e-06,
      "loss": 0.1432,
      "step": 15394
    },
    {
      "epoch": 0.4491218857576288,
      "grad_norm": 1.064555635610097,
      "learning_rate": 6.0589126797781705e-06,
      "loss": 0.166,
      "step": 15395
    },
    {
      "epoch": 0.44915105898827234,
      "grad_norm": 0.8378755092136079,
      "learning_rate": 6.058450957206773e-06,
      "loss": 0.1344,
      "step": 15396
    },
    {
      "epoch": 0.4491802322189159,
      "grad_norm": 0.8085456034294426,
      "learning_rate": 6.057989225185779e-06,
      "loss": 0.1393,
      "step": 15397
    },
    {
      "epoch": 0.4492094054495595,
      "grad_norm": 0.7754635374313775,
      "learning_rate": 6.0575274837193096e-06,
      "loss": 0.1318,
      "step": 15398
    },
    {
      "epoch": 0.44923857868020306,
      "grad_norm": 0.8527890777382867,
      "learning_rate": 6.057065732811488e-06,
      "loss": 0.1442,
      "step": 15399
    },
    {
      "epoch": 0.4492677519108466,
      "grad_norm": 0.9897797469581112,
      "learning_rate": 6.056603972466435e-06,
      "loss": 0.1585,
      "step": 15400
    },
    {
      "epoch": 0.44929692514149017,
      "grad_norm": 0.7479714463271162,
      "learning_rate": 6.0561422026882735e-06,
      "loss": 0.169,
      "step": 15401
    },
    {
      "epoch": 0.4493260983721337,
      "grad_norm": 0.8982032075909928,
      "learning_rate": 6.0556804234811276e-06,
      "loss": 0.1232,
      "step": 15402
    },
    {
      "epoch": 0.4493552716027773,
      "grad_norm": 0.7703823612855346,
      "learning_rate": 6.055218634849118e-06,
      "loss": 0.1442,
      "step": 15403
    },
    {
      "epoch": 0.4493844448334208,
      "grad_norm": 0.9600556990766124,
      "learning_rate": 6.054756836796369e-06,
      "loss": 0.1059,
      "step": 15404
    },
    {
      "epoch": 0.44941361806406444,
      "grad_norm": 1.0048974647856885,
      "learning_rate": 6.054295029327002e-06,
      "loss": 0.1536,
      "step": 15405
    },
    {
      "epoch": 0.449442791294708,
      "grad_norm": 0.7310083617419373,
      "learning_rate": 6.053833212445141e-06,
      "loss": 0.1471,
      "step": 15406
    },
    {
      "epoch": 0.44947196452535154,
      "grad_norm": 0.7867274235152153,
      "learning_rate": 6.05337138615491e-06,
      "loss": 0.1298,
      "step": 15407
    },
    {
      "epoch": 0.4495011377559951,
      "grad_norm": 0.7945308302893132,
      "learning_rate": 6.052909550460429e-06,
      "loss": 0.1391,
      "step": 15408
    },
    {
      "epoch": 0.44953031098663865,
      "grad_norm": 0.8331005019096529,
      "learning_rate": 6.052447705365824e-06,
      "loss": 0.1677,
      "step": 15409
    },
    {
      "epoch": 0.4495594842172822,
      "grad_norm": 0.763065824096036,
      "learning_rate": 6.051985850875216e-06,
      "loss": 0.1639,
      "step": 15410
    },
    {
      "epoch": 0.4495886574479258,
      "grad_norm": 0.6980665358403694,
      "learning_rate": 6.0515239869927285e-06,
      "loss": 0.1118,
      "step": 15411
    },
    {
      "epoch": 0.44961783067856936,
      "grad_norm": 0.8724358091090383,
      "learning_rate": 6.051062113722489e-06,
      "loss": 0.1447,
      "step": 15412
    },
    {
      "epoch": 0.4496470039092129,
      "grad_norm": 0.6836028132907334,
      "learning_rate": 6.050600231068616e-06,
      "loss": 0.1302,
      "step": 15413
    },
    {
      "epoch": 0.44967617713985647,
      "grad_norm": 0.7380583610704688,
      "learning_rate": 6.050138339035235e-06,
      "loss": 0.1333,
      "step": 15414
    },
    {
      "epoch": 0.4497053503705,
      "grad_norm": 0.7250387437012618,
      "learning_rate": 6.0496764376264705e-06,
      "loss": 0.1405,
      "step": 15415
    },
    {
      "epoch": 0.4497345236011436,
      "grad_norm": 0.7031926994293658,
      "learning_rate": 6.049214526846444e-06,
      "loss": 0.147,
      "step": 15416
    },
    {
      "epoch": 0.44976369683178713,
      "grad_norm": 0.8165137002136926,
      "learning_rate": 6.048752606699282e-06,
      "loss": 0.135,
      "step": 15417
    },
    {
      "epoch": 0.44979287006243074,
      "grad_norm": 0.7543408656527488,
      "learning_rate": 6.048290677189106e-06,
      "loss": 0.1364,
      "step": 15418
    },
    {
      "epoch": 0.4498220432930743,
      "grad_norm": 0.6675726524186767,
      "learning_rate": 6.047828738320041e-06,
      "loss": 0.1324,
      "step": 15419
    },
    {
      "epoch": 0.44985121652371785,
      "grad_norm": 0.9602264692579462,
      "learning_rate": 6.047366790096212e-06,
      "loss": 0.169,
      "step": 15420
    },
    {
      "epoch": 0.4498803897543614,
      "grad_norm": 0.8502253876920634,
      "learning_rate": 6.046904832521742e-06,
      "loss": 0.1487,
      "step": 15421
    },
    {
      "epoch": 0.44990956298500495,
      "grad_norm": 0.7192279901324837,
      "learning_rate": 6.046442865600756e-06,
      "loss": 0.1191,
      "step": 15422
    },
    {
      "epoch": 0.4499387362156485,
      "grad_norm": 0.9532467202734655,
      "learning_rate": 6.0459808893373764e-06,
      "loss": 0.1439,
      "step": 15423
    },
    {
      "epoch": 0.44996790944629206,
      "grad_norm": 0.7877536771062995,
      "learning_rate": 6.045518903735731e-06,
      "loss": 0.1374,
      "step": 15424
    },
    {
      "epoch": 0.44999708267693567,
      "grad_norm": 0.763803614463936,
      "learning_rate": 6.045056908799941e-06,
      "loss": 0.1274,
      "step": 15425
    },
    {
      "epoch": 0.4500262559075792,
      "grad_norm": 0.684652609615203,
      "learning_rate": 6.044594904534132e-06,
      "loss": 0.1227,
      "step": 15426
    },
    {
      "epoch": 0.4500554291382228,
      "grad_norm": 0.9331517680282322,
      "learning_rate": 6.044132890942432e-06,
      "loss": 0.1402,
      "step": 15427
    },
    {
      "epoch": 0.4500846023688663,
      "grad_norm": 0.8575296634774847,
      "learning_rate": 6.04367086802896e-06,
      "loss": 0.1065,
      "step": 15428
    },
    {
      "epoch": 0.4501137755995099,
      "grad_norm": 0.8340571847404953,
      "learning_rate": 6.043208835797845e-06,
      "loss": 0.1389,
      "step": 15429
    },
    {
      "epoch": 0.45014294883015343,
      "grad_norm": 0.805375479888196,
      "learning_rate": 6.042746794253209e-06,
      "loss": 0.1324,
      "step": 15430
    },
    {
      "epoch": 0.450172122060797,
      "grad_norm": 0.7207822573273216,
      "learning_rate": 6.0422847433991795e-06,
      "loss": 0.1489,
      "step": 15431
    },
    {
      "epoch": 0.4502012952914406,
      "grad_norm": 0.911818155425748,
      "learning_rate": 6.041822683239881e-06,
      "loss": 0.1296,
      "step": 15432
    },
    {
      "epoch": 0.45023046852208415,
      "grad_norm": 0.674039306822005,
      "learning_rate": 6.041360613779438e-06,
      "loss": 0.1305,
      "step": 15433
    },
    {
      "epoch": 0.4502596417527277,
      "grad_norm": 0.8040425280619382,
      "learning_rate": 6.040898535021975e-06,
      "loss": 0.1427,
      "step": 15434
    },
    {
      "epoch": 0.45028881498337125,
      "grad_norm": 0.8042811515528271,
      "learning_rate": 6.040436446971619e-06,
      "loss": 0.1344,
      "step": 15435
    },
    {
      "epoch": 0.4503179882140148,
      "grad_norm": 0.8620850881804475,
      "learning_rate": 6.039974349632496e-06,
      "loss": 0.1272,
      "step": 15436
    },
    {
      "epoch": 0.45034716144465836,
      "grad_norm": 0.842513368101869,
      "learning_rate": 6.03951224300873e-06,
      "loss": 0.1622,
      "step": 15437
    },
    {
      "epoch": 0.45037633467530197,
      "grad_norm": 0.7177630781376846,
      "learning_rate": 6.0390501271044455e-06,
      "loss": 0.1204,
      "step": 15438
    },
    {
      "epoch": 0.4504055079059455,
      "grad_norm": 0.7727972733442917,
      "learning_rate": 6.038588001923771e-06,
      "loss": 0.1323,
      "step": 15439
    },
    {
      "epoch": 0.4504346811365891,
      "grad_norm": 1.0554661299495012,
      "learning_rate": 6.03812586747083e-06,
      "loss": 0.1363,
      "step": 15440
    },
    {
      "epoch": 0.45046385436723263,
      "grad_norm": 0.7942799378197438,
      "learning_rate": 6.0376637237497474e-06,
      "loss": 0.1341,
      "step": 15441
    },
    {
      "epoch": 0.4504930275978762,
      "grad_norm": 0.8396266384549794,
      "learning_rate": 6.037201570764654e-06,
      "loss": 0.1325,
      "step": 15442
    },
    {
      "epoch": 0.45052220082851974,
      "grad_norm": 0.805519465081484,
      "learning_rate": 6.036739408519671e-06,
      "loss": 0.1034,
      "step": 15443
    },
    {
      "epoch": 0.4505513740591633,
      "grad_norm": 0.8864674384806074,
      "learning_rate": 6.036277237018926e-06,
      "loss": 0.119,
      "step": 15444
    },
    {
      "epoch": 0.4505805472898069,
      "grad_norm": 0.9880335570734815,
      "learning_rate": 6.0358150562665455e-06,
      "loss": 0.1416,
      "step": 15445
    },
    {
      "epoch": 0.45060972052045045,
      "grad_norm": 1.2902869860420179,
      "learning_rate": 6.035352866266655e-06,
      "loss": 0.1648,
      "step": 15446
    },
    {
      "epoch": 0.450638893751094,
      "grad_norm": 1.052955135958026,
      "learning_rate": 6.034890667023381e-06,
      "loss": 0.1499,
      "step": 15447
    },
    {
      "epoch": 0.45066806698173756,
      "grad_norm": 0.9574731079353955,
      "learning_rate": 6.034428458540851e-06,
      "loss": 0.1356,
      "step": 15448
    },
    {
      "epoch": 0.4506972402123811,
      "grad_norm": 1.2152176449831125,
      "learning_rate": 6.03396624082319e-06,
      "loss": 0.1404,
      "step": 15449
    },
    {
      "epoch": 0.45072641344302466,
      "grad_norm": 1.2169092351149746,
      "learning_rate": 6.033504013874525e-06,
      "loss": 0.1334,
      "step": 15450
    },
    {
      "epoch": 0.4507555866736682,
      "grad_norm": 0.7316672657289102,
      "learning_rate": 6.033041777698983e-06,
      "loss": 0.1448,
      "step": 15451
    },
    {
      "epoch": 0.4507847599043118,
      "grad_norm": 0.8576953274679225,
      "learning_rate": 6.032579532300693e-06,
      "loss": 0.1356,
      "step": 15452
    },
    {
      "epoch": 0.4508139331349554,
      "grad_norm": 1.192208792342409,
      "learning_rate": 6.032117277683776e-06,
      "loss": 0.1185,
      "step": 15453
    },
    {
      "epoch": 0.45084310636559893,
      "grad_norm": 0.6967262956605862,
      "learning_rate": 6.0316550138523646e-06,
      "loss": 0.1477,
      "step": 15454
    },
    {
      "epoch": 0.4508722795962425,
      "grad_norm": 0.6481728147418251,
      "learning_rate": 6.031192740810583e-06,
      "loss": 0.1276,
      "step": 15455
    },
    {
      "epoch": 0.45090145282688604,
      "grad_norm": 0.8024521820210737,
      "learning_rate": 6.030730458562557e-06,
      "loss": 0.117,
      "step": 15456
    },
    {
      "epoch": 0.4509306260575296,
      "grad_norm": 0.9684911976346853,
      "learning_rate": 6.030268167112419e-06,
      "loss": 0.1476,
      "step": 15457
    },
    {
      "epoch": 0.45095979928817315,
      "grad_norm": 0.7871994175419291,
      "learning_rate": 6.02980586646429e-06,
      "loss": 0.1215,
      "step": 15458
    },
    {
      "epoch": 0.45098897251881676,
      "grad_norm": 0.8654542181243517,
      "learning_rate": 6.0293435566223e-06,
      "loss": 0.134,
      "step": 15459
    },
    {
      "epoch": 0.4510181457494603,
      "grad_norm": 1.2935168187353963,
      "learning_rate": 6.028881237590578e-06,
      "loss": 0.1403,
      "step": 15460
    },
    {
      "epoch": 0.45104731898010386,
      "grad_norm": 0.9120213480978883,
      "learning_rate": 6.028418909373249e-06,
      "loss": 0.135,
      "step": 15461
    },
    {
      "epoch": 0.4510764922107474,
      "grad_norm": 0.8344562021187523,
      "learning_rate": 6.027956571974442e-06,
      "loss": 0.1437,
      "step": 15462
    },
    {
      "epoch": 0.45110566544139097,
      "grad_norm": 0.8154161122264343,
      "learning_rate": 6.0274942253982825e-06,
      "loss": 0.1398,
      "step": 15463
    },
    {
      "epoch": 0.4511348386720345,
      "grad_norm": 0.9741081672251926,
      "learning_rate": 6.027031869648901e-06,
      "loss": 0.1248,
      "step": 15464
    },
    {
      "epoch": 0.4511640119026781,
      "grad_norm": 0.8620278803679651,
      "learning_rate": 6.026569504730425e-06,
      "loss": 0.1573,
      "step": 15465
    },
    {
      "epoch": 0.4511931851333217,
      "grad_norm": 0.9282741921915446,
      "learning_rate": 6.026107130646981e-06,
      "loss": 0.1241,
      "step": 15466
    },
    {
      "epoch": 0.45122235836396524,
      "grad_norm": 1.0156400767179663,
      "learning_rate": 6.025644747402698e-06,
      "loss": 0.1425,
      "step": 15467
    },
    {
      "epoch": 0.4512515315946088,
      "grad_norm": 0.9510747137714097,
      "learning_rate": 6.025182355001702e-06,
      "loss": 0.1329,
      "step": 15468
    },
    {
      "epoch": 0.45128070482525234,
      "grad_norm": 0.7098683350119727,
      "learning_rate": 6.024719953448124e-06,
      "loss": 0.1367,
      "step": 15469
    },
    {
      "epoch": 0.4513098780558959,
      "grad_norm": 0.9384021723867909,
      "learning_rate": 6.02425754274609e-06,
      "loss": 0.1586,
      "step": 15470
    },
    {
      "epoch": 0.45133905128653945,
      "grad_norm": 1.0093393459392084,
      "learning_rate": 6.023795122899729e-06,
      "loss": 0.1176,
      "step": 15471
    },
    {
      "epoch": 0.45136822451718306,
      "grad_norm": 0.736763983910399,
      "learning_rate": 6.023332693913171e-06,
      "loss": 0.1392,
      "step": 15472
    },
    {
      "epoch": 0.4513973977478266,
      "grad_norm": 1.0968694045293141,
      "learning_rate": 6.0228702557905415e-06,
      "loss": 0.1444,
      "step": 15473
    },
    {
      "epoch": 0.45142657097847017,
      "grad_norm": 1.007510007736189,
      "learning_rate": 6.022407808535972e-06,
      "loss": 0.142,
      "step": 15474
    },
    {
      "epoch": 0.4514557442091137,
      "grad_norm": 0.7977697991769906,
      "learning_rate": 6.0219453521535875e-06,
      "loss": 0.1291,
      "step": 15475
    },
    {
      "epoch": 0.45148491743975727,
      "grad_norm": 0.9444930378608256,
      "learning_rate": 6.021482886647521e-06,
      "loss": 0.1555,
      "step": 15476
    },
    {
      "epoch": 0.4515140906704008,
      "grad_norm": 0.7666298926815919,
      "learning_rate": 6.021020412021897e-06,
      "loss": 0.1339,
      "step": 15477
    },
    {
      "epoch": 0.4515432639010444,
      "grad_norm": 0.9371666251757395,
      "learning_rate": 6.020557928280848e-06,
      "loss": 0.1174,
      "step": 15478
    },
    {
      "epoch": 0.451572437131688,
      "grad_norm": 0.7594950748927057,
      "learning_rate": 6.020095435428501e-06,
      "loss": 0.1194,
      "step": 15479
    },
    {
      "epoch": 0.45160161036233154,
      "grad_norm": 0.8172497911967067,
      "learning_rate": 6.019632933468986e-06,
      "loss": 0.1396,
      "step": 15480
    },
    {
      "epoch": 0.4516307835929751,
      "grad_norm": 0.7670550947442615,
      "learning_rate": 6.0191704224064305e-06,
      "loss": 0.1405,
      "step": 15481
    },
    {
      "epoch": 0.45165995682361865,
      "grad_norm": 0.7604196341090894,
      "learning_rate": 6.018707902244967e-06,
      "loss": 0.1279,
      "step": 15482
    },
    {
      "epoch": 0.4516891300542622,
      "grad_norm": 0.8268719505091074,
      "learning_rate": 6.0182453729887205e-06,
      "loss": 0.1349,
      "step": 15483
    },
    {
      "epoch": 0.45171830328490575,
      "grad_norm": 0.7436341402224729,
      "learning_rate": 6.0177828346418235e-06,
      "loss": 0.1178,
      "step": 15484
    },
    {
      "epoch": 0.4517474765155493,
      "grad_norm": 0.8496856208825444,
      "learning_rate": 6.0173202872084035e-06,
      "loss": 0.1266,
      "step": 15485
    },
    {
      "epoch": 0.4517766497461929,
      "grad_norm": 0.7986128202583314,
      "learning_rate": 6.01685773069259e-06,
      "loss": 0.1241,
      "step": 15486
    },
    {
      "epoch": 0.45180582297683647,
      "grad_norm": 0.7725657820741384,
      "learning_rate": 6.016395165098516e-06,
      "loss": 0.1323,
      "step": 15487
    },
    {
      "epoch": 0.45183499620748,
      "grad_norm": 0.7673420715874265,
      "learning_rate": 6.0159325904303064e-06,
      "loss": 0.1361,
      "step": 15488
    },
    {
      "epoch": 0.4518641694381236,
      "grad_norm": 0.8167424355679127,
      "learning_rate": 6.015470006692095e-06,
      "loss": 0.1518,
      "step": 15489
    },
    {
      "epoch": 0.45189334266876713,
      "grad_norm": 0.9149063800333057,
      "learning_rate": 6.015007413888008e-06,
      "loss": 0.1391,
      "step": 15490
    },
    {
      "epoch": 0.4519225158994107,
      "grad_norm": 0.7660604220663534,
      "learning_rate": 6.014544812022177e-06,
      "loss": 0.1487,
      "step": 15491
    },
    {
      "epoch": 0.45195168913005423,
      "grad_norm": 0.7932425885698281,
      "learning_rate": 6.014082201098733e-06,
      "loss": 0.13,
      "step": 15492
    },
    {
      "epoch": 0.45198086236069784,
      "grad_norm": 0.8012751187484937,
      "learning_rate": 6.013619581121806e-06,
      "loss": 0.1417,
      "step": 15493
    },
    {
      "epoch": 0.4520100355913414,
      "grad_norm": 0.8094050479653967,
      "learning_rate": 6.013156952095523e-06,
      "loss": 0.1362,
      "step": 15494
    },
    {
      "epoch": 0.45203920882198495,
      "grad_norm": 0.8730183129639572,
      "learning_rate": 6.012694314024018e-06,
      "loss": 0.1355,
      "step": 15495
    },
    {
      "epoch": 0.4520683820526285,
      "grad_norm": 0.7518413162940071,
      "learning_rate": 6.01223166691142e-06,
      "loss": 0.1276,
      "step": 15496
    },
    {
      "epoch": 0.45209755528327206,
      "grad_norm": 0.855937178667111,
      "learning_rate": 6.011769010761861e-06,
      "loss": 0.13,
      "step": 15497
    },
    {
      "epoch": 0.4521267285139156,
      "grad_norm": 0.7668697609911654,
      "learning_rate": 6.011306345579466e-06,
      "loss": 0.1356,
      "step": 15498
    },
    {
      "epoch": 0.4521559017445592,
      "grad_norm": 0.8047188374888111,
      "learning_rate": 6.010843671368373e-06,
      "loss": 0.1378,
      "step": 15499
    },
    {
      "epoch": 0.45218507497520277,
      "grad_norm": 0.8493618000262269,
      "learning_rate": 6.0103809881327065e-06,
      "loss": 0.1479,
      "step": 15500
    },
    {
      "epoch": 0.4522142482058463,
      "grad_norm": 0.6987696427403324,
      "learning_rate": 6.0099182958766e-06,
      "loss": 0.1301,
      "step": 15501
    },
    {
      "epoch": 0.4522434214364899,
      "grad_norm": 0.6967339943179401,
      "learning_rate": 6.0094555946041855e-06,
      "loss": 0.1226,
      "step": 15502
    },
    {
      "epoch": 0.45227259466713343,
      "grad_norm": 0.7669631552971107,
      "learning_rate": 6.008992884319591e-06,
      "loss": 0.154,
      "step": 15503
    },
    {
      "epoch": 0.452301767897777,
      "grad_norm": 0.6731993615570382,
      "learning_rate": 6.00853016502695e-06,
      "loss": 0.1545,
      "step": 15504
    },
    {
      "epoch": 0.45233094112842054,
      "grad_norm": 7.223912458215729,
      "learning_rate": 6.008067436730392e-06,
      "loss": 0.1505,
      "step": 15505
    },
    {
      "epoch": 0.45236011435906415,
      "grad_norm": 1.0159335650326058,
      "learning_rate": 6.0076046994340486e-06,
      "loss": 0.1614,
      "step": 15506
    },
    {
      "epoch": 0.4523892875897077,
      "grad_norm": 0.886501189483512,
      "learning_rate": 6.0071419531420505e-06,
      "loss": 0.1175,
      "step": 15507
    },
    {
      "epoch": 0.45241846082035125,
      "grad_norm": 0.6803573396212004,
      "learning_rate": 6.006679197858529e-06,
      "loss": 0.1308,
      "step": 15508
    },
    {
      "epoch": 0.4524476340509948,
      "grad_norm": 0.7105691467647167,
      "learning_rate": 6.006216433587617e-06,
      "loss": 0.1336,
      "step": 15509
    },
    {
      "epoch": 0.45247680728163836,
      "grad_norm": 1.0094452391803512,
      "learning_rate": 6.005753660333446e-06,
      "loss": 0.1536,
      "step": 15510
    },
    {
      "epoch": 0.4525059805122819,
      "grad_norm": 0.8377821686328103,
      "learning_rate": 6.005290878100144e-06,
      "loss": 0.1503,
      "step": 15511
    },
    {
      "epoch": 0.45253515374292547,
      "grad_norm": 0.889393134243688,
      "learning_rate": 6.004828086891847e-06,
      "loss": 0.1549,
      "step": 15512
    },
    {
      "epoch": 0.4525643269735691,
      "grad_norm": 0.8069271483880135,
      "learning_rate": 6.0043652867126835e-06,
      "loss": 0.1406,
      "step": 15513
    },
    {
      "epoch": 0.45259350020421263,
      "grad_norm": 0.9104707480265944,
      "learning_rate": 6.003902477566788e-06,
      "loss": 0.1286,
      "step": 15514
    },
    {
      "epoch": 0.4526226734348562,
      "grad_norm": 0.9262352416055278,
      "learning_rate": 6.003439659458288e-06,
      "loss": 0.147,
      "step": 15515
    },
    {
      "epoch": 0.45265184666549974,
      "grad_norm": 0.8160715212386537,
      "learning_rate": 6.00297683239132e-06,
      "loss": 0.1469,
      "step": 15516
    },
    {
      "epoch": 0.4526810198961433,
      "grad_norm": 0.9547087787979797,
      "learning_rate": 6.002513996370014e-06,
      "loss": 0.1373,
      "step": 15517
    },
    {
      "epoch": 0.45271019312678684,
      "grad_norm": 0.78882374032233,
      "learning_rate": 6.002051151398503e-06,
      "loss": 0.1406,
      "step": 15518
    },
    {
      "epoch": 0.4527393663574304,
      "grad_norm": 0.8128759934170584,
      "learning_rate": 6.001588297480918e-06,
      "loss": 0.146,
      "step": 15519
    },
    {
      "epoch": 0.452768539588074,
      "grad_norm": 0.8322631443153432,
      "learning_rate": 6.0011254346213924e-06,
      "loss": 0.1505,
      "step": 15520
    },
    {
      "epoch": 0.45279771281871756,
      "grad_norm": 0.6798064602547464,
      "learning_rate": 6.000662562824056e-06,
      "loss": 0.1224,
      "step": 15521
    },
    {
      "epoch": 0.4528268860493611,
      "grad_norm": 0.8639071412444207,
      "learning_rate": 6.000199682093045e-06,
      "loss": 0.1521,
      "step": 15522
    },
    {
      "epoch": 0.45285605928000466,
      "grad_norm": 0.8478110353943761,
      "learning_rate": 5.999736792432489e-06,
      "loss": 0.1428,
      "step": 15523
    },
    {
      "epoch": 0.4528852325106482,
      "grad_norm": 0.704310892555951,
      "learning_rate": 5.9992738938465226e-06,
      "loss": 0.1189,
      "step": 15524
    },
    {
      "epoch": 0.45291440574129177,
      "grad_norm": 0.6830878620903953,
      "learning_rate": 5.998810986339276e-06,
      "loss": 0.1302,
      "step": 15525
    },
    {
      "epoch": 0.4529435789719354,
      "grad_norm": 0.6755613588284448,
      "learning_rate": 5.998348069914884e-06,
      "loss": 0.1466,
      "step": 15526
    },
    {
      "epoch": 0.45297275220257893,
      "grad_norm": 0.754726292745728,
      "learning_rate": 5.99788514457748e-06,
      "loss": 0.1368,
      "step": 15527
    },
    {
      "epoch": 0.4530019254332225,
      "grad_norm": 0.8819122489878344,
      "learning_rate": 5.997422210331194e-06,
      "loss": 0.1646,
      "step": 15528
    },
    {
      "epoch": 0.45303109866386604,
      "grad_norm": 0.9737874095863003,
      "learning_rate": 5.996959267180162e-06,
      "loss": 0.1405,
      "step": 15529
    },
    {
      "epoch": 0.4530602718945096,
      "grad_norm": 0.7185403255339305,
      "learning_rate": 5.996496315128514e-06,
      "loss": 0.1293,
      "step": 15530
    },
    {
      "epoch": 0.45308944512515315,
      "grad_norm": 0.808329124481926,
      "learning_rate": 5.996033354180386e-06,
      "loss": 0.1442,
      "step": 15531
    },
    {
      "epoch": 0.4531186183557967,
      "grad_norm": 0.876900716347898,
      "learning_rate": 5.99557038433991e-06,
      "loss": 0.1421,
      "step": 15532
    },
    {
      "epoch": 0.4531477915864403,
      "grad_norm": 0.9997237214924847,
      "learning_rate": 5.995107405611218e-06,
      "loss": 0.1238,
      "step": 15533
    },
    {
      "epoch": 0.45317696481708386,
      "grad_norm": 0.8329400431096406,
      "learning_rate": 5.994644417998447e-06,
      "loss": 0.1166,
      "step": 15534
    },
    {
      "epoch": 0.4532061380477274,
      "grad_norm": 0.9260749046529769,
      "learning_rate": 5.994181421505726e-06,
      "loss": 0.121,
      "step": 15535
    },
    {
      "epoch": 0.45323531127837097,
      "grad_norm": 0.9736342829476802,
      "learning_rate": 5.993718416137191e-06,
      "loss": 0.1253,
      "step": 15536
    },
    {
      "epoch": 0.4532644845090145,
      "grad_norm": 1.1189328167263761,
      "learning_rate": 5.993255401896976e-06,
      "loss": 0.1448,
      "step": 15537
    },
    {
      "epoch": 0.4532936577396581,
      "grad_norm": 1.0059683400281096,
      "learning_rate": 5.9927923787892125e-06,
      "loss": 0.137,
      "step": 15538
    },
    {
      "epoch": 0.4533228309703016,
      "grad_norm": 0.8726715932419563,
      "learning_rate": 5.992329346818036e-06,
      "loss": 0.1438,
      "step": 15539
    },
    {
      "epoch": 0.45335200420094524,
      "grad_norm": 0.9672239353443517,
      "learning_rate": 5.991866305987581e-06,
      "loss": 0.1713,
      "step": 15540
    },
    {
      "epoch": 0.4533811774315888,
      "grad_norm": 1.1397650317352954,
      "learning_rate": 5.99140325630198e-06,
      "loss": 0.1413,
      "step": 15541
    },
    {
      "epoch": 0.45341035066223234,
      "grad_norm": 0.7483326709503136,
      "learning_rate": 5.990940197765367e-06,
      "loss": 0.141,
      "step": 15542
    },
    {
      "epoch": 0.4534395238928759,
      "grad_norm": 0.7700113396276715,
      "learning_rate": 5.990477130381877e-06,
      "loss": 0.1207,
      "step": 15543
    },
    {
      "epoch": 0.45346869712351945,
      "grad_norm": 1.1356080666950594,
      "learning_rate": 5.990014054155644e-06,
      "loss": 0.128,
      "step": 15544
    },
    {
      "epoch": 0.453497870354163,
      "grad_norm": 0.9938686487025937,
      "learning_rate": 5.989550969090801e-06,
      "loss": 0.1549,
      "step": 15545
    },
    {
      "epoch": 0.45352704358480656,
      "grad_norm": 1.2927614805086236,
      "learning_rate": 5.989087875191481e-06,
      "loss": 0.131,
      "step": 15546
    },
    {
      "epoch": 0.45355621681545016,
      "grad_norm": 0.7277551171650158,
      "learning_rate": 5.9886247724618255e-06,
      "loss": 0.1438,
      "step": 15547
    },
    {
      "epoch": 0.4535853900460937,
      "grad_norm": 0.9748458948991484,
      "learning_rate": 5.98816166090596e-06,
      "loss": 0.1412,
      "step": 15548
    },
    {
      "epoch": 0.45361456327673727,
      "grad_norm": 0.8023881371892535,
      "learning_rate": 5.987698540528026e-06,
      "loss": 0.1378,
      "step": 15549
    },
    {
      "epoch": 0.4536437365073808,
      "grad_norm": 0.8176600544026086,
      "learning_rate": 5.987235411332153e-06,
      "loss": 0.1076,
      "step": 15550
    },
    {
      "epoch": 0.4536729097380244,
      "grad_norm": 0.8899976618069858,
      "learning_rate": 5.986772273322478e-06,
      "loss": 0.138,
      "step": 15551
    },
    {
      "epoch": 0.45370208296866793,
      "grad_norm": 0.7308522708829461,
      "learning_rate": 5.986309126503137e-06,
      "loss": 0.1376,
      "step": 15552
    },
    {
      "epoch": 0.45373125619931154,
      "grad_norm": 0.7756023275048722,
      "learning_rate": 5.985845970878263e-06,
      "loss": 0.1553,
      "step": 15553
    },
    {
      "epoch": 0.4537604294299551,
      "grad_norm": 1.15371829833908,
      "learning_rate": 5.985382806451991e-06,
      "loss": 0.1332,
      "step": 15554
    },
    {
      "epoch": 0.45378960266059865,
      "grad_norm": 0.6294588425665543,
      "learning_rate": 5.984919633228458e-06,
      "loss": 0.1331,
      "step": 15555
    },
    {
      "epoch": 0.4538187758912422,
      "grad_norm": 0.9856300304573363,
      "learning_rate": 5.984456451211795e-06,
      "loss": 0.1383,
      "step": 15556
    },
    {
      "epoch": 0.45384794912188575,
      "grad_norm": 0.8435417059113256,
      "learning_rate": 5.9839932604061425e-06,
      "loss": 0.1154,
      "step": 15557
    },
    {
      "epoch": 0.4538771223525293,
      "grad_norm": 0.8038887465016826,
      "learning_rate": 5.983530060815631e-06,
      "loss": 0.1333,
      "step": 15558
    },
    {
      "epoch": 0.45390629558317286,
      "grad_norm": 0.7613533098142548,
      "learning_rate": 5.9830668524444e-06,
      "loss": 0.1672,
      "step": 15559
    },
    {
      "epoch": 0.45393546881381647,
      "grad_norm": 0.8701557236316875,
      "learning_rate": 5.982603635296581e-06,
      "loss": 0.1397,
      "step": 15560
    },
    {
      "epoch": 0.45396464204446,
      "grad_norm": 0.9238254563999214,
      "learning_rate": 5.9821404093763116e-06,
      "loss": 0.1065,
      "step": 15561
    },
    {
      "epoch": 0.4539938152751036,
      "grad_norm": 0.8083320936044646,
      "learning_rate": 5.981677174687729e-06,
      "loss": 0.1197,
      "step": 15562
    },
    {
      "epoch": 0.4540229885057471,
      "grad_norm": 0.8540124986521128,
      "learning_rate": 5.981213931234964e-06,
      "loss": 0.1431,
      "step": 15563
    },
    {
      "epoch": 0.4540521617363907,
      "grad_norm": 1.0437493845420331,
      "learning_rate": 5.980750679022158e-06,
      "loss": 0.1303,
      "step": 15564
    },
    {
      "epoch": 0.45408133496703423,
      "grad_norm": 0.6804187898118352,
      "learning_rate": 5.980287418053442e-06,
      "loss": 0.1369,
      "step": 15565
    },
    {
      "epoch": 0.4541105081976778,
      "grad_norm": 0.7692563005271972,
      "learning_rate": 5.979824148332954e-06,
      "loss": 0.1495,
      "step": 15566
    },
    {
      "epoch": 0.4541396814283214,
      "grad_norm": 0.800288155652702,
      "learning_rate": 5.979360869864832e-06,
      "loss": 0.1512,
      "step": 15567
    },
    {
      "epoch": 0.45416885465896495,
      "grad_norm": 1.003529682542251,
      "learning_rate": 5.9788975826532085e-06,
      "loss": 0.1341,
      "step": 15568
    },
    {
      "epoch": 0.4541980278896085,
      "grad_norm": 0.7516609503566002,
      "learning_rate": 5.97843428670222e-06,
      "loss": 0.1413,
      "step": 15569
    },
    {
      "epoch": 0.45422720112025206,
      "grad_norm": 0.7483244607450641,
      "learning_rate": 5.977970982016006e-06,
      "loss": 0.1375,
      "step": 15570
    },
    {
      "epoch": 0.4542563743508956,
      "grad_norm": 0.6541581501805274,
      "learning_rate": 5.977507668598699e-06,
      "loss": 0.1139,
      "step": 15571
    },
    {
      "epoch": 0.45428554758153916,
      "grad_norm": 0.6464670584708954,
      "learning_rate": 5.977044346454437e-06,
      "loss": 0.1312,
      "step": 15572
    },
    {
      "epoch": 0.4543147208121827,
      "grad_norm": 0.9645176954342236,
      "learning_rate": 5.976581015587357e-06,
      "loss": 0.1507,
      "step": 15573
    },
    {
      "epoch": 0.4543438940428263,
      "grad_norm": 0.8301075641835433,
      "learning_rate": 5.9761176760015945e-06,
      "loss": 0.1429,
      "step": 15574
    },
    {
      "epoch": 0.4543730672734699,
      "grad_norm": 1.0723481299694118,
      "learning_rate": 5.975654327701286e-06,
      "loss": 0.1478,
      "step": 15575
    },
    {
      "epoch": 0.45440224050411343,
      "grad_norm": 0.8724914626221386,
      "learning_rate": 5.975190970690568e-06,
      "loss": 0.1429,
      "step": 15576
    },
    {
      "epoch": 0.454431413734757,
      "grad_norm": 0.8844472149878354,
      "learning_rate": 5.97472760497358e-06,
      "loss": 0.1346,
      "step": 15577
    },
    {
      "epoch": 0.45446058696540054,
      "grad_norm": 0.8083720723703682,
      "learning_rate": 5.974264230554454e-06,
      "loss": 0.1345,
      "step": 15578
    },
    {
      "epoch": 0.4544897601960441,
      "grad_norm": 1.3204416064588236,
      "learning_rate": 5.973800847437332e-06,
      "loss": 0.1251,
      "step": 15579
    },
    {
      "epoch": 0.4545189334266877,
      "grad_norm": 1.0738716931241754,
      "learning_rate": 5.973337455626348e-06,
      "loss": 0.1452,
      "step": 15580
    },
    {
      "epoch": 0.45454810665733125,
      "grad_norm": 1.0675443266092903,
      "learning_rate": 5.972874055125637e-06,
      "loss": 0.1411,
      "step": 15581
    },
    {
      "epoch": 0.4545772798879748,
      "grad_norm": 0.8243831145326032,
      "learning_rate": 5.972410645939342e-06,
      "loss": 0.1254,
      "step": 15582
    },
    {
      "epoch": 0.45460645311861836,
      "grad_norm": 0.7569276271537585,
      "learning_rate": 5.971947228071595e-06,
      "loss": 0.1528,
      "step": 15583
    },
    {
      "epoch": 0.4546356263492619,
      "grad_norm": 0.8531415261963556,
      "learning_rate": 5.971483801526536e-06,
      "loss": 0.1391,
      "step": 15584
    },
    {
      "epoch": 0.45466479957990547,
      "grad_norm": 0.9787891705695544,
      "learning_rate": 5.971020366308301e-06,
      "loss": 0.1334,
      "step": 15585
    },
    {
      "epoch": 0.454693972810549,
      "grad_norm": 0.7507445483480693,
      "learning_rate": 5.970556922421028e-06,
      "loss": 0.1465,
      "step": 15586
    },
    {
      "epoch": 0.4547231460411926,
      "grad_norm": 0.8299327822734983,
      "learning_rate": 5.970093469868855e-06,
      "loss": 0.1207,
      "step": 15587
    },
    {
      "epoch": 0.4547523192718362,
      "grad_norm": 0.8162947581409953,
      "learning_rate": 5.969630008655919e-06,
      "loss": 0.1715,
      "step": 15588
    },
    {
      "epoch": 0.45478149250247973,
      "grad_norm": 0.7519473468992576,
      "learning_rate": 5.969166538786357e-06,
      "loss": 0.1527,
      "step": 15589
    },
    {
      "epoch": 0.4548106657331233,
      "grad_norm": 0.8550610823679656,
      "learning_rate": 5.968703060264308e-06,
      "loss": 0.1403,
      "step": 15590
    },
    {
      "epoch": 0.45483983896376684,
      "grad_norm": 0.9089281538646847,
      "learning_rate": 5.968239573093909e-06,
      "loss": 0.1542,
      "step": 15591
    },
    {
      "epoch": 0.4548690121944104,
      "grad_norm": 0.8927215201020315,
      "learning_rate": 5.967776077279299e-06,
      "loss": 0.1516,
      "step": 15592
    },
    {
      "epoch": 0.45489818542505395,
      "grad_norm": 0.8438451780909817,
      "learning_rate": 5.9673125728246136e-06,
      "loss": 0.1618,
      "step": 15593
    },
    {
      "epoch": 0.45492735865569756,
      "grad_norm": 0.8108699552061513,
      "learning_rate": 5.966849059733994e-06,
      "loss": 0.14,
      "step": 15594
    },
    {
      "epoch": 0.4549565318863411,
      "grad_norm": 0.9168909298375204,
      "learning_rate": 5.966385538011577e-06,
      "loss": 0.1678,
      "step": 15595
    },
    {
      "epoch": 0.45498570511698466,
      "grad_norm": 0.684513498308056,
      "learning_rate": 5.9659220076614995e-06,
      "loss": 0.1509,
      "step": 15596
    },
    {
      "epoch": 0.4550148783476282,
      "grad_norm": 0.7329521844052996,
      "learning_rate": 5.965458468687902e-06,
      "loss": 0.125,
      "step": 15597
    },
    {
      "epoch": 0.45504405157827177,
      "grad_norm": 0.7877975127804708,
      "learning_rate": 5.964994921094921e-06,
      "loss": 0.1397,
      "step": 15598
    },
    {
      "epoch": 0.4550732248089153,
      "grad_norm": 0.8390761540958495,
      "learning_rate": 5.964531364886696e-06,
      "loss": 0.1449,
      "step": 15599
    },
    {
      "epoch": 0.4551023980395589,
      "grad_norm": 0.8375526024404093,
      "learning_rate": 5.964067800067366e-06,
      "loss": 0.1434,
      "step": 15600
    },
    {
      "epoch": 0.4551315712702025,
      "grad_norm": 0.784226838130666,
      "learning_rate": 5.9636042266410666e-06,
      "loss": 0.1398,
      "step": 15601
    },
    {
      "epoch": 0.45516074450084604,
      "grad_norm": 1.188310705158349,
      "learning_rate": 5.96314064461194e-06,
      "loss": 0.1507,
      "step": 15602
    },
    {
      "epoch": 0.4551899177314896,
      "grad_norm": 0.8884153249430453,
      "learning_rate": 5.962677053984124e-06,
      "loss": 0.1318,
      "step": 15603
    },
    {
      "epoch": 0.45521909096213314,
      "grad_norm": 0.8591894126257077,
      "learning_rate": 5.962213454761758e-06,
      "loss": 0.1347,
      "step": 15604
    },
    {
      "epoch": 0.4552482641927767,
      "grad_norm": 0.8814817104144005,
      "learning_rate": 5.961749846948977e-06,
      "loss": 0.1266,
      "step": 15605
    },
    {
      "epoch": 0.45527743742342025,
      "grad_norm": 0.8035971967177175,
      "learning_rate": 5.961286230549925e-06,
      "loss": 0.1422,
      "step": 15606
    },
    {
      "epoch": 0.4553066106540638,
      "grad_norm": 0.8688263422836926,
      "learning_rate": 5.96082260556874e-06,
      "loss": 0.1463,
      "step": 15607
    },
    {
      "epoch": 0.4553357838847074,
      "grad_norm": 0.868205180530539,
      "learning_rate": 5.9603589720095575e-06,
      "loss": 0.1334,
      "step": 15608
    },
    {
      "epoch": 0.45536495711535097,
      "grad_norm": 0.7593555479635258,
      "learning_rate": 5.959895329876521e-06,
      "loss": 0.1627,
      "step": 15609
    },
    {
      "epoch": 0.4553941303459945,
      "grad_norm": 0.9070878175146634,
      "learning_rate": 5.959431679173768e-06,
      "loss": 0.1438,
      "step": 15610
    },
    {
      "epoch": 0.45542330357663807,
      "grad_norm": 0.7284378881575929,
      "learning_rate": 5.958968019905438e-06,
      "loss": 0.1518,
      "step": 15611
    },
    {
      "epoch": 0.4554524768072816,
      "grad_norm": 0.8233408714281342,
      "learning_rate": 5.95850435207567e-06,
      "loss": 0.1225,
      "step": 15612
    },
    {
      "epoch": 0.4554816500379252,
      "grad_norm": 0.6495865762672907,
      "learning_rate": 5.9580406756886046e-06,
      "loss": 0.1332,
      "step": 15613
    },
    {
      "epoch": 0.4555108232685688,
      "grad_norm": 0.8377952270274468,
      "learning_rate": 5.957576990748381e-06,
      "loss": 0.1348,
      "step": 15614
    },
    {
      "epoch": 0.45553999649921234,
      "grad_norm": 0.8555159658834381,
      "learning_rate": 5.957113297259137e-06,
      "loss": 0.1294,
      "step": 15615
    },
    {
      "epoch": 0.4555691697298559,
      "grad_norm": 0.9373924405724049,
      "learning_rate": 5.956649595225015e-06,
      "loss": 0.1473,
      "step": 15616
    },
    {
      "epoch": 0.45559834296049945,
      "grad_norm": 0.8757514150161813,
      "learning_rate": 5.956185884650154e-06,
      "loss": 0.1538,
      "step": 15617
    },
    {
      "epoch": 0.455627516191143,
      "grad_norm": 0.6024764240720053,
      "learning_rate": 5.955722165538693e-06,
      "loss": 0.1267,
      "step": 15618
    },
    {
      "epoch": 0.45565668942178655,
      "grad_norm": 0.9658590661050219,
      "learning_rate": 5.9552584378947746e-06,
      "loss": 0.1291,
      "step": 15619
    },
    {
      "epoch": 0.4556858626524301,
      "grad_norm": 0.8764901413759552,
      "learning_rate": 5.954794701722534e-06,
      "loss": 0.1311,
      "step": 15620
    },
    {
      "epoch": 0.4557150358830737,
      "grad_norm": 0.780645758995893,
      "learning_rate": 5.954330957026115e-06,
      "loss": 0.1202,
      "step": 15621
    },
    {
      "epoch": 0.45574420911371727,
      "grad_norm": 0.8320018159178345,
      "learning_rate": 5.953867203809659e-06,
      "loss": 0.1486,
      "step": 15622
    },
    {
      "epoch": 0.4557733823443608,
      "grad_norm": 0.7268077402865062,
      "learning_rate": 5.953403442077302e-06,
      "loss": 0.1322,
      "step": 15623
    },
    {
      "epoch": 0.4558025555750044,
      "grad_norm": 0.770748963245814,
      "learning_rate": 5.952939671833189e-06,
      "loss": 0.1121,
      "step": 15624
    },
    {
      "epoch": 0.45583172880564793,
      "grad_norm": 0.7652247459678828,
      "learning_rate": 5.9524758930814565e-06,
      "loss": 0.1243,
      "step": 15625
    },
    {
      "epoch": 0.4558609020362915,
      "grad_norm": 0.846546008252921,
      "learning_rate": 5.952012105826247e-06,
      "loss": 0.1317,
      "step": 15626
    },
    {
      "epoch": 0.45589007526693504,
      "grad_norm": 0.6920386970271211,
      "learning_rate": 5.9515483100716994e-06,
      "loss": 0.1261,
      "step": 15627
    },
    {
      "epoch": 0.45591924849757864,
      "grad_norm": 0.8808606251055751,
      "learning_rate": 5.951084505821957e-06,
      "loss": 0.1725,
      "step": 15628
    },
    {
      "epoch": 0.4559484217282222,
      "grad_norm": 0.6640518970320024,
      "learning_rate": 5.950620693081159e-06,
      "loss": 0.123,
      "step": 15629
    },
    {
      "epoch": 0.45597759495886575,
      "grad_norm": 0.8132760484031268,
      "learning_rate": 5.950156871853446e-06,
      "loss": 0.1639,
      "step": 15630
    },
    {
      "epoch": 0.4560067681895093,
      "grad_norm": 0.8180047225855253,
      "learning_rate": 5.94969304214296e-06,
      "loss": 0.1458,
      "step": 15631
    },
    {
      "epoch": 0.45603594142015286,
      "grad_norm": 0.7903443282540956,
      "learning_rate": 5.94922920395384e-06,
      "loss": 0.1364,
      "step": 15632
    },
    {
      "epoch": 0.4560651146507964,
      "grad_norm": 0.7793112046755744,
      "learning_rate": 5.948765357290229e-06,
      "loss": 0.1368,
      "step": 15633
    },
    {
      "epoch": 0.45609428788143996,
      "grad_norm": 0.7479085223042332,
      "learning_rate": 5.94830150215627e-06,
      "loss": 0.1462,
      "step": 15634
    },
    {
      "epoch": 0.4561234611120836,
      "grad_norm": 0.7091872238254096,
      "learning_rate": 5.947837638556096e-06,
      "loss": 0.1318,
      "step": 15635
    },
    {
      "epoch": 0.4561526343427271,
      "grad_norm": 0.9630085534828409,
      "learning_rate": 5.947373766493858e-06,
      "loss": 0.1337,
      "step": 15636
    },
    {
      "epoch": 0.4561818075733707,
      "grad_norm": 0.9419381849430555,
      "learning_rate": 5.946909885973693e-06,
      "loss": 0.1222,
      "step": 15637
    },
    {
      "epoch": 0.45621098080401423,
      "grad_norm": 0.8737622678445661,
      "learning_rate": 5.94644599699974e-06,
      "loss": 0.1392,
      "step": 15638
    },
    {
      "epoch": 0.4562401540346578,
      "grad_norm": 0.7275142853586954,
      "learning_rate": 5.945982099576147e-06,
      "loss": 0.1472,
      "step": 15639
    },
    {
      "epoch": 0.45626932726530134,
      "grad_norm": 0.8130554862107516,
      "learning_rate": 5.945518193707048e-06,
      "loss": 0.1315,
      "step": 15640
    },
    {
      "epoch": 0.45629850049594495,
      "grad_norm": 0.6153955434577857,
      "learning_rate": 5.945054279396589e-06,
      "loss": 0.1294,
      "step": 15641
    },
    {
      "epoch": 0.4563276737265885,
      "grad_norm": 1.0260667782830692,
      "learning_rate": 5.944590356648913e-06,
      "loss": 0.161,
      "step": 15642
    },
    {
      "epoch": 0.45635684695723205,
      "grad_norm": 0.6957108945306344,
      "learning_rate": 5.944126425468158e-06,
      "loss": 0.123,
      "step": 15643
    },
    {
      "epoch": 0.4563860201878756,
      "grad_norm": 0.7159912722493332,
      "learning_rate": 5.943662485858468e-06,
      "loss": 0.1206,
      "step": 15644
    },
    {
      "epoch": 0.45641519341851916,
      "grad_norm": 1.1752349501025643,
      "learning_rate": 5.9431985378239845e-06,
      "loss": 0.1787,
      "step": 15645
    },
    {
      "epoch": 0.4564443666491627,
      "grad_norm": 0.9045465807891037,
      "learning_rate": 5.94273458136885e-06,
      "loss": 0.1486,
      "step": 15646
    },
    {
      "epoch": 0.45647353987980627,
      "grad_norm": 0.6638173725878435,
      "learning_rate": 5.942270616497206e-06,
      "loss": 0.1113,
      "step": 15647
    },
    {
      "epoch": 0.4565027131104499,
      "grad_norm": 1.0599652223154115,
      "learning_rate": 5.941806643213194e-06,
      "loss": 0.1386,
      "step": 15648
    },
    {
      "epoch": 0.45653188634109343,
      "grad_norm": 1.108494409632205,
      "learning_rate": 5.941342661520959e-06,
      "loss": 0.1446,
      "step": 15649
    },
    {
      "epoch": 0.456561059571737,
      "grad_norm": 0.8771978366714083,
      "learning_rate": 5.940878671424639e-06,
      "loss": 0.1388,
      "step": 15650
    },
    {
      "epoch": 0.45659023280238054,
      "grad_norm": 0.8269749943895522,
      "learning_rate": 5.940414672928381e-06,
      "loss": 0.1236,
      "step": 15651
    },
    {
      "epoch": 0.4566194060330241,
      "grad_norm": 0.8759997309413657,
      "learning_rate": 5.9399506660363244e-06,
      "loss": 0.1394,
      "step": 15652
    },
    {
      "epoch": 0.45664857926366764,
      "grad_norm": 0.9488383640892364,
      "learning_rate": 5.939486650752612e-06,
      "loss": 0.136,
      "step": 15653
    },
    {
      "epoch": 0.4566777524943112,
      "grad_norm": 0.6794040859282786,
      "learning_rate": 5.939022627081389e-06,
      "loss": 0.123,
      "step": 15654
    },
    {
      "epoch": 0.4567069257249548,
      "grad_norm": 0.7957088297622816,
      "learning_rate": 5.938558595026794e-06,
      "loss": 0.1195,
      "step": 15655
    },
    {
      "epoch": 0.45673609895559836,
      "grad_norm": 0.8379369819921373,
      "learning_rate": 5.938094554592973e-06,
      "loss": 0.1637,
      "step": 15656
    },
    {
      "epoch": 0.4567652721862419,
      "grad_norm": 0.8238980349361563,
      "learning_rate": 5.937630505784068e-06,
      "loss": 0.1355,
      "step": 15657
    },
    {
      "epoch": 0.45679444541688546,
      "grad_norm": 0.8688347774364377,
      "learning_rate": 5.9371664486042216e-06,
      "loss": 0.132,
      "step": 15658
    },
    {
      "epoch": 0.456823618647529,
      "grad_norm": 1.095856481281604,
      "learning_rate": 5.936702383057576e-06,
      "loss": 0.1301,
      "step": 15659
    },
    {
      "epoch": 0.45685279187817257,
      "grad_norm": 0.8897872583146305,
      "learning_rate": 5.936238309148276e-06,
      "loss": 0.1311,
      "step": 15660
    },
    {
      "epoch": 0.4568819651088161,
      "grad_norm": 0.7323225377747237,
      "learning_rate": 5.935774226880463e-06,
      "loss": 0.1243,
      "step": 15661
    },
    {
      "epoch": 0.45691113833945973,
      "grad_norm": 0.8574268429481138,
      "learning_rate": 5.9353101362582825e-06,
      "loss": 0.1275,
      "step": 15662
    },
    {
      "epoch": 0.4569403115701033,
      "grad_norm": 1.0035717491324825,
      "learning_rate": 5.934846037285875e-06,
      "loss": 0.1274,
      "step": 15663
    },
    {
      "epoch": 0.45696948480074684,
      "grad_norm": 0.6340515572348439,
      "learning_rate": 5.9343819299673865e-06,
      "loss": 0.1032,
      "step": 15664
    },
    {
      "epoch": 0.4569986580313904,
      "grad_norm": 0.9438652048879773,
      "learning_rate": 5.933917814306958e-06,
      "loss": 0.1508,
      "step": 15665
    },
    {
      "epoch": 0.45702783126203395,
      "grad_norm": 0.9742379780115008,
      "learning_rate": 5.933453690308734e-06,
      "loss": 0.1412,
      "step": 15666
    },
    {
      "epoch": 0.4570570044926775,
      "grad_norm": 0.8079002900725311,
      "learning_rate": 5.93298955797686e-06,
      "loss": 0.1339,
      "step": 15667
    },
    {
      "epoch": 0.4570861777233211,
      "grad_norm": 1.1181298450483161,
      "learning_rate": 5.9325254173154754e-06,
      "loss": 0.1351,
      "step": 15668
    },
    {
      "epoch": 0.45711535095396466,
      "grad_norm": 1.2047272065936931,
      "learning_rate": 5.932061268328729e-06,
      "loss": 0.126,
      "step": 15669
    },
    {
      "epoch": 0.4571445241846082,
      "grad_norm": 0.9596218433201393,
      "learning_rate": 5.931597111020762e-06,
      "loss": 0.1295,
      "step": 15670
    },
    {
      "epoch": 0.45717369741525177,
      "grad_norm": 1.0413839676500838,
      "learning_rate": 5.931132945395717e-06,
      "loss": 0.1467,
      "step": 15671
    },
    {
      "epoch": 0.4572028706458953,
      "grad_norm": 1.3019729448171455,
      "learning_rate": 5.930668771457739e-06,
      "loss": 0.1244,
      "step": 15672
    },
    {
      "epoch": 0.4572320438765389,
      "grad_norm": 0.8658591039474676,
      "learning_rate": 5.930204589210974e-06,
      "loss": 0.1481,
      "step": 15673
    },
    {
      "epoch": 0.4572612171071824,
      "grad_norm": 0.7759193174158013,
      "learning_rate": 5.929740398659563e-06,
      "loss": 0.1408,
      "step": 15674
    },
    {
      "epoch": 0.45729039033782604,
      "grad_norm": 0.8641358135923433,
      "learning_rate": 5.929276199807652e-06,
      "loss": 0.151,
      "step": 15675
    },
    {
      "epoch": 0.4573195635684696,
      "grad_norm": 0.7434838348542243,
      "learning_rate": 5.928811992659386e-06,
      "loss": 0.1453,
      "step": 15676
    },
    {
      "epoch": 0.45734873679911314,
      "grad_norm": 0.9680447752688965,
      "learning_rate": 5.928347777218907e-06,
      "loss": 0.1489,
      "step": 15677
    },
    {
      "epoch": 0.4573779100297567,
      "grad_norm": 0.7862911694994094,
      "learning_rate": 5.927883553490361e-06,
      "loss": 0.1265,
      "step": 15678
    },
    {
      "epoch": 0.45740708326040025,
      "grad_norm": 1.045671477489225,
      "learning_rate": 5.927419321477893e-06,
      "loss": 0.1559,
      "step": 15679
    },
    {
      "epoch": 0.4574362564910438,
      "grad_norm": 0.8921272670391884,
      "learning_rate": 5.926955081185646e-06,
      "loss": 0.1695,
      "step": 15680
    },
    {
      "epoch": 0.45746542972168736,
      "grad_norm": 0.7146774925535057,
      "learning_rate": 5.926490832617764e-06,
      "loss": 0.1307,
      "step": 15681
    },
    {
      "epoch": 0.45749460295233096,
      "grad_norm": 0.9267503736800597,
      "learning_rate": 5.926026575778396e-06,
      "loss": 0.1105,
      "step": 15682
    },
    {
      "epoch": 0.4575237761829745,
      "grad_norm": 0.7048558503763448,
      "learning_rate": 5.9255623106716805e-06,
      "loss": 0.1403,
      "step": 15683
    },
    {
      "epoch": 0.45755294941361807,
      "grad_norm": 0.9110532148569493,
      "learning_rate": 5.925098037301769e-06,
      "loss": 0.1727,
      "step": 15684
    },
    {
      "epoch": 0.4575821226442616,
      "grad_norm": 1.1119534675833322,
      "learning_rate": 5.9246337556728005e-06,
      "loss": 0.1407,
      "step": 15685
    },
    {
      "epoch": 0.4576112958749052,
      "grad_norm": 0.6629367221488919,
      "learning_rate": 5.9241694657889236e-06,
      "loss": 0.1429,
      "step": 15686
    },
    {
      "epoch": 0.45764046910554873,
      "grad_norm": 0.6451570080377812,
      "learning_rate": 5.9237051676542825e-06,
      "loss": 0.1166,
      "step": 15687
    },
    {
      "epoch": 0.4576696423361923,
      "grad_norm": 0.8167856611009412,
      "learning_rate": 5.923240861273021e-06,
      "loss": 0.1632,
      "step": 15688
    },
    {
      "epoch": 0.4576988155668359,
      "grad_norm": 1.0186215312827995,
      "learning_rate": 5.922776546649287e-06,
      "loss": 0.1269,
      "step": 15689
    },
    {
      "epoch": 0.45772798879747945,
      "grad_norm": 0.7110533075572291,
      "learning_rate": 5.922312223787223e-06,
      "loss": 0.1395,
      "step": 15690
    },
    {
      "epoch": 0.457757162028123,
      "grad_norm": 0.7325905083465877,
      "learning_rate": 5.921847892690976e-06,
      "loss": 0.1408,
      "step": 15691
    },
    {
      "epoch": 0.45778633525876655,
      "grad_norm": 1.1060383046376248,
      "learning_rate": 5.9213835533646914e-06,
      "loss": 0.1358,
      "step": 15692
    },
    {
      "epoch": 0.4578155084894101,
      "grad_norm": 0.8946239487513469,
      "learning_rate": 5.920919205812514e-06,
      "loss": 0.1249,
      "step": 15693
    },
    {
      "epoch": 0.45784468172005366,
      "grad_norm": 0.8759308957921003,
      "learning_rate": 5.920454850038591e-06,
      "loss": 0.1302,
      "step": 15694
    },
    {
      "epoch": 0.45787385495069727,
      "grad_norm": 1.1367852553850144,
      "learning_rate": 5.919990486047065e-06,
      "loss": 0.1378,
      "step": 15695
    },
    {
      "epoch": 0.4579030281813408,
      "grad_norm": 0.7844432128423668,
      "learning_rate": 5.919526113842085e-06,
      "loss": 0.1208,
      "step": 15696
    },
    {
      "epoch": 0.4579322014119844,
      "grad_norm": 0.8387595450575478,
      "learning_rate": 5.9190617334277955e-06,
      "loss": 0.1366,
      "step": 15697
    },
    {
      "epoch": 0.4579613746426279,
      "grad_norm": 0.9436747595291688,
      "learning_rate": 5.91859734480834e-06,
      "loss": 0.1487,
      "step": 15698
    },
    {
      "epoch": 0.4579905478732715,
      "grad_norm": 0.8176453567039192,
      "learning_rate": 5.9181329479878694e-06,
      "loss": 0.1326,
      "step": 15699
    },
    {
      "epoch": 0.45801972110391503,
      "grad_norm": 0.8249520224108696,
      "learning_rate": 5.917668542970525e-06,
      "loss": 0.1346,
      "step": 15700
    },
    {
      "epoch": 0.4580488943345586,
      "grad_norm": 1.1771261961324997,
      "learning_rate": 5.917204129760457e-06,
      "loss": 0.1258,
      "step": 15701
    },
    {
      "epoch": 0.4580780675652022,
      "grad_norm": 0.922435303808003,
      "learning_rate": 5.916739708361807e-06,
      "loss": 0.1468,
      "step": 15702
    },
    {
      "epoch": 0.45810724079584575,
      "grad_norm": 0.7783907730184846,
      "learning_rate": 5.916275278778725e-06,
      "loss": 0.1308,
      "step": 15703
    },
    {
      "epoch": 0.4581364140264893,
      "grad_norm": 0.8477210818925518,
      "learning_rate": 5.915810841015356e-06,
      "loss": 0.1288,
      "step": 15704
    },
    {
      "epoch": 0.45816558725713286,
      "grad_norm": 0.9263544934521192,
      "learning_rate": 5.9153463950758465e-06,
      "loss": 0.1392,
      "step": 15705
    },
    {
      "epoch": 0.4581947604877764,
      "grad_norm": 1.1589657547471184,
      "learning_rate": 5.914881940964343e-06,
      "loss": 0.1556,
      "step": 15706
    },
    {
      "epoch": 0.45822393371841996,
      "grad_norm": 0.7878431455735818,
      "learning_rate": 5.914417478684992e-06,
      "loss": 0.1244,
      "step": 15707
    },
    {
      "epoch": 0.4582531069490635,
      "grad_norm": 1.048494819227112,
      "learning_rate": 5.913953008241939e-06,
      "loss": 0.1429,
      "step": 15708
    },
    {
      "epoch": 0.4582822801797071,
      "grad_norm": 1.0324088380933156,
      "learning_rate": 5.913488529639334e-06,
      "loss": 0.1603,
      "step": 15709
    },
    {
      "epoch": 0.4583114534103507,
      "grad_norm": 0.7533032521400135,
      "learning_rate": 5.913024042881319e-06,
      "loss": 0.1267,
      "step": 15710
    },
    {
      "epoch": 0.45834062664099423,
      "grad_norm": 0.7113361607782436,
      "learning_rate": 5.912559547972043e-06,
      "loss": 0.1301,
      "step": 15711
    },
    {
      "epoch": 0.4583697998716378,
      "grad_norm": 0.9468588685317267,
      "learning_rate": 5.912095044915655e-06,
      "loss": 0.1429,
      "step": 15712
    },
    {
      "epoch": 0.45839897310228134,
      "grad_norm": 1.0343857302662343,
      "learning_rate": 5.911630533716299e-06,
      "loss": 0.1347,
      "step": 15713
    },
    {
      "epoch": 0.4584281463329249,
      "grad_norm": 0.7442153617635787,
      "learning_rate": 5.911166014378126e-06,
      "loss": 0.134,
      "step": 15714
    },
    {
      "epoch": 0.45845731956356844,
      "grad_norm": 1.0059846109109405,
      "learning_rate": 5.910701486905277e-06,
      "loss": 0.1327,
      "step": 15715
    },
    {
      "epoch": 0.45848649279421205,
      "grad_norm": 0.8944237191137988,
      "learning_rate": 5.910236951301904e-06,
      "loss": 0.1669,
      "step": 15716
    },
    {
      "epoch": 0.4585156660248556,
      "grad_norm": 0.7624367383167346,
      "learning_rate": 5.909772407572153e-06,
      "loss": 0.136,
      "step": 15717
    },
    {
      "epoch": 0.45854483925549916,
      "grad_norm": 0.84192067433435,
      "learning_rate": 5.90930785572017e-06,
      "loss": 0.1112,
      "step": 15718
    },
    {
      "epoch": 0.4585740124861427,
      "grad_norm": 0.7057893376643394,
      "learning_rate": 5.908843295750104e-06,
      "loss": 0.1616,
      "step": 15719
    },
    {
      "epoch": 0.45860318571678627,
      "grad_norm": 0.8740791571990643,
      "learning_rate": 5.908378727666103e-06,
      "loss": 0.1485,
      "step": 15720
    },
    {
      "epoch": 0.4586323589474298,
      "grad_norm": 0.8135321402168436,
      "learning_rate": 5.907914151472312e-06,
      "loss": 0.1325,
      "step": 15721
    },
    {
      "epoch": 0.4586615321780734,
      "grad_norm": 0.848164326095268,
      "learning_rate": 5.9074495671728814e-06,
      "loss": 0.1317,
      "step": 15722
    },
    {
      "epoch": 0.458690705408717,
      "grad_norm": 0.704790213205819,
      "learning_rate": 5.9069849747719565e-06,
      "loss": 0.1221,
      "step": 15723
    },
    {
      "epoch": 0.45871987863936053,
      "grad_norm": 0.7766729929604375,
      "learning_rate": 5.906520374273688e-06,
      "loss": 0.1364,
      "step": 15724
    },
    {
      "epoch": 0.4587490518700041,
      "grad_norm": 0.7079896371676835,
      "learning_rate": 5.90605576568222e-06,
      "loss": 0.1366,
      "step": 15725
    },
    {
      "epoch": 0.45877822510064764,
      "grad_norm": 0.7778515559501419,
      "learning_rate": 5.905591149001704e-06,
      "loss": 0.1276,
      "step": 15726
    },
    {
      "epoch": 0.4588073983312912,
      "grad_norm": 0.9684472097472696,
      "learning_rate": 5.9051265242362854e-06,
      "loss": 0.1346,
      "step": 15727
    },
    {
      "epoch": 0.45883657156193475,
      "grad_norm": 0.676124760181125,
      "learning_rate": 5.904661891390114e-06,
      "loss": 0.1197,
      "step": 15728
    },
    {
      "epoch": 0.45886574479257836,
      "grad_norm": 0.6906927877819188,
      "learning_rate": 5.904197250467339e-06,
      "loss": 0.1425,
      "step": 15729
    },
    {
      "epoch": 0.4588949180232219,
      "grad_norm": 1.2108509483220204,
      "learning_rate": 5.903732601472102e-06,
      "loss": 0.1395,
      "step": 15730
    },
    {
      "epoch": 0.45892409125386546,
      "grad_norm": 1.0535901221585215,
      "learning_rate": 5.903267944408561e-06,
      "loss": 0.1449,
      "step": 15731
    },
    {
      "epoch": 0.458953264484509,
      "grad_norm": 0.730594325168327,
      "learning_rate": 5.902803279280857e-06,
      "loss": 0.106,
      "step": 15732
    },
    {
      "epoch": 0.45898243771515257,
      "grad_norm": 0.9053460795820518,
      "learning_rate": 5.902338606093139e-06,
      "loss": 0.1285,
      "step": 15733
    },
    {
      "epoch": 0.4590116109457961,
      "grad_norm": 0.7754003592857962,
      "learning_rate": 5.9018739248495605e-06,
      "loss": 0.1299,
      "step": 15734
    },
    {
      "epoch": 0.4590407841764397,
      "grad_norm": 0.7470854924206519,
      "learning_rate": 5.901409235554265e-06,
      "loss": 0.1078,
      "step": 15735
    },
    {
      "epoch": 0.4590699574070833,
      "grad_norm": 0.8360333458535162,
      "learning_rate": 5.900944538211404e-06,
      "loss": 0.1367,
      "step": 15736
    },
    {
      "epoch": 0.45909913063772684,
      "grad_norm": 0.8861071433531529,
      "learning_rate": 5.9004798328251255e-06,
      "loss": 0.1485,
      "step": 15737
    },
    {
      "epoch": 0.4591283038683704,
      "grad_norm": 0.9182952557822152,
      "learning_rate": 5.900015119399577e-06,
      "loss": 0.1466,
      "step": 15738
    },
    {
      "epoch": 0.45915747709901394,
      "grad_norm": 1.1191553678586899,
      "learning_rate": 5.899550397938909e-06,
      "loss": 0.148,
      "step": 15739
    },
    {
      "epoch": 0.4591866503296575,
      "grad_norm": 0.816610906007468,
      "learning_rate": 5.89908566844727e-06,
      "loss": 0.1262,
      "step": 15740
    },
    {
      "epoch": 0.45921582356030105,
      "grad_norm": 1.1109664468265683,
      "learning_rate": 5.898620930928808e-06,
      "loss": 0.1432,
      "step": 15741
    },
    {
      "epoch": 0.4592449967909446,
      "grad_norm": 0.8793888636473848,
      "learning_rate": 5.898156185387674e-06,
      "loss": 0.156,
      "step": 15742
    },
    {
      "epoch": 0.4592741700215882,
      "grad_norm": 0.7624539990976663,
      "learning_rate": 5.897691431828014e-06,
      "loss": 0.135,
      "step": 15743
    },
    {
      "epoch": 0.45930334325223177,
      "grad_norm": 0.9667460169531502,
      "learning_rate": 5.897226670253982e-06,
      "loss": 0.1476,
      "step": 15744
    },
    {
      "epoch": 0.4593325164828753,
      "grad_norm": 0.8293488957500059,
      "learning_rate": 5.896761900669722e-06,
      "loss": 0.1422,
      "step": 15745
    },
    {
      "epoch": 0.4593616897135189,
      "grad_norm": 0.8085583621733674,
      "learning_rate": 5.896297123079388e-06,
      "loss": 0.1448,
      "step": 15746
    },
    {
      "epoch": 0.4593908629441624,
      "grad_norm": 0.9251861458750373,
      "learning_rate": 5.895832337487126e-06,
      "loss": 0.1357,
      "step": 15747
    },
    {
      "epoch": 0.459420036174806,
      "grad_norm": 0.773105611322147,
      "learning_rate": 5.895367543897086e-06,
      "loss": 0.1397,
      "step": 15748
    },
    {
      "epoch": 0.45944920940544953,
      "grad_norm": 0.9358682107421241,
      "learning_rate": 5.89490274231342e-06,
      "loss": 0.1257,
      "step": 15749
    },
    {
      "epoch": 0.45947838263609314,
      "grad_norm": 0.7510929156606305,
      "learning_rate": 5.894437932740274e-06,
      "loss": 0.127,
      "step": 15750
    },
    {
      "epoch": 0.4595075558667367,
      "grad_norm": 0.8084065674036928,
      "learning_rate": 5.893973115181801e-06,
      "loss": 0.1441,
      "step": 15751
    },
    {
      "epoch": 0.45953672909738025,
      "grad_norm": 0.822699837033059,
      "learning_rate": 5.8935082896421495e-06,
      "loss": 0.1429,
      "step": 15752
    },
    {
      "epoch": 0.4595659023280238,
      "grad_norm": 0.8620453745010047,
      "learning_rate": 5.893043456125469e-06,
      "loss": 0.1112,
      "step": 15753
    },
    {
      "epoch": 0.45959507555866735,
      "grad_norm": 1.0328251337959462,
      "learning_rate": 5.892578614635909e-06,
      "loss": 0.1393,
      "step": 15754
    },
    {
      "epoch": 0.4596242487893109,
      "grad_norm": 0.8911042434095463,
      "learning_rate": 5.892113765177621e-06,
      "loss": 0.1269,
      "step": 15755
    },
    {
      "epoch": 0.4596534220199545,
      "grad_norm": 0.7718290472463595,
      "learning_rate": 5.891648907754753e-06,
      "loss": 0.136,
      "step": 15756
    },
    {
      "epoch": 0.45968259525059807,
      "grad_norm": 0.9180483729049965,
      "learning_rate": 5.891184042371459e-06,
      "loss": 0.1252,
      "step": 15757
    },
    {
      "epoch": 0.4597117684812416,
      "grad_norm": 0.9859209708795096,
      "learning_rate": 5.890719169031885e-06,
      "loss": 0.1557,
      "step": 15758
    },
    {
      "epoch": 0.4597409417118852,
      "grad_norm": 0.8682703837180158,
      "learning_rate": 5.890254287740183e-06,
      "loss": 0.1461,
      "step": 15759
    },
    {
      "epoch": 0.45977011494252873,
      "grad_norm": 0.9547434965349666,
      "learning_rate": 5.889789398500503e-06,
      "loss": 0.1371,
      "step": 15760
    },
    {
      "epoch": 0.4597992881731723,
      "grad_norm": 1.1981820087067303,
      "learning_rate": 5.8893245013169965e-06,
      "loss": 0.1411,
      "step": 15761
    },
    {
      "epoch": 0.45982846140381584,
      "grad_norm": 1.1017722517492872,
      "learning_rate": 5.888859596193812e-06,
      "loss": 0.1496,
      "step": 15762
    },
    {
      "epoch": 0.45985763463445944,
      "grad_norm": 0.7807452502563147,
      "learning_rate": 5.8883946831351014e-06,
      "loss": 0.1224,
      "step": 15763
    },
    {
      "epoch": 0.459886807865103,
      "grad_norm": 0.8802133211082914,
      "learning_rate": 5.887929762145016e-06,
      "loss": 0.1343,
      "step": 15764
    },
    {
      "epoch": 0.45991598109574655,
      "grad_norm": 1.0183444332824991,
      "learning_rate": 5.887464833227705e-06,
      "loss": 0.1534,
      "step": 15765
    },
    {
      "epoch": 0.4599451543263901,
      "grad_norm": 1.0514384318453445,
      "learning_rate": 5.8869998963873195e-06,
      "loss": 0.1323,
      "step": 15766
    },
    {
      "epoch": 0.45997432755703366,
      "grad_norm": 0.8659569271433172,
      "learning_rate": 5.886534951628011e-06,
      "loss": 0.1539,
      "step": 15767
    },
    {
      "epoch": 0.4600035007876772,
      "grad_norm": 0.969923767980383,
      "learning_rate": 5.88606999895393e-06,
      "loss": 0.1455,
      "step": 15768
    },
    {
      "epoch": 0.46003267401832076,
      "grad_norm": 2.026311164005376,
      "learning_rate": 5.885605038369228e-06,
      "loss": 0.1345,
      "step": 15769
    },
    {
      "epoch": 0.4600618472489644,
      "grad_norm": 1.0163049865030611,
      "learning_rate": 5.885140069878056e-06,
      "loss": 0.1313,
      "step": 15770
    },
    {
      "epoch": 0.4600910204796079,
      "grad_norm": 0.7852708379640772,
      "learning_rate": 5.884675093484565e-06,
      "loss": 0.1119,
      "step": 15771
    },
    {
      "epoch": 0.4601201937102515,
      "grad_norm": 0.8553733755474103,
      "learning_rate": 5.884210109192904e-06,
      "loss": 0.1698,
      "step": 15772
    },
    {
      "epoch": 0.46014936694089503,
      "grad_norm": 0.9825994654228479,
      "learning_rate": 5.883745117007227e-06,
      "loss": 0.1414,
      "step": 15773
    },
    {
      "epoch": 0.4601785401715386,
      "grad_norm": 1.1105769379837276,
      "learning_rate": 5.883280116931687e-06,
      "loss": 0.1795,
      "step": 15774
    },
    {
      "epoch": 0.46020771340218214,
      "grad_norm": 0.7570744073486113,
      "learning_rate": 5.882815108970429e-06,
      "loss": 0.1167,
      "step": 15775
    },
    {
      "epoch": 0.4602368866328257,
      "grad_norm": 0.9461345232593066,
      "learning_rate": 5.882350093127611e-06,
      "loss": 0.1551,
      "step": 15776
    },
    {
      "epoch": 0.4602660598634693,
      "grad_norm": 0.9622667746723423,
      "learning_rate": 5.881885069407382e-06,
      "loss": 0.138,
      "step": 15777
    },
    {
      "epoch": 0.46029523309411285,
      "grad_norm": 0.7238455267781404,
      "learning_rate": 5.881420037813892e-06,
      "loss": 0.1303,
      "step": 15778
    },
    {
      "epoch": 0.4603244063247564,
      "grad_norm": 0.7767999021164391,
      "learning_rate": 5.880954998351296e-06,
      "loss": 0.1333,
      "step": 15779
    },
    {
      "epoch": 0.46035357955539996,
      "grad_norm": 0.7106336440672024,
      "learning_rate": 5.8804899510237435e-06,
      "loss": 0.1248,
      "step": 15780
    },
    {
      "epoch": 0.4603827527860435,
      "grad_norm": 0.7356084204781619,
      "learning_rate": 5.880024895835387e-06,
      "loss": 0.161,
      "step": 15781
    },
    {
      "epoch": 0.46041192601668707,
      "grad_norm": 0.7885342508501595,
      "learning_rate": 5.879559832790378e-06,
      "loss": 0.1349,
      "step": 15782
    },
    {
      "epoch": 0.4604410992473307,
      "grad_norm": 0.8843470982483292,
      "learning_rate": 5.8790947618928686e-06,
      "loss": 0.1549,
      "step": 15783
    },
    {
      "epoch": 0.46047027247797423,
      "grad_norm": 0.7950865713698176,
      "learning_rate": 5.878629683147011e-06,
      "loss": 0.1552,
      "step": 15784
    },
    {
      "epoch": 0.4604994457086178,
      "grad_norm": 0.76882081982006,
      "learning_rate": 5.878164596556958e-06,
      "loss": 0.1415,
      "step": 15785
    },
    {
      "epoch": 0.46052861893926134,
      "grad_norm": 0.7126205467770146,
      "learning_rate": 5.87769950212686e-06,
      "loss": 0.1221,
      "step": 15786
    },
    {
      "epoch": 0.4605577921699049,
      "grad_norm": 0.9999807887006671,
      "learning_rate": 5.877234399860872e-06,
      "loss": 0.1513,
      "step": 15787
    },
    {
      "epoch": 0.46058696540054844,
      "grad_norm": 0.8897769518034591,
      "learning_rate": 5.876769289763144e-06,
      "loss": 0.1587,
      "step": 15788
    },
    {
      "epoch": 0.460616138631192,
      "grad_norm": 0.8672343051128001,
      "learning_rate": 5.876304171837829e-06,
      "loss": 0.1574,
      "step": 15789
    },
    {
      "epoch": 0.4606453118618356,
      "grad_norm": 0.9092971735426206,
      "learning_rate": 5.875839046089078e-06,
      "loss": 0.1332,
      "step": 15790
    },
    {
      "epoch": 0.46067448509247916,
      "grad_norm": 0.8162992516912753,
      "learning_rate": 5.875373912521047e-06,
      "loss": 0.1321,
      "step": 15791
    },
    {
      "epoch": 0.4607036583231227,
      "grad_norm": 0.9867222396381891,
      "learning_rate": 5.874908771137887e-06,
      "loss": 0.1341,
      "step": 15792
    },
    {
      "epoch": 0.46073283155376626,
      "grad_norm": 0.7682594536658461,
      "learning_rate": 5.874443621943749e-06,
      "loss": 0.1305,
      "step": 15793
    },
    {
      "epoch": 0.4607620047844098,
      "grad_norm": 0.7447781676970837,
      "learning_rate": 5.873978464942788e-06,
      "loss": 0.1393,
      "step": 15794
    },
    {
      "epoch": 0.46079117801505337,
      "grad_norm": 0.8220847614500845,
      "learning_rate": 5.873513300139155e-06,
      "loss": 0.1376,
      "step": 15795
    },
    {
      "epoch": 0.4608203512456969,
      "grad_norm": 0.9003115680649544,
      "learning_rate": 5.873048127537005e-06,
      "loss": 0.1283,
      "step": 15796
    },
    {
      "epoch": 0.46084952447634053,
      "grad_norm": 0.6516482520777607,
      "learning_rate": 5.8725829471404884e-06,
      "loss": 0.1268,
      "step": 15797
    },
    {
      "epoch": 0.4608786977069841,
      "grad_norm": 0.8948338886054641,
      "learning_rate": 5.87211775895376e-06,
      "loss": 0.1359,
      "step": 15798
    },
    {
      "epoch": 0.46090787093762764,
      "grad_norm": 0.7926542584453049,
      "learning_rate": 5.871652562980973e-06,
      "loss": 0.1421,
      "step": 15799
    },
    {
      "epoch": 0.4609370441682712,
      "grad_norm": 1.140870125403056,
      "learning_rate": 5.871187359226279e-06,
      "loss": 0.1334,
      "step": 15800
    },
    {
      "epoch": 0.46096621739891475,
      "grad_norm": 0.7507135948161748,
      "learning_rate": 5.870722147693832e-06,
      "loss": 0.1395,
      "step": 15801
    },
    {
      "epoch": 0.4609953906295583,
      "grad_norm": 0.8021253975166392,
      "learning_rate": 5.870256928387788e-06,
      "loss": 0.1351,
      "step": 15802
    },
    {
      "epoch": 0.46102456386020185,
      "grad_norm": 0.7941479253720344,
      "learning_rate": 5.8697917013122955e-06,
      "loss": 0.1427,
      "step": 15803
    },
    {
      "epoch": 0.46105373709084546,
      "grad_norm": 0.8310011848931662,
      "learning_rate": 5.869326466471512e-06,
      "loss": 0.1563,
      "step": 15804
    },
    {
      "epoch": 0.461082910321489,
      "grad_norm": 0.7490702873320659,
      "learning_rate": 5.868861223869587e-06,
      "loss": 0.1321,
      "step": 15805
    },
    {
      "epoch": 0.46111208355213257,
      "grad_norm": 0.7299876823540122,
      "learning_rate": 5.868395973510679e-06,
      "loss": 0.1423,
      "step": 15806
    },
    {
      "epoch": 0.4611412567827761,
      "grad_norm": 0.7715733570698626,
      "learning_rate": 5.867930715398938e-06,
      "loss": 0.132,
      "step": 15807
    },
    {
      "epoch": 0.4611704300134197,
      "grad_norm": 0.7824107871791222,
      "learning_rate": 5.867465449538518e-06,
      "loss": 0.1356,
      "step": 15808
    },
    {
      "epoch": 0.4611996032440632,
      "grad_norm": 0.7929137437316226,
      "learning_rate": 5.8670001759335745e-06,
      "loss": 0.1374,
      "step": 15809
    },
    {
      "epoch": 0.46122877647470684,
      "grad_norm": 0.787361562331058,
      "learning_rate": 5.86653489458826e-06,
      "loss": 0.1274,
      "step": 15810
    },
    {
      "epoch": 0.4612579497053504,
      "grad_norm": 0.6777553009520738,
      "learning_rate": 5.866069605506729e-06,
      "loss": 0.1245,
      "step": 15811
    },
    {
      "epoch": 0.46128712293599394,
      "grad_norm": 0.7695877637104159,
      "learning_rate": 5.865604308693136e-06,
      "loss": 0.1516,
      "step": 15812
    },
    {
      "epoch": 0.4613162961666375,
      "grad_norm": 1.0323711240579274,
      "learning_rate": 5.865139004151633e-06,
      "loss": 0.1448,
      "step": 15813
    },
    {
      "epoch": 0.46134546939728105,
      "grad_norm": 0.799632101319446,
      "learning_rate": 5.864673691886375e-06,
      "loss": 0.1519,
      "step": 15814
    },
    {
      "epoch": 0.4613746426279246,
      "grad_norm": 0.7439077035334605,
      "learning_rate": 5.864208371901519e-06,
      "loss": 0.1462,
      "step": 15815
    },
    {
      "epoch": 0.46140381585856816,
      "grad_norm": 0.9327709860995128,
      "learning_rate": 5.863743044201215e-06,
      "loss": 0.1285,
      "step": 15816
    },
    {
      "epoch": 0.46143298908921176,
      "grad_norm": 0.998590255210444,
      "learning_rate": 5.8632777087896205e-06,
      "loss": 0.1448,
      "step": 15817
    },
    {
      "epoch": 0.4614621623198553,
      "grad_norm": 0.8592407937248154,
      "learning_rate": 5.862812365670888e-06,
      "loss": 0.1331,
      "step": 15818
    },
    {
      "epoch": 0.46149133555049887,
      "grad_norm": 0.9755159497312703,
      "learning_rate": 5.862347014849174e-06,
      "loss": 0.1505,
      "step": 15819
    },
    {
      "epoch": 0.4615205087811424,
      "grad_norm": 0.9038505585913441,
      "learning_rate": 5.861881656328629e-06,
      "loss": 0.135,
      "step": 15820
    },
    {
      "epoch": 0.461549682011786,
      "grad_norm": 0.7637412982771709,
      "learning_rate": 5.861416290113413e-06,
      "loss": 0.1406,
      "step": 15821
    },
    {
      "epoch": 0.46157885524242953,
      "grad_norm": 0.86363035493597,
      "learning_rate": 5.860950916207677e-06,
      "loss": 0.1316,
      "step": 15822
    },
    {
      "epoch": 0.4616080284730731,
      "grad_norm": 0.9496228424623425,
      "learning_rate": 5.8604855346155756e-06,
      "loss": 0.1151,
      "step": 15823
    },
    {
      "epoch": 0.4616372017037167,
      "grad_norm": 0.9982435813688407,
      "learning_rate": 5.860020145341267e-06,
      "loss": 0.1306,
      "step": 15824
    },
    {
      "epoch": 0.46166637493436025,
      "grad_norm": 1.0682495955333833,
      "learning_rate": 5.859554748388903e-06,
      "loss": 0.1191,
      "step": 15825
    },
    {
      "epoch": 0.4616955481650038,
      "grad_norm": 0.9238371193535599,
      "learning_rate": 5.859089343762638e-06,
      "loss": 0.1472,
      "step": 15826
    },
    {
      "epoch": 0.46172472139564735,
      "grad_norm": 0.8926023227567703,
      "learning_rate": 5.85862393146663e-06,
      "loss": 0.1351,
      "step": 15827
    },
    {
      "epoch": 0.4617538946262909,
      "grad_norm": 0.7104826765634306,
      "learning_rate": 5.858158511505032e-06,
      "loss": 0.1103,
      "step": 15828
    },
    {
      "epoch": 0.46178306785693446,
      "grad_norm": 1.5810407758998741,
      "learning_rate": 5.857693083881999e-06,
      "loss": 0.1561,
      "step": 15829
    },
    {
      "epoch": 0.461812241087578,
      "grad_norm": 0.9074428050151642,
      "learning_rate": 5.857227648601688e-06,
      "loss": 0.1765,
      "step": 15830
    },
    {
      "epoch": 0.4618414143182216,
      "grad_norm": 0.8134479443437905,
      "learning_rate": 5.856762205668253e-06,
      "loss": 0.1403,
      "step": 15831
    },
    {
      "epoch": 0.4618705875488652,
      "grad_norm": 1.0476413221189405,
      "learning_rate": 5.856296755085849e-06,
      "loss": 0.1617,
      "step": 15832
    },
    {
      "epoch": 0.46189976077950873,
      "grad_norm": 0.7207947215252731,
      "learning_rate": 5.855831296858631e-06,
      "loss": 0.1565,
      "step": 15833
    },
    {
      "epoch": 0.4619289340101523,
      "grad_norm": 0.8456209194298886,
      "learning_rate": 5.855365830990759e-06,
      "loss": 0.1342,
      "step": 15834
    },
    {
      "epoch": 0.46195810724079583,
      "grad_norm": 1.099463485103391,
      "learning_rate": 5.8549003574863815e-06,
      "loss": 0.1646,
      "step": 15835
    },
    {
      "epoch": 0.4619872804714394,
      "grad_norm": 0.7539161030504511,
      "learning_rate": 5.85443487634966e-06,
      "loss": 0.1418,
      "step": 15836
    },
    {
      "epoch": 0.46201645370208294,
      "grad_norm": 1.2446236246928624,
      "learning_rate": 5.853969387584747e-06,
      "loss": 0.1406,
      "step": 15837
    },
    {
      "epoch": 0.46204562693272655,
      "grad_norm": 1.01262732191191,
      "learning_rate": 5.853503891195797e-06,
      "loss": 0.1418,
      "step": 15838
    },
    {
      "epoch": 0.4620748001633701,
      "grad_norm": 1.0295086226658106,
      "learning_rate": 5.8530383871869725e-06,
      "loss": 0.1275,
      "step": 15839
    },
    {
      "epoch": 0.46210397339401366,
      "grad_norm": 0.8157928167959317,
      "learning_rate": 5.852572875562422e-06,
      "loss": 0.1301,
      "step": 15840
    },
    {
      "epoch": 0.4621331466246572,
      "grad_norm": 1.2160861930921467,
      "learning_rate": 5.852107356326305e-06,
      "loss": 0.1339,
      "step": 15841
    },
    {
      "epoch": 0.46216231985530076,
      "grad_norm": 1.0446637605268678,
      "learning_rate": 5.851641829482777e-06,
      "loss": 0.1403,
      "step": 15842
    },
    {
      "epoch": 0.4621914930859443,
      "grad_norm": 0.9811265382880493,
      "learning_rate": 5.851176295035994e-06,
      "loss": 0.1238,
      "step": 15843
    },
    {
      "epoch": 0.4622206663165879,
      "grad_norm": 0.9901597261186631,
      "learning_rate": 5.850710752990112e-06,
      "loss": 0.1242,
      "step": 15844
    },
    {
      "epoch": 0.4622498395472315,
      "grad_norm": 0.7981800484178719,
      "learning_rate": 5.850245203349288e-06,
      "loss": 0.1528,
      "step": 15845
    },
    {
      "epoch": 0.46227901277787503,
      "grad_norm": 0.7895154486982267,
      "learning_rate": 5.849779646117677e-06,
      "loss": 0.1567,
      "step": 15846
    },
    {
      "epoch": 0.4623081860085186,
      "grad_norm": 0.745081575095745,
      "learning_rate": 5.849314081299436e-06,
      "loss": 0.1386,
      "step": 15847
    },
    {
      "epoch": 0.46233735923916214,
      "grad_norm": 0.8394289508340748,
      "learning_rate": 5.848848508898722e-06,
      "loss": 0.1277,
      "step": 15848
    },
    {
      "epoch": 0.4623665324698057,
      "grad_norm": 0.8546716452311085,
      "learning_rate": 5.848382928919693e-06,
      "loss": 0.1425,
      "step": 15849
    },
    {
      "epoch": 0.46239570570044924,
      "grad_norm": 0.8334257354863847,
      "learning_rate": 5.847917341366501e-06,
      "loss": 0.1315,
      "step": 15850
    },
    {
      "epoch": 0.46242487893109285,
      "grad_norm": 0.9946812920536485,
      "learning_rate": 5.847451746243306e-06,
      "loss": 0.1282,
      "step": 15851
    },
    {
      "epoch": 0.4624540521617364,
      "grad_norm": 1.0914693586102404,
      "learning_rate": 5.846986143554265e-06,
      "loss": 0.1525,
      "step": 15852
    },
    {
      "epoch": 0.46248322539237996,
      "grad_norm": 1.0420059449555357,
      "learning_rate": 5.846520533303532e-06,
      "loss": 0.1485,
      "step": 15853
    },
    {
      "epoch": 0.4625123986230235,
      "grad_norm": 0.7094138955921574,
      "learning_rate": 5.846054915495269e-06,
      "loss": 0.1142,
      "step": 15854
    },
    {
      "epoch": 0.46254157185366707,
      "grad_norm": 1.051887299073645,
      "learning_rate": 5.845589290133627e-06,
      "loss": 0.1377,
      "step": 15855
    },
    {
      "epoch": 0.4625707450843106,
      "grad_norm": 1.0753670585455644,
      "learning_rate": 5.845123657222768e-06,
      "loss": 0.1548,
      "step": 15856
    },
    {
      "epoch": 0.4625999183149542,
      "grad_norm": 0.8494958539932758,
      "learning_rate": 5.844658016766845e-06,
      "loss": 0.1188,
      "step": 15857
    },
    {
      "epoch": 0.4626290915455978,
      "grad_norm": 0.6343566290332142,
      "learning_rate": 5.844192368770017e-06,
      "loss": 0.1272,
      "step": 15858
    },
    {
      "epoch": 0.46265826477624133,
      "grad_norm": 0.8215137434270036,
      "learning_rate": 5.843726713236442e-06,
      "loss": 0.1326,
      "step": 15859
    },
    {
      "epoch": 0.4626874380068849,
      "grad_norm": 1.092692961779195,
      "learning_rate": 5.843261050170274e-06,
      "loss": 0.1321,
      "step": 15860
    },
    {
      "epoch": 0.46271661123752844,
      "grad_norm": 0.7652329509303649,
      "learning_rate": 5.842795379575675e-06,
      "loss": 0.1259,
      "step": 15861
    },
    {
      "epoch": 0.462745784468172,
      "grad_norm": 0.6855271583271109,
      "learning_rate": 5.842329701456799e-06,
      "loss": 0.1304,
      "step": 15862
    },
    {
      "epoch": 0.46277495769881555,
      "grad_norm": 0.7693759880466009,
      "learning_rate": 5.841864015817804e-06,
      "loss": 0.1292,
      "step": 15863
    },
    {
      "epoch": 0.4628041309294591,
      "grad_norm": 1.107998901306329,
      "learning_rate": 5.84139832266285e-06,
      "loss": 0.1294,
      "step": 15864
    },
    {
      "epoch": 0.4628333041601027,
      "grad_norm": 0.7082320486190519,
      "learning_rate": 5.84093262199609e-06,
      "loss": 0.125,
      "step": 15865
    },
    {
      "epoch": 0.46286247739074626,
      "grad_norm": 0.6618581406917674,
      "learning_rate": 5.840466913821687e-06,
      "loss": 0.139,
      "step": 15866
    },
    {
      "epoch": 0.4628916506213898,
      "grad_norm": 0.9890547647006038,
      "learning_rate": 5.840001198143795e-06,
      "loss": 0.1392,
      "step": 15867
    },
    {
      "epoch": 0.46292082385203337,
      "grad_norm": 0.8354134840361568,
      "learning_rate": 5.8395354749665725e-06,
      "loss": 0.1412,
      "step": 15868
    },
    {
      "epoch": 0.4629499970826769,
      "grad_norm": 0.6994827796111657,
      "learning_rate": 5.839069744294178e-06,
      "loss": 0.1183,
      "step": 15869
    },
    {
      "epoch": 0.4629791703133205,
      "grad_norm": 0.7988385418865582,
      "learning_rate": 5.838604006130769e-06,
      "loss": 0.1315,
      "step": 15870
    },
    {
      "epoch": 0.4630083435439641,
      "grad_norm": 0.8536960901789566,
      "learning_rate": 5.8381382604805035e-06,
      "loss": 0.1253,
      "step": 15871
    },
    {
      "epoch": 0.46303751677460764,
      "grad_norm": 0.8348528895425051,
      "learning_rate": 5.83767250734754e-06,
      "loss": 0.1369,
      "step": 15872
    },
    {
      "epoch": 0.4630666900052512,
      "grad_norm": 0.703700913853645,
      "learning_rate": 5.837206746736036e-06,
      "loss": 0.1319,
      "step": 15873
    },
    {
      "epoch": 0.46309586323589474,
      "grad_norm": 0.721868231071233,
      "learning_rate": 5.836740978650149e-06,
      "loss": 0.1438,
      "step": 15874
    },
    {
      "epoch": 0.4631250364665383,
      "grad_norm": 0.9297929393124746,
      "learning_rate": 5.83627520309404e-06,
      "loss": 0.1312,
      "step": 15875
    },
    {
      "epoch": 0.46315420969718185,
      "grad_norm": 0.7933117896441769,
      "learning_rate": 5.835809420071865e-06,
      "loss": 0.1378,
      "step": 15876
    },
    {
      "epoch": 0.4631833829278254,
      "grad_norm": 0.8277036736973242,
      "learning_rate": 5.835343629587783e-06,
      "loss": 0.1574,
      "step": 15877
    },
    {
      "epoch": 0.463212556158469,
      "grad_norm": 0.8011762285766284,
      "learning_rate": 5.834877831645952e-06,
      "loss": 0.1508,
      "step": 15878
    },
    {
      "epoch": 0.46324172938911257,
      "grad_norm": 0.8365521808430085,
      "learning_rate": 5.8344120262505335e-06,
      "loss": 0.1466,
      "step": 15879
    },
    {
      "epoch": 0.4632709026197561,
      "grad_norm": 0.8445417585011479,
      "learning_rate": 5.8339462134056805e-06,
      "loss": 0.1448,
      "step": 15880
    },
    {
      "epoch": 0.4633000758503997,
      "grad_norm": 0.8004943882549624,
      "learning_rate": 5.833480393115556e-06,
      "loss": 0.1441,
      "step": 15881
    },
    {
      "epoch": 0.4633292490810432,
      "grad_norm": 0.7248928591422803,
      "learning_rate": 5.833014565384318e-06,
      "loss": 0.1138,
      "step": 15882
    },
    {
      "epoch": 0.4633584223116868,
      "grad_norm": 1.2993545420646264,
      "learning_rate": 5.832548730216123e-06,
      "loss": 0.1358,
      "step": 15883
    },
    {
      "epoch": 0.46338759554233033,
      "grad_norm": 0.888070530529126,
      "learning_rate": 5.832082887615134e-06,
      "loss": 0.128,
      "step": 15884
    },
    {
      "epoch": 0.46341676877297394,
      "grad_norm": 0.8489878461255642,
      "learning_rate": 5.8316170375855065e-06,
      "loss": 0.1354,
      "step": 15885
    },
    {
      "epoch": 0.4634459420036175,
      "grad_norm": 0.7668677989178522,
      "learning_rate": 5.8311511801314e-06,
      "loss": 0.1479,
      "step": 15886
    },
    {
      "epoch": 0.46347511523426105,
      "grad_norm": 0.8060949317504428,
      "learning_rate": 5.8306853152569755e-06,
      "loss": 0.1384,
      "step": 15887
    },
    {
      "epoch": 0.4635042884649046,
      "grad_norm": 0.7966606036787572,
      "learning_rate": 5.83021944296639e-06,
      "loss": 0.1749,
      "step": 15888
    },
    {
      "epoch": 0.46353346169554815,
      "grad_norm": 0.7777750236694937,
      "learning_rate": 5.829753563263803e-06,
      "loss": 0.1172,
      "step": 15889
    },
    {
      "epoch": 0.4635626349261917,
      "grad_norm": 0.830723371055366,
      "learning_rate": 5.829287676153375e-06,
      "loss": 0.1285,
      "step": 15890
    },
    {
      "epoch": 0.46359180815683526,
      "grad_norm": 0.8333860237145468,
      "learning_rate": 5.828821781639264e-06,
      "loss": 0.1498,
      "step": 15891
    },
    {
      "epoch": 0.46362098138747887,
      "grad_norm": 0.8372865646905908,
      "learning_rate": 5.828355879725632e-06,
      "loss": 0.1189,
      "step": 15892
    },
    {
      "epoch": 0.4636501546181224,
      "grad_norm": 0.8435699699315544,
      "learning_rate": 5.827889970416634e-06,
      "loss": 0.1346,
      "step": 15893
    },
    {
      "epoch": 0.463679327848766,
      "grad_norm": 0.7458663290715156,
      "learning_rate": 5.827424053716434e-06,
      "loss": 0.1518,
      "step": 15894
    },
    {
      "epoch": 0.46370850107940953,
      "grad_norm": 0.7838148068917841,
      "learning_rate": 5.826958129629187e-06,
      "loss": 0.1417,
      "step": 15895
    },
    {
      "epoch": 0.4637376743100531,
      "grad_norm": 0.888477035667017,
      "learning_rate": 5.826492198159058e-06,
      "loss": 0.126,
      "step": 15896
    },
    {
      "epoch": 0.46376684754069664,
      "grad_norm": 0.8729074064593524,
      "learning_rate": 5.826026259310202e-06,
      "loss": 0.1457,
      "step": 15897
    },
    {
      "epoch": 0.46379602077134024,
      "grad_norm": 0.9131639863595894,
      "learning_rate": 5.825560313086781e-06,
      "loss": 0.1425,
      "step": 15898
    },
    {
      "epoch": 0.4638251940019838,
      "grad_norm": 0.9122556029639202,
      "learning_rate": 5.825094359492955e-06,
      "loss": 0.1552,
      "step": 15899
    },
    {
      "epoch": 0.46385436723262735,
      "grad_norm": 0.8976113489352935,
      "learning_rate": 5.8246283985328845e-06,
      "loss": 0.1385,
      "step": 15900
    },
    {
      "epoch": 0.4638835404632709,
      "grad_norm": 0.8362597188678967,
      "learning_rate": 5.824162430210727e-06,
      "loss": 0.1447,
      "step": 15901
    },
    {
      "epoch": 0.46391271369391446,
      "grad_norm": 0.9641924428098306,
      "learning_rate": 5.823696454530645e-06,
      "loss": 0.1453,
      "step": 15902
    },
    {
      "epoch": 0.463941886924558,
      "grad_norm": 0.975611475874682,
      "learning_rate": 5.823230471496797e-06,
      "loss": 0.1274,
      "step": 15903
    },
    {
      "epoch": 0.46397106015520156,
      "grad_norm": 0.7284168076885339,
      "learning_rate": 5.822764481113345e-06,
      "loss": 0.122,
      "step": 15904
    },
    {
      "epoch": 0.4640002333858452,
      "grad_norm": 0.8004637172072524,
      "learning_rate": 5.822298483384446e-06,
      "loss": 0.1305,
      "step": 15905
    },
    {
      "epoch": 0.4640294066164887,
      "grad_norm": 1.3252533327640181,
      "learning_rate": 5.821832478314265e-06,
      "loss": 0.1376,
      "step": 15906
    },
    {
      "epoch": 0.4640585798471323,
      "grad_norm": 0.9785839268351734,
      "learning_rate": 5.821366465906958e-06,
      "loss": 0.1339,
      "step": 15907
    },
    {
      "epoch": 0.46408775307777583,
      "grad_norm": 0.8941719513667673,
      "learning_rate": 5.820900446166687e-06,
      "loss": 0.1448,
      "step": 15908
    },
    {
      "epoch": 0.4641169263084194,
      "grad_norm": 0.9483706041460341,
      "learning_rate": 5.820434419097614e-06,
      "loss": 0.1568,
      "step": 15909
    },
    {
      "epoch": 0.46414609953906294,
      "grad_norm": 1.0202428273220814,
      "learning_rate": 5.819968384703898e-06,
      "loss": 0.115,
      "step": 15910
    },
    {
      "epoch": 0.4641752727697065,
      "grad_norm": 0.7549637289971923,
      "learning_rate": 5.819502342989701e-06,
      "loss": 0.1394,
      "step": 15911
    },
    {
      "epoch": 0.4642044460003501,
      "grad_norm": 0.8714901820124208,
      "learning_rate": 5.81903629395918e-06,
      "loss": 0.1595,
      "step": 15912
    },
    {
      "epoch": 0.46423361923099365,
      "grad_norm": 0.9732469182318602,
      "learning_rate": 5.818570237616501e-06,
      "loss": 0.152,
      "step": 15913
    },
    {
      "epoch": 0.4642627924616372,
      "grad_norm": 0.842016257223515,
      "learning_rate": 5.818104173965822e-06,
      "loss": 0.1264,
      "step": 15914
    },
    {
      "epoch": 0.46429196569228076,
      "grad_norm": 0.8294549762141147,
      "learning_rate": 5.817638103011303e-06,
      "loss": 0.1548,
      "step": 15915
    },
    {
      "epoch": 0.4643211389229243,
      "grad_norm": 1.1116113134622982,
      "learning_rate": 5.817172024757107e-06,
      "loss": 0.1276,
      "step": 15916
    },
    {
      "epoch": 0.46435031215356787,
      "grad_norm": 0.8217614550077912,
      "learning_rate": 5.8167059392073945e-06,
      "loss": 0.153,
      "step": 15917
    },
    {
      "epoch": 0.4643794853842114,
      "grad_norm": 1.0098753202766408,
      "learning_rate": 5.816239846366325e-06,
      "loss": 0.1522,
      "step": 15918
    },
    {
      "epoch": 0.46440865861485503,
      "grad_norm": 0.9738587566490842,
      "learning_rate": 5.815773746238063e-06,
      "loss": 0.138,
      "step": 15919
    },
    {
      "epoch": 0.4644378318454986,
      "grad_norm": 0.7493340709827934,
      "learning_rate": 5.815307638826767e-06,
      "loss": 0.1279,
      "step": 15920
    },
    {
      "epoch": 0.46446700507614214,
      "grad_norm": 0.8826322638455888,
      "learning_rate": 5.8148415241365985e-06,
      "loss": 0.1448,
      "step": 15921
    },
    {
      "epoch": 0.4644961783067857,
      "grad_norm": 0.7779789604266115,
      "learning_rate": 5.81437540217172e-06,
      "loss": 0.1314,
      "step": 15922
    },
    {
      "epoch": 0.46452535153742924,
      "grad_norm": 0.9722819435858137,
      "learning_rate": 5.8139092729362925e-06,
      "loss": 0.1548,
      "step": 15923
    },
    {
      "epoch": 0.4645545247680728,
      "grad_norm": 0.8050178260881279,
      "learning_rate": 5.813443136434475e-06,
      "loss": 0.1209,
      "step": 15924
    },
    {
      "epoch": 0.4645836979987164,
      "grad_norm": 0.9171370105747304,
      "learning_rate": 5.812976992670434e-06,
      "loss": 0.1433,
      "step": 15925
    },
    {
      "epoch": 0.46461287122935996,
      "grad_norm": 0.8076803104860456,
      "learning_rate": 5.812510841648329e-06,
      "loss": 0.1394,
      "step": 15926
    },
    {
      "epoch": 0.4646420444600035,
      "grad_norm": 0.9973335139882092,
      "learning_rate": 5.812044683372318e-06,
      "loss": 0.1444,
      "step": 15927
    },
    {
      "epoch": 0.46467121769064706,
      "grad_norm": 0.7753992461313428,
      "learning_rate": 5.811578517846567e-06,
      "loss": 0.1407,
      "step": 15928
    },
    {
      "epoch": 0.4647003909212906,
      "grad_norm": 0.9103740934767576,
      "learning_rate": 5.81111234507524e-06,
      "loss": 0.1246,
      "step": 15929
    },
    {
      "epoch": 0.46472956415193417,
      "grad_norm": 0.9864108966598794,
      "learning_rate": 5.810646165062491e-06,
      "loss": 0.1285,
      "step": 15930
    },
    {
      "epoch": 0.4647587373825777,
      "grad_norm": 0.9102732017805926,
      "learning_rate": 5.8101799778124905e-06,
      "loss": 0.1346,
      "step": 15931
    },
    {
      "epoch": 0.46478791061322133,
      "grad_norm": 0.9376487685418402,
      "learning_rate": 5.809713783329395e-06,
      "loss": 0.1317,
      "step": 15932
    },
    {
      "epoch": 0.4648170838438649,
      "grad_norm": 1.0161822282457287,
      "learning_rate": 5.809247581617366e-06,
      "loss": 0.1367,
      "step": 15933
    },
    {
      "epoch": 0.46484625707450844,
      "grad_norm": 0.7920617594439303,
      "learning_rate": 5.808781372680571e-06,
      "loss": 0.1301,
      "step": 15934
    },
    {
      "epoch": 0.464875430305152,
      "grad_norm": 1.4013070796833542,
      "learning_rate": 5.808315156523168e-06,
      "loss": 0.1397,
      "step": 15935
    },
    {
      "epoch": 0.46490460353579555,
      "grad_norm": 0.9982127411340717,
      "learning_rate": 5.807848933149319e-06,
      "loss": 0.1309,
      "step": 15936
    },
    {
      "epoch": 0.4649337767664391,
      "grad_norm": 0.8397415543048476,
      "learning_rate": 5.807382702563188e-06,
      "loss": 0.1414,
      "step": 15937
    },
    {
      "epoch": 0.46496294999708265,
      "grad_norm": 1.347567736516576,
      "learning_rate": 5.806916464768938e-06,
      "loss": 0.1287,
      "step": 15938
    },
    {
      "epoch": 0.46499212322772626,
      "grad_norm": 1.4014554496035387,
      "learning_rate": 5.80645021977073e-06,
      "loss": 0.128,
      "step": 15939
    },
    {
      "epoch": 0.4650212964583698,
      "grad_norm": 0.8452629014948132,
      "learning_rate": 5.8059839675727255e-06,
      "loss": 0.1406,
      "step": 15940
    },
    {
      "epoch": 0.46505046968901337,
      "grad_norm": 0.8116111608551361,
      "learning_rate": 5.8055177081790916e-06,
      "loss": 0.1409,
      "step": 15941
    },
    {
      "epoch": 0.4650796429196569,
      "grad_norm": 1.0389772590121729,
      "learning_rate": 5.805051441593985e-06,
      "loss": 0.1388,
      "step": 15942
    },
    {
      "epoch": 0.4651088161503005,
      "grad_norm": 0.8691310836913704,
      "learning_rate": 5.804585167821572e-06,
      "loss": 0.118,
      "step": 15943
    },
    {
      "epoch": 0.46513798938094403,
      "grad_norm": 0.7957873510033092,
      "learning_rate": 5.804118886866016e-06,
      "loss": 0.1363,
      "step": 15944
    },
    {
      "epoch": 0.4651671626115876,
      "grad_norm": 0.9212943282096638,
      "learning_rate": 5.803652598731476e-06,
      "loss": 0.1336,
      "step": 15945
    },
    {
      "epoch": 0.4651963358422312,
      "grad_norm": 0.7731884868494172,
      "learning_rate": 5.80318630342212e-06,
      "loss": 0.1247,
      "step": 15946
    },
    {
      "epoch": 0.46522550907287474,
      "grad_norm": 0.7845451088379048,
      "learning_rate": 5.802720000942108e-06,
      "loss": 0.1445,
      "step": 15947
    },
    {
      "epoch": 0.4652546823035183,
      "grad_norm": 0.8924213228390518,
      "learning_rate": 5.802253691295602e-06,
      "loss": 0.145,
      "step": 15948
    },
    {
      "epoch": 0.46528385553416185,
      "grad_norm": 0.8146316307558824,
      "learning_rate": 5.801787374486768e-06,
      "loss": 0.1665,
      "step": 15949
    },
    {
      "epoch": 0.4653130287648054,
      "grad_norm": 0.7357533913377574,
      "learning_rate": 5.801321050519768e-06,
      "loss": 0.1299,
      "step": 15950
    },
    {
      "epoch": 0.46534220199544896,
      "grad_norm": 1.0523115059686823,
      "learning_rate": 5.800854719398764e-06,
      "loss": 0.1381,
      "step": 15951
    },
    {
      "epoch": 0.46537137522609257,
      "grad_norm": 1.0395106948844963,
      "learning_rate": 5.80038838112792e-06,
      "loss": 0.1508,
      "step": 15952
    },
    {
      "epoch": 0.4654005484567361,
      "grad_norm": 0.7198719787956999,
      "learning_rate": 5.799922035711401e-06,
      "loss": 0.1473,
      "step": 15953
    },
    {
      "epoch": 0.46542972168737967,
      "grad_norm": 0.7866665469816593,
      "learning_rate": 5.799455683153367e-06,
      "loss": 0.1229,
      "step": 15954
    },
    {
      "epoch": 0.4654588949180232,
      "grad_norm": 0.822350925080411,
      "learning_rate": 5.798989323457984e-06,
      "loss": 0.1405,
      "step": 15955
    },
    {
      "epoch": 0.4654880681486668,
      "grad_norm": 0.6539063365948917,
      "learning_rate": 5.798522956629418e-06,
      "loss": 0.1279,
      "step": 15956
    },
    {
      "epoch": 0.46551724137931033,
      "grad_norm": 0.9022568533322931,
      "learning_rate": 5.798056582671825e-06,
      "loss": 0.1308,
      "step": 15957
    },
    {
      "epoch": 0.4655464146099539,
      "grad_norm": 0.8149328748712105,
      "learning_rate": 5.797590201589376e-06,
      "loss": 0.1523,
      "step": 15958
    },
    {
      "epoch": 0.4655755878405975,
      "grad_norm": 0.8771860608393286,
      "learning_rate": 5.7971238133862324e-06,
      "loss": 0.1322,
      "step": 15959
    },
    {
      "epoch": 0.46560476107124105,
      "grad_norm": 0.9190749951306041,
      "learning_rate": 5.796657418066556e-06,
      "loss": 0.1511,
      "step": 15960
    },
    {
      "epoch": 0.4656339343018846,
      "grad_norm": 0.8041082549146434,
      "learning_rate": 5.796191015634515e-06,
      "loss": 0.1421,
      "step": 15961
    },
    {
      "epoch": 0.46566310753252815,
      "grad_norm": 0.8021631778731418,
      "learning_rate": 5.795724606094269e-06,
      "loss": 0.1412,
      "step": 15962
    },
    {
      "epoch": 0.4656922807631717,
      "grad_norm": 0.972627726391673,
      "learning_rate": 5.795258189449983e-06,
      "loss": 0.1599,
      "step": 15963
    },
    {
      "epoch": 0.46572145399381526,
      "grad_norm": 0.911331091011921,
      "learning_rate": 5.794791765705823e-06,
      "loss": 0.1253,
      "step": 15964
    },
    {
      "epoch": 0.4657506272244588,
      "grad_norm": 0.7905016224581348,
      "learning_rate": 5.79432533486595e-06,
      "loss": 0.1567,
      "step": 15965
    },
    {
      "epoch": 0.4657798004551024,
      "grad_norm": 1.025032660651501,
      "learning_rate": 5.793858896934532e-06,
      "loss": 0.1376,
      "step": 15966
    },
    {
      "epoch": 0.465808973685746,
      "grad_norm": 0.9245182905301442,
      "learning_rate": 5.79339245191573e-06,
      "loss": 0.1552,
      "step": 15967
    },
    {
      "epoch": 0.46583814691638953,
      "grad_norm": 1.1764974665898509,
      "learning_rate": 5.79292599981371e-06,
      "loss": 0.1534,
      "step": 15968
    },
    {
      "epoch": 0.4658673201470331,
      "grad_norm": 0.8612890491522468,
      "learning_rate": 5.792459540632636e-06,
      "loss": 0.1402,
      "step": 15969
    },
    {
      "epoch": 0.46589649337767663,
      "grad_norm": 0.7767768190759483,
      "learning_rate": 5.791993074376673e-06,
      "loss": 0.1549,
      "step": 15970
    },
    {
      "epoch": 0.4659256666083202,
      "grad_norm": 0.7311189609185839,
      "learning_rate": 5.791526601049985e-06,
      "loss": 0.1436,
      "step": 15971
    },
    {
      "epoch": 0.46595483983896374,
      "grad_norm": 0.7177933757195226,
      "learning_rate": 5.791060120656735e-06,
      "loss": 0.1271,
      "step": 15972
    },
    {
      "epoch": 0.46598401306960735,
      "grad_norm": 0.7610377232259685,
      "learning_rate": 5.790593633201089e-06,
      "loss": 0.1503,
      "step": 15973
    },
    {
      "epoch": 0.4660131863002509,
      "grad_norm": 0.8474659506844789,
      "learning_rate": 5.790127138687215e-06,
      "loss": 0.1269,
      "step": 15974
    },
    {
      "epoch": 0.46604235953089446,
      "grad_norm": 0.7254915854900958,
      "learning_rate": 5.789660637119271e-06,
      "loss": 0.122,
      "step": 15975
    },
    {
      "epoch": 0.466071532761538,
      "grad_norm": 0.7989884598295096,
      "learning_rate": 5.789194128501428e-06,
      "loss": 0.147,
      "step": 15976
    },
    {
      "epoch": 0.46610070599218156,
      "grad_norm": 0.8491314139759364,
      "learning_rate": 5.788727612837846e-06,
      "loss": 0.1355,
      "step": 15977
    },
    {
      "epoch": 0.4661298792228251,
      "grad_norm": 0.761607299632363,
      "learning_rate": 5.788261090132693e-06,
      "loss": 0.1218,
      "step": 15978
    },
    {
      "epoch": 0.46615905245346867,
      "grad_norm": 0.6986230239896588,
      "learning_rate": 5.787794560390133e-06,
      "loss": 0.1347,
      "step": 15979
    },
    {
      "epoch": 0.4661882256841123,
      "grad_norm": 1.054838554348645,
      "learning_rate": 5.787328023614331e-06,
      "loss": 0.144,
      "step": 15980
    },
    {
      "epoch": 0.46621739891475583,
      "grad_norm": 0.8017458873753452,
      "learning_rate": 5.786861479809453e-06,
      "loss": 0.1497,
      "step": 15981
    },
    {
      "epoch": 0.4662465721453994,
      "grad_norm": 0.7515159455250477,
      "learning_rate": 5.786394928979663e-06,
      "loss": 0.1215,
      "step": 15982
    },
    {
      "epoch": 0.46627574537604294,
      "grad_norm": 0.7126228538215411,
      "learning_rate": 5.785928371129127e-06,
      "loss": 0.131,
      "step": 15983
    },
    {
      "epoch": 0.4663049186066865,
      "grad_norm": 0.7843490446497038,
      "learning_rate": 5.785461806262011e-06,
      "loss": 0.1183,
      "step": 15984
    },
    {
      "epoch": 0.46633409183733004,
      "grad_norm": 0.6464048953221874,
      "learning_rate": 5.784995234382478e-06,
      "loss": 0.1194,
      "step": 15985
    },
    {
      "epoch": 0.46636326506797365,
      "grad_norm": 0.6520631001183158,
      "learning_rate": 5.784528655494697e-06,
      "loss": 0.1371,
      "step": 15986
    },
    {
      "epoch": 0.4663924382986172,
      "grad_norm": 0.9026076849639647,
      "learning_rate": 5.784062069602828e-06,
      "loss": 0.1751,
      "step": 15987
    },
    {
      "epoch": 0.46642161152926076,
      "grad_norm": 0.8416307734789469,
      "learning_rate": 5.783595476711043e-06,
      "loss": 0.135,
      "step": 15988
    },
    {
      "epoch": 0.4664507847599043,
      "grad_norm": 0.6961788531818385,
      "learning_rate": 5.783128876823504e-06,
      "loss": 0.118,
      "step": 15989
    },
    {
      "epoch": 0.46647995799054787,
      "grad_norm": 0.8461906207423968,
      "learning_rate": 5.782662269944376e-06,
      "loss": 0.1398,
      "step": 15990
    },
    {
      "epoch": 0.4665091312211914,
      "grad_norm": 0.8957956304247597,
      "learning_rate": 5.782195656077828e-06,
      "loss": 0.1219,
      "step": 15991
    },
    {
      "epoch": 0.466538304451835,
      "grad_norm": 0.8668098247405077,
      "learning_rate": 5.781729035228023e-06,
      "loss": 0.1477,
      "step": 15992
    },
    {
      "epoch": 0.4665674776824786,
      "grad_norm": 0.7105224761898491,
      "learning_rate": 5.7812624073991276e-06,
      "loss": 0.1321,
      "step": 15993
    },
    {
      "epoch": 0.46659665091312214,
      "grad_norm": 1.012134838403854,
      "learning_rate": 5.7807957725953076e-06,
      "loss": 0.1468,
      "step": 15994
    },
    {
      "epoch": 0.4666258241437657,
      "grad_norm": 0.8740211865235363,
      "learning_rate": 5.78032913082073e-06,
      "loss": 0.1372,
      "step": 15995
    },
    {
      "epoch": 0.46665499737440924,
      "grad_norm": 0.8097992390360006,
      "learning_rate": 5.7798624820795605e-06,
      "loss": 0.1398,
      "step": 15996
    },
    {
      "epoch": 0.4666841706050528,
      "grad_norm": 0.8794296185515897,
      "learning_rate": 5.779395826375964e-06,
      "loss": 0.148,
      "step": 15997
    },
    {
      "epoch": 0.46671334383569635,
      "grad_norm": 0.9570177270508721,
      "learning_rate": 5.778929163714109e-06,
      "loss": 0.1562,
      "step": 15998
    },
    {
      "epoch": 0.4667425170663399,
      "grad_norm": 0.8375361115858077,
      "learning_rate": 5.77846249409816e-06,
      "loss": 0.1143,
      "step": 15999
    },
    {
      "epoch": 0.4667716902969835,
      "grad_norm": 0.9187618810046481,
      "learning_rate": 5.777995817532282e-06,
      "loss": 0.1292,
      "step": 16000
    },
    {
      "epoch": 0.46680086352762706,
      "grad_norm": 0.6914638111304797,
      "learning_rate": 5.777529134020645e-06,
      "loss": 0.1346,
      "step": 16001
    },
    {
      "epoch": 0.4668300367582706,
      "grad_norm": 0.7145671042787071,
      "learning_rate": 5.777062443567412e-06,
      "loss": 0.1461,
      "step": 16002
    },
    {
      "epoch": 0.46685920998891417,
      "grad_norm": 0.8079655492792002,
      "learning_rate": 5.7765957461767515e-06,
      "loss": 0.1321,
      "step": 16003
    },
    {
      "epoch": 0.4668883832195577,
      "grad_norm": 0.8192383471776307,
      "learning_rate": 5.776129041852831e-06,
      "loss": 0.1348,
      "step": 16004
    },
    {
      "epoch": 0.4669175564502013,
      "grad_norm": 0.8083178474820759,
      "learning_rate": 5.775662330599814e-06,
      "loss": 0.1405,
      "step": 16005
    },
    {
      "epoch": 0.46694672968084483,
      "grad_norm": 0.8795955703771724,
      "learning_rate": 5.77519561242187e-06,
      "loss": 0.157,
      "step": 16006
    },
    {
      "epoch": 0.46697590291148844,
      "grad_norm": 0.7997922708958513,
      "learning_rate": 5.7747288873231645e-06,
      "loss": 0.1226,
      "step": 16007
    },
    {
      "epoch": 0.467005076142132,
      "grad_norm": 0.776315606649705,
      "learning_rate": 5.774262155307863e-06,
      "loss": 0.1233,
      "step": 16008
    },
    {
      "epoch": 0.46703424937277555,
      "grad_norm": 0.8044015518766559,
      "learning_rate": 5.773795416380135e-06,
      "loss": 0.1416,
      "step": 16009
    },
    {
      "epoch": 0.4670634226034191,
      "grad_norm": 0.8374210098741632,
      "learning_rate": 5.773328670544146e-06,
      "loss": 0.1358,
      "step": 16010
    },
    {
      "epoch": 0.46709259583406265,
      "grad_norm": 0.8489569119467483,
      "learning_rate": 5.772861917804064e-06,
      "loss": 0.1274,
      "step": 16011
    },
    {
      "epoch": 0.4671217690647062,
      "grad_norm": 0.6620216492087456,
      "learning_rate": 5.772395158164054e-06,
      "loss": 0.1254,
      "step": 16012
    },
    {
      "epoch": 0.4671509422953498,
      "grad_norm": 1.1379218990602393,
      "learning_rate": 5.771928391628284e-06,
      "loss": 0.1233,
      "step": 16013
    },
    {
      "epoch": 0.46718011552599337,
      "grad_norm": 0.9407417004271419,
      "learning_rate": 5.771461618200923e-06,
      "loss": 0.1327,
      "step": 16014
    },
    {
      "epoch": 0.4672092887566369,
      "grad_norm": 0.8260911501985518,
      "learning_rate": 5.770994837886137e-06,
      "loss": 0.1374,
      "step": 16015
    },
    {
      "epoch": 0.4672384619872805,
      "grad_norm": 0.8446010490366636,
      "learning_rate": 5.770528050688093e-06,
      "loss": 0.1131,
      "step": 16016
    },
    {
      "epoch": 0.467267635217924,
      "grad_norm": 0.7865372069291242,
      "learning_rate": 5.770061256610957e-06,
      "loss": 0.1366,
      "step": 16017
    },
    {
      "epoch": 0.4672968084485676,
      "grad_norm": 0.8856937467203092,
      "learning_rate": 5.769594455658899e-06,
      "loss": 0.0975,
      "step": 16018
    },
    {
      "epoch": 0.46732598167921113,
      "grad_norm": 1.0385318123528824,
      "learning_rate": 5.7691276478360854e-06,
      "loss": 0.1515,
      "step": 16019
    },
    {
      "epoch": 0.46735515490985474,
      "grad_norm": 0.8525272348093069,
      "learning_rate": 5.768660833146683e-06,
      "loss": 0.1296,
      "step": 16020
    },
    {
      "epoch": 0.4673843281404983,
      "grad_norm": 0.7564119644539686,
      "learning_rate": 5.7681940115948624e-06,
      "loss": 0.1005,
      "step": 16021
    },
    {
      "epoch": 0.46741350137114185,
      "grad_norm": 1.032340142288582,
      "learning_rate": 5.767727183184787e-06,
      "loss": 0.1272,
      "step": 16022
    },
    {
      "epoch": 0.4674426746017854,
      "grad_norm": 0.9099153002462543,
      "learning_rate": 5.767260347920627e-06,
      "loss": 0.1337,
      "step": 16023
    },
    {
      "epoch": 0.46747184783242896,
      "grad_norm": 0.846177825244513,
      "learning_rate": 5.766793505806551e-06,
      "loss": 0.1474,
      "step": 16024
    },
    {
      "epoch": 0.4675010210630725,
      "grad_norm": 0.9650979193854139,
      "learning_rate": 5.766326656846723e-06,
      "loss": 0.1163,
      "step": 16025
    },
    {
      "epoch": 0.46753019429371606,
      "grad_norm": 1.1737999601585896,
      "learning_rate": 5.765859801045316e-06,
      "loss": 0.1446,
      "step": 16026
    },
    {
      "epoch": 0.46755936752435967,
      "grad_norm": 0.7507372097227764,
      "learning_rate": 5.765392938406494e-06,
      "loss": 0.144,
      "step": 16027
    },
    {
      "epoch": 0.4675885407550032,
      "grad_norm": 1.0289732575570227,
      "learning_rate": 5.764926068934428e-06,
      "loss": 0.1553,
      "step": 16028
    },
    {
      "epoch": 0.4676177139856468,
      "grad_norm": 0.7891455298453132,
      "learning_rate": 5.764459192633282e-06,
      "loss": 0.1426,
      "step": 16029
    },
    {
      "epoch": 0.46764688721629033,
      "grad_norm": 0.8292319534656433,
      "learning_rate": 5.763992309507229e-06,
      "loss": 0.1417,
      "step": 16030
    },
    {
      "epoch": 0.4676760604469339,
      "grad_norm": 0.9021045559202837,
      "learning_rate": 5.763525419560436e-06,
      "loss": 0.1526,
      "step": 16031
    },
    {
      "epoch": 0.46770523367757744,
      "grad_norm": 0.8264634081061879,
      "learning_rate": 5.763058522797068e-06,
      "loss": 0.1202,
      "step": 16032
    },
    {
      "epoch": 0.467734406908221,
      "grad_norm": 0.652515074592792,
      "learning_rate": 5.762591619221297e-06,
      "loss": 0.1367,
      "step": 16033
    },
    {
      "epoch": 0.4677635801388646,
      "grad_norm": 0.7340814911279222,
      "learning_rate": 5.762124708837291e-06,
      "loss": 0.1712,
      "step": 16034
    },
    {
      "epoch": 0.46779275336950815,
      "grad_norm": 0.8506201084744504,
      "learning_rate": 5.7616577916492145e-06,
      "loss": 0.124,
      "step": 16035
    },
    {
      "epoch": 0.4678219266001517,
      "grad_norm": 0.7813046536314805,
      "learning_rate": 5.761190867661243e-06,
      "loss": 0.1523,
      "step": 16036
    },
    {
      "epoch": 0.46785109983079526,
      "grad_norm": 0.7246186837938973,
      "learning_rate": 5.760723936877538e-06,
      "loss": 0.1585,
      "step": 16037
    },
    {
      "epoch": 0.4678802730614388,
      "grad_norm": 0.8265533346747397,
      "learning_rate": 5.760256999302273e-06,
      "loss": 0.1685,
      "step": 16038
    },
    {
      "epoch": 0.46790944629208236,
      "grad_norm": 0.7512918950003025,
      "learning_rate": 5.759790054939614e-06,
      "loss": 0.132,
      "step": 16039
    },
    {
      "epoch": 0.467938619522726,
      "grad_norm": 0.6419272741135773,
      "learning_rate": 5.7593231037937306e-06,
      "loss": 0.1321,
      "step": 16040
    },
    {
      "epoch": 0.4679677927533695,
      "grad_norm": 1.0469292057265425,
      "learning_rate": 5.758856145868792e-06,
      "loss": 0.135,
      "step": 16041
    },
    {
      "epoch": 0.4679969659840131,
      "grad_norm": 0.844045201941579,
      "learning_rate": 5.758389181168967e-06,
      "loss": 0.1423,
      "step": 16042
    },
    {
      "epoch": 0.46802613921465663,
      "grad_norm": 0.7167638581889434,
      "learning_rate": 5.757922209698424e-06,
      "loss": 0.1267,
      "step": 16043
    },
    {
      "epoch": 0.4680553124453002,
      "grad_norm": 0.7103636203539796,
      "learning_rate": 5.757455231461334e-06,
      "loss": 0.146,
      "step": 16044
    },
    {
      "epoch": 0.46808448567594374,
      "grad_norm": 0.6696736563358334,
      "learning_rate": 5.756988246461863e-06,
      "loss": 0.1391,
      "step": 16045
    },
    {
      "epoch": 0.4681136589065873,
      "grad_norm": 0.7317317777456038,
      "learning_rate": 5.7565212547041835e-06,
      "loss": 0.1278,
      "step": 16046
    },
    {
      "epoch": 0.4681428321372309,
      "grad_norm": 0.637411729797091,
      "learning_rate": 5.75605425619246e-06,
      "loss": 0.1503,
      "step": 16047
    },
    {
      "epoch": 0.46817200536787446,
      "grad_norm": 1.0490510121574332,
      "learning_rate": 5.755587250930866e-06,
      "loss": 0.1418,
      "step": 16048
    },
    {
      "epoch": 0.468201178598518,
      "grad_norm": 0.8138231282346315,
      "learning_rate": 5.75512023892357e-06,
      "loss": 0.1409,
      "step": 16049
    },
    {
      "epoch": 0.46823035182916156,
      "grad_norm": 0.7919611992138462,
      "learning_rate": 5.75465322017474e-06,
      "loss": 0.124,
      "step": 16050
    },
    {
      "epoch": 0.4682595250598051,
      "grad_norm": 1.0982884992699131,
      "learning_rate": 5.754186194688547e-06,
      "loss": 0.1767,
      "step": 16051
    },
    {
      "epoch": 0.46828869829044867,
      "grad_norm": 1.0299722455438751,
      "learning_rate": 5.753719162469159e-06,
      "loss": 0.1439,
      "step": 16052
    },
    {
      "epoch": 0.4683178715210922,
      "grad_norm": 0.6987058633317436,
      "learning_rate": 5.753252123520746e-06,
      "loss": 0.1132,
      "step": 16053
    },
    {
      "epoch": 0.46834704475173583,
      "grad_norm": 1.0383339761552182,
      "learning_rate": 5.7527850778474795e-06,
      "loss": 0.143,
      "step": 16054
    },
    {
      "epoch": 0.4683762179823794,
      "grad_norm": 1.1578639748397883,
      "learning_rate": 5.752318025453525e-06,
      "loss": 0.1486,
      "step": 16055
    },
    {
      "epoch": 0.46840539121302294,
      "grad_norm": 0.7351432849730697,
      "learning_rate": 5.751850966343057e-06,
      "loss": 0.1488,
      "step": 16056
    },
    {
      "epoch": 0.4684345644436665,
      "grad_norm": 0.843477890384064,
      "learning_rate": 5.751383900520241e-06,
      "loss": 0.1113,
      "step": 16057
    },
    {
      "epoch": 0.46846373767431004,
      "grad_norm": 0.8823683745562853,
      "learning_rate": 5.75091682798925e-06,
      "loss": 0.1327,
      "step": 16058
    },
    {
      "epoch": 0.4684929109049536,
      "grad_norm": 0.8005412105676123,
      "learning_rate": 5.750449748754253e-06,
      "loss": 0.1338,
      "step": 16059
    },
    {
      "epoch": 0.46852208413559715,
      "grad_norm": 0.968143710825528,
      "learning_rate": 5.74998266281942e-06,
      "loss": 0.1686,
      "step": 16060
    },
    {
      "epoch": 0.46855125736624076,
      "grad_norm": 0.8627773213283976,
      "learning_rate": 5.7495155701889215e-06,
      "loss": 0.1174,
      "step": 16061
    },
    {
      "epoch": 0.4685804305968843,
      "grad_norm": 1.2187515398357416,
      "learning_rate": 5.749048470866925e-06,
      "loss": 0.1568,
      "step": 16062
    },
    {
      "epoch": 0.46860960382752787,
      "grad_norm": 1.0308792401904288,
      "learning_rate": 5.748581364857603e-06,
      "loss": 0.1558,
      "step": 16063
    },
    {
      "epoch": 0.4686387770581714,
      "grad_norm": 0.9357220981721602,
      "learning_rate": 5.748114252165127e-06,
      "loss": 0.1393,
      "step": 16064
    },
    {
      "epoch": 0.46866795028881497,
      "grad_norm": 0.8992948611466673,
      "learning_rate": 5.747647132793662e-06,
      "loss": 0.1467,
      "step": 16065
    },
    {
      "epoch": 0.4686971235194585,
      "grad_norm": 0.9082733345381605,
      "learning_rate": 5.747180006747386e-06,
      "loss": 0.1695,
      "step": 16066
    },
    {
      "epoch": 0.46872629675010213,
      "grad_norm": 0.8600373402962813,
      "learning_rate": 5.746712874030462e-06,
      "loss": 0.1391,
      "step": 16067
    },
    {
      "epoch": 0.4687554699807457,
      "grad_norm": 0.8737621041818534,
      "learning_rate": 5.746245734647066e-06,
      "loss": 0.1266,
      "step": 16068
    },
    {
      "epoch": 0.46878464321138924,
      "grad_norm": 1.2121580779759222,
      "learning_rate": 5.745778588601365e-06,
      "loss": 0.1628,
      "step": 16069
    },
    {
      "epoch": 0.4688138164420328,
      "grad_norm": 0.7783008039871504,
      "learning_rate": 5.745311435897531e-06,
      "loss": 0.1463,
      "step": 16070
    },
    {
      "epoch": 0.46884298967267635,
      "grad_norm": 0.8315272123788562,
      "learning_rate": 5.744844276539734e-06,
      "loss": 0.132,
      "step": 16071
    },
    {
      "epoch": 0.4688721629033199,
      "grad_norm": 0.8048650563939992,
      "learning_rate": 5.744377110532146e-06,
      "loss": 0.1237,
      "step": 16072
    },
    {
      "epoch": 0.46890133613396345,
      "grad_norm": 1.011428222566658,
      "learning_rate": 5.7439099378789366e-06,
      "loss": 0.1364,
      "step": 16073
    },
    {
      "epoch": 0.46893050936460706,
      "grad_norm": 0.8505536900192274,
      "learning_rate": 5.743442758584277e-06,
      "loss": 0.1274,
      "step": 16074
    },
    {
      "epoch": 0.4689596825952506,
      "grad_norm": 0.9729993148155971,
      "learning_rate": 5.742975572652337e-06,
      "loss": 0.123,
      "step": 16075
    },
    {
      "epoch": 0.46898885582589417,
      "grad_norm": 1.0459297712761293,
      "learning_rate": 5.74250838008729e-06,
      "loss": 0.1469,
      "step": 16076
    },
    {
      "epoch": 0.4690180290565377,
      "grad_norm": 0.8132158729836365,
      "learning_rate": 5.742041180893303e-06,
      "loss": 0.1392,
      "step": 16077
    },
    {
      "epoch": 0.4690472022871813,
      "grad_norm": 0.8584280553812293,
      "learning_rate": 5.741573975074551e-06,
      "loss": 0.1159,
      "step": 16078
    },
    {
      "epoch": 0.46907637551782483,
      "grad_norm": 0.6767765047043773,
      "learning_rate": 5.741106762635205e-06,
      "loss": 0.1219,
      "step": 16079
    },
    {
      "epoch": 0.4691055487484684,
      "grad_norm": 1.1232119840550205,
      "learning_rate": 5.740639543579433e-06,
      "loss": 0.1369,
      "step": 16080
    },
    {
      "epoch": 0.469134721979112,
      "grad_norm": 0.796951447742278,
      "learning_rate": 5.740172317911409e-06,
      "loss": 0.1359,
      "step": 16081
    },
    {
      "epoch": 0.46916389520975554,
      "grad_norm": 0.8761079062131024,
      "learning_rate": 5.739705085635302e-06,
      "loss": 0.1431,
      "step": 16082
    },
    {
      "epoch": 0.4691930684403991,
      "grad_norm": 0.9884386708917005,
      "learning_rate": 5.739237846755285e-06,
      "loss": 0.13,
      "step": 16083
    },
    {
      "epoch": 0.46922224167104265,
      "grad_norm": 1.1919464567864113,
      "learning_rate": 5.738770601275529e-06,
      "loss": 0.1246,
      "step": 16084
    },
    {
      "epoch": 0.4692514149016862,
      "grad_norm": 1.045991070029729,
      "learning_rate": 5.738303349200206e-06,
      "loss": 0.1257,
      "step": 16085
    },
    {
      "epoch": 0.46928058813232976,
      "grad_norm": 0.7959565312596193,
      "learning_rate": 5.7378360905334865e-06,
      "loss": 0.1396,
      "step": 16086
    },
    {
      "epoch": 0.4693097613629733,
      "grad_norm": 1.5820160593183699,
      "learning_rate": 5.737368825279542e-06,
      "loss": 0.1476,
      "step": 16087
    },
    {
      "epoch": 0.4693389345936169,
      "grad_norm": 1.1815826628493877,
      "learning_rate": 5.736901553442545e-06,
      "loss": 0.1402,
      "step": 16088
    },
    {
      "epoch": 0.46936810782426047,
      "grad_norm": 0.7513949019477715,
      "learning_rate": 5.736434275026667e-06,
      "loss": 0.1334,
      "step": 16089
    },
    {
      "epoch": 0.469397281054904,
      "grad_norm": 1.2522696477108841,
      "learning_rate": 5.735966990036079e-06,
      "loss": 0.1392,
      "step": 16090
    },
    {
      "epoch": 0.4694264542855476,
      "grad_norm": 1.0568337995409316,
      "learning_rate": 5.735499698474956e-06,
      "loss": 0.139,
      "step": 16091
    },
    {
      "epoch": 0.46945562751619113,
      "grad_norm": 1.0403577071745036,
      "learning_rate": 5.735032400347463e-06,
      "loss": 0.1388,
      "step": 16092
    },
    {
      "epoch": 0.4694848007468347,
      "grad_norm": 0.7900333409971576,
      "learning_rate": 5.734565095657779e-06,
      "loss": 0.1243,
      "step": 16093
    },
    {
      "epoch": 0.46951397397747824,
      "grad_norm": 0.9720347804398541,
      "learning_rate": 5.7340977844100735e-06,
      "loss": 0.1369,
      "step": 16094
    },
    {
      "epoch": 0.46954314720812185,
      "grad_norm": 0.9672105474943895,
      "learning_rate": 5.733630466608516e-06,
      "loss": 0.1358,
      "step": 16095
    },
    {
      "epoch": 0.4695723204387654,
      "grad_norm": 1.0027947901199838,
      "learning_rate": 5.733163142257283e-06,
      "loss": 0.1509,
      "step": 16096
    },
    {
      "epoch": 0.46960149366940895,
      "grad_norm": 0.6224211116672625,
      "learning_rate": 5.732695811360543e-06,
      "loss": 0.1257,
      "step": 16097
    },
    {
      "epoch": 0.4696306669000525,
      "grad_norm": 0.7699030096852254,
      "learning_rate": 5.732228473922471e-06,
      "loss": 0.176,
      "step": 16098
    },
    {
      "epoch": 0.46965984013069606,
      "grad_norm": 0.8856354430030329,
      "learning_rate": 5.731761129947238e-06,
      "loss": 0.1186,
      "step": 16099
    },
    {
      "epoch": 0.4696890133613396,
      "grad_norm": 0.8167555368557391,
      "learning_rate": 5.731293779439015e-06,
      "loss": 0.1285,
      "step": 16100
    },
    {
      "epoch": 0.4697181865919832,
      "grad_norm": 0.9193967476538044,
      "learning_rate": 5.730826422401976e-06,
      "loss": 0.1589,
      "step": 16101
    },
    {
      "epoch": 0.4697473598226268,
      "grad_norm": 0.6393872877340689,
      "learning_rate": 5.730359058840294e-06,
      "loss": 0.1417,
      "step": 16102
    },
    {
      "epoch": 0.46977653305327033,
      "grad_norm": 1.110856759356158,
      "learning_rate": 5.7298916887581405e-06,
      "loss": 0.1457,
      "step": 16103
    },
    {
      "epoch": 0.4698057062839139,
      "grad_norm": 0.9144741724024261,
      "learning_rate": 5.729424312159687e-06,
      "loss": 0.1497,
      "step": 16104
    },
    {
      "epoch": 0.46983487951455744,
      "grad_norm": 0.648704093586818,
      "learning_rate": 5.728956929049109e-06,
      "loss": 0.1298,
      "step": 16105
    },
    {
      "epoch": 0.469864052745201,
      "grad_norm": 0.8048886852999525,
      "learning_rate": 5.728489539430576e-06,
      "loss": 0.1331,
      "step": 16106
    },
    {
      "epoch": 0.46989322597584454,
      "grad_norm": 0.7897477806332242,
      "learning_rate": 5.728022143308264e-06,
      "loss": 0.1339,
      "step": 16107
    },
    {
      "epoch": 0.46992239920648815,
      "grad_norm": 0.8601492069847314,
      "learning_rate": 5.727554740686343e-06,
      "loss": 0.1367,
      "step": 16108
    },
    {
      "epoch": 0.4699515724371317,
      "grad_norm": 0.9207029387636473,
      "learning_rate": 5.727087331568986e-06,
      "loss": 0.1553,
      "step": 16109
    },
    {
      "epoch": 0.46998074566777526,
      "grad_norm": 0.7737759715592784,
      "learning_rate": 5.726619915960368e-06,
      "loss": 0.1069,
      "step": 16110
    },
    {
      "epoch": 0.4700099188984188,
      "grad_norm": 0.7515370629130431,
      "learning_rate": 5.726152493864663e-06,
      "loss": 0.1598,
      "step": 16111
    },
    {
      "epoch": 0.47003909212906236,
      "grad_norm": 0.8571022277462108,
      "learning_rate": 5.725685065286038e-06,
      "loss": 0.1471,
      "step": 16112
    },
    {
      "epoch": 0.4700682653597059,
      "grad_norm": 0.8753528201700052,
      "learning_rate": 5.725217630228673e-06,
      "loss": 0.1596,
      "step": 16113
    },
    {
      "epoch": 0.47009743859034947,
      "grad_norm": 4.859814509656574,
      "learning_rate": 5.724750188696737e-06,
      "loss": 0.1489,
      "step": 16114
    },
    {
      "epoch": 0.4701266118209931,
      "grad_norm": 1.4828493024288039,
      "learning_rate": 5.724282740694404e-06,
      "loss": 0.1154,
      "step": 16115
    },
    {
      "epoch": 0.47015578505163663,
      "grad_norm": 0.8638338229309651,
      "learning_rate": 5.723815286225848e-06,
      "loss": 0.1447,
      "step": 16116
    },
    {
      "epoch": 0.4701849582822802,
      "grad_norm": 0.7688649662024418,
      "learning_rate": 5.723347825295243e-06,
      "loss": 0.1237,
      "step": 16117
    },
    {
      "epoch": 0.47021413151292374,
      "grad_norm": 0.7522221028055127,
      "learning_rate": 5.7228803579067594e-06,
      "loss": 0.1152,
      "step": 16118
    },
    {
      "epoch": 0.4702433047435673,
      "grad_norm": 0.8944578795617836,
      "learning_rate": 5.722412884064572e-06,
      "loss": 0.1298,
      "step": 16119
    },
    {
      "epoch": 0.47027247797421085,
      "grad_norm": 1.084083705789618,
      "learning_rate": 5.7219454037728564e-06,
      "loss": 0.1508,
      "step": 16120
    },
    {
      "epoch": 0.4703016512048544,
      "grad_norm": 0.9507613558325935,
      "learning_rate": 5.721477917035785e-06,
      "loss": 0.1362,
      "step": 16121
    },
    {
      "epoch": 0.470330824435498,
      "grad_norm": 0.7930644797736451,
      "learning_rate": 5.7210104238575295e-06,
      "loss": 0.1201,
      "step": 16122
    },
    {
      "epoch": 0.47035999766614156,
      "grad_norm": 0.6314872642891975,
      "learning_rate": 5.720542924242265e-06,
      "loss": 0.1296,
      "step": 16123
    },
    {
      "epoch": 0.4703891708967851,
      "grad_norm": 0.8494723624941237,
      "learning_rate": 5.720075418194166e-06,
      "loss": 0.1368,
      "step": 16124
    },
    {
      "epoch": 0.47041834412742867,
      "grad_norm": 0.9204880438144835,
      "learning_rate": 5.719607905717406e-06,
      "loss": 0.1355,
      "step": 16125
    },
    {
      "epoch": 0.4704475173580722,
      "grad_norm": 0.7134989970552862,
      "learning_rate": 5.719140386816159e-06,
      "loss": 0.1282,
      "step": 16126
    },
    {
      "epoch": 0.4704766905887158,
      "grad_norm": 0.8339145112729058,
      "learning_rate": 5.718672861494597e-06,
      "loss": 0.1516,
      "step": 16127
    },
    {
      "epoch": 0.4705058638193594,
      "grad_norm": 0.9566071881084267,
      "learning_rate": 5.718205329756895e-06,
      "loss": 0.1565,
      "step": 16128
    },
    {
      "epoch": 0.47053503705000294,
      "grad_norm": 0.854543237254776,
      "learning_rate": 5.7177377916072285e-06,
      "loss": 0.1645,
      "step": 16129
    },
    {
      "epoch": 0.4705642102806465,
      "grad_norm": 0.9789235876882216,
      "learning_rate": 5.717270247049769e-06,
      "loss": 0.1407,
      "step": 16130
    },
    {
      "epoch": 0.47059338351129004,
      "grad_norm": 0.7974384768832588,
      "learning_rate": 5.7168026960886925e-06,
      "loss": 0.1357,
      "step": 16131
    },
    {
      "epoch": 0.4706225567419336,
      "grad_norm": 0.8275988235439948,
      "learning_rate": 5.716335138728173e-06,
      "loss": 0.1263,
      "step": 16132
    },
    {
      "epoch": 0.47065172997257715,
      "grad_norm": 0.9523676958991666,
      "learning_rate": 5.715867574972384e-06,
      "loss": 0.1621,
      "step": 16133
    },
    {
      "epoch": 0.4706809032032207,
      "grad_norm": 0.7635523007543593,
      "learning_rate": 5.7154000048255e-06,
      "loss": 0.1603,
      "step": 16134
    },
    {
      "epoch": 0.4707100764338643,
      "grad_norm": 0.8623497340330429,
      "learning_rate": 5.7149324282916966e-06,
      "loss": 0.1281,
      "step": 16135
    },
    {
      "epoch": 0.47073924966450786,
      "grad_norm": 0.8510871688185185,
      "learning_rate": 5.714464845375146e-06,
      "loss": 0.1225,
      "step": 16136
    },
    {
      "epoch": 0.4707684228951514,
      "grad_norm": 0.7639056998088636,
      "learning_rate": 5.7139972560800235e-06,
      "loss": 0.1646,
      "step": 16137
    },
    {
      "epoch": 0.47079759612579497,
      "grad_norm": 0.9565157521139924,
      "learning_rate": 5.713529660410505e-06,
      "loss": 0.1517,
      "step": 16138
    },
    {
      "epoch": 0.4708267693564385,
      "grad_norm": 0.7635071253582182,
      "learning_rate": 5.713062058370763e-06,
      "loss": 0.1245,
      "step": 16139
    },
    {
      "epoch": 0.4708559425870821,
      "grad_norm": 0.7291809115107964,
      "learning_rate": 5.7125944499649745e-06,
      "loss": 0.1233,
      "step": 16140
    },
    {
      "epoch": 0.47088511581772563,
      "grad_norm": 0.8774965193916223,
      "learning_rate": 5.712126835197313e-06,
      "loss": 0.1439,
      "step": 16141
    },
    {
      "epoch": 0.47091428904836924,
      "grad_norm": 0.8285968761142548,
      "learning_rate": 5.711659214071951e-06,
      "loss": 0.13,
      "step": 16142
    },
    {
      "epoch": 0.4709434622790128,
      "grad_norm": 0.6676578682951105,
      "learning_rate": 5.711191586593068e-06,
      "loss": 0.1202,
      "step": 16143
    },
    {
      "epoch": 0.47097263550965635,
      "grad_norm": 0.666544706641162,
      "learning_rate": 5.710723952764835e-06,
      "loss": 0.1268,
      "step": 16144
    },
    {
      "epoch": 0.4710018087402999,
      "grad_norm": 0.7635204443806743,
      "learning_rate": 5.7102563125914265e-06,
      "loss": 0.1416,
      "step": 16145
    },
    {
      "epoch": 0.47103098197094345,
      "grad_norm": 0.7802522015872385,
      "learning_rate": 5.709788666077022e-06,
      "loss": 0.1194,
      "step": 16146
    },
    {
      "epoch": 0.471060155201587,
      "grad_norm": 0.7881972123320081,
      "learning_rate": 5.709321013225792e-06,
      "loss": 0.1362,
      "step": 16147
    },
    {
      "epoch": 0.47108932843223056,
      "grad_norm": 0.9339135012352625,
      "learning_rate": 5.708853354041914e-06,
      "loss": 0.1373,
      "step": 16148
    },
    {
      "epoch": 0.47111850166287417,
      "grad_norm": 0.7428028039373517,
      "learning_rate": 5.708385688529563e-06,
      "loss": 0.1447,
      "step": 16149
    },
    {
      "epoch": 0.4711476748935177,
      "grad_norm": 0.7557091850447257,
      "learning_rate": 5.707918016692913e-06,
      "loss": 0.1136,
      "step": 16150
    },
    {
      "epoch": 0.4711768481241613,
      "grad_norm": 0.930051679838062,
      "learning_rate": 5.7074503385361406e-06,
      "loss": 0.1528,
      "step": 16151
    },
    {
      "epoch": 0.4712060213548048,
      "grad_norm": 0.914309014482323,
      "learning_rate": 5.70698265406342e-06,
      "loss": 0.1414,
      "step": 16152
    },
    {
      "epoch": 0.4712351945854484,
      "grad_norm": 1.0921308307739288,
      "learning_rate": 5.706514963278926e-06,
      "loss": 0.1458,
      "step": 16153
    },
    {
      "epoch": 0.47126436781609193,
      "grad_norm": 0.8584718839188415,
      "learning_rate": 5.706047266186836e-06,
      "loss": 0.1254,
      "step": 16154
    },
    {
      "epoch": 0.47129354104673554,
      "grad_norm": 0.7586975375020832,
      "learning_rate": 5.705579562791325e-06,
      "loss": 0.1506,
      "step": 16155
    },
    {
      "epoch": 0.4713227142773791,
      "grad_norm": 0.9601549045653952,
      "learning_rate": 5.705111853096569e-06,
      "loss": 0.1453,
      "step": 16156
    },
    {
      "epoch": 0.47135188750802265,
      "grad_norm": 0.9988405830758633,
      "learning_rate": 5.70464413710674e-06,
      "loss": 0.1028,
      "step": 16157
    },
    {
      "epoch": 0.4713810607386662,
      "grad_norm": 0.7686197023796981,
      "learning_rate": 5.704176414826018e-06,
      "loss": 0.1393,
      "step": 16158
    },
    {
      "epoch": 0.47141023396930976,
      "grad_norm": 0.9426717499516286,
      "learning_rate": 5.703708686258577e-06,
      "loss": 0.1318,
      "step": 16159
    },
    {
      "epoch": 0.4714394071999533,
      "grad_norm": 0.9483265331401411,
      "learning_rate": 5.703240951408592e-06,
      "loss": 0.1424,
      "step": 16160
    },
    {
      "epoch": 0.47146858043059686,
      "grad_norm": 0.9296762430677868,
      "learning_rate": 5.7027732102802416e-06,
      "loss": 0.118,
      "step": 16161
    },
    {
      "epoch": 0.47149775366124047,
      "grad_norm": 0.6927683269348481,
      "learning_rate": 5.702305462877697e-06,
      "loss": 0.1589,
      "step": 16162
    },
    {
      "epoch": 0.471526926891884,
      "grad_norm": 0.6899457308718389,
      "learning_rate": 5.701837709205139e-06,
      "loss": 0.1109,
      "step": 16163
    },
    {
      "epoch": 0.4715561001225276,
      "grad_norm": 0.9561555591782459,
      "learning_rate": 5.70136994926674e-06,
      "loss": 0.1595,
      "step": 16164
    },
    {
      "epoch": 0.47158527335317113,
      "grad_norm": 0.9913448171536647,
      "learning_rate": 5.700902183066679e-06,
      "loss": 0.1158,
      "step": 16165
    },
    {
      "epoch": 0.4716144465838147,
      "grad_norm": 0.7727073830246254,
      "learning_rate": 5.70043441060913e-06,
      "loss": 0.1354,
      "step": 16166
    },
    {
      "epoch": 0.47164361981445824,
      "grad_norm": 0.96473705052807,
      "learning_rate": 5.699966631898269e-06,
      "loss": 0.1127,
      "step": 16167
    },
    {
      "epoch": 0.4716727930451018,
      "grad_norm": 0.7495226406181917,
      "learning_rate": 5.699498846938274e-06,
      "loss": 0.1299,
      "step": 16168
    },
    {
      "epoch": 0.4717019662757454,
      "grad_norm": 0.8292004078232724,
      "learning_rate": 5.699031055733319e-06,
      "loss": 0.1492,
      "step": 16169
    },
    {
      "epoch": 0.47173113950638895,
      "grad_norm": 0.7441635103053541,
      "learning_rate": 5.698563258287584e-06,
      "loss": 0.165,
      "step": 16170
    },
    {
      "epoch": 0.4717603127370325,
      "grad_norm": 0.6816748903949795,
      "learning_rate": 5.698095454605243e-06,
      "loss": 0.1299,
      "step": 16171
    },
    {
      "epoch": 0.47178948596767606,
      "grad_norm": 0.7927442000531812,
      "learning_rate": 5.6976276446904684e-06,
      "loss": 0.1514,
      "step": 16172
    },
    {
      "epoch": 0.4718186591983196,
      "grad_norm": 0.8611246894664979,
      "learning_rate": 5.697159828547445e-06,
      "loss": 0.1612,
      "step": 16173
    },
    {
      "epoch": 0.47184783242896317,
      "grad_norm": 0.7813186770413587,
      "learning_rate": 5.6966920061803435e-06,
      "loss": 0.1287,
      "step": 16174
    },
    {
      "epoch": 0.4718770056596067,
      "grad_norm": 0.8555039145706819,
      "learning_rate": 5.696224177593341e-06,
      "loss": 0.1269,
      "step": 16175
    },
    {
      "epoch": 0.4719061788902503,
      "grad_norm": 0.8187749906027676,
      "learning_rate": 5.695756342790617e-06,
      "loss": 0.1456,
      "step": 16176
    },
    {
      "epoch": 0.4719353521208939,
      "grad_norm": 0.8189764933933033,
      "learning_rate": 5.6952885017763455e-06,
      "loss": 0.1397,
      "step": 16177
    },
    {
      "epoch": 0.47196452535153743,
      "grad_norm": 0.9037366018345423,
      "learning_rate": 5.694820654554705e-06,
      "loss": 0.1487,
      "step": 16178
    },
    {
      "epoch": 0.471993698582181,
      "grad_norm": 0.830932021339775,
      "learning_rate": 5.694352801129871e-06,
      "loss": 0.1202,
      "step": 16179
    },
    {
      "epoch": 0.47202287181282454,
      "grad_norm": 0.9435808114088704,
      "learning_rate": 5.69388494150602e-06,
      "loss": 0.1494,
      "step": 16180
    },
    {
      "epoch": 0.4720520450434681,
      "grad_norm": 0.7360990872898313,
      "learning_rate": 5.693417075687332e-06,
      "loss": 0.1384,
      "step": 16181
    },
    {
      "epoch": 0.4720812182741117,
      "grad_norm": 0.946534332693603,
      "learning_rate": 5.69294920367798e-06,
      "loss": 0.1291,
      "step": 16182
    },
    {
      "epoch": 0.47211039150475526,
      "grad_norm": 0.8751863756735684,
      "learning_rate": 5.692481325482144e-06,
      "loss": 0.1362,
      "step": 16183
    },
    {
      "epoch": 0.4721395647353988,
      "grad_norm": 0.8350416032285239,
      "learning_rate": 5.692013441103999e-06,
      "loss": 0.1364,
      "step": 16184
    },
    {
      "epoch": 0.47216873796604236,
      "grad_norm": 0.7840014775393542,
      "learning_rate": 5.6915455505477244e-06,
      "loss": 0.1396,
      "step": 16185
    },
    {
      "epoch": 0.4721979111966859,
      "grad_norm": 0.9949906733657168,
      "learning_rate": 5.691077653817496e-06,
      "loss": 0.143,
      "step": 16186
    },
    {
      "epoch": 0.47222708442732947,
      "grad_norm": 1.1018000811796227,
      "learning_rate": 5.690609750917491e-06,
      "loss": 0.1335,
      "step": 16187
    },
    {
      "epoch": 0.472256257657973,
      "grad_norm": 0.839745800730884,
      "learning_rate": 5.690141841851887e-06,
      "loss": 0.1289,
      "step": 16188
    },
    {
      "epoch": 0.47228543088861663,
      "grad_norm": 0.8672016742917711,
      "learning_rate": 5.689673926624862e-06,
      "loss": 0.1242,
      "step": 16189
    },
    {
      "epoch": 0.4723146041192602,
      "grad_norm": 0.9283706838298952,
      "learning_rate": 5.6892060052405906e-06,
      "loss": 0.1287,
      "step": 16190
    },
    {
      "epoch": 0.47234377734990374,
      "grad_norm": 0.8957466119878499,
      "learning_rate": 5.688738077703255e-06,
      "loss": 0.1532,
      "step": 16191
    },
    {
      "epoch": 0.4723729505805473,
      "grad_norm": 0.8065300531139225,
      "learning_rate": 5.68827014401703e-06,
      "loss": 0.1438,
      "step": 16192
    },
    {
      "epoch": 0.47240212381119084,
      "grad_norm": 0.7639327856073047,
      "learning_rate": 5.687802204186092e-06,
      "loss": 0.1209,
      "step": 16193
    },
    {
      "epoch": 0.4724312970418344,
      "grad_norm": 0.9828183122126705,
      "learning_rate": 5.687334258214622e-06,
      "loss": 0.1269,
      "step": 16194
    },
    {
      "epoch": 0.47246047027247795,
      "grad_norm": 0.8431529340952344,
      "learning_rate": 5.686866306106794e-06,
      "loss": 0.1412,
      "step": 16195
    },
    {
      "epoch": 0.47248964350312156,
      "grad_norm": 1.1173127652406816,
      "learning_rate": 5.686398347866789e-06,
      "loss": 0.1343,
      "step": 16196
    },
    {
      "epoch": 0.4725188167337651,
      "grad_norm": 0.7631756116195886,
      "learning_rate": 5.685930383498782e-06,
      "loss": 0.1304,
      "step": 16197
    },
    {
      "epoch": 0.47254798996440867,
      "grad_norm": 0.8798489410905271,
      "learning_rate": 5.685462413006953e-06,
      "loss": 0.1409,
      "step": 16198
    },
    {
      "epoch": 0.4725771631950522,
      "grad_norm": 0.9584388683992391,
      "learning_rate": 5.684994436395479e-06,
      "loss": 0.1207,
      "step": 16199
    },
    {
      "epoch": 0.47260633642569577,
      "grad_norm": 0.7511771133137782,
      "learning_rate": 5.684526453668538e-06,
      "loss": 0.1153,
      "step": 16200
    },
    {
      "epoch": 0.4726355096563393,
      "grad_norm": 1.0283700102265485,
      "learning_rate": 5.684058464830311e-06,
      "loss": 0.1309,
      "step": 16201
    },
    {
      "epoch": 0.4726646828869829,
      "grad_norm": 0.815990377068461,
      "learning_rate": 5.68359046988497e-06,
      "loss": 0.1206,
      "step": 16202
    },
    {
      "epoch": 0.4726938561176265,
      "grad_norm": 0.7414053418331662,
      "learning_rate": 5.683122468836698e-06,
      "loss": 0.1179,
      "step": 16203
    },
    {
      "epoch": 0.47272302934827004,
      "grad_norm": 0.7558004797743774,
      "learning_rate": 5.682654461689671e-06,
      "loss": 0.1435,
      "step": 16204
    },
    {
      "epoch": 0.4727522025789136,
      "grad_norm": 0.763002566802126,
      "learning_rate": 5.682186448448067e-06,
      "loss": 0.1437,
      "step": 16205
    },
    {
      "epoch": 0.47278137580955715,
      "grad_norm": 0.7492393525328191,
      "learning_rate": 5.681718429116067e-06,
      "loss": 0.1312,
      "step": 16206
    },
    {
      "epoch": 0.4728105490402007,
      "grad_norm": 0.7150913299742103,
      "learning_rate": 5.681250403697847e-06,
      "loss": 0.1036,
      "step": 16207
    },
    {
      "epoch": 0.47283972227084425,
      "grad_norm": 0.6375610874861878,
      "learning_rate": 5.680782372197586e-06,
      "loss": 0.1189,
      "step": 16208
    },
    {
      "epoch": 0.4728688955014878,
      "grad_norm": 0.9498688512170802,
      "learning_rate": 5.6803143346194625e-06,
      "loss": 0.1275,
      "step": 16209
    },
    {
      "epoch": 0.4728980687321314,
      "grad_norm": 0.782084929024932,
      "learning_rate": 5.679846290967654e-06,
      "loss": 0.1257,
      "step": 16210
    },
    {
      "epoch": 0.47292724196277497,
      "grad_norm": 0.8024384483136685,
      "learning_rate": 5.679378241246341e-06,
      "loss": 0.1326,
      "step": 16211
    },
    {
      "epoch": 0.4729564151934185,
      "grad_norm": 0.9090946722184887,
      "learning_rate": 5.678910185459702e-06,
      "loss": 0.1391,
      "step": 16212
    },
    {
      "epoch": 0.4729855884240621,
      "grad_norm": 0.8432305552418801,
      "learning_rate": 5.678442123611914e-06,
      "loss": 0.1356,
      "step": 16213
    },
    {
      "epoch": 0.47301476165470563,
      "grad_norm": 0.8891832226368575,
      "learning_rate": 5.6779740557071574e-06,
      "loss": 0.1394,
      "step": 16214
    },
    {
      "epoch": 0.4730439348853492,
      "grad_norm": 0.8660342683361745,
      "learning_rate": 5.67750598174961e-06,
      "loss": 0.1627,
      "step": 16215
    },
    {
      "epoch": 0.4730731081159928,
      "grad_norm": 0.9064999845036191,
      "learning_rate": 5.67703790174345e-06,
      "loss": 0.1432,
      "step": 16216
    },
    {
      "epoch": 0.47310228134663634,
      "grad_norm": 0.9992737392003487,
      "learning_rate": 5.676569815692858e-06,
      "loss": 0.1315,
      "step": 16217
    },
    {
      "epoch": 0.4731314545772799,
      "grad_norm": 0.841757401922245,
      "learning_rate": 5.676101723602014e-06,
      "loss": 0.1384,
      "step": 16218
    },
    {
      "epoch": 0.47316062780792345,
      "grad_norm": 0.677806080845052,
      "learning_rate": 5.675633625475092e-06,
      "loss": 0.1288,
      "step": 16219
    },
    {
      "epoch": 0.473189801038567,
      "grad_norm": 0.9660709917443289,
      "learning_rate": 5.6751655213162746e-06,
      "loss": 0.1473,
      "step": 16220
    },
    {
      "epoch": 0.47321897426921056,
      "grad_norm": 0.9806908705855765,
      "learning_rate": 5.674697411129743e-06,
      "loss": 0.1382,
      "step": 16221
    },
    {
      "epoch": 0.4732481474998541,
      "grad_norm": 0.8439538122796523,
      "learning_rate": 5.674229294919672e-06,
      "loss": 0.1435,
      "step": 16222
    },
    {
      "epoch": 0.4732773207304977,
      "grad_norm": 0.7437934665996728,
      "learning_rate": 5.6737611726902446e-06,
      "loss": 0.1531,
      "step": 16223
    },
    {
      "epoch": 0.4733064939611413,
      "grad_norm": 0.9858281049159205,
      "learning_rate": 5.673293044445636e-06,
      "loss": 0.1551,
      "step": 16224
    },
    {
      "epoch": 0.4733356671917848,
      "grad_norm": 0.7692244938518777,
      "learning_rate": 5.672824910190029e-06,
      "loss": 0.1487,
      "step": 16225
    },
    {
      "epoch": 0.4733648404224284,
      "grad_norm": 0.775186689285311,
      "learning_rate": 5.672356769927601e-06,
      "loss": 0.1394,
      "step": 16226
    },
    {
      "epoch": 0.47339401365307193,
      "grad_norm": 0.8328297749803291,
      "learning_rate": 5.671888623662534e-06,
      "loss": 0.16,
      "step": 16227
    },
    {
      "epoch": 0.4734231868837155,
      "grad_norm": 1.738440038387054,
      "learning_rate": 5.671420471399005e-06,
      "loss": 0.1497,
      "step": 16228
    },
    {
      "epoch": 0.47345236011435904,
      "grad_norm": 0.8687263397133216,
      "learning_rate": 5.670952313141193e-06,
      "loss": 0.1133,
      "step": 16229
    },
    {
      "epoch": 0.47348153334500265,
      "grad_norm": 0.9985943263605154,
      "learning_rate": 5.670484148893281e-06,
      "loss": 0.1306,
      "step": 16230
    },
    {
      "epoch": 0.4735107065756462,
      "grad_norm": 0.8357561037795771,
      "learning_rate": 5.6700159786594466e-06,
      "loss": 0.1267,
      "step": 16231
    },
    {
      "epoch": 0.47353987980628975,
      "grad_norm": 1.0401668740599634,
      "learning_rate": 5.6695478024438665e-06,
      "loss": 0.1395,
      "step": 16232
    },
    {
      "epoch": 0.4735690530369333,
      "grad_norm": 0.7872097597891584,
      "learning_rate": 5.669079620250727e-06,
      "loss": 0.1358,
      "step": 16233
    },
    {
      "epoch": 0.47359822626757686,
      "grad_norm": 0.6619763784436152,
      "learning_rate": 5.668611432084202e-06,
      "loss": 0.1398,
      "step": 16234
    },
    {
      "epoch": 0.4736273994982204,
      "grad_norm": 0.8538666418095292,
      "learning_rate": 5.668143237948474e-06,
      "loss": 0.1384,
      "step": 16235
    },
    {
      "epoch": 0.47365657272886397,
      "grad_norm": 0.7445876302715672,
      "learning_rate": 5.667675037847724e-06,
      "loss": 0.1286,
      "step": 16236
    },
    {
      "epoch": 0.4736857459595076,
      "grad_norm": 0.9563616285404325,
      "learning_rate": 5.667206831786131e-06,
      "loss": 0.1535,
      "step": 16237
    },
    {
      "epoch": 0.47371491919015113,
      "grad_norm": 0.7220433397475235,
      "learning_rate": 5.666738619767873e-06,
      "loss": 0.1305,
      "step": 16238
    },
    {
      "epoch": 0.4737440924207947,
      "grad_norm": 0.9418118334499467,
      "learning_rate": 5.666270401797132e-06,
      "loss": 0.1213,
      "step": 16239
    },
    {
      "epoch": 0.47377326565143824,
      "grad_norm": 0.7768616027863579,
      "learning_rate": 5.665802177878088e-06,
      "loss": 0.1428,
      "step": 16240
    },
    {
      "epoch": 0.4738024388820818,
      "grad_norm": 0.9101999476468361,
      "learning_rate": 5.665333948014922e-06,
      "loss": 0.1403,
      "step": 16241
    },
    {
      "epoch": 0.47383161211272534,
      "grad_norm": 0.8402877876782288,
      "learning_rate": 5.664865712211812e-06,
      "loss": 0.1304,
      "step": 16242
    },
    {
      "epoch": 0.47386078534336895,
      "grad_norm": 0.9650119347957364,
      "learning_rate": 5.66439747047294e-06,
      "loss": 0.1397,
      "step": 16243
    },
    {
      "epoch": 0.4738899585740125,
      "grad_norm": 0.7516018342228209,
      "learning_rate": 5.663929222802487e-06,
      "loss": 0.1174,
      "step": 16244
    },
    {
      "epoch": 0.47391913180465606,
      "grad_norm": 0.9969496985948444,
      "learning_rate": 5.663460969204631e-06,
      "loss": 0.1438,
      "step": 16245
    },
    {
      "epoch": 0.4739483050352996,
      "grad_norm": 0.9829370754519864,
      "learning_rate": 5.662992709683556e-06,
      "loss": 0.1189,
      "step": 16246
    },
    {
      "epoch": 0.47397747826594316,
      "grad_norm": 0.7936624946802271,
      "learning_rate": 5.662524444243437e-06,
      "loss": 0.1211,
      "step": 16247
    },
    {
      "epoch": 0.4740066514965867,
      "grad_norm": 1.014677559492022,
      "learning_rate": 5.6620561728884616e-06,
      "loss": 0.1323,
      "step": 16248
    },
    {
      "epoch": 0.47403582472723027,
      "grad_norm": 0.8610488994778456,
      "learning_rate": 5.661587895622805e-06,
      "loss": 0.1218,
      "step": 16249
    },
    {
      "epoch": 0.4740649979578739,
      "grad_norm": 0.8314360485172108,
      "learning_rate": 5.661119612450647e-06,
      "loss": 0.129,
      "step": 16250
    },
    {
      "epoch": 0.47409417118851743,
      "grad_norm": 0.9404321259942992,
      "learning_rate": 5.660651323376175e-06,
      "loss": 0.1336,
      "step": 16251
    },
    {
      "epoch": 0.474123344419161,
      "grad_norm": 0.7948735444190058,
      "learning_rate": 5.660183028403564e-06,
      "loss": 0.1381,
      "step": 16252
    },
    {
      "epoch": 0.47415251764980454,
      "grad_norm": 0.9186003866517694,
      "learning_rate": 5.659714727536997e-06,
      "loss": 0.1744,
      "step": 16253
    },
    {
      "epoch": 0.4741816908804481,
      "grad_norm": 0.6924003566379509,
      "learning_rate": 5.659246420780654e-06,
      "loss": 0.14,
      "step": 16254
    },
    {
      "epoch": 0.47421086411109165,
      "grad_norm": 0.8309259555606926,
      "learning_rate": 5.658778108138716e-06,
      "loss": 0.1474,
      "step": 16255
    },
    {
      "epoch": 0.4742400373417352,
      "grad_norm": 0.8182532797642579,
      "learning_rate": 5.658309789615365e-06,
      "loss": 0.1487,
      "step": 16256
    },
    {
      "epoch": 0.4742692105723788,
      "grad_norm": 0.8406096018410196,
      "learning_rate": 5.657841465214781e-06,
      "loss": 0.1357,
      "step": 16257
    },
    {
      "epoch": 0.47429838380302236,
      "grad_norm": 0.877345040652179,
      "learning_rate": 5.6573731349411455e-06,
      "loss": 0.1489,
      "step": 16258
    },
    {
      "epoch": 0.4743275570336659,
      "grad_norm": 1.108890956422376,
      "learning_rate": 5.656904798798639e-06,
      "loss": 0.1375,
      "step": 16259
    },
    {
      "epoch": 0.47435673026430947,
      "grad_norm": 1.018419444436362,
      "learning_rate": 5.6564364567914446e-06,
      "loss": 0.1337,
      "step": 16260
    },
    {
      "epoch": 0.474385903494953,
      "grad_norm": 0.8214578524218132,
      "learning_rate": 5.655968108923742e-06,
      "loss": 0.1333,
      "step": 16261
    },
    {
      "epoch": 0.4744150767255966,
      "grad_norm": 0.9240534830592845,
      "learning_rate": 5.655499755199711e-06,
      "loss": 0.1476,
      "step": 16262
    },
    {
      "epoch": 0.4744442499562401,
      "grad_norm": 0.8709571035293724,
      "learning_rate": 5.655031395623537e-06,
      "loss": 0.1314,
      "step": 16263
    },
    {
      "epoch": 0.47447342318688374,
      "grad_norm": 0.8551789118468752,
      "learning_rate": 5.654563030199398e-06,
      "loss": 0.1443,
      "step": 16264
    },
    {
      "epoch": 0.4745025964175273,
      "grad_norm": 0.9940196886603396,
      "learning_rate": 5.654094658931475e-06,
      "loss": 0.1162,
      "step": 16265
    },
    {
      "epoch": 0.47453176964817084,
      "grad_norm": 0.8371940155546695,
      "learning_rate": 5.653626281823954e-06,
      "loss": 0.1561,
      "step": 16266
    },
    {
      "epoch": 0.4745609428788144,
      "grad_norm": 0.8418990081313251,
      "learning_rate": 5.653157898881012e-06,
      "loss": 0.1439,
      "step": 16267
    },
    {
      "epoch": 0.47459011610945795,
      "grad_norm": 0.8449860784566638,
      "learning_rate": 5.652689510106832e-06,
      "loss": 0.149,
      "step": 16268
    },
    {
      "epoch": 0.4746192893401015,
      "grad_norm": 0.9916595714534104,
      "learning_rate": 5.652221115505596e-06,
      "loss": 0.1212,
      "step": 16269
    },
    {
      "epoch": 0.4746484625707451,
      "grad_norm": 1.1035250167472606,
      "learning_rate": 5.651752715081486e-06,
      "loss": 0.154,
      "step": 16270
    },
    {
      "epoch": 0.47467763580138866,
      "grad_norm": 0.9671174009877531,
      "learning_rate": 5.651284308838683e-06,
      "loss": 0.1522,
      "step": 16271
    },
    {
      "epoch": 0.4747068090320322,
      "grad_norm": 0.945547690334323,
      "learning_rate": 5.650815896781369e-06,
      "loss": 0.1532,
      "step": 16272
    },
    {
      "epoch": 0.47473598226267577,
      "grad_norm": 0.9820598763822408,
      "learning_rate": 5.650347478913726e-06,
      "loss": 0.1293,
      "step": 16273
    },
    {
      "epoch": 0.4747651554933193,
      "grad_norm": 0.9646350181918945,
      "learning_rate": 5.649879055239936e-06,
      "loss": 0.142,
      "step": 16274
    },
    {
      "epoch": 0.4747943287239629,
      "grad_norm": 1.0998332083928983,
      "learning_rate": 5.649410625764181e-06,
      "loss": 0.1274,
      "step": 16275
    },
    {
      "epoch": 0.47482350195460643,
      "grad_norm": 1.0358855667448654,
      "learning_rate": 5.648942190490645e-06,
      "loss": 0.1454,
      "step": 16276
    },
    {
      "epoch": 0.47485267518525004,
      "grad_norm": 0.73096872678173,
      "learning_rate": 5.648473749423504e-06,
      "loss": 0.1263,
      "step": 16277
    },
    {
      "epoch": 0.4748818484158936,
      "grad_norm": 0.7662822299619612,
      "learning_rate": 5.648005302566948e-06,
      "loss": 0.095,
      "step": 16278
    },
    {
      "epoch": 0.47491102164653715,
      "grad_norm": 0.8184027461602206,
      "learning_rate": 5.647536849925154e-06,
      "loss": 0.1174,
      "step": 16279
    },
    {
      "epoch": 0.4749401948771807,
      "grad_norm": 0.7924127382126381,
      "learning_rate": 5.647068391502304e-06,
      "loss": 0.1202,
      "step": 16280
    },
    {
      "epoch": 0.47496936810782425,
      "grad_norm": 1.10565237542086,
      "learning_rate": 5.646599927302584e-06,
      "loss": 0.1581,
      "step": 16281
    },
    {
      "epoch": 0.4749985413384678,
      "grad_norm": 0.7452630278413567,
      "learning_rate": 5.646131457330173e-06,
      "loss": 0.1151,
      "step": 16282
    },
    {
      "epoch": 0.47502771456911136,
      "grad_norm": 0.7729927488370573,
      "learning_rate": 5.645662981589255e-06,
      "loss": 0.1377,
      "step": 16283
    },
    {
      "epoch": 0.47505688779975497,
      "grad_norm": 0.7449926836030744,
      "learning_rate": 5.645194500084011e-06,
      "loss": 0.139,
      "step": 16284
    },
    {
      "epoch": 0.4750860610303985,
      "grad_norm": 0.7884663737594525,
      "learning_rate": 5.644726012818626e-06,
      "loss": 0.1303,
      "step": 16285
    },
    {
      "epoch": 0.4751152342610421,
      "grad_norm": 1.008820998919924,
      "learning_rate": 5.644257519797281e-06,
      "loss": 0.1309,
      "step": 16286
    },
    {
      "epoch": 0.4751444074916856,
      "grad_norm": 0.7658301592147836,
      "learning_rate": 5.643789021024157e-06,
      "loss": 0.1518,
      "step": 16287
    },
    {
      "epoch": 0.4751735807223292,
      "grad_norm": 0.880928281849524,
      "learning_rate": 5.64332051650344e-06,
      "loss": 0.1285,
      "step": 16288
    },
    {
      "epoch": 0.47520275395297273,
      "grad_norm": 0.7554296987147964,
      "learning_rate": 5.642852006239311e-06,
      "loss": 0.1259,
      "step": 16289
    },
    {
      "epoch": 0.4752319271836163,
      "grad_norm": 1.024612839630492,
      "learning_rate": 5.642383490235952e-06,
      "loss": 0.1228,
      "step": 16290
    },
    {
      "epoch": 0.4752611004142599,
      "grad_norm": 0.9496401102042054,
      "learning_rate": 5.641914968497547e-06,
      "loss": 0.1465,
      "step": 16291
    },
    {
      "epoch": 0.47529027364490345,
      "grad_norm": 0.7016204864134581,
      "learning_rate": 5.6414464410282775e-06,
      "loss": 0.1388,
      "step": 16292
    },
    {
      "epoch": 0.475319446875547,
      "grad_norm": 0.9001482427453192,
      "learning_rate": 5.640977907832329e-06,
      "loss": 0.127,
      "step": 16293
    },
    {
      "epoch": 0.47534862010619056,
      "grad_norm": 0.7739858159602111,
      "learning_rate": 5.640509368913881e-06,
      "loss": 0.1445,
      "step": 16294
    },
    {
      "epoch": 0.4753777933368341,
      "grad_norm": 0.8609336916889685,
      "learning_rate": 5.640040824277119e-06,
      "loss": 0.144,
      "step": 16295
    },
    {
      "epoch": 0.47540696656747766,
      "grad_norm": 0.7231928968906722,
      "learning_rate": 5.639572273926226e-06,
      "loss": 0.115,
      "step": 16296
    },
    {
      "epoch": 0.47543613979812127,
      "grad_norm": 0.8155024116357175,
      "learning_rate": 5.639103717865383e-06,
      "loss": 0.127,
      "step": 16297
    },
    {
      "epoch": 0.4754653130287648,
      "grad_norm": 0.8359432403720437,
      "learning_rate": 5.6386351560987765e-06,
      "loss": 0.1327,
      "step": 16298
    },
    {
      "epoch": 0.4754944862594084,
      "grad_norm": 0.9868203154406928,
      "learning_rate": 5.6381665886305855e-06,
      "loss": 0.1372,
      "step": 16299
    },
    {
      "epoch": 0.47552365949005193,
      "grad_norm": 0.7039813079919041,
      "learning_rate": 5.637698015464996e-06,
      "loss": 0.124,
      "step": 16300
    },
    {
      "epoch": 0.4755528327206955,
      "grad_norm": 0.9433427371998303,
      "learning_rate": 5.637229436606193e-06,
      "loss": 0.1366,
      "step": 16301
    },
    {
      "epoch": 0.47558200595133904,
      "grad_norm": 1.104607429078894,
      "learning_rate": 5.636760852058356e-06,
      "loss": 0.1304,
      "step": 16302
    },
    {
      "epoch": 0.4756111791819826,
      "grad_norm": 0.9894809272089414,
      "learning_rate": 5.63629226182567e-06,
      "loss": 0.1131,
      "step": 16303
    },
    {
      "epoch": 0.4756403524126262,
      "grad_norm": 0.8373168273815884,
      "learning_rate": 5.635823665912319e-06,
      "loss": 0.1276,
      "step": 16304
    },
    {
      "epoch": 0.47566952564326975,
      "grad_norm": 1.1356005883576916,
      "learning_rate": 5.635355064322485e-06,
      "loss": 0.1538,
      "step": 16305
    },
    {
      "epoch": 0.4756986988739133,
      "grad_norm": 0.9302560494816402,
      "learning_rate": 5.634886457060355e-06,
      "loss": 0.1354,
      "step": 16306
    },
    {
      "epoch": 0.47572787210455686,
      "grad_norm": 0.9249712503956626,
      "learning_rate": 5.634417844130108e-06,
      "loss": 0.1238,
      "step": 16307
    },
    {
      "epoch": 0.4757570453352004,
      "grad_norm": 0.9696940757980068,
      "learning_rate": 5.633949225535932e-06,
      "loss": 0.1312,
      "step": 16308
    },
    {
      "epoch": 0.47578621856584397,
      "grad_norm": 1.0416341297635099,
      "learning_rate": 5.633480601282007e-06,
      "loss": 0.1279,
      "step": 16309
    },
    {
      "epoch": 0.4758153917964875,
      "grad_norm": 1.023882818797613,
      "learning_rate": 5.633011971372519e-06,
      "loss": 0.1486,
      "step": 16310
    },
    {
      "epoch": 0.47584456502713113,
      "grad_norm": 0.8128353283753157,
      "learning_rate": 5.632543335811651e-06,
      "loss": 0.1116,
      "step": 16311
    },
    {
      "epoch": 0.4758737382577747,
      "grad_norm": 0.984506636394337,
      "learning_rate": 5.632074694603586e-06,
      "loss": 0.1379,
      "step": 16312
    },
    {
      "epoch": 0.47590291148841823,
      "grad_norm": 1.084500215608927,
      "learning_rate": 5.631606047752512e-06,
      "loss": 0.1328,
      "step": 16313
    },
    {
      "epoch": 0.4759320847190618,
      "grad_norm": 0.7600621811441662,
      "learning_rate": 5.631137395262608e-06,
      "loss": 0.1127,
      "step": 16314
    },
    {
      "epoch": 0.47596125794970534,
      "grad_norm": 0.9186156422568045,
      "learning_rate": 5.6306687371380585e-06,
      "loss": 0.1189,
      "step": 16315
    },
    {
      "epoch": 0.4759904311803489,
      "grad_norm": 0.7012573468766916,
      "learning_rate": 5.630200073383052e-06,
      "loss": 0.1336,
      "step": 16316
    },
    {
      "epoch": 0.47601960441099245,
      "grad_norm": 0.9912072267814284,
      "learning_rate": 5.629731404001769e-06,
      "loss": 0.1464,
      "step": 16317
    },
    {
      "epoch": 0.47604877764163606,
      "grad_norm": 0.766488550025548,
      "learning_rate": 5.6292627289983934e-06,
      "loss": 0.1154,
      "step": 16318
    },
    {
      "epoch": 0.4760779508722796,
      "grad_norm": 0.9167470478466365,
      "learning_rate": 5.628794048377111e-06,
      "loss": 0.123,
      "step": 16319
    },
    {
      "epoch": 0.47610712410292316,
      "grad_norm": 0.8191229535731257,
      "learning_rate": 5.628325362142105e-06,
      "loss": 0.1332,
      "step": 16320
    },
    {
      "epoch": 0.4761362973335667,
      "grad_norm": 0.7887257907050483,
      "learning_rate": 5.62785667029756e-06,
      "loss": 0.1343,
      "step": 16321
    },
    {
      "epoch": 0.47616547056421027,
      "grad_norm": 0.7854894873088004,
      "learning_rate": 5.627387972847661e-06,
      "loss": 0.1503,
      "step": 16322
    },
    {
      "epoch": 0.4761946437948538,
      "grad_norm": 0.9401852237844465,
      "learning_rate": 5.626919269796594e-06,
      "loss": 0.1497,
      "step": 16323
    },
    {
      "epoch": 0.47622381702549743,
      "grad_norm": 0.9153490341263396,
      "learning_rate": 5.626450561148537e-06,
      "loss": 0.1437,
      "step": 16324
    },
    {
      "epoch": 0.476252990256141,
      "grad_norm": 0.7760296164459083,
      "learning_rate": 5.625981846907682e-06,
      "loss": 0.1354,
      "step": 16325
    },
    {
      "epoch": 0.47628216348678454,
      "grad_norm": 0.9832838538956926,
      "learning_rate": 5.62551312707821e-06,
      "loss": 0.1492,
      "step": 16326
    },
    {
      "epoch": 0.4763113367174281,
      "grad_norm": 0.770392387891613,
      "learning_rate": 5.625044401664306e-06,
      "loss": 0.1447,
      "step": 16327
    },
    {
      "epoch": 0.47634050994807164,
      "grad_norm": 0.8861269753905067,
      "learning_rate": 5.624575670670155e-06,
      "loss": 0.1423,
      "step": 16328
    },
    {
      "epoch": 0.4763696831787152,
      "grad_norm": 0.772477885446521,
      "learning_rate": 5.624106934099941e-06,
      "loss": 0.1479,
      "step": 16329
    },
    {
      "epoch": 0.47639885640935875,
      "grad_norm": 0.8666486284004198,
      "learning_rate": 5.623638191957849e-06,
      "loss": 0.1366,
      "step": 16330
    },
    {
      "epoch": 0.47642802964000236,
      "grad_norm": 1.0405527675674633,
      "learning_rate": 5.623169444248064e-06,
      "loss": 0.1234,
      "step": 16331
    },
    {
      "epoch": 0.4764572028706459,
      "grad_norm": 0.9482855984339433,
      "learning_rate": 5.6227006909747724e-06,
      "loss": 0.1242,
      "step": 16332
    },
    {
      "epoch": 0.47648637610128947,
      "grad_norm": 0.8474810524686269,
      "learning_rate": 5.622231932142157e-06,
      "loss": 0.1278,
      "step": 16333
    },
    {
      "epoch": 0.476515549331933,
      "grad_norm": 0.9077890273505308,
      "learning_rate": 5.621763167754402e-06,
      "loss": 0.1239,
      "step": 16334
    },
    {
      "epoch": 0.4765447225625766,
      "grad_norm": 0.742815865182335,
      "learning_rate": 5.621294397815697e-06,
      "loss": 0.1233,
      "step": 16335
    },
    {
      "epoch": 0.4765738957932201,
      "grad_norm": 1.087039266341935,
      "learning_rate": 5.620825622330221e-06,
      "loss": 0.1294,
      "step": 16336
    },
    {
      "epoch": 0.4766030690238637,
      "grad_norm": 0.932618488392475,
      "learning_rate": 5.620356841302162e-06,
      "loss": 0.1392,
      "step": 16337
    },
    {
      "epoch": 0.4766322422545073,
      "grad_norm": 0.9678135850146654,
      "learning_rate": 5.6198880547357085e-06,
      "loss": 0.1507,
      "step": 16338
    },
    {
      "epoch": 0.47666141548515084,
      "grad_norm": 0.7887670456865438,
      "learning_rate": 5.619419262635039e-06,
      "loss": 0.1141,
      "step": 16339
    },
    {
      "epoch": 0.4766905887157944,
      "grad_norm": 0.9818752688943305,
      "learning_rate": 5.618950465004344e-06,
      "loss": 0.1223,
      "step": 16340
    },
    {
      "epoch": 0.47671976194643795,
      "grad_norm": 1.0586796820123847,
      "learning_rate": 5.618481661847806e-06,
      "loss": 0.1435,
      "step": 16341
    },
    {
      "epoch": 0.4767489351770815,
      "grad_norm": 1.1787417750038875,
      "learning_rate": 5.618012853169611e-06,
      "loss": 0.1323,
      "step": 16342
    },
    {
      "epoch": 0.47677810840772505,
      "grad_norm": 0.9320450187711383,
      "learning_rate": 5.617544038973946e-06,
      "loss": 0.129,
      "step": 16343
    },
    {
      "epoch": 0.4768072816383686,
      "grad_norm": 1.2869324861698728,
      "learning_rate": 5.617075219264996e-06,
      "loss": 0.1617,
      "step": 16344
    },
    {
      "epoch": 0.4768364548690122,
      "grad_norm": 1.0556070212776434,
      "learning_rate": 5.616606394046944e-06,
      "loss": 0.1311,
      "step": 16345
    },
    {
      "epoch": 0.47686562809965577,
      "grad_norm": 0.9284384602442743,
      "learning_rate": 5.616137563323978e-06,
      "loss": 0.1204,
      "step": 16346
    },
    {
      "epoch": 0.4768948013302993,
      "grad_norm": 0.8314594194575753,
      "learning_rate": 5.615668727100283e-06,
      "loss": 0.1267,
      "step": 16347
    },
    {
      "epoch": 0.4769239745609429,
      "grad_norm": 0.8195256567909605,
      "learning_rate": 5.615199885380044e-06,
      "loss": 0.1313,
      "step": 16348
    },
    {
      "epoch": 0.47695314779158643,
      "grad_norm": 0.9979888435768413,
      "learning_rate": 5.614731038167448e-06,
      "loss": 0.1578,
      "step": 16349
    },
    {
      "epoch": 0.47698232102223,
      "grad_norm": 0.9328922645018125,
      "learning_rate": 5.614262185466679e-06,
      "loss": 0.1347,
      "step": 16350
    },
    {
      "epoch": 0.47701149425287354,
      "grad_norm": 0.8920237393330316,
      "learning_rate": 5.613793327281924e-06,
      "loss": 0.136,
      "step": 16351
    },
    {
      "epoch": 0.47704066748351714,
      "grad_norm": 0.7439757715781523,
      "learning_rate": 5.61332446361737e-06,
      "loss": 0.1161,
      "step": 16352
    },
    {
      "epoch": 0.4770698407141607,
      "grad_norm": 0.9532320816988058,
      "learning_rate": 5.612855594477202e-06,
      "loss": 0.1422,
      "step": 16353
    },
    {
      "epoch": 0.47709901394480425,
      "grad_norm": 0.773370658411886,
      "learning_rate": 5.612386719865604e-06,
      "loss": 0.1233,
      "step": 16354
    },
    {
      "epoch": 0.4771281871754478,
      "grad_norm": 0.9073938318172595,
      "learning_rate": 5.611917839786763e-06,
      "loss": 0.1331,
      "step": 16355
    },
    {
      "epoch": 0.47715736040609136,
      "grad_norm": 0.8527106710249348,
      "learning_rate": 5.6114489542448684e-06,
      "loss": 0.1323,
      "step": 16356
    },
    {
      "epoch": 0.4771865336367349,
      "grad_norm": 0.787239349102488,
      "learning_rate": 5.610980063244099e-06,
      "loss": 0.1331,
      "step": 16357
    },
    {
      "epoch": 0.4772157068673785,
      "grad_norm": 0.750076972832993,
      "learning_rate": 5.61051116678865e-06,
      "loss": 0.1485,
      "step": 16358
    },
    {
      "epoch": 0.4772448800980221,
      "grad_norm": 0.9619912671955636,
      "learning_rate": 5.610042264882701e-06,
      "loss": 0.1588,
      "step": 16359
    },
    {
      "epoch": 0.4772740533286656,
      "grad_norm": 1.0436512459267488,
      "learning_rate": 5.60957335753044e-06,
      "loss": 0.1387,
      "step": 16360
    },
    {
      "epoch": 0.4773032265593092,
      "grad_norm": 0.9279762888706286,
      "learning_rate": 5.6091044447360545e-06,
      "loss": 0.1349,
      "step": 16361
    },
    {
      "epoch": 0.47733239978995273,
      "grad_norm": 0.840565962671998,
      "learning_rate": 5.60863552650373e-06,
      "loss": 0.1497,
      "step": 16362
    },
    {
      "epoch": 0.4773615730205963,
      "grad_norm": 1.2783894863802543,
      "learning_rate": 5.608166602837652e-06,
      "loss": 0.1352,
      "step": 16363
    },
    {
      "epoch": 0.47739074625123984,
      "grad_norm": 0.8850599410811496,
      "learning_rate": 5.607697673742008e-06,
      "loss": 0.1318,
      "step": 16364
    },
    {
      "epoch": 0.47741991948188345,
      "grad_norm": 0.8114331658584741,
      "learning_rate": 5.607228739220984e-06,
      "loss": 0.1255,
      "step": 16365
    },
    {
      "epoch": 0.477449092712527,
      "grad_norm": 1.5518561725449465,
      "learning_rate": 5.606759799278766e-06,
      "loss": 0.1662,
      "step": 16366
    },
    {
      "epoch": 0.47747826594317055,
      "grad_norm": 0.9667529991787196,
      "learning_rate": 5.606290853919543e-06,
      "loss": 0.1221,
      "step": 16367
    },
    {
      "epoch": 0.4775074391738141,
      "grad_norm": 0.8184996547673199,
      "learning_rate": 5.6058219031475e-06,
      "loss": 0.1235,
      "step": 16368
    },
    {
      "epoch": 0.47753661240445766,
      "grad_norm": 1.2228537771081796,
      "learning_rate": 5.605352946966822e-06,
      "loss": 0.1296,
      "step": 16369
    },
    {
      "epoch": 0.4775657856351012,
      "grad_norm": 1.0041666589820943,
      "learning_rate": 5.604883985381699e-06,
      "loss": 0.1564,
      "step": 16370
    },
    {
      "epoch": 0.47759495886574477,
      "grad_norm": 0.9556537717756693,
      "learning_rate": 5.604415018396315e-06,
      "loss": 0.1294,
      "step": 16371
    },
    {
      "epoch": 0.4776241320963884,
      "grad_norm": 0.9860609207112173,
      "learning_rate": 5.603946046014859e-06,
      "loss": 0.149,
      "step": 16372
    },
    {
      "epoch": 0.47765330532703193,
      "grad_norm": 0.9007862306763328,
      "learning_rate": 5.603477068241516e-06,
      "loss": 0.1252,
      "step": 16373
    },
    {
      "epoch": 0.4776824785576755,
      "grad_norm": 0.9248634586943631,
      "learning_rate": 5.603008085080475e-06,
      "loss": 0.1262,
      "step": 16374
    },
    {
      "epoch": 0.47771165178831904,
      "grad_norm": 0.8512401550436068,
      "learning_rate": 5.602539096535921e-06,
      "loss": 0.1188,
      "step": 16375
    },
    {
      "epoch": 0.4777408250189626,
      "grad_norm": 0.9262570479023378,
      "learning_rate": 5.602070102612042e-06,
      "loss": 0.1491,
      "step": 16376
    },
    {
      "epoch": 0.47776999824960614,
      "grad_norm": 0.8646970332991294,
      "learning_rate": 5.6016011033130246e-06,
      "loss": 0.1637,
      "step": 16377
    },
    {
      "epoch": 0.4777991714802497,
      "grad_norm": 1.1847361888051147,
      "learning_rate": 5.601132098643056e-06,
      "loss": 0.1435,
      "step": 16378
    },
    {
      "epoch": 0.4778283447108933,
      "grad_norm": 0.764824753947898,
      "learning_rate": 5.600663088606324e-06,
      "loss": 0.1164,
      "step": 16379
    },
    {
      "epoch": 0.47785751794153686,
      "grad_norm": 1.0289881956391431,
      "learning_rate": 5.600194073207015e-06,
      "loss": 0.1268,
      "step": 16380
    },
    {
      "epoch": 0.4778866911721804,
      "grad_norm": 1.0154543053608711,
      "learning_rate": 5.599725052449316e-06,
      "loss": 0.1469,
      "step": 16381
    },
    {
      "epoch": 0.47791586440282396,
      "grad_norm": 0.7685139442929392,
      "learning_rate": 5.599256026337417e-06,
      "loss": 0.1182,
      "step": 16382
    },
    {
      "epoch": 0.4779450376334675,
      "grad_norm": 0.8005495512040665,
      "learning_rate": 5.5987869948755014e-06,
      "loss": 0.1249,
      "step": 16383
    },
    {
      "epoch": 0.47797421086411107,
      "grad_norm": 0.7659027252764419,
      "learning_rate": 5.598317958067758e-06,
      "loss": 0.1413,
      "step": 16384
    },
    {
      "epoch": 0.4780033840947547,
      "grad_norm": 0.7497897555580734,
      "learning_rate": 5.597848915918376e-06,
      "loss": 0.1359,
      "step": 16385
    },
    {
      "epoch": 0.47803255732539823,
      "grad_norm": 0.9880357635674445,
      "learning_rate": 5.5973798684315415e-06,
      "loss": 0.135,
      "step": 16386
    },
    {
      "epoch": 0.4780617305560418,
      "grad_norm": 0.8929744030550361,
      "learning_rate": 5.5969108156114406e-06,
      "loss": 0.1312,
      "step": 16387
    },
    {
      "epoch": 0.47809090378668534,
      "grad_norm": 0.8720010906201271,
      "learning_rate": 5.596441757462266e-06,
      "loss": 0.1336,
      "step": 16388
    },
    {
      "epoch": 0.4781200770173289,
      "grad_norm": 0.7842769572080821,
      "learning_rate": 5.595972693988199e-06,
      "loss": 0.1095,
      "step": 16389
    },
    {
      "epoch": 0.47814925024797245,
      "grad_norm": 0.7877639372626694,
      "learning_rate": 5.595503625193429e-06,
      "loss": 0.1336,
      "step": 16390
    },
    {
      "epoch": 0.478178423478616,
      "grad_norm": 1.0346505626710718,
      "learning_rate": 5.595034551082147e-06,
      "loss": 0.1541,
      "step": 16391
    },
    {
      "epoch": 0.4782075967092596,
      "grad_norm": 1.1464245879999029,
      "learning_rate": 5.594565471658537e-06,
      "loss": 0.1532,
      "step": 16392
    },
    {
      "epoch": 0.47823676993990316,
      "grad_norm": 0.7323674847221384,
      "learning_rate": 5.594096386926789e-06,
      "loss": 0.1286,
      "step": 16393
    },
    {
      "epoch": 0.4782659431705467,
      "grad_norm": 0.7177221973266082,
      "learning_rate": 5.5936272968910905e-06,
      "loss": 0.149,
      "step": 16394
    },
    {
      "epoch": 0.47829511640119027,
      "grad_norm": 0.939104521542515,
      "learning_rate": 5.5931582015556294e-06,
      "loss": 0.1489,
      "step": 16395
    },
    {
      "epoch": 0.4783242896318338,
      "grad_norm": 0.7656754892557142,
      "learning_rate": 5.592689100924595e-06,
      "loss": 0.1266,
      "step": 16396
    },
    {
      "epoch": 0.4783534628624774,
      "grad_norm": 0.71370623999873,
      "learning_rate": 5.59221999500217e-06,
      "loss": 0.1538,
      "step": 16397
    },
    {
      "epoch": 0.4783826360931209,
      "grad_norm": 0.885135228127414,
      "learning_rate": 5.59175088379255e-06,
      "loss": 0.1246,
      "step": 16398
    },
    {
      "epoch": 0.47841180932376454,
      "grad_norm": 1.030477827237176,
      "learning_rate": 5.591281767299916e-06,
      "loss": 0.1483,
      "step": 16399
    },
    {
      "epoch": 0.4784409825544081,
      "grad_norm": 0.817095829340118,
      "learning_rate": 5.590812645528462e-06,
      "loss": 0.1169,
      "step": 16400
    },
    {
      "epoch": 0.47847015578505164,
      "grad_norm": 0.7114241373232695,
      "learning_rate": 5.590343518482374e-06,
      "loss": 0.1335,
      "step": 16401
    },
    {
      "epoch": 0.4784993290156952,
      "grad_norm": 0.8398076516534582,
      "learning_rate": 5.589874386165838e-06,
      "loss": 0.1445,
      "step": 16402
    },
    {
      "epoch": 0.47852850224633875,
      "grad_norm": 0.8405885559418892,
      "learning_rate": 5.5894052485830464e-06,
      "loss": 0.1207,
      "step": 16403
    },
    {
      "epoch": 0.4785576754769823,
      "grad_norm": 0.6021177127044054,
      "learning_rate": 5.588936105738184e-06,
      "loss": 0.1113,
      "step": 16404
    },
    {
      "epoch": 0.47858684870762586,
      "grad_norm": 1.0032348959711763,
      "learning_rate": 5.588466957635441e-06,
      "loss": 0.1423,
      "step": 16405
    },
    {
      "epoch": 0.47861602193826946,
      "grad_norm": 0.7924935714187326,
      "learning_rate": 5.587997804279005e-06,
      "loss": 0.1522,
      "step": 16406
    },
    {
      "epoch": 0.478645195168913,
      "grad_norm": 0.7680357104654308,
      "learning_rate": 5.587528645673066e-06,
      "loss": 0.1208,
      "step": 16407
    },
    {
      "epoch": 0.47867436839955657,
      "grad_norm": 0.8609661601631099,
      "learning_rate": 5.58705948182181e-06,
      "loss": 0.1406,
      "step": 16408
    },
    {
      "epoch": 0.4787035416302001,
      "grad_norm": 0.9511141456151238,
      "learning_rate": 5.586590312729429e-06,
      "loss": 0.1342,
      "step": 16409
    },
    {
      "epoch": 0.4787327148608437,
      "grad_norm": 0.7120753550924881,
      "learning_rate": 5.586121138400108e-06,
      "loss": 0.1095,
      "step": 16410
    },
    {
      "epoch": 0.47876188809148723,
      "grad_norm": 1.0006084562915092,
      "learning_rate": 5.5856519588380385e-06,
      "loss": 0.14,
      "step": 16411
    },
    {
      "epoch": 0.47879106132213084,
      "grad_norm": 0.9435542241082506,
      "learning_rate": 5.5851827740474075e-06,
      "loss": 0.1616,
      "step": 16412
    },
    {
      "epoch": 0.4788202345527744,
      "grad_norm": 0.930465655253894,
      "learning_rate": 5.584713584032406e-06,
      "loss": 0.121,
      "step": 16413
    },
    {
      "epoch": 0.47884940778341795,
      "grad_norm": 0.7637096447809273,
      "learning_rate": 5.5842443887972184e-06,
      "loss": 0.1268,
      "step": 16414
    },
    {
      "epoch": 0.4788785810140615,
      "grad_norm": 0.8205669053114473,
      "learning_rate": 5.5837751883460375e-06,
      "loss": 0.1387,
      "step": 16415
    },
    {
      "epoch": 0.47890775424470505,
      "grad_norm": 0.8744346633681963,
      "learning_rate": 5.583305982683053e-06,
      "loss": 0.1417,
      "step": 16416
    },
    {
      "epoch": 0.4789369274753486,
      "grad_norm": 0.7638189563637072,
      "learning_rate": 5.582836771812448e-06,
      "loss": 0.1191,
      "step": 16417
    },
    {
      "epoch": 0.47896610070599216,
      "grad_norm": 1.0753041099163074,
      "learning_rate": 5.582367555738419e-06,
      "loss": 0.1313,
      "step": 16418
    },
    {
      "epoch": 0.47899527393663577,
      "grad_norm": 0.8909782947913392,
      "learning_rate": 5.5818983344651515e-06,
      "loss": 0.1149,
      "step": 16419
    },
    {
      "epoch": 0.4790244471672793,
      "grad_norm": 0.8579148418636114,
      "learning_rate": 5.581429107996833e-06,
      "loss": 0.1323,
      "step": 16420
    },
    {
      "epoch": 0.4790536203979229,
      "grad_norm": 1.112360093645641,
      "learning_rate": 5.580959876337654e-06,
      "loss": 0.1384,
      "step": 16421
    },
    {
      "epoch": 0.47908279362856643,
      "grad_norm": 0.8913188629078489,
      "learning_rate": 5.580490639491805e-06,
      "loss": 0.1551,
      "step": 16422
    },
    {
      "epoch": 0.47911196685921,
      "grad_norm": 0.7864805654378544,
      "learning_rate": 5.580021397463473e-06,
      "loss": 0.1642,
      "step": 16423
    },
    {
      "epoch": 0.47914114008985353,
      "grad_norm": 1.0481762492684195,
      "learning_rate": 5.579552150256849e-06,
      "loss": 0.1035,
      "step": 16424
    },
    {
      "epoch": 0.4791703133204971,
      "grad_norm": 0.8525269377785037,
      "learning_rate": 5.5790828978761215e-06,
      "loss": 0.1503,
      "step": 16425
    },
    {
      "epoch": 0.4791994865511407,
      "grad_norm": 0.7712905240418593,
      "learning_rate": 5.578613640325481e-06,
      "loss": 0.1297,
      "step": 16426
    },
    {
      "epoch": 0.47922865978178425,
      "grad_norm": 1.1679634596940096,
      "learning_rate": 5.5781443776091145e-06,
      "loss": 0.1501,
      "step": 16427
    },
    {
      "epoch": 0.4792578330124278,
      "grad_norm": 1.0837233145526382,
      "learning_rate": 5.577675109731216e-06,
      "loss": 0.1392,
      "step": 16428
    },
    {
      "epoch": 0.47928700624307136,
      "grad_norm": 0.8112739922629897,
      "learning_rate": 5.577205836695968e-06,
      "loss": 0.1263,
      "step": 16429
    },
    {
      "epoch": 0.4793161794737149,
      "grad_norm": 1.4041024594045965,
      "learning_rate": 5.576736558507566e-06,
      "loss": 0.1345,
      "step": 16430
    },
    {
      "epoch": 0.47934535270435846,
      "grad_norm": 1.137613393891698,
      "learning_rate": 5.5762672751702e-06,
      "loss": 0.1325,
      "step": 16431
    },
    {
      "epoch": 0.479374525935002,
      "grad_norm": 0.8934552907259496,
      "learning_rate": 5.575797986688053e-06,
      "loss": 0.1214,
      "step": 16432
    },
    {
      "epoch": 0.4794036991656456,
      "grad_norm": 0.6215980394763082,
      "learning_rate": 5.575328693065322e-06,
      "loss": 0.1091,
      "step": 16433
    },
    {
      "epoch": 0.4794328723962892,
      "grad_norm": 1.1148917131769525,
      "learning_rate": 5.574859394306194e-06,
      "loss": 0.1246,
      "step": 16434
    },
    {
      "epoch": 0.47946204562693273,
      "grad_norm": 1.1038719635113177,
      "learning_rate": 5.574390090414856e-06,
      "loss": 0.1283,
      "step": 16435
    },
    {
      "epoch": 0.4794912188575763,
      "grad_norm": 0.8680988789897821,
      "learning_rate": 5.573920781395502e-06,
      "loss": 0.149,
      "step": 16436
    },
    {
      "epoch": 0.47952039208821984,
      "grad_norm": 1.0342378646567074,
      "learning_rate": 5.57345146725232e-06,
      "loss": 0.1458,
      "step": 16437
    },
    {
      "epoch": 0.4795495653188634,
      "grad_norm": 0.9369272256059544,
      "learning_rate": 5.572982147989501e-06,
      "loss": 0.1341,
      "step": 16438
    },
    {
      "epoch": 0.479578738549507,
      "grad_norm": 0.709060020956352,
      "learning_rate": 5.5725128236112326e-06,
      "loss": 0.1266,
      "step": 16439
    },
    {
      "epoch": 0.47960791178015055,
      "grad_norm": 0.7898214892689679,
      "learning_rate": 5.572043494121707e-06,
      "loss": 0.1579,
      "step": 16440
    },
    {
      "epoch": 0.4796370850107941,
      "grad_norm": 1.0129801073910862,
      "learning_rate": 5.571574159525114e-06,
      "loss": 0.1524,
      "step": 16441
    },
    {
      "epoch": 0.47966625824143766,
      "grad_norm": 1.0811600843143345,
      "learning_rate": 5.571104819825643e-06,
      "loss": 0.1195,
      "step": 16442
    },
    {
      "epoch": 0.4796954314720812,
      "grad_norm": 0.8784083018169584,
      "learning_rate": 5.570635475027486e-06,
      "loss": 0.1459,
      "step": 16443
    },
    {
      "epoch": 0.47972460470272477,
      "grad_norm": 0.8980149655159101,
      "learning_rate": 5.570166125134829e-06,
      "loss": 0.1357,
      "step": 16444
    },
    {
      "epoch": 0.4797537779333683,
      "grad_norm": 0.8902907706004111,
      "learning_rate": 5.569696770151866e-06,
      "loss": 0.1377,
      "step": 16445
    },
    {
      "epoch": 0.47978295116401193,
      "grad_norm": 0.8007189101995574,
      "learning_rate": 5.569227410082788e-06,
      "loss": 0.1116,
      "step": 16446
    },
    {
      "epoch": 0.4798121243946555,
      "grad_norm": 1.013187782512625,
      "learning_rate": 5.568758044931781e-06,
      "loss": 0.1442,
      "step": 16447
    },
    {
      "epoch": 0.47984129762529903,
      "grad_norm": 0.8005773321204657,
      "learning_rate": 5.568288674703041e-06,
      "loss": 0.1273,
      "step": 16448
    },
    {
      "epoch": 0.4798704708559426,
      "grad_norm": 0.7571208636228627,
      "learning_rate": 5.5678192994007526e-06,
      "loss": 0.1429,
      "step": 16449
    },
    {
      "epoch": 0.47989964408658614,
      "grad_norm": 0.6262548695044964,
      "learning_rate": 5.56734991902911e-06,
      "loss": 0.1236,
      "step": 16450
    },
    {
      "epoch": 0.4799288173172297,
      "grad_norm": 1.0030999671621827,
      "learning_rate": 5.566880533592303e-06,
      "loss": 0.1397,
      "step": 16451
    },
    {
      "epoch": 0.47995799054787325,
      "grad_norm": 0.825659076886547,
      "learning_rate": 5.566411143094521e-06,
      "loss": 0.1256,
      "step": 16452
    },
    {
      "epoch": 0.47998716377851686,
      "grad_norm": 0.7132176911079248,
      "learning_rate": 5.565941747539957e-06,
      "loss": 0.1444,
      "step": 16453
    },
    {
      "epoch": 0.4800163370091604,
      "grad_norm": 0.7043674861429906,
      "learning_rate": 5.565472346932799e-06,
      "loss": 0.1528,
      "step": 16454
    },
    {
      "epoch": 0.48004551023980396,
      "grad_norm": 0.9347895133101263,
      "learning_rate": 5.565002941277239e-06,
      "loss": 0.1267,
      "step": 16455
    },
    {
      "epoch": 0.4800746834704475,
      "grad_norm": 0.7726630271489435,
      "learning_rate": 5.564533530577467e-06,
      "loss": 0.1314,
      "step": 16456
    },
    {
      "epoch": 0.48010385670109107,
      "grad_norm": 0.6726280534384327,
      "learning_rate": 5.5640641148376765e-06,
      "loss": 0.1319,
      "step": 16457
    },
    {
      "epoch": 0.4801330299317346,
      "grad_norm": 0.8693280533083172,
      "learning_rate": 5.563594694062055e-06,
      "loss": 0.1297,
      "step": 16458
    },
    {
      "epoch": 0.4801622031623782,
      "grad_norm": 1.0234800984648962,
      "learning_rate": 5.563125268254794e-06,
      "loss": 0.1107,
      "step": 16459
    },
    {
      "epoch": 0.4801913763930218,
      "grad_norm": 0.9369029388200885,
      "learning_rate": 5.562655837420086e-06,
      "loss": 0.1648,
      "step": 16460
    },
    {
      "epoch": 0.48022054962366534,
      "grad_norm": 0.9688258507488033,
      "learning_rate": 5.562186401562121e-06,
      "loss": 0.1238,
      "step": 16461
    },
    {
      "epoch": 0.4802497228543089,
      "grad_norm": 0.9769512448249669,
      "learning_rate": 5.561716960685089e-06,
      "loss": 0.1447,
      "step": 16462
    },
    {
      "epoch": 0.48027889608495244,
      "grad_norm": 1.0054589122449409,
      "learning_rate": 5.561247514793183e-06,
      "loss": 0.1481,
      "step": 16463
    },
    {
      "epoch": 0.480308069315596,
      "grad_norm": 0.8938732851180062,
      "learning_rate": 5.560778063890593e-06,
      "loss": 0.1446,
      "step": 16464
    },
    {
      "epoch": 0.48033724254623955,
      "grad_norm": 0.9926766665942596,
      "learning_rate": 5.560308607981511e-06,
      "loss": 0.1388,
      "step": 16465
    },
    {
      "epoch": 0.4803664157768831,
      "grad_norm": 0.6755509937775684,
      "learning_rate": 5.559839147070125e-06,
      "loss": 0.1388,
      "step": 16466
    },
    {
      "epoch": 0.4803955890075267,
      "grad_norm": 1.1091210266959968,
      "learning_rate": 5.5593696811606314e-06,
      "loss": 0.126,
      "step": 16467
    },
    {
      "epoch": 0.48042476223817027,
      "grad_norm": 0.8662699026525464,
      "learning_rate": 5.558900210257218e-06,
      "loss": 0.1566,
      "step": 16468
    },
    {
      "epoch": 0.4804539354688138,
      "grad_norm": 0.7126832076605465,
      "learning_rate": 5.558430734364077e-06,
      "loss": 0.1342,
      "step": 16469
    },
    {
      "epoch": 0.4804831086994574,
      "grad_norm": 0.8529551201597447,
      "learning_rate": 5.557961253485399e-06,
      "loss": 0.1429,
      "step": 16470
    },
    {
      "epoch": 0.4805122819301009,
      "grad_norm": 0.7867620828402069,
      "learning_rate": 5.5574917676253755e-06,
      "loss": 0.1195,
      "step": 16471
    },
    {
      "epoch": 0.4805414551607445,
      "grad_norm": 0.7318710212935304,
      "learning_rate": 5.5570222767882e-06,
      "loss": 0.1182,
      "step": 16472
    },
    {
      "epoch": 0.4805706283913881,
      "grad_norm": 0.7632637272100438,
      "learning_rate": 5.5565527809780635e-06,
      "loss": 0.1299,
      "step": 16473
    },
    {
      "epoch": 0.48059980162203164,
      "grad_norm": 0.8563688559181248,
      "learning_rate": 5.556083280199154e-06,
      "loss": 0.1523,
      "step": 16474
    },
    {
      "epoch": 0.4806289748526752,
      "grad_norm": 0.8808119500229724,
      "learning_rate": 5.555613774455667e-06,
      "loss": 0.1288,
      "step": 16475
    },
    {
      "epoch": 0.48065814808331875,
      "grad_norm": 0.6808399437320495,
      "learning_rate": 5.555144263751795e-06,
      "loss": 0.1326,
      "step": 16476
    },
    {
      "epoch": 0.4806873213139623,
      "grad_norm": 0.808250933327027,
      "learning_rate": 5.554674748091724e-06,
      "loss": 0.1225,
      "step": 16477
    },
    {
      "epoch": 0.48071649454460585,
      "grad_norm": 0.9609093191262928,
      "learning_rate": 5.5542052274796524e-06,
      "loss": 0.1491,
      "step": 16478
    },
    {
      "epoch": 0.4807456677752494,
      "grad_norm": 0.8257760206838456,
      "learning_rate": 5.5537357019197665e-06,
      "loss": 0.1298,
      "step": 16479
    },
    {
      "epoch": 0.480774841005893,
      "grad_norm": 0.7090715152658524,
      "learning_rate": 5.553266171416261e-06,
      "loss": 0.1389,
      "step": 16480
    },
    {
      "epoch": 0.48080401423653657,
      "grad_norm": 1.0532035124340533,
      "learning_rate": 5.5527966359733274e-06,
      "loss": 0.1422,
      "step": 16481
    },
    {
      "epoch": 0.4808331874671801,
      "grad_norm": 1.0824765111729429,
      "learning_rate": 5.552327095595157e-06,
      "loss": 0.1246,
      "step": 16482
    },
    {
      "epoch": 0.4808623606978237,
      "grad_norm": 0.7181584806931197,
      "learning_rate": 5.551857550285943e-06,
      "loss": 0.1449,
      "step": 16483
    },
    {
      "epoch": 0.48089153392846723,
      "grad_norm": 0.7609579922837822,
      "learning_rate": 5.551388000049875e-06,
      "loss": 0.1285,
      "step": 16484
    },
    {
      "epoch": 0.4809207071591108,
      "grad_norm": 0.9222075582398839,
      "learning_rate": 5.550918444891148e-06,
      "loss": 0.1275,
      "step": 16485
    },
    {
      "epoch": 0.48094988038975434,
      "grad_norm": 0.9596579643201208,
      "learning_rate": 5.550448884813952e-06,
      "loss": 0.1431,
      "step": 16486
    },
    {
      "epoch": 0.48097905362039794,
      "grad_norm": 0.7015797071947066,
      "learning_rate": 5.54997931982248e-06,
      "loss": 0.1381,
      "step": 16487
    },
    {
      "epoch": 0.4810082268510415,
      "grad_norm": 0.925869743484793,
      "learning_rate": 5.5495097499209235e-06,
      "loss": 0.158,
      "step": 16488
    },
    {
      "epoch": 0.48103740008168505,
      "grad_norm": 0.8716894146482536,
      "learning_rate": 5.549040175113476e-06,
      "loss": 0.1067,
      "step": 16489
    },
    {
      "epoch": 0.4810665733123286,
      "grad_norm": 0.7537976244261037,
      "learning_rate": 5.548570595404328e-06,
      "loss": 0.1431,
      "step": 16490
    },
    {
      "epoch": 0.48109574654297216,
      "grad_norm": 0.9190646876419771,
      "learning_rate": 5.548101010797673e-06,
      "loss": 0.1516,
      "step": 16491
    },
    {
      "epoch": 0.4811249197736157,
      "grad_norm": 0.852624910553048,
      "learning_rate": 5.547631421297704e-06,
      "loss": 0.1157,
      "step": 16492
    },
    {
      "epoch": 0.48115409300425926,
      "grad_norm": 0.718147437544926,
      "learning_rate": 5.5471618269086125e-06,
      "loss": 0.1214,
      "step": 16493
    },
    {
      "epoch": 0.4811832662349029,
      "grad_norm": 0.8552282376593306,
      "learning_rate": 5.546692227634588e-06,
      "loss": 0.1487,
      "step": 16494
    },
    {
      "epoch": 0.4812124394655464,
      "grad_norm": 0.9300653222118747,
      "learning_rate": 5.546222623479829e-06,
      "loss": 0.144,
      "step": 16495
    },
    {
      "epoch": 0.48124161269619,
      "grad_norm": 0.856634662299573,
      "learning_rate": 5.545753014448523e-06,
      "loss": 0.1705,
      "step": 16496
    },
    {
      "epoch": 0.48127078592683353,
      "grad_norm": 0.653131023475819,
      "learning_rate": 5.545283400544864e-06,
      "loss": 0.1387,
      "step": 16497
    },
    {
      "epoch": 0.4812999591574771,
      "grad_norm": 0.7570686133259324,
      "learning_rate": 5.544813781773046e-06,
      "loss": 0.1388,
      "step": 16498
    },
    {
      "epoch": 0.48132913238812064,
      "grad_norm": 0.9521446827960226,
      "learning_rate": 5.544344158137262e-06,
      "loss": 0.1181,
      "step": 16499
    },
    {
      "epoch": 0.48135830561876425,
      "grad_norm": 1.8571987751703076,
      "learning_rate": 5.543874529641701e-06,
      "loss": 0.1173,
      "step": 16500
    },
    {
      "epoch": 0.4813874788494078,
      "grad_norm": 0.6692509290438796,
      "learning_rate": 5.543404896290559e-06,
      "loss": 0.1118,
      "step": 16501
    },
    {
      "epoch": 0.48141665208005135,
      "grad_norm": 0.8703717468671059,
      "learning_rate": 5.542935258088027e-06,
      "loss": 0.1481,
      "step": 16502
    },
    {
      "epoch": 0.4814458253106949,
      "grad_norm": 0.95446062042861,
      "learning_rate": 5.5424656150383e-06,
      "loss": 0.1519,
      "step": 16503
    },
    {
      "epoch": 0.48147499854133846,
      "grad_norm": 0.599736865610296,
      "learning_rate": 5.5419959671455685e-06,
      "loss": 0.1345,
      "step": 16504
    },
    {
      "epoch": 0.481504171771982,
      "grad_norm": 0.9137106562508852,
      "learning_rate": 5.541526314414025e-06,
      "loss": 0.1264,
      "step": 16505
    },
    {
      "epoch": 0.48153334500262557,
      "grad_norm": 0.8354033497039266,
      "learning_rate": 5.541056656847866e-06,
      "loss": 0.118,
      "step": 16506
    },
    {
      "epoch": 0.4815625182332692,
      "grad_norm": 0.9765969251457027,
      "learning_rate": 5.540586994451281e-06,
      "loss": 0.1303,
      "step": 16507
    },
    {
      "epoch": 0.48159169146391273,
      "grad_norm": 0.8333721286224998,
      "learning_rate": 5.540117327228467e-06,
      "loss": 0.1842,
      "step": 16508
    },
    {
      "epoch": 0.4816208646945563,
      "grad_norm": 0.8190719756353236,
      "learning_rate": 5.5396476551836105e-06,
      "loss": 0.1287,
      "step": 16509
    },
    {
      "epoch": 0.48165003792519984,
      "grad_norm": 0.8662930610716588,
      "learning_rate": 5.539177978320912e-06,
      "loss": 0.131,
      "step": 16510
    },
    {
      "epoch": 0.4816792111558434,
      "grad_norm": 0.778730400033978,
      "learning_rate": 5.53870829664456e-06,
      "loss": 0.122,
      "step": 16511
    },
    {
      "epoch": 0.48170838438648694,
      "grad_norm": 0.7945330640920626,
      "learning_rate": 5.538238610158747e-06,
      "loss": 0.1513,
      "step": 16512
    },
    {
      "epoch": 0.4817375576171305,
      "grad_norm": 0.8547319196600227,
      "learning_rate": 5.537768918867672e-06,
      "loss": 0.125,
      "step": 16513
    },
    {
      "epoch": 0.4817667308477741,
      "grad_norm": 0.9733174618029062,
      "learning_rate": 5.537299222775522e-06,
      "loss": 0.1372,
      "step": 16514
    },
    {
      "epoch": 0.48179590407841766,
      "grad_norm": 0.5822618893424835,
      "learning_rate": 5.536829521886493e-06,
      "loss": 0.1038,
      "step": 16515
    },
    {
      "epoch": 0.4818250773090612,
      "grad_norm": 0.8427107813694704,
      "learning_rate": 5.536359816204779e-06,
      "loss": 0.1315,
      "step": 16516
    },
    {
      "epoch": 0.48185425053970476,
      "grad_norm": 0.995674395919126,
      "learning_rate": 5.535890105734571e-06,
      "loss": 0.1547,
      "step": 16517
    },
    {
      "epoch": 0.4818834237703483,
      "grad_norm": 0.8359358884480976,
      "learning_rate": 5.535420390480065e-06,
      "loss": 0.1496,
      "step": 16518
    },
    {
      "epoch": 0.48191259700099187,
      "grad_norm": 0.794006514004653,
      "learning_rate": 5.534950670445453e-06,
      "loss": 0.14,
      "step": 16519
    },
    {
      "epoch": 0.4819417702316354,
      "grad_norm": 0.9871956366899922,
      "learning_rate": 5.53448094563493e-06,
      "loss": 0.13,
      "step": 16520
    },
    {
      "epoch": 0.48197094346227903,
      "grad_norm": 0.9647479521573002,
      "learning_rate": 5.534011216052688e-06,
      "loss": 0.1238,
      "step": 16521
    },
    {
      "epoch": 0.4820001166929226,
      "grad_norm": 0.7474789922402135,
      "learning_rate": 5.533541481702922e-06,
      "loss": 0.1419,
      "step": 16522
    },
    {
      "epoch": 0.48202928992356614,
      "grad_norm": 0.890849777723087,
      "learning_rate": 5.533071742589826e-06,
      "loss": 0.1217,
      "step": 16523
    },
    {
      "epoch": 0.4820584631542097,
      "grad_norm": 0.9112210921801089,
      "learning_rate": 5.53260199871759e-06,
      "loss": 0.1435,
      "step": 16524
    },
    {
      "epoch": 0.48208763638485325,
      "grad_norm": 0.8242540047625232,
      "learning_rate": 5.532132250090414e-06,
      "loss": 0.1357,
      "step": 16525
    },
    {
      "epoch": 0.4821168096154968,
      "grad_norm": 1.0408837781474771,
      "learning_rate": 5.531662496712485e-06,
      "loss": 0.148,
      "step": 16526
    },
    {
      "epoch": 0.4821459828461404,
      "grad_norm": 0.808382138381165,
      "learning_rate": 5.531192738588e-06,
      "loss": 0.142,
      "step": 16527
    },
    {
      "epoch": 0.48217515607678396,
      "grad_norm": 0.985210178693923,
      "learning_rate": 5.5307229757211565e-06,
      "loss": 0.1392,
      "step": 16528
    },
    {
      "epoch": 0.4822043293074275,
      "grad_norm": 0.733025222941078,
      "learning_rate": 5.530253208116143e-06,
      "loss": 0.125,
      "step": 16529
    },
    {
      "epoch": 0.48223350253807107,
      "grad_norm": 1.0001245540675354,
      "learning_rate": 5.529783435777155e-06,
      "loss": 0.1289,
      "step": 16530
    },
    {
      "epoch": 0.4822626757687146,
      "grad_norm": 0.7763369947500759,
      "learning_rate": 5.529313658708387e-06,
      "loss": 0.1572,
      "step": 16531
    },
    {
      "epoch": 0.4822918489993582,
      "grad_norm": 0.7732278672191945,
      "learning_rate": 5.528843876914034e-06,
      "loss": 0.1258,
      "step": 16532
    },
    {
      "epoch": 0.48232102223000173,
      "grad_norm": 0.7179032294173053,
      "learning_rate": 5.5283740903982886e-06,
      "loss": 0.121,
      "step": 16533
    },
    {
      "epoch": 0.48235019546064534,
      "grad_norm": 0.8505302722884381,
      "learning_rate": 5.5279042991653456e-06,
      "loss": 0.141,
      "step": 16534
    },
    {
      "epoch": 0.4823793686912889,
      "grad_norm": 0.8750178937222148,
      "learning_rate": 5.527434503219398e-06,
      "loss": 0.1379,
      "step": 16535
    },
    {
      "epoch": 0.48240854192193244,
      "grad_norm": 0.7382532797087719,
      "learning_rate": 5.526964702564642e-06,
      "loss": 0.1319,
      "step": 16536
    },
    {
      "epoch": 0.482437715152576,
      "grad_norm": 0.8259270765480796,
      "learning_rate": 5.52649489720527e-06,
      "loss": 0.1567,
      "step": 16537
    },
    {
      "epoch": 0.48246688838321955,
      "grad_norm": 0.7733304721672678,
      "learning_rate": 5.526025087145479e-06,
      "loss": 0.1617,
      "step": 16538
    },
    {
      "epoch": 0.4824960616138631,
      "grad_norm": 0.7787072372182898,
      "learning_rate": 5.52555527238946e-06,
      "loss": 0.1351,
      "step": 16539
    },
    {
      "epoch": 0.48252523484450666,
      "grad_norm": 0.7745084992915291,
      "learning_rate": 5.525085452941411e-06,
      "loss": 0.1699,
      "step": 16540
    },
    {
      "epoch": 0.48255440807515027,
      "grad_norm": 0.8384407509541995,
      "learning_rate": 5.524615628805523e-06,
      "loss": 0.1418,
      "step": 16541
    },
    {
      "epoch": 0.4825835813057938,
      "grad_norm": 0.8909645378113125,
      "learning_rate": 5.52414579998599e-06,
      "loss": 0.1348,
      "step": 16542
    },
    {
      "epoch": 0.48261275453643737,
      "grad_norm": 0.7095590406783882,
      "learning_rate": 5.523675966487012e-06,
      "loss": 0.1429,
      "step": 16543
    },
    {
      "epoch": 0.4826419277670809,
      "grad_norm": 0.9085360330182193,
      "learning_rate": 5.523206128312778e-06,
      "loss": 0.1515,
      "step": 16544
    },
    {
      "epoch": 0.4826711009977245,
      "grad_norm": 1.1421048007952443,
      "learning_rate": 5.522736285467485e-06,
      "loss": 0.1489,
      "step": 16545
    },
    {
      "epoch": 0.48270027422836803,
      "grad_norm": 1.1052497560282095,
      "learning_rate": 5.522266437955327e-06,
      "loss": 0.1619,
      "step": 16546
    },
    {
      "epoch": 0.4827294474590116,
      "grad_norm": 0.9155191091801037,
      "learning_rate": 5.5217965857804985e-06,
      "loss": 0.1381,
      "step": 16547
    },
    {
      "epoch": 0.4827586206896552,
      "grad_norm": 0.8365915686320591,
      "learning_rate": 5.521326728947195e-06,
      "loss": 0.1277,
      "step": 16548
    },
    {
      "epoch": 0.48278779392029875,
      "grad_norm": 0.7858846985914822,
      "learning_rate": 5.520856867459612e-06,
      "loss": 0.1422,
      "step": 16549
    },
    {
      "epoch": 0.4828169671509423,
      "grad_norm": 1.3055734495893565,
      "learning_rate": 5.520387001321941e-06,
      "loss": 0.1592,
      "step": 16550
    },
    {
      "epoch": 0.48284614038158585,
      "grad_norm": 1.008156375466698,
      "learning_rate": 5.519917130538381e-06,
      "loss": 0.1275,
      "step": 16551
    },
    {
      "epoch": 0.4828753136122294,
      "grad_norm": 0.77627103000323,
      "learning_rate": 5.519447255113124e-06,
      "loss": 0.1171,
      "step": 16552
    },
    {
      "epoch": 0.48290448684287296,
      "grad_norm": 1.060123575100618,
      "learning_rate": 5.518977375050369e-06,
      "loss": 0.143,
      "step": 16553
    },
    {
      "epoch": 0.48293366007351657,
      "grad_norm": 1.021153263148013,
      "learning_rate": 5.518507490354303e-06,
      "loss": 0.125,
      "step": 16554
    },
    {
      "epoch": 0.4829628333041601,
      "grad_norm": 0.882098373933168,
      "learning_rate": 5.518037601029129e-06,
      "loss": 0.1284,
      "step": 16555
    },
    {
      "epoch": 0.4829920065348037,
      "grad_norm": 0.7901226421111385,
      "learning_rate": 5.517567707079038e-06,
      "loss": 0.119,
      "step": 16556
    },
    {
      "epoch": 0.48302117976544723,
      "grad_norm": 0.920247587865245,
      "learning_rate": 5.517097808508225e-06,
      "loss": 0.1302,
      "step": 16557
    },
    {
      "epoch": 0.4830503529960908,
      "grad_norm": 1.1793749720336413,
      "learning_rate": 5.516627905320888e-06,
      "loss": 0.1553,
      "step": 16558
    },
    {
      "epoch": 0.48307952622673433,
      "grad_norm": 1.0796224648889627,
      "learning_rate": 5.51615799752122e-06,
      "loss": 0.1401,
      "step": 16559
    },
    {
      "epoch": 0.4831086994573779,
      "grad_norm": 1.5646483378237794,
      "learning_rate": 5.515688085113416e-06,
      "loss": 0.1337,
      "step": 16560
    },
    {
      "epoch": 0.4831378726880215,
      "grad_norm": 0.9313085361047224,
      "learning_rate": 5.515218168101673e-06,
      "loss": 0.1381,
      "step": 16561
    },
    {
      "epoch": 0.48316704591866505,
      "grad_norm": 0.9453004475932669,
      "learning_rate": 5.514748246490184e-06,
      "loss": 0.134,
      "step": 16562
    },
    {
      "epoch": 0.4831962191493086,
      "grad_norm": 0.9656647349989894,
      "learning_rate": 5.514278320283145e-06,
      "loss": 0.1097,
      "step": 16563
    },
    {
      "epoch": 0.48322539237995216,
      "grad_norm": 0.8275094301706116,
      "learning_rate": 5.513808389484754e-06,
      "loss": 0.1214,
      "step": 16564
    },
    {
      "epoch": 0.4832545656105957,
      "grad_norm": 0.9536694338675364,
      "learning_rate": 5.513338454099203e-06,
      "loss": 0.1673,
      "step": 16565
    },
    {
      "epoch": 0.48328373884123926,
      "grad_norm": 1.42692864189217,
      "learning_rate": 5.512868514130688e-06,
      "loss": 0.1681,
      "step": 16566
    },
    {
      "epoch": 0.4833129120718828,
      "grad_norm": 0.9921845063117846,
      "learning_rate": 5.512398569583407e-06,
      "loss": 0.1436,
      "step": 16567
    },
    {
      "epoch": 0.4833420853025264,
      "grad_norm": 1.0185852627053067,
      "learning_rate": 5.511928620461554e-06,
      "loss": 0.1104,
      "step": 16568
    },
    {
      "epoch": 0.48337125853317,
      "grad_norm": 0.5818103833703272,
      "learning_rate": 5.511458666769323e-06,
      "loss": 0.1224,
      "step": 16569
    },
    {
      "epoch": 0.48340043176381353,
      "grad_norm": 0.9644499244791919,
      "learning_rate": 5.510988708510913e-06,
      "loss": 0.1225,
      "step": 16570
    },
    {
      "epoch": 0.4834296049944571,
      "grad_norm": 1.0213132183185323,
      "learning_rate": 5.510518745690516e-06,
      "loss": 0.1303,
      "step": 16571
    },
    {
      "epoch": 0.48345877822510064,
      "grad_norm": 0.9379995454353172,
      "learning_rate": 5.510048778312329e-06,
      "loss": 0.1279,
      "step": 16572
    },
    {
      "epoch": 0.4834879514557442,
      "grad_norm": 0.790624273438235,
      "learning_rate": 5.509578806380551e-06,
      "loss": 0.1366,
      "step": 16573
    },
    {
      "epoch": 0.48351712468638774,
      "grad_norm": 1.0198715632730508,
      "learning_rate": 5.509108829899374e-06,
      "loss": 0.149,
      "step": 16574
    },
    {
      "epoch": 0.48354629791703135,
      "grad_norm": 0.8444673125265063,
      "learning_rate": 5.508638848872993e-06,
      "loss": 0.1399,
      "step": 16575
    },
    {
      "epoch": 0.4835754711476749,
      "grad_norm": 0.9265335583361651,
      "learning_rate": 5.508168863305607e-06,
      "loss": 0.157,
      "step": 16576
    },
    {
      "epoch": 0.48360464437831846,
      "grad_norm": 0.8774750551063015,
      "learning_rate": 5.507698873201411e-06,
      "loss": 0.143,
      "step": 16577
    },
    {
      "epoch": 0.483633817608962,
      "grad_norm": 0.756139460175104,
      "learning_rate": 5.507228878564601e-06,
      "loss": 0.1346,
      "step": 16578
    },
    {
      "epoch": 0.48366299083960557,
      "grad_norm": 0.7341509530257865,
      "learning_rate": 5.506758879399372e-06,
      "loss": 0.1398,
      "step": 16579
    },
    {
      "epoch": 0.4836921640702491,
      "grad_norm": 0.7510997357780191,
      "learning_rate": 5.506288875709921e-06,
      "loss": 0.1311,
      "step": 16580
    },
    {
      "epoch": 0.4837213373008927,
      "grad_norm": 0.888161077874037,
      "learning_rate": 5.505818867500443e-06,
      "loss": 0.1388,
      "step": 16581
    },
    {
      "epoch": 0.4837505105315363,
      "grad_norm": 0.8020689619674765,
      "learning_rate": 5.505348854775135e-06,
      "loss": 0.1351,
      "step": 16582
    },
    {
      "epoch": 0.48377968376217984,
      "grad_norm": 0.7977449131759667,
      "learning_rate": 5.504878837538195e-06,
      "loss": 0.1375,
      "step": 16583
    },
    {
      "epoch": 0.4838088569928234,
      "grad_norm": 0.9988269198867377,
      "learning_rate": 5.504408815793816e-06,
      "loss": 0.1439,
      "step": 16584
    },
    {
      "epoch": 0.48383803022346694,
      "grad_norm": 1.086336562718429,
      "learning_rate": 5.5039387895461956e-06,
      "loss": 0.1632,
      "step": 16585
    },
    {
      "epoch": 0.4838672034541105,
      "grad_norm": 0.8145764289106557,
      "learning_rate": 5.503468758799529e-06,
      "loss": 0.1324,
      "step": 16586
    },
    {
      "epoch": 0.48389637668475405,
      "grad_norm": 0.7863050471615618,
      "learning_rate": 5.502998723558014e-06,
      "loss": 0.1448,
      "step": 16587
    },
    {
      "epoch": 0.48392554991539766,
      "grad_norm": 0.7478906050284334,
      "learning_rate": 5.502528683825847e-06,
      "loss": 0.1493,
      "step": 16588
    },
    {
      "epoch": 0.4839547231460412,
      "grad_norm": 0.7873330482565611,
      "learning_rate": 5.502058639607224e-06,
      "loss": 0.1205,
      "step": 16589
    },
    {
      "epoch": 0.48398389637668476,
      "grad_norm": 0.8620669116617288,
      "learning_rate": 5.501588590906342e-06,
      "loss": 0.1182,
      "step": 16590
    },
    {
      "epoch": 0.4840130696073283,
      "grad_norm": 0.6864588399225512,
      "learning_rate": 5.501118537727394e-06,
      "loss": 0.1367,
      "step": 16591
    },
    {
      "epoch": 0.48404224283797187,
      "grad_norm": 0.8999941015068548,
      "learning_rate": 5.500648480074582e-06,
      "loss": 0.135,
      "step": 16592
    },
    {
      "epoch": 0.4840714160686154,
      "grad_norm": 0.7647912021712191,
      "learning_rate": 5.500178417952099e-06,
      "loss": 0.1207,
      "step": 16593
    },
    {
      "epoch": 0.484100589299259,
      "grad_norm": 0.7569057693438344,
      "learning_rate": 5.499708351364142e-06,
      "loss": 0.1476,
      "step": 16594
    },
    {
      "epoch": 0.4841297625299026,
      "grad_norm": 0.7492787375397334,
      "learning_rate": 5.499238280314909e-06,
      "loss": 0.1325,
      "step": 16595
    },
    {
      "epoch": 0.48415893576054614,
      "grad_norm": 0.8361052466336646,
      "learning_rate": 5.4987682048085955e-06,
      "loss": 0.1384,
      "step": 16596
    },
    {
      "epoch": 0.4841881089911897,
      "grad_norm": 0.8739257125006146,
      "learning_rate": 5.498298124849399e-06,
      "loss": 0.1612,
      "step": 16597
    },
    {
      "epoch": 0.48421728222183325,
      "grad_norm": 0.7515913562564204,
      "learning_rate": 5.497828040441515e-06,
      "loss": 0.1257,
      "step": 16598
    },
    {
      "epoch": 0.4842464554524768,
      "grad_norm": 0.7851826210252971,
      "learning_rate": 5.497357951589141e-06,
      "loss": 0.1335,
      "step": 16599
    },
    {
      "epoch": 0.48427562868312035,
      "grad_norm": 0.7789635299586463,
      "learning_rate": 5.496887858296475e-06,
      "loss": 0.1279,
      "step": 16600
    },
    {
      "epoch": 0.4843048019137639,
      "grad_norm": 0.9577817455507989,
      "learning_rate": 5.496417760567712e-06,
      "loss": 0.135,
      "step": 16601
    },
    {
      "epoch": 0.4843339751444075,
      "grad_norm": 0.8623397308921672,
      "learning_rate": 5.4959476584070485e-06,
      "loss": 0.1557,
      "step": 16602
    },
    {
      "epoch": 0.48436314837505107,
      "grad_norm": 0.7733125935298913,
      "learning_rate": 5.495477551818685e-06,
      "loss": 0.155,
      "step": 16603
    },
    {
      "epoch": 0.4843923216056946,
      "grad_norm": 0.8175778594508675,
      "learning_rate": 5.495007440806816e-06,
      "loss": 0.1325,
      "step": 16604
    },
    {
      "epoch": 0.4844214948363382,
      "grad_norm": 0.6646761772274842,
      "learning_rate": 5.494537325375637e-06,
      "loss": 0.1307,
      "step": 16605
    },
    {
      "epoch": 0.4844506680669817,
      "grad_norm": 1.02001488818844,
      "learning_rate": 5.494067205529347e-06,
      "loss": 0.1322,
      "step": 16606
    },
    {
      "epoch": 0.4844798412976253,
      "grad_norm": 0.7459854556711544,
      "learning_rate": 5.493597081272144e-06,
      "loss": 0.1298,
      "step": 16607
    },
    {
      "epoch": 0.48450901452826883,
      "grad_norm": 0.8182527768869449,
      "learning_rate": 5.493126952608224e-06,
      "loss": 0.1271,
      "step": 16608
    },
    {
      "epoch": 0.48453818775891244,
      "grad_norm": 0.8523103671767203,
      "learning_rate": 5.4926568195417836e-06,
      "loss": 0.1205,
      "step": 16609
    },
    {
      "epoch": 0.484567360989556,
      "grad_norm": 0.9129672467024895,
      "learning_rate": 5.492186682077021e-06,
      "loss": 0.1747,
      "step": 16610
    },
    {
      "epoch": 0.48459653422019955,
      "grad_norm": 1.0324779786710794,
      "learning_rate": 5.491716540218134e-06,
      "loss": 0.1465,
      "step": 16611
    },
    {
      "epoch": 0.4846257074508431,
      "grad_norm": 0.8173512900407819,
      "learning_rate": 5.491246393969318e-06,
      "loss": 0.1309,
      "step": 16612
    },
    {
      "epoch": 0.48465488068148666,
      "grad_norm": 0.9675424431363238,
      "learning_rate": 5.490776243334773e-06,
      "loss": 0.1291,
      "step": 16613
    },
    {
      "epoch": 0.4846840539121302,
      "grad_norm": 0.7566896229893554,
      "learning_rate": 5.4903060883186934e-06,
      "loss": 0.1301,
      "step": 16614
    },
    {
      "epoch": 0.4847132271427738,
      "grad_norm": 0.7624964269774012,
      "learning_rate": 5.489835928925279e-06,
      "loss": 0.1369,
      "step": 16615
    },
    {
      "epoch": 0.48474240037341737,
      "grad_norm": 0.7919410215877583,
      "learning_rate": 5.489365765158726e-06,
      "loss": 0.1303,
      "step": 16616
    },
    {
      "epoch": 0.4847715736040609,
      "grad_norm": 0.8996018718186514,
      "learning_rate": 5.488895597023231e-06,
      "loss": 0.142,
      "step": 16617
    },
    {
      "epoch": 0.4848007468347045,
      "grad_norm": 0.9470661118490609,
      "learning_rate": 5.488425424522995e-06,
      "loss": 0.1381,
      "step": 16618
    },
    {
      "epoch": 0.48482992006534803,
      "grad_norm": 0.8278474374546028,
      "learning_rate": 5.487955247662212e-06,
      "loss": 0.1495,
      "step": 16619
    },
    {
      "epoch": 0.4848590932959916,
      "grad_norm": 0.8078013253587139,
      "learning_rate": 5.487485066445082e-06,
      "loss": 0.1394,
      "step": 16620
    },
    {
      "epoch": 0.48488826652663514,
      "grad_norm": 0.8290149226693893,
      "learning_rate": 5.487014880875801e-06,
      "loss": 0.1268,
      "step": 16621
    },
    {
      "epoch": 0.48491743975727875,
      "grad_norm": 0.8751408919109652,
      "learning_rate": 5.486544690958566e-06,
      "loss": 0.1264,
      "step": 16622
    },
    {
      "epoch": 0.4849466129879223,
      "grad_norm": 0.6911520271026074,
      "learning_rate": 5.486074496697579e-06,
      "loss": 0.1343,
      "step": 16623
    },
    {
      "epoch": 0.48497578621856585,
      "grad_norm": 1.1207690834165749,
      "learning_rate": 5.4856042980970325e-06,
      "loss": 0.1537,
      "step": 16624
    },
    {
      "epoch": 0.4850049594492094,
      "grad_norm": 0.7882957005018483,
      "learning_rate": 5.485134095161128e-06,
      "loss": 0.1228,
      "step": 16625
    },
    {
      "epoch": 0.48503413267985296,
      "grad_norm": 0.7985098025453309,
      "learning_rate": 5.484663887894062e-06,
      "loss": 0.1512,
      "step": 16626
    },
    {
      "epoch": 0.4850633059104965,
      "grad_norm": 0.9279442523753277,
      "learning_rate": 5.484193676300033e-06,
      "loss": 0.1443,
      "step": 16627
    },
    {
      "epoch": 0.48509247914114006,
      "grad_norm": 0.9967141948785709,
      "learning_rate": 5.483723460383238e-06,
      "loss": 0.1182,
      "step": 16628
    },
    {
      "epoch": 0.4851216523717837,
      "grad_norm": 0.882580770098697,
      "learning_rate": 5.4832532401478745e-06,
      "loss": 0.1435,
      "step": 16629
    },
    {
      "epoch": 0.4851508256024272,
      "grad_norm": 1.3703099992895844,
      "learning_rate": 5.4827830155981435e-06,
      "loss": 0.1263,
      "step": 16630
    },
    {
      "epoch": 0.4851799988330708,
      "grad_norm": 0.8743530719152944,
      "learning_rate": 5.48231278673824e-06,
      "loss": 0.1361,
      "step": 16631
    },
    {
      "epoch": 0.48520917206371433,
      "grad_norm": 1.0134325947634437,
      "learning_rate": 5.481842553572361e-06,
      "loss": 0.1203,
      "step": 16632
    },
    {
      "epoch": 0.4852383452943579,
      "grad_norm": 1.3346784215743988,
      "learning_rate": 5.481372316104709e-06,
      "loss": 0.135,
      "step": 16633
    },
    {
      "epoch": 0.48526751852500144,
      "grad_norm": 0.8632620273812637,
      "learning_rate": 5.480902074339481e-06,
      "loss": 0.1536,
      "step": 16634
    },
    {
      "epoch": 0.485296691755645,
      "grad_norm": 0.7689613898443438,
      "learning_rate": 5.480431828280871e-06,
      "loss": 0.1513,
      "step": 16635
    },
    {
      "epoch": 0.4853258649862886,
      "grad_norm": 1.019997277179937,
      "learning_rate": 5.479961577933082e-06,
      "loss": 0.1447,
      "step": 16636
    },
    {
      "epoch": 0.48535503821693216,
      "grad_norm": 0.7371436515523148,
      "learning_rate": 5.47949132330031e-06,
      "loss": 0.1328,
      "step": 16637
    },
    {
      "epoch": 0.4853842114475757,
      "grad_norm": 0.7720598248913605,
      "learning_rate": 5.479021064386755e-06,
      "loss": 0.1351,
      "step": 16638
    },
    {
      "epoch": 0.48541338467821926,
      "grad_norm": 0.9798615052228705,
      "learning_rate": 5.4785508011966125e-06,
      "loss": 0.1451,
      "step": 16639
    },
    {
      "epoch": 0.4854425579088628,
      "grad_norm": 0.9712722685659123,
      "learning_rate": 5.478080533734085e-06,
      "loss": 0.1551,
      "step": 16640
    },
    {
      "epoch": 0.48547173113950637,
      "grad_norm": 0.8415761999773763,
      "learning_rate": 5.477610262003367e-06,
      "loss": 0.1502,
      "step": 16641
    },
    {
      "epoch": 0.48550090437015,
      "grad_norm": 1.0311450776940383,
      "learning_rate": 5.477139986008658e-06,
      "loss": 0.1296,
      "step": 16642
    },
    {
      "epoch": 0.48553007760079353,
      "grad_norm": 0.8779969680054039,
      "learning_rate": 5.476669705754159e-06,
      "loss": 0.1248,
      "step": 16643
    },
    {
      "epoch": 0.4855592508314371,
      "grad_norm": 0.7249655699170686,
      "learning_rate": 5.476199421244065e-06,
      "loss": 0.1212,
      "step": 16644
    },
    {
      "epoch": 0.48558842406208064,
      "grad_norm": 0.9359321465945644,
      "learning_rate": 5.475729132482578e-06,
      "loss": 0.1283,
      "step": 16645
    },
    {
      "epoch": 0.4856175972927242,
      "grad_norm": 0.8862819070652191,
      "learning_rate": 5.475258839473894e-06,
      "loss": 0.1279,
      "step": 16646
    },
    {
      "epoch": 0.48564677052336774,
      "grad_norm": 0.7651430301032168,
      "learning_rate": 5.474788542222211e-06,
      "loss": 0.1379,
      "step": 16647
    },
    {
      "epoch": 0.4856759437540113,
      "grad_norm": 0.7489622617004734,
      "learning_rate": 5.474318240731732e-06,
      "loss": 0.1548,
      "step": 16648
    },
    {
      "epoch": 0.4857051169846549,
      "grad_norm": 0.7813106362728675,
      "learning_rate": 5.473847935006652e-06,
      "loss": 0.1384,
      "step": 16649
    },
    {
      "epoch": 0.48573429021529846,
      "grad_norm": 0.6767682363045044,
      "learning_rate": 5.4733776250511706e-06,
      "loss": 0.1235,
      "step": 16650
    },
    {
      "epoch": 0.485763463445942,
      "grad_norm": 0.7700170536576478,
      "learning_rate": 5.472907310869486e-06,
      "loss": 0.1212,
      "step": 16651
    },
    {
      "epoch": 0.48579263667658557,
      "grad_norm": 0.9489483314365391,
      "learning_rate": 5.4724369924657985e-06,
      "loss": 0.1288,
      "step": 16652
    },
    {
      "epoch": 0.4858218099072291,
      "grad_norm": 0.7578751245786504,
      "learning_rate": 5.471966669844307e-06,
      "loss": 0.1196,
      "step": 16653
    },
    {
      "epoch": 0.48585098313787267,
      "grad_norm": 0.974441354637069,
      "learning_rate": 5.471496343009208e-06,
      "loss": 0.1391,
      "step": 16654
    },
    {
      "epoch": 0.4858801563685162,
      "grad_norm": 0.7060072836914836,
      "learning_rate": 5.471026011964703e-06,
      "loss": 0.1233,
      "step": 16655
    },
    {
      "epoch": 0.48590932959915983,
      "grad_norm": 0.8272008883339874,
      "learning_rate": 5.47055567671499e-06,
      "loss": 0.1241,
      "step": 16656
    },
    {
      "epoch": 0.4859385028298034,
      "grad_norm": 0.8842349354146034,
      "learning_rate": 5.470085337264268e-06,
      "loss": 0.145,
      "step": 16657
    },
    {
      "epoch": 0.48596767606044694,
      "grad_norm": 0.8066742307684761,
      "learning_rate": 5.469614993616739e-06,
      "loss": 0.1552,
      "step": 16658
    },
    {
      "epoch": 0.4859968492910905,
      "grad_norm": 0.886208217483384,
      "learning_rate": 5.469144645776596e-06,
      "loss": 0.1519,
      "step": 16659
    },
    {
      "epoch": 0.48602602252173405,
      "grad_norm": 0.8917265733195617,
      "learning_rate": 5.468674293748044e-06,
      "loss": 0.1398,
      "step": 16660
    },
    {
      "epoch": 0.4860551957523776,
      "grad_norm": 0.741515680860913,
      "learning_rate": 5.468203937535278e-06,
      "loss": 0.1188,
      "step": 16661
    },
    {
      "epoch": 0.48608436898302115,
      "grad_norm": 0.8224294073227981,
      "learning_rate": 5.467733577142499e-06,
      "loss": 0.1556,
      "step": 16662
    },
    {
      "epoch": 0.48611354221366476,
      "grad_norm": 0.7176844646669913,
      "learning_rate": 5.467263212573908e-06,
      "loss": 0.1336,
      "step": 16663
    },
    {
      "epoch": 0.4861427154443083,
      "grad_norm": 0.70235432623764,
      "learning_rate": 5.466792843833702e-06,
      "loss": 0.1443,
      "step": 16664
    },
    {
      "epoch": 0.48617188867495187,
      "grad_norm": 0.8076954041882298,
      "learning_rate": 5.46632247092608e-06,
      "loss": 0.1226,
      "step": 16665
    },
    {
      "epoch": 0.4862010619055954,
      "grad_norm": 1.0319426461567602,
      "learning_rate": 5.465852093855243e-06,
      "loss": 0.1519,
      "step": 16666
    },
    {
      "epoch": 0.486230235136239,
      "grad_norm": 0.8531808357671388,
      "learning_rate": 5.46538171262539e-06,
      "loss": 0.1281,
      "step": 16667
    },
    {
      "epoch": 0.48625940836688253,
      "grad_norm": 1.0352049857879229,
      "learning_rate": 5.464911327240719e-06,
      "loss": 0.173,
      "step": 16668
    },
    {
      "epoch": 0.48628858159752614,
      "grad_norm": 0.8800928140830611,
      "learning_rate": 5.4644409377054305e-06,
      "loss": 0.1315,
      "step": 16669
    },
    {
      "epoch": 0.4863177548281697,
      "grad_norm": 1.0911421983003886,
      "learning_rate": 5.463970544023726e-06,
      "loss": 0.1626,
      "step": 16670
    },
    {
      "epoch": 0.48634692805881324,
      "grad_norm": 0.8169729470278007,
      "learning_rate": 5.463500146199801e-06,
      "loss": 0.1445,
      "step": 16671
    },
    {
      "epoch": 0.4863761012894568,
      "grad_norm": 0.8859138574134487,
      "learning_rate": 5.46302974423786e-06,
      "loss": 0.1237,
      "step": 16672
    },
    {
      "epoch": 0.48640527452010035,
      "grad_norm": 1.0588198479976483,
      "learning_rate": 5.4625593381421e-06,
      "loss": 0.1349,
      "step": 16673
    },
    {
      "epoch": 0.4864344477507439,
      "grad_norm": 0.9419584188868422,
      "learning_rate": 5.4620889279167174e-06,
      "loss": 0.1444,
      "step": 16674
    },
    {
      "epoch": 0.48646362098138746,
      "grad_norm": 0.8217783306314604,
      "learning_rate": 5.461618513565918e-06,
      "loss": 0.116,
      "step": 16675
    },
    {
      "epoch": 0.48649279421203107,
      "grad_norm": 1.488590067859592,
      "learning_rate": 5.461148095093898e-06,
      "loss": 0.167,
      "step": 16676
    },
    {
      "epoch": 0.4865219674426746,
      "grad_norm": 0.9474291803655491,
      "learning_rate": 5.460677672504856e-06,
      "loss": 0.1475,
      "step": 16677
    },
    {
      "epoch": 0.48655114067331817,
      "grad_norm": 0.860261484254281,
      "learning_rate": 5.460207245802996e-06,
      "loss": 0.1379,
      "step": 16678
    },
    {
      "epoch": 0.4865803139039617,
      "grad_norm": 0.8841663562973775,
      "learning_rate": 5.4597368149925154e-06,
      "loss": 0.1154,
      "step": 16679
    },
    {
      "epoch": 0.4866094871346053,
      "grad_norm": 0.8119690881667214,
      "learning_rate": 5.459266380077614e-06,
      "loss": 0.1674,
      "step": 16680
    },
    {
      "epoch": 0.48663866036524883,
      "grad_norm": 0.8433755193533623,
      "learning_rate": 5.458795941062491e-06,
      "loss": 0.1271,
      "step": 16681
    },
    {
      "epoch": 0.4866678335958924,
      "grad_norm": 0.8857321157738206,
      "learning_rate": 5.458325497951348e-06,
      "loss": 0.111,
      "step": 16682
    },
    {
      "epoch": 0.486697006826536,
      "grad_norm": 0.7623113028603865,
      "learning_rate": 5.457855050748385e-06,
      "loss": 0.1291,
      "step": 16683
    },
    {
      "epoch": 0.48672618005717955,
      "grad_norm": 0.8593488336640536,
      "learning_rate": 5.457384599457801e-06,
      "loss": 0.1273,
      "step": 16684
    },
    {
      "epoch": 0.4867553532878231,
      "grad_norm": 0.7462675008781164,
      "learning_rate": 5.456914144083796e-06,
      "loss": 0.1312,
      "step": 16685
    },
    {
      "epoch": 0.48678452651846665,
      "grad_norm": 0.8076048988420254,
      "learning_rate": 5.456443684630572e-06,
      "loss": 0.1053,
      "step": 16686
    },
    {
      "epoch": 0.4868136997491102,
      "grad_norm": 0.7176795308410698,
      "learning_rate": 5.455973221102325e-06,
      "loss": 0.1236,
      "step": 16687
    },
    {
      "epoch": 0.48684287297975376,
      "grad_norm": 0.7454224420745489,
      "learning_rate": 5.45550275350326e-06,
      "loss": 0.1235,
      "step": 16688
    },
    {
      "epoch": 0.4868720462103973,
      "grad_norm": 0.8958860901901691,
      "learning_rate": 5.455032281837576e-06,
      "loss": 0.1262,
      "step": 16689
    },
    {
      "epoch": 0.4869012194410409,
      "grad_norm": 0.8392124770640628,
      "learning_rate": 5.454561806109472e-06,
      "loss": 0.1413,
      "step": 16690
    },
    {
      "epoch": 0.4869303926716845,
      "grad_norm": 0.9805932324169192,
      "learning_rate": 5.4540913263231466e-06,
      "loss": 0.1362,
      "step": 16691
    },
    {
      "epoch": 0.48695956590232803,
      "grad_norm": 0.8005952637989759,
      "learning_rate": 5.453620842482803e-06,
      "loss": 0.1059,
      "step": 16692
    },
    {
      "epoch": 0.4869887391329716,
      "grad_norm": 1.0117586096771631,
      "learning_rate": 5.4531503545926425e-06,
      "loss": 0.1655,
      "step": 16693
    },
    {
      "epoch": 0.48701791236361514,
      "grad_norm": 0.7899426762886181,
      "learning_rate": 5.452679862656861e-06,
      "loss": 0.1377,
      "step": 16694
    },
    {
      "epoch": 0.4870470855942587,
      "grad_norm": 0.8782222208997967,
      "learning_rate": 5.452209366679665e-06,
      "loss": 0.1287,
      "step": 16695
    },
    {
      "epoch": 0.4870762588249023,
      "grad_norm": 0.798744209565529,
      "learning_rate": 5.45173886666525e-06,
      "loss": 0.1163,
      "step": 16696
    },
    {
      "epoch": 0.48710543205554585,
      "grad_norm": 0.9060525066292064,
      "learning_rate": 5.451268362617819e-06,
      "loss": 0.1184,
      "step": 16697
    },
    {
      "epoch": 0.4871346052861894,
      "grad_norm": 1.039120813857294,
      "learning_rate": 5.4507978545415704e-06,
      "loss": 0.1369,
      "step": 16698
    },
    {
      "epoch": 0.48716377851683296,
      "grad_norm": 0.8210188290349758,
      "learning_rate": 5.450327342440707e-06,
      "loss": 0.1311,
      "step": 16699
    },
    {
      "epoch": 0.4871929517474765,
      "grad_norm": 0.7169642763400298,
      "learning_rate": 5.449856826319429e-06,
      "loss": 0.1185,
      "step": 16700
    },
    {
      "epoch": 0.48722212497812006,
      "grad_norm": 1.2238526355218988,
      "learning_rate": 5.449386306181935e-06,
      "loss": 0.1417,
      "step": 16701
    },
    {
      "epoch": 0.4872512982087636,
      "grad_norm": 1.0169156857525088,
      "learning_rate": 5.448915782032429e-06,
      "loss": 0.144,
      "step": 16702
    },
    {
      "epoch": 0.4872804714394072,
      "grad_norm": 0.8190136502687897,
      "learning_rate": 5.4484452538751095e-06,
      "loss": 0.1022,
      "step": 16703
    },
    {
      "epoch": 0.4873096446700508,
      "grad_norm": 0.9112090248792706,
      "learning_rate": 5.447974721714178e-06,
      "loss": 0.1177,
      "step": 16704
    },
    {
      "epoch": 0.48733881790069433,
      "grad_norm": 0.8967721578079546,
      "learning_rate": 5.447504185553836e-06,
      "loss": 0.1348,
      "step": 16705
    },
    {
      "epoch": 0.4873679911313379,
      "grad_norm": 0.6705471140235855,
      "learning_rate": 5.4470336453982805e-06,
      "loss": 0.1351,
      "step": 16706
    },
    {
      "epoch": 0.48739716436198144,
      "grad_norm": 0.7990269514479517,
      "learning_rate": 5.446563101251718e-06,
      "loss": 0.1338,
      "step": 16707
    },
    {
      "epoch": 0.487426337592625,
      "grad_norm": 0.8389563162533513,
      "learning_rate": 5.446092553118347e-06,
      "loss": 0.1428,
      "step": 16708
    },
    {
      "epoch": 0.48745551082326855,
      "grad_norm": 0.8288790745157593,
      "learning_rate": 5.445622001002366e-06,
      "loss": 0.1309,
      "step": 16709
    },
    {
      "epoch": 0.48748468405391215,
      "grad_norm": 0.8000472215523416,
      "learning_rate": 5.445151444907981e-06,
      "loss": 0.1758,
      "step": 16710
    },
    {
      "epoch": 0.4875138572845557,
      "grad_norm": 0.6687827671769992,
      "learning_rate": 5.444680884839389e-06,
      "loss": 0.1213,
      "step": 16711
    },
    {
      "epoch": 0.48754303051519926,
      "grad_norm": 0.8007923645474778,
      "learning_rate": 5.444210320800791e-06,
      "loss": 0.144,
      "step": 16712
    },
    {
      "epoch": 0.4875722037458428,
      "grad_norm": 1.0185180313992426,
      "learning_rate": 5.44373975279639e-06,
      "loss": 0.1232,
      "step": 16713
    },
    {
      "epoch": 0.48760137697648637,
      "grad_norm": 0.7897839234618698,
      "learning_rate": 5.443269180830386e-06,
      "loss": 0.1238,
      "step": 16714
    },
    {
      "epoch": 0.4876305502071299,
      "grad_norm": 0.7037191563042358,
      "learning_rate": 5.442798604906981e-06,
      "loss": 0.1367,
      "step": 16715
    },
    {
      "epoch": 0.4876597234377735,
      "grad_norm": 0.7153632602948264,
      "learning_rate": 5.442328025030375e-06,
      "loss": 0.1373,
      "step": 16716
    },
    {
      "epoch": 0.4876888966684171,
      "grad_norm": 0.7414456386811991,
      "learning_rate": 5.441857441204772e-06,
      "loss": 0.1297,
      "step": 16717
    },
    {
      "epoch": 0.48771806989906064,
      "grad_norm": 0.9137008098492148,
      "learning_rate": 5.441386853434369e-06,
      "loss": 0.1238,
      "step": 16718
    },
    {
      "epoch": 0.4877472431297042,
      "grad_norm": 0.8122164406109992,
      "learning_rate": 5.4409162617233715e-06,
      "loss": 0.1621,
      "step": 16719
    },
    {
      "epoch": 0.48777641636034774,
      "grad_norm": 0.8667435227430735,
      "learning_rate": 5.440445666075979e-06,
      "loss": 0.1396,
      "step": 16720
    },
    {
      "epoch": 0.4878055895909913,
      "grad_norm": 0.9612628334759031,
      "learning_rate": 5.4399750664963905e-06,
      "loss": 0.143,
      "step": 16721
    },
    {
      "epoch": 0.48783476282163485,
      "grad_norm": 0.7942635529285477,
      "learning_rate": 5.439504462988811e-06,
      "loss": 0.1193,
      "step": 16722
    },
    {
      "epoch": 0.4878639360522784,
      "grad_norm": 0.7533546031416218,
      "learning_rate": 5.4390338555574405e-06,
      "loss": 0.148,
      "step": 16723
    },
    {
      "epoch": 0.487893109282922,
      "grad_norm": 0.8896298213154132,
      "learning_rate": 5.4385632442064795e-06,
      "loss": 0.1386,
      "step": 16724
    },
    {
      "epoch": 0.48792228251356556,
      "grad_norm": 1.0280146935703676,
      "learning_rate": 5.4380926289401325e-06,
      "loss": 0.1152,
      "step": 16725
    },
    {
      "epoch": 0.4879514557442091,
      "grad_norm": 0.7729068892464108,
      "learning_rate": 5.437622009762599e-06,
      "loss": 0.1202,
      "step": 16726
    },
    {
      "epoch": 0.48798062897485267,
      "grad_norm": 0.7668135338946674,
      "learning_rate": 5.437151386678079e-06,
      "loss": 0.1416,
      "step": 16727
    },
    {
      "epoch": 0.4880098022054962,
      "grad_norm": 0.8540119289562168,
      "learning_rate": 5.436680759690777e-06,
      "loss": 0.1401,
      "step": 16728
    },
    {
      "epoch": 0.4880389754361398,
      "grad_norm": 0.7166846145096721,
      "learning_rate": 5.436210128804893e-06,
      "loss": 0.1412,
      "step": 16729
    },
    {
      "epoch": 0.4880681486667834,
      "grad_norm": 1.276487160810915,
      "learning_rate": 5.435739494024629e-06,
      "loss": 0.1233,
      "step": 16730
    },
    {
      "epoch": 0.48809732189742694,
      "grad_norm": 0.9385052299502991,
      "learning_rate": 5.4352688553541865e-06,
      "loss": 0.129,
      "step": 16731
    },
    {
      "epoch": 0.4881264951280705,
      "grad_norm": 0.763211669110271,
      "learning_rate": 5.434798212797767e-06,
      "loss": 0.1092,
      "step": 16732
    },
    {
      "epoch": 0.48815566835871405,
      "grad_norm": 0.799938314499092,
      "learning_rate": 5.434327566359574e-06,
      "loss": 0.1212,
      "step": 16733
    },
    {
      "epoch": 0.4881848415893576,
      "grad_norm": 0.7336706928372488,
      "learning_rate": 5.433856916043808e-06,
      "loss": 0.1716,
      "step": 16734
    },
    {
      "epoch": 0.48821401482000115,
      "grad_norm": 1.068392530537651,
      "learning_rate": 5.433386261854672e-06,
      "loss": 0.1375,
      "step": 16735
    },
    {
      "epoch": 0.4882431880506447,
      "grad_norm": 0.8966242026079092,
      "learning_rate": 5.432915603796365e-06,
      "loss": 0.1372,
      "step": 16736
    },
    {
      "epoch": 0.4882723612812883,
      "grad_norm": 0.7255530041341725,
      "learning_rate": 5.432444941873092e-06,
      "loss": 0.136,
      "step": 16737
    },
    {
      "epoch": 0.48830153451193187,
      "grad_norm": 0.9633534175498903,
      "learning_rate": 5.431974276089054e-06,
      "loss": 0.16,
      "step": 16738
    },
    {
      "epoch": 0.4883307077425754,
      "grad_norm": 0.7816706975664015,
      "learning_rate": 5.431503606448452e-06,
      "loss": 0.1302,
      "step": 16739
    },
    {
      "epoch": 0.488359880973219,
      "grad_norm": 0.7555487024819179,
      "learning_rate": 5.4310329329554885e-06,
      "loss": 0.1185,
      "step": 16740
    },
    {
      "epoch": 0.4883890542038625,
      "grad_norm": 1.1433135594249912,
      "learning_rate": 5.4305622556143675e-06,
      "loss": 0.1382,
      "step": 16741
    },
    {
      "epoch": 0.4884182274345061,
      "grad_norm": 0.7882269824023626,
      "learning_rate": 5.430091574429288e-06,
      "loss": 0.1307,
      "step": 16742
    },
    {
      "epoch": 0.48844740066514963,
      "grad_norm": 0.7279517395647666,
      "learning_rate": 5.429620889404454e-06,
      "loss": 0.1144,
      "step": 16743
    },
    {
      "epoch": 0.48847657389579324,
      "grad_norm": 0.7461429640054047,
      "learning_rate": 5.429150200544068e-06,
      "loss": 0.141,
      "step": 16744
    },
    {
      "epoch": 0.4885057471264368,
      "grad_norm": 0.8471060884395589,
      "learning_rate": 5.42867950785233e-06,
      "loss": 0.1369,
      "step": 16745
    },
    {
      "epoch": 0.48853492035708035,
      "grad_norm": 0.7419462183963568,
      "learning_rate": 5.4282088113334445e-06,
      "loss": 0.1459,
      "step": 16746
    },
    {
      "epoch": 0.4885640935877239,
      "grad_norm": 0.7109712546871579,
      "learning_rate": 5.427738110991613e-06,
      "loss": 0.1427,
      "step": 16747
    },
    {
      "epoch": 0.48859326681836746,
      "grad_norm": 0.7921504567432183,
      "learning_rate": 5.427267406831037e-06,
      "loss": 0.1205,
      "step": 16748
    },
    {
      "epoch": 0.488622440049011,
      "grad_norm": 0.6369093230324466,
      "learning_rate": 5.426796698855921e-06,
      "loss": 0.1268,
      "step": 16749
    },
    {
      "epoch": 0.48865161327965456,
      "grad_norm": 0.7619024885991046,
      "learning_rate": 5.426325987070465e-06,
      "loss": 0.1249,
      "step": 16750
    },
    {
      "epoch": 0.48868078651029817,
      "grad_norm": 1.1428791818286819,
      "learning_rate": 5.425855271478873e-06,
      "loss": 0.1642,
      "step": 16751
    },
    {
      "epoch": 0.4887099597409417,
      "grad_norm": 0.7777704283770847,
      "learning_rate": 5.425384552085346e-06,
      "loss": 0.1353,
      "step": 16752
    },
    {
      "epoch": 0.4887391329715853,
      "grad_norm": 1.350268335225953,
      "learning_rate": 5.424913828894088e-06,
      "loss": 0.1344,
      "step": 16753
    },
    {
      "epoch": 0.48876830620222883,
      "grad_norm": 0.7177870442582034,
      "learning_rate": 5.424443101909299e-06,
      "loss": 0.1344,
      "step": 16754
    },
    {
      "epoch": 0.4887974794328724,
      "grad_norm": 0.902093696664178,
      "learning_rate": 5.423972371135186e-06,
      "loss": 0.1412,
      "step": 16755
    },
    {
      "epoch": 0.48882665266351594,
      "grad_norm": 0.8169954794117326,
      "learning_rate": 5.423501636575947e-06,
      "loss": 0.1297,
      "step": 16756
    },
    {
      "epoch": 0.48885582589415955,
      "grad_norm": 1.1969661614115779,
      "learning_rate": 5.423030898235788e-06,
      "loss": 0.1183,
      "step": 16757
    },
    {
      "epoch": 0.4888849991248031,
      "grad_norm": 0.9097145304324763,
      "learning_rate": 5.422560156118909e-06,
      "loss": 0.1331,
      "step": 16758
    },
    {
      "epoch": 0.48891417235544665,
      "grad_norm": 0.8576091171382718,
      "learning_rate": 5.422089410229514e-06,
      "loss": 0.1325,
      "step": 16759
    },
    {
      "epoch": 0.4889433455860902,
      "grad_norm": 1.542136337139382,
      "learning_rate": 5.421618660571804e-06,
      "loss": 0.132,
      "step": 16760
    },
    {
      "epoch": 0.48897251881673376,
      "grad_norm": 0.9677542293305892,
      "learning_rate": 5.4211479071499866e-06,
      "loss": 0.1554,
      "step": 16761
    },
    {
      "epoch": 0.4890016920473773,
      "grad_norm": 0.802046556672027,
      "learning_rate": 5.420677149968259e-06,
      "loss": 0.1335,
      "step": 16762
    },
    {
      "epoch": 0.48903086527802087,
      "grad_norm": 1.016011147504178,
      "learning_rate": 5.4202063890308265e-06,
      "loss": 0.1268,
      "step": 16763
    },
    {
      "epoch": 0.4890600385086645,
      "grad_norm": 0.6628011873979082,
      "learning_rate": 5.419735624341891e-06,
      "loss": 0.1018,
      "step": 16764
    },
    {
      "epoch": 0.489089211739308,
      "grad_norm": 0.8839723900097081,
      "learning_rate": 5.419264855905658e-06,
      "loss": 0.1494,
      "step": 16765
    },
    {
      "epoch": 0.4891183849699516,
      "grad_norm": 0.7841528464484294,
      "learning_rate": 5.418794083726326e-06,
      "loss": 0.1227,
      "step": 16766
    },
    {
      "epoch": 0.48914755820059513,
      "grad_norm": 0.8447786992580212,
      "learning_rate": 5.418323307808102e-06,
      "loss": 0.1175,
      "step": 16767
    },
    {
      "epoch": 0.4891767314312387,
      "grad_norm": 0.8742525640783552,
      "learning_rate": 5.4178525281551874e-06,
      "loss": 0.1429,
      "step": 16768
    },
    {
      "epoch": 0.48920590466188224,
      "grad_norm": 0.7398236346392426,
      "learning_rate": 5.417381744771783e-06,
      "loss": 0.1237,
      "step": 16769
    },
    {
      "epoch": 0.4892350778925258,
      "grad_norm": 0.8276263046738359,
      "learning_rate": 5.416910957662098e-06,
      "loss": 0.1156,
      "step": 16770
    },
    {
      "epoch": 0.4892642511231694,
      "grad_norm": 0.7211954023857353,
      "learning_rate": 5.416440166830329e-06,
      "loss": 0.1169,
      "step": 16771
    },
    {
      "epoch": 0.48929342435381296,
      "grad_norm": 0.7849124847552079,
      "learning_rate": 5.415969372280682e-06,
      "loss": 0.1609,
      "step": 16772
    },
    {
      "epoch": 0.4893225975844565,
      "grad_norm": 0.8787052180196727,
      "learning_rate": 5.415498574017359e-06,
      "loss": 0.1323,
      "step": 16773
    },
    {
      "epoch": 0.48935177081510006,
      "grad_norm": 0.907574241717095,
      "learning_rate": 5.415027772044565e-06,
      "loss": 0.1255,
      "step": 16774
    },
    {
      "epoch": 0.4893809440457436,
      "grad_norm": 0.7477803076224181,
      "learning_rate": 5.4145569663665024e-06,
      "loss": 0.1382,
      "step": 16775
    },
    {
      "epoch": 0.48941011727638717,
      "grad_norm": 1.0226656916821553,
      "learning_rate": 5.4140861569873725e-06,
      "loss": 0.1402,
      "step": 16776
    },
    {
      "epoch": 0.4894392905070307,
      "grad_norm": 0.7094494368558831,
      "learning_rate": 5.413615343911382e-06,
      "loss": 0.1155,
      "step": 16777
    },
    {
      "epoch": 0.48946846373767433,
      "grad_norm": 0.8783132299893559,
      "learning_rate": 5.413144527142731e-06,
      "loss": 0.1295,
      "step": 16778
    },
    {
      "epoch": 0.4894976369683179,
      "grad_norm": 0.8450164614100708,
      "learning_rate": 5.412673706685625e-06,
      "loss": 0.1384,
      "step": 16779
    },
    {
      "epoch": 0.48952681019896144,
      "grad_norm": 0.6510655045156076,
      "learning_rate": 5.4122028825442675e-06,
      "loss": 0.1248,
      "step": 16780
    },
    {
      "epoch": 0.489555983429605,
      "grad_norm": 0.8934417540642914,
      "learning_rate": 5.411732054722859e-06,
      "loss": 0.1369,
      "step": 16781
    },
    {
      "epoch": 0.48958515666024854,
      "grad_norm": 0.7863068068754623,
      "learning_rate": 5.411261223225605e-06,
      "loss": 0.1254,
      "step": 16782
    },
    {
      "epoch": 0.4896143298908921,
      "grad_norm": 0.8649214694677202,
      "learning_rate": 5.4107903880567125e-06,
      "loss": 0.1287,
      "step": 16783
    },
    {
      "epoch": 0.4896435031215357,
      "grad_norm": 0.8852695558120606,
      "learning_rate": 5.410319549220378e-06,
      "loss": 0.137,
      "step": 16784
    },
    {
      "epoch": 0.48967267635217926,
      "grad_norm": 0.9302665968237415,
      "learning_rate": 5.40984870672081e-06,
      "loss": 0.1249,
      "step": 16785
    },
    {
      "epoch": 0.4897018495828228,
      "grad_norm": 0.8130711936782737,
      "learning_rate": 5.4093778605622105e-06,
      "loss": 0.1267,
      "step": 16786
    },
    {
      "epoch": 0.48973102281346637,
      "grad_norm": 0.8807032602975655,
      "learning_rate": 5.408907010748783e-06,
      "loss": 0.1199,
      "step": 16787
    },
    {
      "epoch": 0.4897601960441099,
      "grad_norm": 1.013407550964662,
      "learning_rate": 5.408436157284731e-06,
      "loss": 0.15,
      "step": 16788
    },
    {
      "epoch": 0.48978936927475347,
      "grad_norm": 0.6923897675745911,
      "learning_rate": 5.40796530017426e-06,
      "loss": 0.1332,
      "step": 16789
    },
    {
      "epoch": 0.489818542505397,
      "grad_norm": 0.858974260629212,
      "learning_rate": 5.40749443942157e-06,
      "loss": 0.1068,
      "step": 16790
    },
    {
      "epoch": 0.48984771573604063,
      "grad_norm": 0.9930197684295528,
      "learning_rate": 5.407023575030867e-06,
      "loss": 0.1548,
      "step": 16791
    },
    {
      "epoch": 0.4898768889666842,
      "grad_norm": 0.8859854839653981,
      "learning_rate": 5.406552707006356e-06,
      "loss": 0.1384,
      "step": 16792
    },
    {
      "epoch": 0.48990606219732774,
      "grad_norm": 0.793575497542596,
      "learning_rate": 5.4060818353522396e-06,
      "loss": 0.1491,
      "step": 16793
    },
    {
      "epoch": 0.4899352354279713,
      "grad_norm": 0.888016965330607,
      "learning_rate": 5.405610960072721e-06,
      "loss": 0.1375,
      "step": 16794
    },
    {
      "epoch": 0.48996440865861485,
      "grad_norm": 0.8028485926525644,
      "learning_rate": 5.405140081172005e-06,
      "loss": 0.132,
      "step": 16795
    },
    {
      "epoch": 0.4899935818892584,
      "grad_norm": 1.093747280561161,
      "learning_rate": 5.4046691986542935e-06,
      "loss": 0.1294,
      "step": 16796
    },
    {
      "epoch": 0.49002275511990195,
      "grad_norm": 0.6918535963370159,
      "learning_rate": 5.404198312523793e-06,
      "loss": 0.1267,
      "step": 16797
    },
    {
      "epoch": 0.49005192835054556,
      "grad_norm": 0.855602394232518,
      "learning_rate": 5.403727422784707e-06,
      "loss": 0.1267,
      "step": 16798
    },
    {
      "epoch": 0.4900811015811891,
      "grad_norm": 0.9963409012648484,
      "learning_rate": 5.403256529441238e-06,
      "loss": 0.1568,
      "step": 16799
    },
    {
      "epoch": 0.49011027481183267,
      "grad_norm": 0.7977064881755879,
      "learning_rate": 5.402785632497593e-06,
      "loss": 0.1421,
      "step": 16800
    },
    {
      "epoch": 0.4901394480424762,
      "grad_norm": 0.829545169257994,
      "learning_rate": 5.4023147319579715e-06,
      "loss": 0.1345,
      "step": 16801
    },
    {
      "epoch": 0.4901686212731198,
      "grad_norm": 0.74539276370791,
      "learning_rate": 5.401843827826581e-06,
      "loss": 0.1177,
      "step": 16802
    },
    {
      "epoch": 0.49019779450376333,
      "grad_norm": 0.8563638907714458,
      "learning_rate": 5.4013729201076245e-06,
      "loss": 0.1329,
      "step": 16803
    },
    {
      "epoch": 0.4902269677344069,
      "grad_norm": 0.7020997028155187,
      "learning_rate": 5.400902008805306e-06,
      "loss": 0.1201,
      "step": 16804
    },
    {
      "epoch": 0.4902561409650505,
      "grad_norm": 0.891919959854982,
      "learning_rate": 5.400431093923832e-06,
      "loss": 0.1277,
      "step": 16805
    },
    {
      "epoch": 0.49028531419569404,
      "grad_norm": 0.9311534081126035,
      "learning_rate": 5.399960175467404e-06,
      "loss": 0.14,
      "step": 16806
    },
    {
      "epoch": 0.4903144874263376,
      "grad_norm": 0.708145217147487,
      "learning_rate": 5.3994892534402255e-06,
      "loss": 0.1306,
      "step": 16807
    },
    {
      "epoch": 0.49034366065698115,
      "grad_norm": 0.6924660526146618,
      "learning_rate": 5.399018327846504e-06,
      "loss": 0.1312,
      "step": 16808
    },
    {
      "epoch": 0.4903728338876247,
      "grad_norm": 0.7908199978659743,
      "learning_rate": 5.398547398690441e-06,
      "loss": 0.1447,
      "step": 16809
    },
    {
      "epoch": 0.49040200711826826,
      "grad_norm": 0.835693832828301,
      "learning_rate": 5.398076465976243e-06,
      "loss": 0.1392,
      "step": 16810
    },
    {
      "epoch": 0.49043118034891187,
      "grad_norm": 1.3199714941969403,
      "learning_rate": 5.397605529708112e-06,
      "loss": 0.1497,
      "step": 16811
    },
    {
      "epoch": 0.4904603535795554,
      "grad_norm": 0.7411579942711041,
      "learning_rate": 5.397134589890255e-06,
      "loss": 0.1098,
      "step": 16812
    },
    {
      "epoch": 0.490489526810199,
      "grad_norm": 0.7547862470630657,
      "learning_rate": 5.396663646526875e-06,
      "loss": 0.1402,
      "step": 16813
    },
    {
      "epoch": 0.4905187000408425,
      "grad_norm": 1.0065925346132234,
      "learning_rate": 5.396192699622176e-06,
      "loss": 0.1241,
      "step": 16814
    },
    {
      "epoch": 0.4905478732714861,
      "grad_norm": 0.9593389319689464,
      "learning_rate": 5.3957217491803645e-06,
      "loss": 0.1293,
      "step": 16815
    },
    {
      "epoch": 0.49057704650212963,
      "grad_norm": 0.7320499446501436,
      "learning_rate": 5.395250795205642e-06,
      "loss": 0.1391,
      "step": 16816
    },
    {
      "epoch": 0.4906062197327732,
      "grad_norm": 0.7895198341381162,
      "learning_rate": 5.394779837702216e-06,
      "loss": 0.1267,
      "step": 16817
    },
    {
      "epoch": 0.4906353929634168,
      "grad_norm": 0.974056337435937,
      "learning_rate": 5.394308876674289e-06,
      "loss": 0.147,
      "step": 16818
    },
    {
      "epoch": 0.49066456619406035,
      "grad_norm": 1.1127042826556375,
      "learning_rate": 5.3938379121260675e-06,
      "loss": 0.1582,
      "step": 16819
    },
    {
      "epoch": 0.4906937394247039,
      "grad_norm": 1.1167645802770252,
      "learning_rate": 5.393366944061754e-06,
      "loss": 0.1413,
      "step": 16820
    },
    {
      "epoch": 0.49072291265534745,
      "grad_norm": 0.7937400312126629,
      "learning_rate": 5.392895972485555e-06,
      "loss": 0.1402,
      "step": 16821
    },
    {
      "epoch": 0.490752085885991,
      "grad_norm": 0.9375515907959356,
      "learning_rate": 5.392424997401674e-06,
      "loss": 0.1468,
      "step": 16822
    },
    {
      "epoch": 0.49078125911663456,
      "grad_norm": 1.277586920616488,
      "learning_rate": 5.391954018814316e-06,
      "loss": 0.1282,
      "step": 16823
    },
    {
      "epoch": 0.4908104323472781,
      "grad_norm": 1.0791530362056487,
      "learning_rate": 5.3914830367276875e-06,
      "loss": 0.1292,
      "step": 16824
    },
    {
      "epoch": 0.4908396055779217,
      "grad_norm": 0.7994404509745789,
      "learning_rate": 5.3910120511459915e-06,
      "loss": 0.1317,
      "step": 16825
    },
    {
      "epoch": 0.4908687788085653,
      "grad_norm": 1.107509311312047,
      "learning_rate": 5.390541062073432e-06,
      "loss": 0.1355,
      "step": 16826
    },
    {
      "epoch": 0.49089795203920883,
      "grad_norm": 0.8922484935540654,
      "learning_rate": 5.390070069514216e-06,
      "loss": 0.1478,
      "step": 16827
    },
    {
      "epoch": 0.4909271252698524,
      "grad_norm": 1.2708040797349531,
      "learning_rate": 5.389599073472549e-06,
      "loss": 0.1216,
      "step": 16828
    },
    {
      "epoch": 0.49095629850049594,
      "grad_norm": 1.0097909657546378,
      "learning_rate": 5.389128073952632e-06,
      "loss": 0.1372,
      "step": 16829
    },
    {
      "epoch": 0.4909854717311395,
      "grad_norm": 0.7479600103341646,
      "learning_rate": 5.388657070958674e-06,
      "loss": 0.1562,
      "step": 16830
    },
    {
      "epoch": 0.49101464496178304,
      "grad_norm": 0.7886261775704383,
      "learning_rate": 5.388186064494878e-06,
      "loss": 0.1152,
      "step": 16831
    },
    {
      "epoch": 0.49104381819242665,
      "grad_norm": 1.0528727344581106,
      "learning_rate": 5.3877150545654486e-06,
      "loss": 0.1627,
      "step": 16832
    },
    {
      "epoch": 0.4910729914230702,
      "grad_norm": 0.7441770880074093,
      "learning_rate": 5.387244041174593e-06,
      "loss": 0.1424,
      "step": 16833
    },
    {
      "epoch": 0.49110216465371376,
      "grad_norm": 0.8021533562511127,
      "learning_rate": 5.3867730243265145e-06,
      "loss": 0.1319,
      "step": 16834
    },
    {
      "epoch": 0.4911313378843573,
      "grad_norm": 0.8206389697842621,
      "learning_rate": 5.386302004025419e-06,
      "loss": 0.1234,
      "step": 16835
    },
    {
      "epoch": 0.49116051111500086,
      "grad_norm": 0.9698046102401209,
      "learning_rate": 5.385830980275511e-06,
      "loss": 0.1413,
      "step": 16836
    },
    {
      "epoch": 0.4911896843456444,
      "grad_norm": 0.7334987243422961,
      "learning_rate": 5.385359953080997e-06,
      "loss": 0.1467,
      "step": 16837
    },
    {
      "epoch": 0.49121885757628797,
      "grad_norm": 0.7536300243009635,
      "learning_rate": 5.384888922446081e-06,
      "loss": 0.1278,
      "step": 16838
    },
    {
      "epoch": 0.4912480308069316,
      "grad_norm": 0.7082229165762484,
      "learning_rate": 5.384417888374967e-06,
      "loss": 0.1054,
      "step": 16839
    },
    {
      "epoch": 0.49127720403757513,
      "grad_norm": 0.756425998044131,
      "learning_rate": 5.383946850871865e-06,
      "loss": 0.1342,
      "step": 16840
    },
    {
      "epoch": 0.4913063772682187,
      "grad_norm": 0.8357922972489411,
      "learning_rate": 5.383475809940975e-06,
      "loss": 0.1389,
      "step": 16841
    },
    {
      "epoch": 0.49133555049886224,
      "grad_norm": 0.7504759075158891,
      "learning_rate": 5.383004765586504e-06,
      "loss": 0.1217,
      "step": 16842
    },
    {
      "epoch": 0.4913647237295058,
      "grad_norm": 0.6340031231950363,
      "learning_rate": 5.38253371781266e-06,
      "loss": 0.1251,
      "step": 16843
    },
    {
      "epoch": 0.49139389696014935,
      "grad_norm": 0.8961351187184075,
      "learning_rate": 5.3820626666236445e-06,
      "loss": 0.1392,
      "step": 16844
    },
    {
      "epoch": 0.49142307019079295,
      "grad_norm": 1.1382162980969783,
      "learning_rate": 5.381591612023665e-06,
      "loss": 0.1491,
      "step": 16845
    },
    {
      "epoch": 0.4914522434214365,
      "grad_norm": 1.0942466131232769,
      "learning_rate": 5.381120554016928e-06,
      "loss": 0.1151,
      "step": 16846
    },
    {
      "epoch": 0.49148141665208006,
      "grad_norm": 1.127863183941797,
      "learning_rate": 5.380649492607636e-06,
      "loss": 0.1285,
      "step": 16847
    },
    {
      "epoch": 0.4915105898827236,
      "grad_norm": 0.9259252089644728,
      "learning_rate": 5.380178427799997e-06,
      "loss": 0.1481,
      "step": 16848
    },
    {
      "epoch": 0.49153976311336717,
      "grad_norm": 0.8838430115790725,
      "learning_rate": 5.379707359598215e-06,
      "loss": 0.1654,
      "step": 16849
    },
    {
      "epoch": 0.4915689363440107,
      "grad_norm": 0.9487736874910573,
      "learning_rate": 5.379236288006497e-06,
      "loss": 0.1268,
      "step": 16850
    },
    {
      "epoch": 0.4915981095746543,
      "grad_norm": 0.983283064243009,
      "learning_rate": 5.378765213029048e-06,
      "loss": 0.1246,
      "step": 16851
    },
    {
      "epoch": 0.4916272828052979,
      "grad_norm": 0.7664008709616098,
      "learning_rate": 5.378294134670073e-06,
      "loss": 0.1375,
      "step": 16852
    },
    {
      "epoch": 0.49165645603594144,
      "grad_norm": 0.716134218745774,
      "learning_rate": 5.377823052933779e-06,
      "loss": 0.1057,
      "step": 16853
    },
    {
      "epoch": 0.491685629266585,
      "grad_norm": 0.7962058399740612,
      "learning_rate": 5.37735196782437e-06,
      "loss": 0.1333,
      "step": 16854
    },
    {
      "epoch": 0.49171480249722854,
      "grad_norm": 0.6437605306220976,
      "learning_rate": 5.376880879346054e-06,
      "loss": 0.1196,
      "step": 16855
    },
    {
      "epoch": 0.4917439757278721,
      "grad_norm": 1.111434136573803,
      "learning_rate": 5.376409787503034e-06,
      "loss": 0.1416,
      "step": 16856
    },
    {
      "epoch": 0.49177314895851565,
      "grad_norm": 0.7347356291363897,
      "learning_rate": 5.375938692299518e-06,
      "loss": 0.1116,
      "step": 16857
    },
    {
      "epoch": 0.4918023221891592,
      "grad_norm": 0.7997691791500734,
      "learning_rate": 5.375467593739713e-06,
      "loss": 0.1431,
      "step": 16858
    },
    {
      "epoch": 0.4918314954198028,
      "grad_norm": 0.6407723668348148,
      "learning_rate": 5.37499649182782e-06,
      "loss": 0.1247,
      "step": 16859
    },
    {
      "epoch": 0.49186066865044636,
      "grad_norm": 0.9153042964347438,
      "learning_rate": 5.37452538656805e-06,
      "loss": 0.1565,
      "step": 16860
    },
    {
      "epoch": 0.4918898418810899,
      "grad_norm": 0.7070742348713276,
      "learning_rate": 5.374054277964605e-06,
      "loss": 0.1345,
      "step": 16861
    },
    {
      "epoch": 0.49191901511173347,
      "grad_norm": 0.7474405370120811,
      "learning_rate": 5.373583166021694e-06,
      "loss": 0.1345,
      "step": 16862
    },
    {
      "epoch": 0.491948188342377,
      "grad_norm": 0.975596061924209,
      "learning_rate": 5.373112050743522e-06,
      "loss": 0.1201,
      "step": 16863
    },
    {
      "epoch": 0.4919773615730206,
      "grad_norm": 0.8097861084213097,
      "learning_rate": 5.3726409321342935e-06,
      "loss": 0.1245,
      "step": 16864
    },
    {
      "epoch": 0.49200653480366413,
      "grad_norm": 0.792038897417954,
      "learning_rate": 5.372169810198215e-06,
      "loss": 0.1503,
      "step": 16865
    },
    {
      "epoch": 0.49203570803430774,
      "grad_norm": 0.9405911575915272,
      "learning_rate": 5.371698684939495e-06,
      "loss": 0.1412,
      "step": 16866
    },
    {
      "epoch": 0.4920648812649513,
      "grad_norm": 0.7992514729447099,
      "learning_rate": 5.371227556362337e-06,
      "loss": 0.129,
      "step": 16867
    },
    {
      "epoch": 0.49209405449559485,
      "grad_norm": 0.6542417261430244,
      "learning_rate": 5.370756424470948e-06,
      "loss": 0.114,
      "step": 16868
    },
    {
      "epoch": 0.4921232277262384,
      "grad_norm": 1.163943728663297,
      "learning_rate": 5.370285289269535e-06,
      "loss": 0.1287,
      "step": 16869
    },
    {
      "epoch": 0.49215240095688195,
      "grad_norm": 0.9083702708069268,
      "learning_rate": 5.369814150762304e-06,
      "loss": 0.1268,
      "step": 16870
    },
    {
      "epoch": 0.4921815741875255,
      "grad_norm": 0.6994081948116999,
      "learning_rate": 5.369343008953458e-06,
      "loss": 0.1042,
      "step": 16871
    },
    {
      "epoch": 0.4922107474181691,
      "grad_norm": 0.8741433725163471,
      "learning_rate": 5.368871863847207e-06,
      "loss": 0.1171,
      "step": 16872
    },
    {
      "epoch": 0.49223992064881267,
      "grad_norm": 1.2603996912826851,
      "learning_rate": 5.368400715447757e-06,
      "loss": 0.127,
      "step": 16873
    },
    {
      "epoch": 0.4922690938794562,
      "grad_norm": 0.7652508192028512,
      "learning_rate": 5.367929563759311e-06,
      "loss": 0.1147,
      "step": 16874
    },
    {
      "epoch": 0.4922982671100998,
      "grad_norm": 1.1648667864675992,
      "learning_rate": 5.36745840878608e-06,
      "loss": 0.131,
      "step": 16875
    },
    {
      "epoch": 0.4923274403407433,
      "grad_norm": 1.0416485002240208,
      "learning_rate": 5.366987250532266e-06,
      "loss": 0.1326,
      "step": 16876
    },
    {
      "epoch": 0.4923566135713869,
      "grad_norm": 0.8748062798784032,
      "learning_rate": 5.36651608900208e-06,
      "loss": 0.1522,
      "step": 16877
    },
    {
      "epoch": 0.49238578680203043,
      "grad_norm": 0.8007945767833401,
      "learning_rate": 5.366044924199725e-06,
      "loss": 0.1286,
      "step": 16878
    },
    {
      "epoch": 0.49241496003267404,
      "grad_norm": 0.8482147388626303,
      "learning_rate": 5.365573756129406e-06,
      "loss": 0.1408,
      "step": 16879
    },
    {
      "epoch": 0.4924441332633176,
      "grad_norm": 0.967644841804688,
      "learning_rate": 5.365102584795334e-06,
      "loss": 0.1285,
      "step": 16880
    },
    {
      "epoch": 0.49247330649396115,
      "grad_norm": 0.8995162241486194,
      "learning_rate": 5.364631410201713e-06,
      "loss": 0.1251,
      "step": 16881
    },
    {
      "epoch": 0.4925024797246047,
      "grad_norm": 0.8415421441677097,
      "learning_rate": 5.364160232352749e-06,
      "loss": 0.1156,
      "step": 16882
    },
    {
      "epoch": 0.49253165295524826,
      "grad_norm": 1.323591753648415,
      "learning_rate": 5.363689051252651e-06,
      "loss": 0.1447,
      "step": 16883
    },
    {
      "epoch": 0.4925608261858918,
      "grad_norm": 0.9582912145627567,
      "learning_rate": 5.363217866905622e-06,
      "loss": 0.15,
      "step": 16884
    },
    {
      "epoch": 0.49258999941653536,
      "grad_norm": 1.0225435535678218,
      "learning_rate": 5.362746679315872e-06,
      "loss": 0.1415,
      "step": 16885
    },
    {
      "epoch": 0.49261917264717897,
      "grad_norm": 0.9494187747788593,
      "learning_rate": 5.362275488487606e-06,
      "loss": 0.1691,
      "step": 16886
    },
    {
      "epoch": 0.4926483458778225,
      "grad_norm": 0.8646619701138887,
      "learning_rate": 5.361804294425031e-06,
      "loss": 0.1336,
      "step": 16887
    },
    {
      "epoch": 0.4926775191084661,
      "grad_norm": 1.014770621302196,
      "learning_rate": 5.361333097132353e-06,
      "loss": 0.1152,
      "step": 16888
    },
    {
      "epoch": 0.49270669233910963,
      "grad_norm": 0.8472959034349283,
      "learning_rate": 5.360861896613779e-06,
      "loss": 0.1257,
      "step": 16889
    },
    {
      "epoch": 0.4927358655697532,
      "grad_norm": 0.9647487125299989,
      "learning_rate": 5.360390692873518e-06,
      "loss": 0.1325,
      "step": 16890
    },
    {
      "epoch": 0.49276503880039674,
      "grad_norm": 0.9419671715760097,
      "learning_rate": 5.3599194859157735e-06,
      "loss": 0.1395,
      "step": 16891
    },
    {
      "epoch": 0.4927942120310403,
      "grad_norm": 0.7140778222154549,
      "learning_rate": 5.359448275744755e-06,
      "loss": 0.1413,
      "step": 16892
    },
    {
      "epoch": 0.4928233852616839,
      "grad_norm": 0.7519770736931981,
      "learning_rate": 5.358977062364666e-06,
      "loss": 0.1369,
      "step": 16893
    },
    {
      "epoch": 0.49285255849232745,
      "grad_norm": 0.8270270530223526,
      "learning_rate": 5.358505845779717e-06,
      "loss": 0.1402,
      "step": 16894
    },
    {
      "epoch": 0.492881731722971,
      "grad_norm": 0.7663369407857998,
      "learning_rate": 5.358034625994113e-06,
      "loss": 0.1514,
      "step": 16895
    },
    {
      "epoch": 0.49291090495361456,
      "grad_norm": 0.5966561564754324,
      "learning_rate": 5.357563403012061e-06,
      "loss": 0.1077,
      "step": 16896
    },
    {
      "epoch": 0.4929400781842581,
      "grad_norm": 0.7748324464328683,
      "learning_rate": 5.357092176837769e-06,
      "loss": 0.1258,
      "step": 16897
    },
    {
      "epoch": 0.49296925141490167,
      "grad_norm": 0.8450404382189073,
      "learning_rate": 5.3566209474754425e-06,
      "loss": 0.1422,
      "step": 16898
    },
    {
      "epoch": 0.4929984246455453,
      "grad_norm": 0.8087937305077653,
      "learning_rate": 5.356149714929291e-06,
      "loss": 0.1176,
      "step": 16899
    },
    {
      "epoch": 0.49302759787618883,
      "grad_norm": 0.7299956387668687,
      "learning_rate": 5.355678479203518e-06,
      "loss": 0.1003,
      "step": 16900
    },
    {
      "epoch": 0.4930567711068324,
      "grad_norm": 0.817078900020243,
      "learning_rate": 5.355207240302332e-06,
      "loss": 0.1476,
      "step": 16901
    },
    {
      "epoch": 0.49308594433747593,
      "grad_norm": 0.7672277827493014,
      "learning_rate": 5.354735998229943e-06,
      "loss": 0.1209,
      "step": 16902
    },
    {
      "epoch": 0.4931151175681195,
      "grad_norm": 0.7747143025480175,
      "learning_rate": 5.354264752990553e-06,
      "loss": 0.1388,
      "step": 16903
    },
    {
      "epoch": 0.49314429079876304,
      "grad_norm": 0.8244094445952639,
      "learning_rate": 5.353793504588374e-06,
      "loss": 0.1287,
      "step": 16904
    },
    {
      "epoch": 0.4931734640294066,
      "grad_norm": 0.8267489229302534,
      "learning_rate": 5.353322253027611e-06,
      "loss": 0.1216,
      "step": 16905
    },
    {
      "epoch": 0.4932026372600502,
      "grad_norm": 1.0800355112043782,
      "learning_rate": 5.352850998312469e-06,
      "loss": 0.1241,
      "step": 16906
    },
    {
      "epoch": 0.49323181049069376,
      "grad_norm": 0.8004539660085748,
      "learning_rate": 5.35237974044716e-06,
      "loss": 0.1577,
      "step": 16907
    },
    {
      "epoch": 0.4932609837213373,
      "grad_norm": 0.9291822275348421,
      "learning_rate": 5.351908479435888e-06,
      "loss": 0.1247,
      "step": 16908
    },
    {
      "epoch": 0.49329015695198086,
      "grad_norm": 0.5909709718039137,
      "learning_rate": 5.35143721528286e-06,
      "loss": 0.1318,
      "step": 16909
    },
    {
      "epoch": 0.4933193301826244,
      "grad_norm": 0.8182605410853571,
      "learning_rate": 5.350965947992286e-06,
      "loss": 0.1248,
      "step": 16910
    },
    {
      "epoch": 0.49334850341326797,
      "grad_norm": 1.060945591380358,
      "learning_rate": 5.350494677568371e-06,
      "loss": 0.1479,
      "step": 16911
    },
    {
      "epoch": 0.4933776766439115,
      "grad_norm": 0.7781852485598358,
      "learning_rate": 5.350023404015323e-06,
      "loss": 0.1208,
      "step": 16912
    },
    {
      "epoch": 0.49340684987455513,
      "grad_norm": 0.897173862130397,
      "learning_rate": 5.3495521273373504e-06,
      "loss": 0.1429,
      "step": 16913
    },
    {
      "epoch": 0.4934360231051987,
      "grad_norm": 0.902893401242971,
      "learning_rate": 5.349080847538659e-06,
      "loss": 0.1225,
      "step": 16914
    },
    {
      "epoch": 0.49346519633584224,
      "grad_norm": 1.0301174153869221,
      "learning_rate": 5.348609564623458e-06,
      "loss": 0.136,
      "step": 16915
    },
    {
      "epoch": 0.4934943695664858,
      "grad_norm": 0.8247224773588024,
      "learning_rate": 5.3481382785959536e-06,
      "loss": 0.1451,
      "step": 16916
    },
    {
      "epoch": 0.49352354279712934,
      "grad_norm": 0.7293546646236825,
      "learning_rate": 5.347666989460353e-06,
      "loss": 0.1282,
      "step": 16917
    },
    {
      "epoch": 0.4935527160277729,
      "grad_norm": 0.8207541196798567,
      "learning_rate": 5.347195697220865e-06,
      "loss": 0.13,
      "step": 16918
    },
    {
      "epoch": 0.49358188925841645,
      "grad_norm": 0.8288687812887132,
      "learning_rate": 5.346724401881697e-06,
      "loss": 0.1341,
      "step": 16919
    },
    {
      "epoch": 0.49361106248906006,
      "grad_norm": 0.8174263177651826,
      "learning_rate": 5.346253103447058e-06,
      "loss": 0.124,
      "step": 16920
    },
    {
      "epoch": 0.4936402357197036,
      "grad_norm": 0.7612552679145954,
      "learning_rate": 5.34578180192115e-06,
      "loss": 0.1357,
      "step": 16921
    },
    {
      "epoch": 0.49366940895034717,
      "grad_norm": 0.7953009668694182,
      "learning_rate": 5.3453104973081884e-06,
      "loss": 0.1326,
      "step": 16922
    },
    {
      "epoch": 0.4936985821809907,
      "grad_norm": 0.6520145509816958,
      "learning_rate": 5.344839189612375e-06,
      "loss": 0.1283,
      "step": 16923
    },
    {
      "epoch": 0.4937277554116343,
      "grad_norm": 0.7295993419099278,
      "learning_rate": 5.3443678788379195e-06,
      "loss": 0.1388,
      "step": 16924
    },
    {
      "epoch": 0.4937569286422778,
      "grad_norm": 0.7165709618403117,
      "learning_rate": 5.343896564989031e-06,
      "loss": 0.1231,
      "step": 16925
    },
    {
      "epoch": 0.49378610187292143,
      "grad_norm": 0.7751106265930863,
      "learning_rate": 5.3434252480699154e-06,
      "loss": 0.1229,
      "step": 16926
    },
    {
      "epoch": 0.493815275103565,
      "grad_norm": 0.872227451919471,
      "learning_rate": 5.3429539280847805e-06,
      "loss": 0.1312,
      "step": 16927
    },
    {
      "epoch": 0.49384444833420854,
      "grad_norm": 0.8025408400035391,
      "learning_rate": 5.3424826050378365e-06,
      "loss": 0.1433,
      "step": 16928
    },
    {
      "epoch": 0.4938736215648521,
      "grad_norm": 0.7289180481653799,
      "learning_rate": 5.3420112789332875e-06,
      "loss": 0.1432,
      "step": 16929
    },
    {
      "epoch": 0.49390279479549565,
      "grad_norm": 0.9134284659715837,
      "learning_rate": 5.341539949775345e-06,
      "loss": 0.1296,
      "step": 16930
    },
    {
      "epoch": 0.4939319680261392,
      "grad_norm": 0.8583498510757264,
      "learning_rate": 5.341068617568215e-06,
      "loss": 0.1486,
      "step": 16931
    },
    {
      "epoch": 0.49396114125678275,
      "grad_norm": 0.8446957679303193,
      "learning_rate": 5.340597282316105e-06,
      "loss": 0.1292,
      "step": 16932
    },
    {
      "epoch": 0.49399031448742636,
      "grad_norm": 0.9423209681327167,
      "learning_rate": 5.340125944023226e-06,
      "loss": 0.1138,
      "step": 16933
    },
    {
      "epoch": 0.4940194877180699,
      "grad_norm": 1.06153794678825,
      "learning_rate": 5.339654602693781e-06,
      "loss": 0.1379,
      "step": 16934
    },
    {
      "epoch": 0.49404866094871347,
      "grad_norm": 1.039660574620419,
      "learning_rate": 5.339183258331983e-06,
      "loss": 0.118,
      "step": 16935
    },
    {
      "epoch": 0.494077834179357,
      "grad_norm": 0.9709315235538598,
      "learning_rate": 5.338711910942036e-06,
      "loss": 0.1238,
      "step": 16936
    },
    {
      "epoch": 0.4941070074100006,
      "grad_norm": 1.068794208306307,
      "learning_rate": 5.338240560528152e-06,
      "loss": 0.1545,
      "step": 16937
    },
    {
      "epoch": 0.49413618064064413,
      "grad_norm": 0.9457659930370551,
      "learning_rate": 5.337769207094535e-06,
      "loss": 0.1354,
      "step": 16938
    },
    {
      "epoch": 0.4941653538712877,
      "grad_norm": 1.0797531203555015,
      "learning_rate": 5.337297850645395e-06,
      "loss": 0.1494,
      "step": 16939
    },
    {
      "epoch": 0.4941945271019313,
      "grad_norm": 0.8296869908372474,
      "learning_rate": 5.336826491184943e-06,
      "loss": 0.1304,
      "step": 16940
    },
    {
      "epoch": 0.49422370033257484,
      "grad_norm": 0.9105037236438693,
      "learning_rate": 5.336355128717382e-06,
      "loss": 0.1407,
      "step": 16941
    },
    {
      "epoch": 0.4942528735632184,
      "grad_norm": 1.0522028001869357,
      "learning_rate": 5.335883763246924e-06,
      "loss": 0.1433,
      "step": 16942
    },
    {
      "epoch": 0.49428204679386195,
      "grad_norm": 0.9423373371445346,
      "learning_rate": 5.335412394777775e-06,
      "loss": 0.158,
      "step": 16943
    },
    {
      "epoch": 0.4943112200245055,
      "grad_norm": 0.9413246687873199,
      "learning_rate": 5.334941023314145e-06,
      "loss": 0.1376,
      "step": 16944
    },
    {
      "epoch": 0.49434039325514906,
      "grad_norm": 0.8574886498954905,
      "learning_rate": 5.334469648860241e-06,
      "loss": 0.1611,
      "step": 16945
    },
    {
      "epoch": 0.4943695664857926,
      "grad_norm": 1.4212075923177745,
      "learning_rate": 5.333998271420272e-06,
      "loss": 0.1194,
      "step": 16946
    },
    {
      "epoch": 0.4943987397164362,
      "grad_norm": 0.9849675768205657,
      "learning_rate": 5.333526890998446e-06,
      "loss": 0.1482,
      "step": 16947
    },
    {
      "epoch": 0.4944279129470798,
      "grad_norm": 1.124274564036261,
      "learning_rate": 5.333055507598971e-06,
      "loss": 0.1518,
      "step": 16948
    },
    {
      "epoch": 0.4944570861777233,
      "grad_norm": 0.8075400744977255,
      "learning_rate": 5.332584121226057e-06,
      "loss": 0.1295,
      "step": 16949
    },
    {
      "epoch": 0.4944862594083669,
      "grad_norm": 1.019394901701943,
      "learning_rate": 5.332112731883912e-06,
      "loss": 0.148,
      "step": 16950
    },
    {
      "epoch": 0.49451543263901043,
      "grad_norm": 0.9657542465799582,
      "learning_rate": 5.3316413395767405e-06,
      "loss": 0.135,
      "step": 16951
    },
    {
      "epoch": 0.494544605869654,
      "grad_norm": 0.7827734704996198,
      "learning_rate": 5.331169944308758e-06,
      "loss": 0.1252,
      "step": 16952
    },
    {
      "epoch": 0.49457377910029754,
      "grad_norm": 0.8204546421277574,
      "learning_rate": 5.330698546084167e-06,
      "loss": 0.1169,
      "step": 16953
    },
    {
      "epoch": 0.49460295233094115,
      "grad_norm": 0.7606126873012299,
      "learning_rate": 5.330227144907179e-06,
      "loss": 0.1158,
      "step": 16954
    },
    {
      "epoch": 0.4946321255615847,
      "grad_norm": 0.8045375654519857,
      "learning_rate": 5.329755740782003e-06,
      "loss": 0.1444,
      "step": 16955
    },
    {
      "epoch": 0.49466129879222825,
      "grad_norm": 1.0550458230531574,
      "learning_rate": 5.329284333712845e-06,
      "loss": 0.1402,
      "step": 16956
    },
    {
      "epoch": 0.4946904720228718,
      "grad_norm": 1.0603512795656371,
      "learning_rate": 5.328812923703917e-06,
      "loss": 0.1318,
      "step": 16957
    },
    {
      "epoch": 0.49471964525351536,
      "grad_norm": 0.5955430122008735,
      "learning_rate": 5.328341510759423e-06,
      "loss": 0.1174,
      "step": 16958
    },
    {
      "epoch": 0.4947488184841589,
      "grad_norm": 0.8398390339422245,
      "learning_rate": 5.327870094883576e-06,
      "loss": 0.1311,
      "step": 16959
    },
    {
      "epoch": 0.4947779917148025,
      "grad_norm": 1.1291956634852869,
      "learning_rate": 5.327398676080583e-06,
      "loss": 0.1309,
      "step": 16960
    },
    {
      "epoch": 0.4948071649454461,
      "grad_norm": 1.0516302051600936,
      "learning_rate": 5.3269272543546524e-06,
      "loss": 0.162,
      "step": 16961
    },
    {
      "epoch": 0.49483633817608963,
      "grad_norm": 0.7976172681207964,
      "learning_rate": 5.3264558297099935e-06,
      "loss": 0.1362,
      "step": 16962
    },
    {
      "epoch": 0.4948655114067332,
      "grad_norm": 0.8877281464621941,
      "learning_rate": 5.3259844021508145e-06,
      "loss": 0.1407,
      "step": 16963
    },
    {
      "epoch": 0.49489468463737674,
      "grad_norm": 0.7885792964523457,
      "learning_rate": 5.325512971681325e-06,
      "loss": 0.1184,
      "step": 16964
    },
    {
      "epoch": 0.4949238578680203,
      "grad_norm": 0.8746196809306633,
      "learning_rate": 5.325041538305734e-06,
      "loss": 0.1411,
      "step": 16965
    },
    {
      "epoch": 0.49495303109866384,
      "grad_norm": 0.8485752718426207,
      "learning_rate": 5.324570102028248e-06,
      "loss": 0.161,
      "step": 16966
    },
    {
      "epoch": 0.49498220432930745,
      "grad_norm": 0.9043789611546547,
      "learning_rate": 5.324098662853079e-06,
      "loss": 0.1362,
      "step": 16967
    },
    {
      "epoch": 0.495011377559951,
      "grad_norm": 0.8806146882536067,
      "learning_rate": 5.323627220784434e-06,
      "loss": 0.1237,
      "step": 16968
    },
    {
      "epoch": 0.49504055079059456,
      "grad_norm": 0.8910099585858573,
      "learning_rate": 5.3231557758265215e-06,
      "loss": 0.1273,
      "step": 16969
    },
    {
      "epoch": 0.4950697240212381,
      "grad_norm": 0.691561728123018,
      "learning_rate": 5.322684327983554e-06,
      "loss": 0.1314,
      "step": 16970
    },
    {
      "epoch": 0.49509889725188166,
      "grad_norm": 0.7960444633372479,
      "learning_rate": 5.3222128772597355e-06,
      "loss": 0.1522,
      "step": 16971
    },
    {
      "epoch": 0.4951280704825252,
      "grad_norm": 0.9629556327673812,
      "learning_rate": 5.321741423659279e-06,
      "loss": 0.1416,
      "step": 16972
    },
    {
      "epoch": 0.49515724371316877,
      "grad_norm": 0.7045362984911328,
      "learning_rate": 5.321269967186391e-06,
      "loss": 0.1217,
      "step": 16973
    },
    {
      "epoch": 0.4951864169438124,
      "grad_norm": 0.742526438025149,
      "learning_rate": 5.320798507845281e-06,
      "loss": 0.1239,
      "step": 16974
    },
    {
      "epoch": 0.49521559017445593,
      "grad_norm": 0.7586203050264122,
      "learning_rate": 5.320327045640159e-06,
      "loss": 0.1191,
      "step": 16975
    },
    {
      "epoch": 0.4952447634050995,
      "grad_norm": 1.1978140322051605,
      "learning_rate": 5.319855580575233e-06,
      "loss": 0.1335,
      "step": 16976
    },
    {
      "epoch": 0.49527393663574304,
      "grad_norm": 0.8545638705041876,
      "learning_rate": 5.319384112654713e-06,
      "loss": 0.1341,
      "step": 16977
    },
    {
      "epoch": 0.4953031098663866,
      "grad_norm": 0.8353652565520191,
      "learning_rate": 5.318912641882809e-06,
      "loss": 0.1414,
      "step": 16978
    },
    {
      "epoch": 0.49533228309703015,
      "grad_norm": 0.7460690829875166,
      "learning_rate": 5.318441168263727e-06,
      "loss": 0.1399,
      "step": 16979
    },
    {
      "epoch": 0.4953614563276737,
      "grad_norm": 0.8707347033873954,
      "learning_rate": 5.317969691801681e-06,
      "loss": 0.1239,
      "step": 16980
    },
    {
      "epoch": 0.4953906295583173,
      "grad_norm": 0.8130012177841824,
      "learning_rate": 5.3174982125008745e-06,
      "loss": 0.1288,
      "step": 16981
    },
    {
      "epoch": 0.49541980278896086,
      "grad_norm": 0.8792166690507628,
      "learning_rate": 5.317026730365523e-06,
      "loss": 0.1311,
      "step": 16982
    },
    {
      "epoch": 0.4954489760196044,
      "grad_norm": 0.8265517968595797,
      "learning_rate": 5.31655524539983e-06,
      "loss": 0.1121,
      "step": 16983
    },
    {
      "epoch": 0.49547814925024797,
      "grad_norm": 0.6604723081097706,
      "learning_rate": 5.316083757608007e-06,
      "loss": 0.1495,
      "step": 16984
    },
    {
      "epoch": 0.4955073224808915,
      "grad_norm": 0.947929565131271,
      "learning_rate": 5.3156122669942665e-06,
      "loss": 0.1392,
      "step": 16985
    },
    {
      "epoch": 0.4955364957115351,
      "grad_norm": 0.8239737182678122,
      "learning_rate": 5.3151407735628125e-06,
      "loss": 0.1251,
      "step": 16986
    },
    {
      "epoch": 0.4955656689421787,
      "grad_norm": 1.0205966496438035,
      "learning_rate": 5.314669277317858e-06,
      "loss": 0.1379,
      "step": 16987
    },
    {
      "epoch": 0.49559484217282224,
      "grad_norm": 0.7194479213053977,
      "learning_rate": 5.314197778263611e-06,
      "loss": 0.1248,
      "step": 16988
    },
    {
      "epoch": 0.4956240154034658,
      "grad_norm": 1.1724484690623163,
      "learning_rate": 5.313726276404281e-06,
      "loss": 0.1354,
      "step": 16989
    },
    {
      "epoch": 0.49565318863410934,
      "grad_norm": 1.2049310165373772,
      "learning_rate": 5.313254771744079e-06,
      "loss": 0.1389,
      "step": 16990
    },
    {
      "epoch": 0.4956823618647529,
      "grad_norm": 0.700208096745436,
      "learning_rate": 5.3127832642872116e-06,
      "loss": 0.1218,
      "step": 16991
    },
    {
      "epoch": 0.49571153509539645,
      "grad_norm": 0.8755550331867326,
      "learning_rate": 5.3123117540378895e-06,
      "loss": 0.1475,
      "step": 16992
    },
    {
      "epoch": 0.49574070832604,
      "grad_norm": 0.9273784916004748,
      "learning_rate": 5.311840241000323e-06,
      "loss": 0.1163,
      "step": 16993
    },
    {
      "epoch": 0.4957698815566836,
      "grad_norm": 0.9852128356665886,
      "learning_rate": 5.311368725178723e-06,
      "loss": 0.1165,
      "step": 16994
    },
    {
      "epoch": 0.49579905478732716,
      "grad_norm": 0.8172719672545941,
      "learning_rate": 5.310897206577297e-06,
      "loss": 0.1326,
      "step": 16995
    },
    {
      "epoch": 0.4958282280179707,
      "grad_norm": 0.8514774838543385,
      "learning_rate": 5.310425685200252e-06,
      "loss": 0.1381,
      "step": 16996
    },
    {
      "epoch": 0.49585740124861427,
      "grad_norm": 0.8554652494690688,
      "learning_rate": 5.3099541610518046e-06,
      "loss": 0.1313,
      "step": 16997
    },
    {
      "epoch": 0.4958865744792578,
      "grad_norm": 0.9276791529462564,
      "learning_rate": 5.309482634136158e-06,
      "loss": 0.1306,
      "step": 16998
    },
    {
      "epoch": 0.4959157477099014,
      "grad_norm": 1.096328000628962,
      "learning_rate": 5.309011104457524e-06,
      "loss": 0.1463,
      "step": 16999
    },
    {
      "epoch": 0.49594492094054493,
      "grad_norm": 0.8096521912923862,
      "learning_rate": 5.3085395720201145e-06,
      "loss": 0.1124,
      "step": 17000
    },
    {
      "epoch": 0.49597409417118854,
      "grad_norm": 0.8665191497573511,
      "learning_rate": 5.308068036828137e-06,
      "loss": 0.1177,
      "step": 17001
    },
    {
      "epoch": 0.4960032674018321,
      "grad_norm": 1.0612210196688785,
      "learning_rate": 5.3075964988857995e-06,
      "loss": 0.1255,
      "step": 17002
    },
    {
      "epoch": 0.49603244063247565,
      "grad_norm": 1.0490245799527906,
      "learning_rate": 5.307124958197316e-06,
      "loss": 0.1372,
      "step": 17003
    },
    {
      "epoch": 0.4960616138631192,
      "grad_norm": 0.7495009661428474,
      "learning_rate": 5.306653414766894e-06,
      "loss": 0.138,
      "step": 17004
    },
    {
      "epoch": 0.49609078709376275,
      "grad_norm": 0.7803730397205744,
      "learning_rate": 5.306181868598742e-06,
      "loss": 0.1332,
      "step": 17005
    },
    {
      "epoch": 0.4961199603244063,
      "grad_norm": 1.0449426882750648,
      "learning_rate": 5.305710319697073e-06,
      "loss": 0.1249,
      "step": 17006
    },
    {
      "epoch": 0.49614913355504986,
      "grad_norm": 0.8090543934474392,
      "learning_rate": 5.3052387680660945e-06,
      "loss": 0.1164,
      "step": 17007
    },
    {
      "epoch": 0.49617830678569347,
      "grad_norm": 0.8323158832759161,
      "learning_rate": 5.304767213710017e-06,
      "loss": 0.1237,
      "step": 17008
    },
    {
      "epoch": 0.496207480016337,
      "grad_norm": 1.0577348037359635,
      "learning_rate": 5.304295656633051e-06,
      "loss": 0.1461,
      "step": 17009
    },
    {
      "epoch": 0.4962366532469806,
      "grad_norm": 0.9646279194872153,
      "learning_rate": 5.303824096839407e-06,
      "loss": 0.1394,
      "step": 17010
    },
    {
      "epoch": 0.49626582647762413,
      "grad_norm": 1.0613858884412792,
      "learning_rate": 5.303352534333291e-06,
      "loss": 0.1401,
      "step": 17011
    },
    {
      "epoch": 0.4962949997082677,
      "grad_norm": 1.0609249588291445,
      "learning_rate": 5.30288096911892e-06,
      "loss": 0.1209,
      "step": 17012
    },
    {
      "epoch": 0.49632417293891123,
      "grad_norm": 6.964887388903303,
      "learning_rate": 5.302409401200497e-06,
      "loss": 0.1429,
      "step": 17013
    },
    {
      "epoch": 0.49635334616955484,
      "grad_norm": 0.9194167504080002,
      "learning_rate": 5.301937830582235e-06,
      "loss": 0.1268,
      "step": 17014
    },
    {
      "epoch": 0.4963825194001984,
      "grad_norm": 0.8043827704686469,
      "learning_rate": 5.301466257268346e-06,
      "loss": 0.1389,
      "step": 17015
    },
    {
      "epoch": 0.49641169263084195,
      "grad_norm": 0.9819150658530034,
      "learning_rate": 5.300994681263038e-06,
      "loss": 0.1403,
      "step": 17016
    },
    {
      "epoch": 0.4964408658614855,
      "grad_norm": 0.7903323502090318,
      "learning_rate": 5.3005231025705195e-06,
      "loss": 0.1651,
      "step": 17017
    },
    {
      "epoch": 0.49647003909212906,
      "grad_norm": 0.8828242039006373,
      "learning_rate": 5.300051521195004e-06,
      "loss": 0.1167,
      "step": 17018
    },
    {
      "epoch": 0.4964992123227726,
      "grad_norm": 0.9808924137735425,
      "learning_rate": 5.299579937140699e-06,
      "loss": 0.1254,
      "step": 17019
    },
    {
      "epoch": 0.49652838555341616,
      "grad_norm": 0.8858867618245164,
      "learning_rate": 5.299108350411817e-06,
      "loss": 0.1466,
      "step": 17020
    },
    {
      "epoch": 0.49655755878405977,
      "grad_norm": 0.7861078381162786,
      "learning_rate": 5.298636761012567e-06,
      "loss": 0.1251,
      "step": 17021
    },
    {
      "epoch": 0.4965867320147033,
      "grad_norm": 0.9829584641559423,
      "learning_rate": 5.298165168947158e-06,
      "loss": 0.1346,
      "step": 17022
    },
    {
      "epoch": 0.4966159052453469,
      "grad_norm": 1.0145337046355396,
      "learning_rate": 5.297693574219803e-06,
      "loss": 0.1492,
      "step": 17023
    },
    {
      "epoch": 0.49664507847599043,
      "grad_norm": 0.9425629232566466,
      "learning_rate": 5.29722197683471e-06,
      "loss": 0.1304,
      "step": 17024
    },
    {
      "epoch": 0.496674251706634,
      "grad_norm": 0.9778512536435043,
      "learning_rate": 5.296750376796092e-06,
      "loss": 0.1197,
      "step": 17025
    },
    {
      "epoch": 0.49670342493727754,
      "grad_norm": 0.8531332874595413,
      "learning_rate": 5.296278774108154e-06,
      "loss": 0.142,
      "step": 17026
    },
    {
      "epoch": 0.4967325981679211,
      "grad_norm": 0.9996692545825431,
      "learning_rate": 5.295807168775113e-06,
      "loss": 0.1341,
      "step": 17027
    },
    {
      "epoch": 0.4967617713985647,
      "grad_norm": 0.8120564563782053,
      "learning_rate": 5.295335560801175e-06,
      "loss": 0.1363,
      "step": 17028
    },
    {
      "epoch": 0.49679094462920825,
      "grad_norm": 0.7709973211616562,
      "learning_rate": 5.294863950190551e-06,
      "loss": 0.1373,
      "step": 17029
    },
    {
      "epoch": 0.4968201178598518,
      "grad_norm": 0.9389352398385432,
      "learning_rate": 5.294392336947454e-06,
      "loss": 0.1236,
      "step": 17030
    },
    {
      "epoch": 0.49684929109049536,
      "grad_norm": 0.8049604165370342,
      "learning_rate": 5.29392072107609e-06,
      "loss": 0.1339,
      "step": 17031
    },
    {
      "epoch": 0.4968784643211389,
      "grad_norm": 0.885934958894433,
      "learning_rate": 5.293449102580674e-06,
      "loss": 0.1257,
      "step": 17032
    },
    {
      "epoch": 0.49690763755178247,
      "grad_norm": 0.7998677185861385,
      "learning_rate": 5.292977481465413e-06,
      "loss": 0.1383,
      "step": 17033
    },
    {
      "epoch": 0.496936810782426,
      "grad_norm": 0.9177388784485104,
      "learning_rate": 5.292505857734519e-06,
      "loss": 0.1202,
      "step": 17034
    },
    {
      "epoch": 0.49696598401306963,
      "grad_norm": 0.900434529486035,
      "learning_rate": 5.292034231392204e-06,
      "loss": 0.1467,
      "step": 17035
    },
    {
      "epoch": 0.4969951572437132,
      "grad_norm": 0.9666660790595324,
      "learning_rate": 5.2915626024426755e-06,
      "loss": 0.1487,
      "step": 17036
    },
    {
      "epoch": 0.49702433047435673,
      "grad_norm": 0.9791511838331073,
      "learning_rate": 5.291090970890146e-06,
      "loss": 0.1446,
      "step": 17037
    },
    {
      "epoch": 0.4970535037050003,
      "grad_norm": 0.852126728562316,
      "learning_rate": 5.290619336738826e-06,
      "loss": 0.1504,
      "step": 17038
    },
    {
      "epoch": 0.49708267693564384,
      "grad_norm": 0.9246946420062684,
      "learning_rate": 5.290147699992926e-06,
      "loss": 0.1421,
      "step": 17039
    },
    {
      "epoch": 0.4971118501662874,
      "grad_norm": 0.8825312853080106,
      "learning_rate": 5.2896760606566576e-06,
      "loss": 0.1361,
      "step": 17040
    },
    {
      "epoch": 0.497141023396931,
      "grad_norm": 0.7683017994188074,
      "learning_rate": 5.289204418734228e-06,
      "loss": 0.1192,
      "step": 17041
    },
    {
      "epoch": 0.49717019662757456,
      "grad_norm": 0.745517946160222,
      "learning_rate": 5.288732774229853e-06,
      "loss": 0.1235,
      "step": 17042
    },
    {
      "epoch": 0.4971993698582181,
      "grad_norm": 0.7803483486077735,
      "learning_rate": 5.28826112714774e-06,
      "loss": 0.1141,
      "step": 17043
    },
    {
      "epoch": 0.49722854308886166,
      "grad_norm": 0.9824857045201814,
      "learning_rate": 5.287789477492099e-06,
      "loss": 0.1528,
      "step": 17044
    },
    {
      "epoch": 0.4972577163195052,
      "grad_norm": 0.7936125583236037,
      "learning_rate": 5.287317825267146e-06,
      "loss": 0.1353,
      "step": 17045
    },
    {
      "epoch": 0.49728688955014877,
      "grad_norm": 0.8966151860414091,
      "learning_rate": 5.286846170477085e-06,
      "loss": 0.115,
      "step": 17046
    },
    {
      "epoch": 0.4973160627807923,
      "grad_norm": 0.8581127729591428,
      "learning_rate": 5.286374513126129e-06,
      "loss": 0.1411,
      "step": 17047
    },
    {
      "epoch": 0.49734523601143593,
      "grad_norm": 0.6876296940895243,
      "learning_rate": 5.285902853218492e-06,
      "loss": 0.1385,
      "step": 17048
    },
    {
      "epoch": 0.4973744092420795,
      "grad_norm": 0.9251394566503488,
      "learning_rate": 5.285431190758381e-06,
      "loss": 0.1414,
      "step": 17049
    },
    {
      "epoch": 0.49740358247272304,
      "grad_norm": 0.7486646186866653,
      "learning_rate": 5.2849595257500085e-06,
      "loss": 0.12,
      "step": 17050
    },
    {
      "epoch": 0.4974327557033666,
      "grad_norm": 0.7712107945516644,
      "learning_rate": 5.284487858197586e-06,
      "loss": 0.1267,
      "step": 17051
    },
    {
      "epoch": 0.49746192893401014,
      "grad_norm": 0.8771844825510239,
      "learning_rate": 5.284016188105324e-06,
      "loss": 0.1144,
      "step": 17052
    },
    {
      "epoch": 0.4974911021646537,
      "grad_norm": 0.8092455419223078,
      "learning_rate": 5.283544515477434e-06,
      "loss": 0.1428,
      "step": 17053
    },
    {
      "epoch": 0.49752027539529725,
      "grad_norm": 0.8932150374580071,
      "learning_rate": 5.283072840318124e-06,
      "loss": 0.155,
      "step": 17054
    },
    {
      "epoch": 0.49754944862594086,
      "grad_norm": 0.6802444985614569,
      "learning_rate": 5.282601162631609e-06,
      "loss": 0.1164,
      "step": 17055
    },
    {
      "epoch": 0.4975786218565844,
      "grad_norm": 0.6382936620776493,
      "learning_rate": 5.282129482422097e-06,
      "loss": 0.129,
      "step": 17056
    },
    {
      "epoch": 0.49760779508722797,
      "grad_norm": 0.9247116257299203,
      "learning_rate": 5.281657799693803e-06,
      "loss": 0.1318,
      "step": 17057
    },
    {
      "epoch": 0.4976369683178715,
      "grad_norm": 0.728335394824729,
      "learning_rate": 5.281186114450934e-06,
      "loss": 0.1292,
      "step": 17058
    },
    {
      "epoch": 0.4976661415485151,
      "grad_norm": 0.713713454618646,
      "learning_rate": 5.2807144266977e-06,
      "loss": 0.1404,
      "step": 17059
    },
    {
      "epoch": 0.4976953147791586,
      "grad_norm": 0.8888424569177863,
      "learning_rate": 5.280242736438318e-06,
      "loss": 0.1315,
      "step": 17060
    },
    {
      "epoch": 0.4977244880098022,
      "grad_norm": 0.7007810331234429,
      "learning_rate": 5.279771043676994e-06,
      "loss": 0.1312,
      "step": 17061
    },
    {
      "epoch": 0.4977536612404458,
      "grad_norm": 0.7150189101992875,
      "learning_rate": 5.2792993484179415e-06,
      "loss": 0.1473,
      "step": 17062
    },
    {
      "epoch": 0.49778283447108934,
      "grad_norm": 0.8462435442992592,
      "learning_rate": 5.27882765066537e-06,
      "loss": 0.1469,
      "step": 17063
    },
    {
      "epoch": 0.4978120077017329,
      "grad_norm": 0.6706241051842962,
      "learning_rate": 5.2783559504234926e-06,
      "loss": 0.1433,
      "step": 17064
    },
    {
      "epoch": 0.49784118093237645,
      "grad_norm": 0.8418848267602387,
      "learning_rate": 5.277884247696521e-06,
      "loss": 0.1388,
      "step": 17065
    },
    {
      "epoch": 0.49787035416302,
      "grad_norm": 0.9550010976560557,
      "learning_rate": 5.277412542488664e-06,
      "loss": 0.1389,
      "step": 17066
    },
    {
      "epoch": 0.49789952739366355,
      "grad_norm": 0.8098212287485531,
      "learning_rate": 5.276940834804133e-06,
      "loss": 0.1343,
      "step": 17067
    },
    {
      "epoch": 0.49792870062430716,
      "grad_norm": 0.9511043289148973,
      "learning_rate": 5.276469124647141e-06,
      "loss": 0.1363,
      "step": 17068
    },
    {
      "epoch": 0.4979578738549507,
      "grad_norm": 0.9632947494313432,
      "learning_rate": 5.2759974120218995e-06,
      "loss": 0.1337,
      "step": 17069
    },
    {
      "epoch": 0.49798704708559427,
      "grad_norm": 0.7655909900395728,
      "learning_rate": 5.2755256969326195e-06,
      "loss": 0.1213,
      "step": 17070
    },
    {
      "epoch": 0.4980162203162378,
      "grad_norm": 0.6625999224547507,
      "learning_rate": 5.27505397938351e-06,
      "loss": 0.1251,
      "step": 17071
    },
    {
      "epoch": 0.4980453935468814,
      "grad_norm": 0.9600395652719803,
      "learning_rate": 5.274582259378785e-06,
      "loss": 0.1443,
      "step": 17072
    },
    {
      "epoch": 0.49807456677752493,
      "grad_norm": 0.7291946285816626,
      "learning_rate": 5.274110536922655e-06,
      "loss": 0.1217,
      "step": 17073
    },
    {
      "epoch": 0.4981037400081685,
      "grad_norm": 0.7580019253967736,
      "learning_rate": 5.273638812019331e-06,
      "loss": 0.1324,
      "step": 17074
    },
    {
      "epoch": 0.4981329132388121,
      "grad_norm": 1.0467376295916913,
      "learning_rate": 5.273167084673028e-06,
      "loss": 0.1233,
      "step": 17075
    },
    {
      "epoch": 0.49816208646945564,
      "grad_norm": 0.7543399720664358,
      "learning_rate": 5.272695354887951e-06,
      "loss": 0.1601,
      "step": 17076
    },
    {
      "epoch": 0.4981912597000992,
      "grad_norm": 0.8197716089735835,
      "learning_rate": 5.272223622668316e-06,
      "loss": 0.1113,
      "step": 17077
    },
    {
      "epoch": 0.49822043293074275,
      "grad_norm": 0.7635338222701459,
      "learning_rate": 5.271751888018335e-06,
      "loss": 0.1312,
      "step": 17078
    },
    {
      "epoch": 0.4982496061613863,
      "grad_norm": 0.8843088300732025,
      "learning_rate": 5.271280150942217e-06,
      "loss": 0.1398,
      "step": 17079
    },
    {
      "epoch": 0.49827877939202986,
      "grad_norm": 0.8341103549713862,
      "learning_rate": 5.270808411444174e-06,
      "loss": 0.1568,
      "step": 17080
    },
    {
      "epoch": 0.4983079526226734,
      "grad_norm": 0.8561214965639796,
      "learning_rate": 5.270336669528417e-06,
      "loss": 0.1426,
      "step": 17081
    },
    {
      "epoch": 0.498337125853317,
      "grad_norm": 0.8894287839903012,
      "learning_rate": 5.269864925199161e-06,
      "loss": 0.1328,
      "step": 17082
    },
    {
      "epoch": 0.4983662990839606,
      "grad_norm": 0.8532956943212627,
      "learning_rate": 5.269393178460614e-06,
      "loss": 0.1541,
      "step": 17083
    },
    {
      "epoch": 0.4983954723146041,
      "grad_norm": 0.6599364125191604,
      "learning_rate": 5.2689214293169896e-06,
      "loss": 0.1393,
      "step": 17084
    },
    {
      "epoch": 0.4984246455452477,
      "grad_norm": 0.8397851924690483,
      "learning_rate": 5.268449677772499e-06,
      "loss": 0.1133,
      "step": 17085
    },
    {
      "epoch": 0.49845381877589123,
      "grad_norm": 0.9352705543099153,
      "learning_rate": 5.267977923831354e-06,
      "loss": 0.1353,
      "step": 17086
    },
    {
      "epoch": 0.4984829920065348,
      "grad_norm": 0.8282603629619283,
      "learning_rate": 5.2675061674977665e-06,
      "loss": 0.1202,
      "step": 17087
    },
    {
      "epoch": 0.49851216523717834,
      "grad_norm": 0.9538341436559521,
      "learning_rate": 5.2670344087759466e-06,
      "loss": 0.1569,
      "step": 17088
    },
    {
      "epoch": 0.49854133846782195,
      "grad_norm": 0.9729144870371288,
      "learning_rate": 5.266562647670107e-06,
      "loss": 0.1534,
      "step": 17089
    },
    {
      "epoch": 0.4985705116984655,
      "grad_norm": 0.798530296346233,
      "learning_rate": 5.266090884184462e-06,
      "loss": 0.137,
      "step": 17090
    },
    {
      "epoch": 0.49859968492910905,
      "grad_norm": 0.6972318621149772,
      "learning_rate": 5.265619118323218e-06,
      "loss": 0.1275,
      "step": 17091
    },
    {
      "epoch": 0.4986288581597526,
      "grad_norm": 0.9491328908344574,
      "learning_rate": 5.2651473500905925e-06,
      "loss": 0.1149,
      "step": 17092
    },
    {
      "epoch": 0.49865803139039616,
      "grad_norm": 0.9503833956624851,
      "learning_rate": 5.264675579490793e-06,
      "loss": 0.1386,
      "step": 17093
    },
    {
      "epoch": 0.4986872046210397,
      "grad_norm": 0.5601770519055654,
      "learning_rate": 5.264203806528034e-06,
      "loss": 0.1171,
      "step": 17094
    },
    {
      "epoch": 0.49871637785168327,
      "grad_norm": 1.0063804905034068,
      "learning_rate": 5.263732031206527e-06,
      "loss": 0.1451,
      "step": 17095
    },
    {
      "epoch": 0.4987455510823269,
      "grad_norm": 0.9061470755468986,
      "learning_rate": 5.263260253530482e-06,
      "loss": 0.1304,
      "step": 17096
    },
    {
      "epoch": 0.49877472431297043,
      "grad_norm": 0.7608954569541991,
      "learning_rate": 5.262788473504112e-06,
      "loss": 0.1301,
      "step": 17097
    },
    {
      "epoch": 0.498803897543614,
      "grad_norm": 0.6842391012230838,
      "learning_rate": 5.262316691131631e-06,
      "loss": 0.1284,
      "step": 17098
    },
    {
      "epoch": 0.49883307077425754,
      "grad_norm": 0.8127145270545167,
      "learning_rate": 5.261844906417249e-06,
      "loss": 0.124,
      "step": 17099
    },
    {
      "epoch": 0.4988622440049011,
      "grad_norm": 0.9134184916856534,
      "learning_rate": 5.261373119365176e-06,
      "loss": 0.1242,
      "step": 17100
    },
    {
      "epoch": 0.49889141723554464,
      "grad_norm": 0.809696198233572,
      "learning_rate": 5.260901329979628e-06,
      "loss": 0.1367,
      "step": 17101
    },
    {
      "epoch": 0.49892059046618825,
      "grad_norm": 0.7523160407219962,
      "learning_rate": 5.260429538264816e-06,
      "loss": 0.1291,
      "step": 17102
    },
    {
      "epoch": 0.4989497636968318,
      "grad_norm": 0.7269267040884179,
      "learning_rate": 5.2599577442249496e-06,
      "loss": 0.1219,
      "step": 17103
    },
    {
      "epoch": 0.49897893692747536,
      "grad_norm": 0.7285468407026217,
      "learning_rate": 5.259485947864242e-06,
      "loss": 0.1026,
      "step": 17104
    },
    {
      "epoch": 0.4990081101581189,
      "grad_norm": 0.9600386351774044,
      "learning_rate": 5.259014149186908e-06,
      "loss": 0.1585,
      "step": 17105
    },
    {
      "epoch": 0.49903728338876246,
      "grad_norm": 0.7941039515535414,
      "learning_rate": 5.258542348197157e-06,
      "loss": 0.1416,
      "step": 17106
    },
    {
      "epoch": 0.499066456619406,
      "grad_norm": 0.8670383025657609,
      "learning_rate": 5.258070544899201e-06,
      "loss": 0.1206,
      "step": 17107
    },
    {
      "epoch": 0.49909562985004957,
      "grad_norm": 0.8872511624243985,
      "learning_rate": 5.257598739297253e-06,
      "loss": 0.1621,
      "step": 17108
    },
    {
      "epoch": 0.4991248030806932,
      "grad_norm": 1.6453368524010874,
      "learning_rate": 5.257126931395524e-06,
      "loss": 0.1462,
      "step": 17109
    },
    {
      "epoch": 0.49915397631133673,
      "grad_norm": 0.9544847583473068,
      "learning_rate": 5.256655121198229e-06,
      "loss": 0.1182,
      "step": 17110
    },
    {
      "epoch": 0.4991831495419803,
      "grad_norm": 0.9484962867566957,
      "learning_rate": 5.256183308709577e-06,
      "loss": 0.1488,
      "step": 17111
    },
    {
      "epoch": 0.49921232277262384,
      "grad_norm": 0.8161254533832683,
      "learning_rate": 5.255711493933781e-06,
      "loss": 0.1282,
      "step": 17112
    },
    {
      "epoch": 0.4992414960032674,
      "grad_norm": 1.0173781251841534,
      "learning_rate": 5.255239676875055e-06,
      "loss": 0.1635,
      "step": 17113
    },
    {
      "epoch": 0.49927066923391095,
      "grad_norm": 0.9580842003699206,
      "learning_rate": 5.254767857537611e-06,
      "loss": 0.1376,
      "step": 17114
    },
    {
      "epoch": 0.4992998424645545,
      "grad_norm": 0.7094881150961431,
      "learning_rate": 5.254296035925658e-06,
      "loss": 0.1143,
      "step": 17115
    },
    {
      "epoch": 0.4993290156951981,
      "grad_norm": 1.0444373873103063,
      "learning_rate": 5.253824212043411e-06,
      "loss": 0.1381,
      "step": 17116
    },
    {
      "epoch": 0.49935818892584166,
      "grad_norm": 1.245752921310101,
      "learning_rate": 5.253352385895085e-06,
      "loss": 0.1466,
      "step": 17117
    },
    {
      "epoch": 0.4993873621564852,
      "grad_norm": 0.8277694308556317,
      "learning_rate": 5.252880557484886e-06,
      "loss": 0.1134,
      "step": 17118
    },
    {
      "epoch": 0.49941653538712877,
      "grad_norm": 0.6658102339743692,
      "learning_rate": 5.252408726817031e-06,
      "loss": 0.1406,
      "step": 17119
    },
    {
      "epoch": 0.4994457086177723,
      "grad_norm": 2.0027386387552615,
      "learning_rate": 5.251936893895732e-06,
      "loss": 0.1202,
      "step": 17120
    },
    {
      "epoch": 0.4994748818484159,
      "grad_norm": 1.0980822142946722,
      "learning_rate": 5.251465058725198e-06,
      "loss": 0.1448,
      "step": 17121
    },
    {
      "epoch": 0.49950405507905943,
      "grad_norm": 1.2828284325790056,
      "learning_rate": 5.250993221309647e-06,
      "loss": 0.1389,
      "step": 17122
    },
    {
      "epoch": 0.49953322830970304,
      "grad_norm": 0.7465328188197644,
      "learning_rate": 5.250521381653287e-06,
      "loss": 0.1369,
      "step": 17123
    },
    {
      "epoch": 0.4995624015403466,
      "grad_norm": 0.6626564926755844,
      "learning_rate": 5.250049539760332e-06,
      "loss": 0.1179,
      "step": 17124
    },
    {
      "epoch": 0.49959157477099014,
      "grad_norm": 0.7576224810787092,
      "learning_rate": 5.249577695634994e-06,
      "loss": 0.143,
      "step": 17125
    },
    {
      "epoch": 0.4996207480016337,
      "grad_norm": 0.7086279516859726,
      "learning_rate": 5.2491058492814875e-06,
      "loss": 0.119,
      "step": 17126
    },
    {
      "epoch": 0.49964992123227725,
      "grad_norm": 0.9251884333562526,
      "learning_rate": 5.248634000704021e-06,
      "loss": 0.1602,
      "step": 17127
    },
    {
      "epoch": 0.4996790944629208,
      "grad_norm": 0.7102153188699789,
      "learning_rate": 5.248162149906811e-06,
      "loss": 0.1381,
      "step": 17128
    },
    {
      "epoch": 0.4997082676935644,
      "grad_norm": 0.6747754949169598,
      "learning_rate": 5.247690296894069e-06,
      "loss": 0.1293,
      "step": 17129
    },
    {
      "epoch": 0.49973744092420797,
      "grad_norm": 0.8786216402254168,
      "learning_rate": 5.247218441670005e-06,
      "loss": 0.1292,
      "step": 17130
    },
    {
      "epoch": 0.4997666141548515,
      "grad_norm": 1.1623128843874437,
      "learning_rate": 5.246746584238837e-06,
      "loss": 0.1353,
      "step": 17131
    },
    {
      "epoch": 0.49979578738549507,
      "grad_norm": 0.7744472521828143,
      "learning_rate": 5.246274724604773e-06,
      "loss": 0.1321,
      "step": 17132
    },
    {
      "epoch": 0.4998249606161386,
      "grad_norm": 0.7848661400042279,
      "learning_rate": 5.245802862772026e-06,
      "loss": 0.1452,
      "step": 17133
    },
    {
      "epoch": 0.4998541338467822,
      "grad_norm": 1.0082561941495158,
      "learning_rate": 5.24533099874481e-06,
      "loss": 0.1612,
      "step": 17134
    },
    {
      "epoch": 0.49988330707742573,
      "grad_norm": 0.9243029079990752,
      "learning_rate": 5.244859132527339e-06,
      "loss": 0.1464,
      "step": 17135
    },
    {
      "epoch": 0.49991248030806934,
      "grad_norm": 0.8644881692321109,
      "learning_rate": 5.2443872641238215e-06,
      "loss": 0.1561,
      "step": 17136
    },
    {
      "epoch": 0.4999416535387129,
      "grad_norm": 1.1887181921751124,
      "learning_rate": 5.243915393538476e-06,
      "loss": 0.1301,
      "step": 17137
    },
    {
      "epoch": 0.49997082676935645,
      "grad_norm": 1.118070939766289,
      "learning_rate": 5.2434435207755094e-06,
      "loss": 0.1285,
      "step": 17138
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.9731561634333157,
      "learning_rate": 5.242971645839139e-06,
      "loss": 0.1201,
      "step": 17139
    },
    {
      "epoch": 0.5000291732306436,
      "grad_norm": 1.3684619331189036,
      "learning_rate": 5.242499768733574e-06,
      "loss": 0.1311,
      "step": 17140
    },
    {
      "epoch": 0.5000583464612871,
      "grad_norm": 1.0099767079129824,
      "learning_rate": 5.24202788946303e-06,
      "loss": 0.1416,
      "step": 17141
    },
    {
      "epoch": 0.5000875196919307,
      "grad_norm": 1.035754821415772,
      "learning_rate": 5.2415560080317184e-06,
      "loss": 0.1365,
      "step": 17142
    },
    {
      "epoch": 0.5001166929225742,
      "grad_norm": 0.7112097320370867,
      "learning_rate": 5.241084124443854e-06,
      "loss": 0.1017,
      "step": 17143
    },
    {
      "epoch": 0.5001458661532178,
      "grad_norm": 0.8004353611466325,
      "learning_rate": 5.240612238703646e-06,
      "loss": 0.1241,
      "step": 17144
    },
    {
      "epoch": 0.5001750393838613,
      "grad_norm": 1.120319803724126,
      "learning_rate": 5.24014035081531e-06,
      "loss": 0.1161,
      "step": 17145
    },
    {
      "epoch": 0.500204212614505,
      "grad_norm": 0.8980134409796721,
      "learning_rate": 5.239668460783059e-06,
      "loss": 0.1249,
      "step": 17146
    },
    {
      "epoch": 0.5002333858451485,
      "grad_norm": 0.7903064197778377,
      "learning_rate": 5.239196568611105e-06,
      "loss": 0.1249,
      "step": 17147
    },
    {
      "epoch": 0.5002625590757921,
      "grad_norm": 0.8356499527549497,
      "learning_rate": 5.2387246743036595e-06,
      "loss": 0.1364,
      "step": 17148
    },
    {
      "epoch": 0.5002917323064356,
      "grad_norm": 0.886421625560439,
      "learning_rate": 5.238252777864938e-06,
      "loss": 0.1397,
      "step": 17149
    },
    {
      "epoch": 0.5003209055370792,
      "grad_norm": 0.6963085457001312,
      "learning_rate": 5.237780879299155e-06,
      "loss": 0.1148,
      "step": 17150
    },
    {
      "epoch": 0.5003500787677228,
      "grad_norm": 1.025842471472968,
      "learning_rate": 5.237308978610517e-06,
      "loss": 0.139,
      "step": 17151
    },
    {
      "epoch": 0.5003792519983663,
      "grad_norm": 0.794770099589364,
      "learning_rate": 5.236837075803244e-06,
      "loss": 0.1493,
      "step": 17152
    },
    {
      "epoch": 0.5004084252290099,
      "grad_norm": 0.780344725681366,
      "learning_rate": 5.236365170881545e-06,
      "loss": 0.1188,
      "step": 17153
    },
    {
      "epoch": 0.5004375984596534,
      "grad_norm": 0.8627918229814081,
      "learning_rate": 5.235893263849635e-06,
      "loss": 0.127,
      "step": 17154
    },
    {
      "epoch": 0.500466771690297,
      "grad_norm": 0.8956561634544297,
      "learning_rate": 5.2354213547117246e-06,
      "loss": 0.1147,
      "step": 17155
    },
    {
      "epoch": 0.5004959449209405,
      "grad_norm": 0.8951160083460781,
      "learning_rate": 5.234949443472031e-06,
      "loss": 0.1633,
      "step": 17156
    },
    {
      "epoch": 0.5005251181515841,
      "grad_norm": 0.6589209539871892,
      "learning_rate": 5.234477530134763e-06,
      "loss": 0.1147,
      "step": 17157
    },
    {
      "epoch": 0.5005542913822276,
      "grad_norm": 0.8054137191463541,
      "learning_rate": 5.2340056147041356e-06,
      "loss": 0.1418,
      "step": 17158
    },
    {
      "epoch": 0.5005834646128712,
      "grad_norm": 0.9100534464272424,
      "learning_rate": 5.233533697184362e-06,
      "loss": 0.113,
      "step": 17159
    },
    {
      "epoch": 0.5006126378435148,
      "grad_norm": 0.8610173686244597,
      "learning_rate": 5.233061777579656e-06,
      "loss": 0.1306,
      "step": 17160
    },
    {
      "epoch": 0.5006418110741584,
      "grad_norm": 0.6738213603824865,
      "learning_rate": 5.23258985589423e-06,
      "loss": 0.1144,
      "step": 17161
    },
    {
      "epoch": 0.500670984304802,
      "grad_norm": 0.81525623459888,
      "learning_rate": 5.232117932132298e-06,
      "loss": 0.1318,
      "step": 17162
    },
    {
      "epoch": 0.5007001575354455,
      "grad_norm": 0.7610579385189001,
      "learning_rate": 5.23164600629807e-06,
      "loss": 0.1357,
      "step": 17163
    },
    {
      "epoch": 0.500729330766089,
      "grad_norm": 0.7452739226198208,
      "learning_rate": 5.231174078395763e-06,
      "loss": 0.1253,
      "step": 17164
    },
    {
      "epoch": 0.5007585039967326,
      "grad_norm": 0.7060718897728941,
      "learning_rate": 5.230702148429591e-06,
      "loss": 0.1226,
      "step": 17165
    },
    {
      "epoch": 0.5007876772273762,
      "grad_norm": 0.622535066895069,
      "learning_rate": 5.230230216403762e-06,
      "loss": 0.1507,
      "step": 17166
    },
    {
      "epoch": 0.5008168504580197,
      "grad_norm": 0.8489303336404533,
      "learning_rate": 5.2297582823224955e-06,
      "loss": 0.1125,
      "step": 17167
    },
    {
      "epoch": 0.5008460236886633,
      "grad_norm": 0.8558647579546741,
      "learning_rate": 5.22928634619e-06,
      "loss": 0.1231,
      "step": 17168
    },
    {
      "epoch": 0.5008751969193068,
      "grad_norm": 0.6552461441578921,
      "learning_rate": 5.228814408010492e-06,
      "loss": 0.1494,
      "step": 17169
    },
    {
      "epoch": 0.5009043701499504,
      "grad_norm": 0.7765841525541597,
      "learning_rate": 5.228342467788182e-06,
      "loss": 0.1229,
      "step": 17170
    },
    {
      "epoch": 0.5009335433805939,
      "grad_norm": 0.9703386582263274,
      "learning_rate": 5.2278705255272866e-06,
      "loss": 0.1409,
      "step": 17171
    },
    {
      "epoch": 0.5009627166112375,
      "grad_norm": 0.782969804611631,
      "learning_rate": 5.227398581232016e-06,
      "loss": 0.1373,
      "step": 17172
    },
    {
      "epoch": 0.5009918898418811,
      "grad_norm": 0.7574228508574635,
      "learning_rate": 5.226926634906586e-06,
      "loss": 0.1392,
      "step": 17173
    },
    {
      "epoch": 0.5010210630725247,
      "grad_norm": 0.7353146706328247,
      "learning_rate": 5.226454686555209e-06,
      "loss": 0.1391,
      "step": 17174
    },
    {
      "epoch": 0.5010502363031683,
      "grad_norm": 0.8493344005529188,
      "learning_rate": 5.225982736182099e-06,
      "loss": 0.1193,
      "step": 17175
    },
    {
      "epoch": 0.5010794095338118,
      "grad_norm": 0.7026209480147556,
      "learning_rate": 5.2255107837914685e-06,
      "loss": 0.1357,
      "step": 17176
    },
    {
      "epoch": 0.5011085827644554,
      "grad_norm": 2.137751931106153,
      "learning_rate": 5.225038829387533e-06,
      "loss": 0.1389,
      "step": 17177
    },
    {
      "epoch": 0.5011377559950989,
      "grad_norm": 0.7991677006618545,
      "learning_rate": 5.224566872974502e-06,
      "loss": 0.1473,
      "step": 17178
    },
    {
      "epoch": 0.5011669292257425,
      "grad_norm": 0.674882459748988,
      "learning_rate": 5.2240949145565935e-06,
      "loss": 0.1323,
      "step": 17179
    },
    {
      "epoch": 0.501196102456386,
      "grad_norm": 0.8200767887070802,
      "learning_rate": 5.22362295413802e-06,
      "loss": 0.1507,
      "step": 17180
    },
    {
      "epoch": 0.5012252756870296,
      "grad_norm": 0.9659816451627661,
      "learning_rate": 5.223150991722992e-06,
      "loss": 0.1223,
      "step": 17181
    },
    {
      "epoch": 0.5012544489176731,
      "grad_norm": 0.9508972043286074,
      "learning_rate": 5.222679027315727e-06,
      "loss": 0.156,
      "step": 17182
    },
    {
      "epoch": 0.5012836221483167,
      "grad_norm": 1.0595972555282738,
      "learning_rate": 5.2222070609204355e-06,
      "loss": 0.1317,
      "step": 17183
    },
    {
      "epoch": 0.5013127953789602,
      "grad_norm": 0.8614280346225054,
      "learning_rate": 5.221735092541332e-06,
      "loss": 0.1532,
      "step": 17184
    },
    {
      "epoch": 0.5013419686096038,
      "grad_norm": 0.884064970889249,
      "learning_rate": 5.2212631221826315e-06,
      "loss": 0.117,
      "step": 17185
    },
    {
      "epoch": 0.5013711418402473,
      "grad_norm": 0.7387140232644068,
      "learning_rate": 5.220791149848547e-06,
      "loss": 0.128,
      "step": 17186
    },
    {
      "epoch": 0.501400315070891,
      "grad_norm": 0.883379906728551,
      "learning_rate": 5.22031917554329e-06,
      "loss": 0.1153,
      "step": 17187
    },
    {
      "epoch": 0.5014294883015346,
      "grad_norm": 0.8607105100624723,
      "learning_rate": 5.219847199271078e-06,
      "loss": 0.1654,
      "step": 17188
    },
    {
      "epoch": 0.5014586615321781,
      "grad_norm": 0.8471029065213588,
      "learning_rate": 5.219375221036122e-06,
      "loss": 0.1217,
      "step": 17189
    },
    {
      "epoch": 0.5014878347628217,
      "grad_norm": 0.9251528919789282,
      "learning_rate": 5.218903240842635e-06,
      "loss": 0.1559,
      "step": 17190
    },
    {
      "epoch": 0.5015170079934652,
      "grad_norm": 0.7348551840984193,
      "learning_rate": 5.218431258694833e-06,
      "loss": 0.1461,
      "step": 17191
    },
    {
      "epoch": 0.5015461812241088,
      "grad_norm": 0.8236715980687773,
      "learning_rate": 5.217959274596931e-06,
      "loss": 0.1364,
      "step": 17192
    },
    {
      "epoch": 0.5015753544547523,
      "grad_norm": 1.0239782261428787,
      "learning_rate": 5.217487288553138e-06,
      "loss": 0.1323,
      "step": 17193
    },
    {
      "epoch": 0.5016045276853959,
      "grad_norm": 1.0428771227201818,
      "learning_rate": 5.2170153005676715e-06,
      "loss": 0.1445,
      "step": 17194
    },
    {
      "epoch": 0.5016337009160394,
      "grad_norm": 0.8244842854108386,
      "learning_rate": 5.216543310644745e-06,
      "loss": 0.1448,
      "step": 17195
    },
    {
      "epoch": 0.501662874146683,
      "grad_norm": 1.141564332369589,
      "learning_rate": 5.216071318788569e-06,
      "loss": 0.1586,
      "step": 17196
    },
    {
      "epoch": 0.5016920473773265,
      "grad_norm": 1.0340795327004673,
      "learning_rate": 5.215599325003362e-06,
      "loss": 0.1437,
      "step": 17197
    },
    {
      "epoch": 0.5017212206079701,
      "grad_norm": 0.7727040580340079,
      "learning_rate": 5.215127329293336e-06,
      "loss": 0.1219,
      "step": 17198
    },
    {
      "epoch": 0.5017503938386136,
      "grad_norm": 0.8878697654869425,
      "learning_rate": 5.214655331662703e-06,
      "loss": 0.1405,
      "step": 17199
    },
    {
      "epoch": 0.5017795670692573,
      "grad_norm": 0.8413827455853727,
      "learning_rate": 5.2141833321156785e-06,
      "loss": 0.1363,
      "step": 17200
    },
    {
      "epoch": 0.5018087402999009,
      "grad_norm": 0.8404351792994844,
      "learning_rate": 5.213711330656478e-06,
      "loss": 0.133,
      "step": 17201
    },
    {
      "epoch": 0.5018379135305444,
      "grad_norm": 1.5971743469107753,
      "learning_rate": 5.213239327289312e-06,
      "loss": 0.1466,
      "step": 17202
    },
    {
      "epoch": 0.501867086761188,
      "grad_norm": 0.8603876780371584,
      "learning_rate": 5.212767322018397e-06,
      "loss": 0.151,
      "step": 17203
    },
    {
      "epoch": 0.5018962599918315,
      "grad_norm": 0.9672789957744976,
      "learning_rate": 5.212295314847946e-06,
      "loss": 0.1923,
      "step": 17204
    },
    {
      "epoch": 0.5019254332224751,
      "grad_norm": 0.7749333811009574,
      "learning_rate": 5.211823305782173e-06,
      "loss": 0.1469,
      "step": 17205
    },
    {
      "epoch": 0.5019546064531186,
      "grad_norm": 0.7871876121019177,
      "learning_rate": 5.211351294825292e-06,
      "loss": 0.1061,
      "step": 17206
    },
    {
      "epoch": 0.5019837796837622,
      "grad_norm": 0.7937711212665466,
      "learning_rate": 5.210879281981518e-06,
      "loss": 0.1079,
      "step": 17207
    },
    {
      "epoch": 0.5020129529144057,
      "grad_norm": 0.7134331140405762,
      "learning_rate": 5.210407267255062e-06,
      "loss": 0.1367,
      "step": 17208
    },
    {
      "epoch": 0.5020421261450493,
      "grad_norm": 0.7591675522412211,
      "learning_rate": 5.209935250650142e-06,
      "loss": 0.1272,
      "step": 17209
    },
    {
      "epoch": 0.5020712993756928,
      "grad_norm": 0.8322558863772237,
      "learning_rate": 5.2094632321709705e-06,
      "loss": 0.1512,
      "step": 17210
    },
    {
      "epoch": 0.5021004726063364,
      "grad_norm": 0.754612848043372,
      "learning_rate": 5.20899121182176e-06,
      "loss": 0.1352,
      "step": 17211
    },
    {
      "epoch": 0.5021296458369799,
      "grad_norm": 0.8021606134855779,
      "learning_rate": 5.2085191896067265e-06,
      "loss": 0.1252,
      "step": 17212
    },
    {
      "epoch": 0.5021588190676235,
      "grad_norm": 0.950624928019119,
      "learning_rate": 5.208047165530083e-06,
      "loss": 0.1273,
      "step": 17213
    },
    {
      "epoch": 0.5021879922982672,
      "grad_norm": 0.768628499878225,
      "learning_rate": 5.207575139596045e-06,
      "loss": 0.1417,
      "step": 17214
    },
    {
      "epoch": 0.5022171655289107,
      "grad_norm": 0.8059146562851579,
      "learning_rate": 5.2071031118088255e-06,
      "loss": 0.1371,
      "step": 17215
    },
    {
      "epoch": 0.5022463387595543,
      "grad_norm": 0.8437210934009688,
      "learning_rate": 5.206631082172638e-06,
      "loss": 0.143,
      "step": 17216
    },
    {
      "epoch": 0.5022755119901978,
      "grad_norm": 0.7986468902053767,
      "learning_rate": 5.206159050691698e-06,
      "loss": 0.1444,
      "step": 17217
    },
    {
      "epoch": 0.5023046852208414,
      "grad_norm": 0.8032047586295259,
      "learning_rate": 5.205687017370219e-06,
      "loss": 0.1155,
      "step": 17218
    },
    {
      "epoch": 0.5023338584514849,
      "grad_norm": 0.7235984459577574,
      "learning_rate": 5.205214982212416e-06,
      "loss": 0.1363,
      "step": 17219
    },
    {
      "epoch": 0.5023630316821285,
      "grad_norm": 0.82779791602683,
      "learning_rate": 5.204742945222502e-06,
      "loss": 0.1315,
      "step": 17220
    },
    {
      "epoch": 0.502392204912772,
      "grad_norm": 0.9646787226862298,
      "learning_rate": 5.204270906404692e-06,
      "loss": 0.1261,
      "step": 17221
    },
    {
      "epoch": 0.5024213781434156,
      "grad_norm": 0.7659089965180165,
      "learning_rate": 5.203798865763201e-06,
      "loss": 0.1122,
      "step": 17222
    },
    {
      "epoch": 0.5024505513740591,
      "grad_norm": 0.9048215158293285,
      "learning_rate": 5.20332682330224e-06,
      "loss": 0.1357,
      "step": 17223
    },
    {
      "epoch": 0.5024797246047027,
      "grad_norm": 1.0139816087897788,
      "learning_rate": 5.202854779026028e-06,
      "loss": 0.128,
      "step": 17224
    },
    {
      "epoch": 0.5025088978353462,
      "grad_norm": 0.8499543411608957,
      "learning_rate": 5.202382732938777e-06,
      "loss": 0.1157,
      "step": 17225
    },
    {
      "epoch": 0.5025380710659898,
      "grad_norm": 0.8338528415467421,
      "learning_rate": 5.201910685044699e-06,
      "loss": 0.1214,
      "step": 17226
    },
    {
      "epoch": 0.5025672442966335,
      "grad_norm": 0.8279495772859653,
      "learning_rate": 5.201438635348013e-06,
      "loss": 0.1572,
      "step": 17227
    },
    {
      "epoch": 0.502596417527277,
      "grad_norm": 1.0496619765438127,
      "learning_rate": 5.20096658385293e-06,
      "loss": 0.1218,
      "step": 17228
    },
    {
      "epoch": 0.5026255907579206,
      "grad_norm": 0.7130666128350417,
      "learning_rate": 5.2004945305636656e-06,
      "loss": 0.1158,
      "step": 17229
    },
    {
      "epoch": 0.5026547639885641,
      "grad_norm": 0.7951027918329903,
      "learning_rate": 5.200022475484433e-06,
      "loss": 0.1284,
      "step": 17230
    },
    {
      "epoch": 0.5026839372192077,
      "grad_norm": 1.0339598951541478,
      "learning_rate": 5.1995504186194476e-06,
      "loss": 0.1598,
      "step": 17231
    },
    {
      "epoch": 0.5027131104498512,
      "grad_norm": 0.8752915020425636,
      "learning_rate": 5.199078359972925e-06,
      "loss": 0.1205,
      "step": 17232
    },
    {
      "epoch": 0.5027422836804948,
      "grad_norm": 1.0506697388721178,
      "learning_rate": 5.198606299549077e-06,
      "loss": 0.1383,
      "step": 17233
    },
    {
      "epoch": 0.5027714569111383,
      "grad_norm": 1.0142581175250243,
      "learning_rate": 5.198134237352121e-06,
      "loss": 0.148,
      "step": 17234
    },
    {
      "epoch": 0.5028006301417819,
      "grad_norm": 0.8689653063950162,
      "learning_rate": 5.1976621733862675e-06,
      "loss": 0.1228,
      "step": 17235
    },
    {
      "epoch": 0.5028298033724254,
      "grad_norm": 0.9116054998973984,
      "learning_rate": 5.197190107655735e-06,
      "loss": 0.1437,
      "step": 17236
    },
    {
      "epoch": 0.502858976603069,
      "grad_norm": 1.1149824829170243,
      "learning_rate": 5.196718040164737e-06,
      "loss": 0.134,
      "step": 17237
    },
    {
      "epoch": 0.5028881498337125,
      "grad_norm": 1.1332287799316638,
      "learning_rate": 5.196245970917485e-06,
      "loss": 0.1562,
      "step": 17238
    },
    {
      "epoch": 0.5029173230643561,
      "grad_norm": 0.8639495800020006,
      "learning_rate": 5.195773899918196e-06,
      "loss": 0.1352,
      "step": 17239
    },
    {
      "epoch": 0.5029464962949997,
      "grad_norm": 0.8801817825528243,
      "learning_rate": 5.195301827171086e-06,
      "loss": 0.1466,
      "step": 17240
    },
    {
      "epoch": 0.5029756695256433,
      "grad_norm": 0.9851435143146585,
      "learning_rate": 5.194829752680367e-06,
      "loss": 0.1381,
      "step": 17241
    },
    {
      "epoch": 0.5030048427562869,
      "grad_norm": 0.9734840545233086,
      "learning_rate": 5.194357676450256e-06,
      "loss": 0.1477,
      "step": 17242
    },
    {
      "epoch": 0.5030340159869304,
      "grad_norm": 0.7232070671923675,
      "learning_rate": 5.1938855984849645e-06,
      "loss": 0.1174,
      "step": 17243
    },
    {
      "epoch": 0.503063189217574,
      "grad_norm": 0.9599050197452415,
      "learning_rate": 5.193413518788709e-06,
      "loss": 0.1448,
      "step": 17244
    },
    {
      "epoch": 0.5030923624482175,
      "grad_norm": 0.8497767215530986,
      "learning_rate": 5.192941437365704e-06,
      "loss": 0.1379,
      "step": 17245
    },
    {
      "epoch": 0.5031215356788611,
      "grad_norm": 0.9261743928033254,
      "learning_rate": 5.192469354220163e-06,
      "loss": 0.1252,
      "step": 17246
    },
    {
      "epoch": 0.5031507089095046,
      "grad_norm": 0.7747379280891233,
      "learning_rate": 5.191997269356302e-06,
      "loss": 0.1194,
      "step": 17247
    },
    {
      "epoch": 0.5031798821401482,
      "grad_norm": 0.8353798486093982,
      "learning_rate": 5.1915251827783355e-06,
      "loss": 0.1383,
      "step": 17248
    },
    {
      "epoch": 0.5032090553707917,
      "grad_norm": 1.078824034442684,
      "learning_rate": 5.191053094490477e-06,
      "loss": 0.1344,
      "step": 17249
    },
    {
      "epoch": 0.5032382286014353,
      "grad_norm": 0.7618045597560886,
      "learning_rate": 5.190581004496943e-06,
      "loss": 0.1263,
      "step": 17250
    },
    {
      "epoch": 0.5032674018320789,
      "grad_norm": 0.6335554826591071,
      "learning_rate": 5.190108912801948e-06,
      "loss": 0.1226,
      "step": 17251
    },
    {
      "epoch": 0.5032965750627224,
      "grad_norm": 0.8226690093523454,
      "learning_rate": 5.189636819409706e-06,
      "loss": 0.1391,
      "step": 17252
    },
    {
      "epoch": 0.503325748293366,
      "grad_norm": 0.8102035247872031,
      "learning_rate": 5.1891647243244295e-06,
      "loss": 0.1238,
      "step": 17253
    },
    {
      "epoch": 0.5033549215240096,
      "grad_norm": 0.8244272403590951,
      "learning_rate": 5.188692627550337e-06,
      "loss": 0.1511,
      "step": 17254
    },
    {
      "epoch": 0.5033840947546532,
      "grad_norm": 1.0458971247754452,
      "learning_rate": 5.188220529091642e-06,
      "loss": 0.1224,
      "step": 17255
    },
    {
      "epoch": 0.5034132679852967,
      "grad_norm": 0.753577592633354,
      "learning_rate": 5.187748428952557e-06,
      "loss": 0.1357,
      "step": 17256
    },
    {
      "epoch": 0.5034424412159403,
      "grad_norm": 0.853675461914812,
      "learning_rate": 5.187276327137302e-06,
      "loss": 0.1338,
      "step": 17257
    },
    {
      "epoch": 0.5034716144465838,
      "grad_norm": 1.1959383984510523,
      "learning_rate": 5.186804223650087e-06,
      "loss": 0.1496,
      "step": 17258
    },
    {
      "epoch": 0.5035007876772274,
      "grad_norm": 0.708652813153985,
      "learning_rate": 5.1863321184951285e-06,
      "loss": 0.1222,
      "step": 17259
    },
    {
      "epoch": 0.5035299609078709,
      "grad_norm": 1.0634244139453963,
      "learning_rate": 5.185860011676643e-06,
      "loss": 0.1452,
      "step": 17260
    },
    {
      "epoch": 0.5035591341385145,
      "grad_norm": 0.8887788334081251,
      "learning_rate": 5.185387903198841e-06,
      "loss": 0.1542,
      "step": 17261
    },
    {
      "epoch": 0.503588307369158,
      "grad_norm": 0.8628642665490859,
      "learning_rate": 5.184915793065941e-06,
      "loss": 0.1375,
      "step": 17262
    },
    {
      "epoch": 0.5036174805998016,
      "grad_norm": 0.8509809838676219,
      "learning_rate": 5.184443681282157e-06,
      "loss": 0.1186,
      "step": 17263
    },
    {
      "epoch": 0.5036466538304452,
      "grad_norm": 1.1490226197470912,
      "learning_rate": 5.183971567851704e-06,
      "loss": 0.1483,
      "step": 17264
    },
    {
      "epoch": 0.5036758270610887,
      "grad_norm": 0.8110213708126766,
      "learning_rate": 5.183499452778797e-06,
      "loss": 0.1401,
      "step": 17265
    },
    {
      "epoch": 0.5037050002917323,
      "grad_norm": 1.1587106322731249,
      "learning_rate": 5.183027336067649e-06,
      "loss": 0.1303,
      "step": 17266
    },
    {
      "epoch": 0.5037341735223758,
      "grad_norm": 0.8795036241011459,
      "learning_rate": 5.182555217722479e-06,
      "loss": 0.1396,
      "step": 17267
    },
    {
      "epoch": 0.5037633467530195,
      "grad_norm": 0.777523482896419,
      "learning_rate": 5.182083097747499e-06,
      "loss": 0.158,
      "step": 17268
    },
    {
      "epoch": 0.503792519983663,
      "grad_norm": 1.1357611384784294,
      "learning_rate": 5.181610976146924e-06,
      "loss": 0.1344,
      "step": 17269
    },
    {
      "epoch": 0.5038216932143066,
      "grad_norm": 1.185696125110025,
      "learning_rate": 5.1811388529249695e-06,
      "loss": 0.126,
      "step": 17270
    },
    {
      "epoch": 0.5038508664449501,
      "grad_norm": 0.9539166554928546,
      "learning_rate": 5.180666728085852e-06,
      "loss": 0.1149,
      "step": 17271
    },
    {
      "epoch": 0.5038800396755937,
      "grad_norm": 0.9670610747291339,
      "learning_rate": 5.180194601633784e-06,
      "loss": 0.129,
      "step": 17272
    },
    {
      "epoch": 0.5039092129062372,
      "grad_norm": 0.7969008634462226,
      "learning_rate": 5.179722473572982e-06,
      "loss": 0.1302,
      "step": 17273
    },
    {
      "epoch": 0.5039383861368808,
      "grad_norm": 0.7885981007998627,
      "learning_rate": 5.17925034390766e-06,
      "loss": 0.1476,
      "step": 17274
    },
    {
      "epoch": 0.5039675593675244,
      "grad_norm": 1.2347809487692973,
      "learning_rate": 5.178778212642034e-06,
      "loss": 0.1478,
      "step": 17275
    },
    {
      "epoch": 0.5039967325981679,
      "grad_norm": 0.7202792160277384,
      "learning_rate": 5.178306079780318e-06,
      "loss": 0.1397,
      "step": 17276
    },
    {
      "epoch": 0.5040259058288115,
      "grad_norm": 0.7922208300396085,
      "learning_rate": 5.177833945326729e-06,
      "loss": 0.1454,
      "step": 17277
    },
    {
      "epoch": 0.504055079059455,
      "grad_norm": 1.0010676963995142,
      "learning_rate": 5.17736180928548e-06,
      "loss": 0.1166,
      "step": 17278
    },
    {
      "epoch": 0.5040842522900986,
      "grad_norm": 0.7995820822926964,
      "learning_rate": 5.176889671660789e-06,
      "loss": 0.1111,
      "step": 17279
    },
    {
      "epoch": 0.5041134255207421,
      "grad_norm": 0.6889976012705192,
      "learning_rate": 5.176417532456868e-06,
      "loss": 0.1149,
      "step": 17280
    },
    {
      "epoch": 0.5041425987513857,
      "grad_norm": 1.2054995080238184,
      "learning_rate": 5.175945391677932e-06,
      "loss": 0.1591,
      "step": 17281
    },
    {
      "epoch": 0.5041717719820293,
      "grad_norm": 0.7467556408446708,
      "learning_rate": 5.175473249328199e-06,
      "loss": 0.0966,
      "step": 17282
    },
    {
      "epoch": 0.5042009452126729,
      "grad_norm": 0.7376648464058364,
      "learning_rate": 5.175001105411883e-06,
      "loss": 0.1376,
      "step": 17283
    },
    {
      "epoch": 0.5042301184433164,
      "grad_norm": 0.7796029587698763,
      "learning_rate": 5.174528959933198e-06,
      "loss": 0.1686,
      "step": 17284
    },
    {
      "epoch": 0.50425929167396,
      "grad_norm": 0.937693107754778,
      "learning_rate": 5.1740568128963605e-06,
      "loss": 0.1515,
      "step": 17285
    },
    {
      "epoch": 0.5042884649046036,
      "grad_norm": 0.761824456179311,
      "learning_rate": 5.173584664305587e-06,
      "loss": 0.1281,
      "step": 17286
    },
    {
      "epoch": 0.5043176381352471,
      "grad_norm": 0.7372483380958332,
      "learning_rate": 5.173112514165089e-06,
      "loss": 0.1194,
      "step": 17287
    },
    {
      "epoch": 0.5043468113658907,
      "grad_norm": 0.8701508919391344,
      "learning_rate": 5.1726403624790834e-06,
      "loss": 0.1265,
      "step": 17288
    },
    {
      "epoch": 0.5043759845965342,
      "grad_norm": 0.6910382203783673,
      "learning_rate": 5.172168209251788e-06,
      "loss": 0.124,
      "step": 17289
    },
    {
      "epoch": 0.5044051578271778,
      "grad_norm": 1.2675084606568687,
      "learning_rate": 5.171696054487415e-06,
      "loss": 0.1611,
      "step": 17290
    },
    {
      "epoch": 0.5044343310578213,
      "grad_norm": 0.8944759267988633,
      "learning_rate": 5.171223898190178e-06,
      "loss": 0.1395,
      "step": 17291
    },
    {
      "epoch": 0.5044635042884649,
      "grad_norm": 0.9528528080657033,
      "learning_rate": 5.170751740364299e-06,
      "loss": 0.1302,
      "step": 17292
    },
    {
      "epoch": 0.5044926775191084,
      "grad_norm": 0.9272631413814407,
      "learning_rate": 5.170279581013987e-06,
      "loss": 0.1146,
      "step": 17293
    },
    {
      "epoch": 0.504521850749752,
      "grad_norm": 0.6632904208785081,
      "learning_rate": 5.16980742014346e-06,
      "loss": 0.1361,
      "step": 17294
    },
    {
      "epoch": 0.5045510239803956,
      "grad_norm": 0.7432749761201312,
      "learning_rate": 5.169335257756933e-06,
      "loss": 0.1313,
      "step": 17295
    },
    {
      "epoch": 0.5045801972110392,
      "grad_norm": 0.8658986817112828,
      "learning_rate": 5.168863093858622e-06,
      "loss": 0.1258,
      "step": 17296
    },
    {
      "epoch": 0.5046093704416827,
      "grad_norm": 0.9471182737787873,
      "learning_rate": 5.1683909284527404e-06,
      "loss": 0.1364,
      "step": 17297
    },
    {
      "epoch": 0.5046385436723263,
      "grad_norm": 0.7554185249892994,
      "learning_rate": 5.1679187615435045e-06,
      "loss": 0.1465,
      "step": 17298
    },
    {
      "epoch": 0.5046677169029699,
      "grad_norm": 0.8796981762674053,
      "learning_rate": 5.16744659313513e-06,
      "loss": 0.143,
      "step": 17299
    },
    {
      "epoch": 0.5046968901336134,
      "grad_norm": 0.778147412229105,
      "learning_rate": 5.1669744232318345e-06,
      "loss": 0.1454,
      "step": 17300
    },
    {
      "epoch": 0.504726063364257,
      "grad_norm": 0.6812293522365617,
      "learning_rate": 5.1665022518378285e-06,
      "loss": 0.131,
      "step": 17301
    },
    {
      "epoch": 0.5047552365949005,
      "grad_norm": 1.0503963227237636,
      "learning_rate": 5.166030078957333e-06,
      "loss": 0.1555,
      "step": 17302
    },
    {
      "epoch": 0.5047844098255441,
      "grad_norm": 0.7414429945332138,
      "learning_rate": 5.165557904594557e-06,
      "loss": 0.1221,
      "step": 17303
    },
    {
      "epoch": 0.5048135830561876,
      "grad_norm": 0.9916526761838073,
      "learning_rate": 5.165085728753723e-06,
      "loss": 0.1478,
      "step": 17304
    },
    {
      "epoch": 0.5048427562868312,
      "grad_norm": 0.9172534816491521,
      "learning_rate": 5.16461355143904e-06,
      "loss": 0.1365,
      "step": 17305
    },
    {
      "epoch": 0.5048719295174747,
      "grad_norm": 0.8760311130636964,
      "learning_rate": 5.164141372654728e-06,
      "loss": 0.163,
      "step": 17306
    },
    {
      "epoch": 0.5049011027481183,
      "grad_norm": 0.7033881953878998,
      "learning_rate": 5.163669192405002e-06,
      "loss": 0.1272,
      "step": 17307
    },
    {
      "epoch": 0.5049302759787618,
      "grad_norm": 0.7781406001882464,
      "learning_rate": 5.163197010694076e-06,
      "loss": 0.1351,
      "step": 17308
    },
    {
      "epoch": 0.5049594492094055,
      "grad_norm": 1.0139965614663418,
      "learning_rate": 5.162724827526164e-06,
      "loss": 0.1586,
      "step": 17309
    },
    {
      "epoch": 0.504988622440049,
      "grad_norm": 0.770099026281592,
      "learning_rate": 5.1622526429054855e-06,
      "loss": 0.1156,
      "step": 17310
    },
    {
      "epoch": 0.5050177956706926,
      "grad_norm": 0.7775863334155969,
      "learning_rate": 5.161780456836254e-06,
      "loss": 0.1316,
      "step": 17311
    },
    {
      "epoch": 0.5050469689013362,
      "grad_norm": 0.9193186397371704,
      "learning_rate": 5.161308269322684e-06,
      "loss": 0.1335,
      "step": 17312
    },
    {
      "epoch": 0.5050761421319797,
      "grad_norm": 0.961528245068585,
      "learning_rate": 5.160836080368994e-06,
      "loss": 0.1624,
      "step": 17313
    },
    {
      "epoch": 0.5051053153626233,
      "grad_norm": 0.8024931120018204,
      "learning_rate": 5.160363889979396e-06,
      "loss": 0.1126,
      "step": 17314
    },
    {
      "epoch": 0.5051344885932668,
      "grad_norm": 0.792742673458629,
      "learning_rate": 5.159891698158109e-06,
      "loss": 0.1508,
      "step": 17315
    },
    {
      "epoch": 0.5051636618239104,
      "grad_norm": 0.8145078846492846,
      "learning_rate": 5.159419504909346e-06,
      "loss": 0.1252,
      "step": 17316
    },
    {
      "epoch": 0.5051928350545539,
      "grad_norm": 1.1263377609224792,
      "learning_rate": 5.1589473102373265e-06,
      "loss": 0.1498,
      "step": 17317
    },
    {
      "epoch": 0.5052220082851975,
      "grad_norm": 0.7968997022564035,
      "learning_rate": 5.15847511414626e-06,
      "loss": 0.118,
      "step": 17318
    },
    {
      "epoch": 0.505251181515841,
      "grad_norm": 0.8437469819237442,
      "learning_rate": 5.1580029166403675e-06,
      "loss": 0.1354,
      "step": 17319
    },
    {
      "epoch": 0.5052803547464846,
      "grad_norm": 0.8186802097733634,
      "learning_rate": 5.157530717723862e-06,
      "loss": 0.1367,
      "step": 17320
    },
    {
      "epoch": 0.5053095279771281,
      "grad_norm": 0.7429109514425973,
      "learning_rate": 5.157058517400958e-06,
      "loss": 0.1568,
      "step": 17321
    },
    {
      "epoch": 0.5053387012077718,
      "grad_norm": 0.7847091485413135,
      "learning_rate": 5.156586315675877e-06,
      "loss": 0.1374,
      "step": 17322
    },
    {
      "epoch": 0.5053678744384154,
      "grad_norm": 1.0191237270795168,
      "learning_rate": 5.156114112552828e-06,
      "loss": 0.1348,
      "step": 17323
    },
    {
      "epoch": 0.5053970476690589,
      "grad_norm": 0.739147094279075,
      "learning_rate": 5.15564190803603e-06,
      "loss": 0.1297,
      "step": 17324
    },
    {
      "epoch": 0.5054262208997025,
      "grad_norm": 0.8209983422623021,
      "learning_rate": 5.1551697021296975e-06,
      "loss": 0.1397,
      "step": 17325
    },
    {
      "epoch": 0.505455394130346,
      "grad_norm": 0.9436519694130765,
      "learning_rate": 5.154697494838048e-06,
      "loss": 0.1406,
      "step": 17326
    },
    {
      "epoch": 0.5054845673609896,
      "grad_norm": 0.7957110386933195,
      "learning_rate": 5.154225286165296e-06,
      "loss": 0.1353,
      "step": 17327
    },
    {
      "epoch": 0.5055137405916331,
      "grad_norm": 1.012930471614181,
      "learning_rate": 5.153753076115657e-06,
      "loss": 0.1512,
      "step": 17328
    },
    {
      "epoch": 0.5055429138222767,
      "grad_norm": 0.5983101306414064,
      "learning_rate": 5.153280864693348e-06,
      "loss": 0.1356,
      "step": 17329
    },
    {
      "epoch": 0.5055720870529202,
      "grad_norm": 0.8152696892143733,
      "learning_rate": 5.152808651902583e-06,
      "loss": 0.1427,
      "step": 17330
    },
    {
      "epoch": 0.5056012602835638,
      "grad_norm": 1.0830767427558556,
      "learning_rate": 5.152336437747579e-06,
      "loss": 0.1408,
      "step": 17331
    },
    {
      "epoch": 0.5056304335142073,
      "grad_norm": 0.7452343454132013,
      "learning_rate": 5.1518642222325535e-06,
      "loss": 0.1188,
      "step": 17332
    },
    {
      "epoch": 0.5056596067448509,
      "grad_norm": 0.7369772632010159,
      "learning_rate": 5.151392005361719e-06,
      "loss": 0.1191,
      "step": 17333
    },
    {
      "epoch": 0.5056887799754944,
      "grad_norm": 0.9904101752353428,
      "learning_rate": 5.150919787139294e-06,
      "loss": 0.1746,
      "step": 17334
    },
    {
      "epoch": 0.505717953206138,
      "grad_norm": 1.0406323486541111,
      "learning_rate": 5.150447567569491e-06,
      "loss": 0.1254,
      "step": 17335
    },
    {
      "epoch": 0.5057471264367817,
      "grad_norm": 0.7075305093172395,
      "learning_rate": 5.149975346656528e-06,
      "loss": 0.1096,
      "step": 17336
    },
    {
      "epoch": 0.5057762996674252,
      "grad_norm": 0.702184811195534,
      "learning_rate": 5.149503124404624e-06,
      "loss": 0.1179,
      "step": 17337
    },
    {
      "epoch": 0.5058054728980688,
      "grad_norm": 1.0631778209550506,
      "learning_rate": 5.149030900817988e-06,
      "loss": 0.1211,
      "step": 17338
    },
    {
      "epoch": 0.5058346461287123,
      "grad_norm": 0.7921009141751055,
      "learning_rate": 5.148558675900842e-06,
      "loss": 0.1131,
      "step": 17339
    },
    {
      "epoch": 0.5058638193593559,
      "grad_norm": 0.8507068470545315,
      "learning_rate": 5.148086449657399e-06,
      "loss": 0.1414,
      "step": 17340
    },
    {
      "epoch": 0.5058929925899994,
      "grad_norm": 0.910500527934626,
      "learning_rate": 5.147614222091876e-06,
      "loss": 0.1339,
      "step": 17341
    },
    {
      "epoch": 0.505922165820643,
      "grad_norm": 1.0105572210231657,
      "learning_rate": 5.147141993208487e-06,
      "loss": 0.1495,
      "step": 17342
    },
    {
      "epoch": 0.5059513390512865,
      "grad_norm": 0.8055268268118744,
      "learning_rate": 5.146669763011452e-06,
      "loss": 0.1345,
      "step": 17343
    },
    {
      "epoch": 0.5059805122819301,
      "grad_norm": 0.8120024890477584,
      "learning_rate": 5.146197531504982e-06,
      "loss": 0.1315,
      "step": 17344
    },
    {
      "epoch": 0.5060096855125736,
      "grad_norm": 0.83127325265799,
      "learning_rate": 5.145725298693296e-06,
      "loss": 0.1173,
      "step": 17345
    },
    {
      "epoch": 0.5060388587432172,
      "grad_norm": 0.8872147149249974,
      "learning_rate": 5.145253064580609e-06,
      "loss": 0.1066,
      "step": 17346
    },
    {
      "epoch": 0.5060680319738607,
      "grad_norm": 0.8112256411173704,
      "learning_rate": 5.144780829171139e-06,
      "loss": 0.1231,
      "step": 17347
    },
    {
      "epoch": 0.5060972052045043,
      "grad_norm": 0.9873245820093021,
      "learning_rate": 5.1443085924690986e-06,
      "loss": 0.1734,
      "step": 17348
    },
    {
      "epoch": 0.506126378435148,
      "grad_norm": 1.1000236816155275,
      "learning_rate": 5.143836354478706e-06,
      "loss": 0.1439,
      "step": 17349
    },
    {
      "epoch": 0.5061555516657915,
      "grad_norm": 0.8240164434531867,
      "learning_rate": 5.143364115204178e-06,
      "loss": 0.1482,
      "step": 17350
    },
    {
      "epoch": 0.5061847248964351,
      "grad_norm": 0.80167303383792,
      "learning_rate": 5.142891874649727e-06,
      "loss": 0.1475,
      "step": 17351
    },
    {
      "epoch": 0.5062138981270786,
      "grad_norm": 0.7506224208283753,
      "learning_rate": 5.142419632819573e-06,
      "loss": 0.1247,
      "step": 17352
    },
    {
      "epoch": 0.5062430713577222,
      "grad_norm": 0.8844447321758908,
      "learning_rate": 5.14194738971793e-06,
      "loss": 0.1449,
      "step": 17353
    },
    {
      "epoch": 0.5062722445883657,
      "grad_norm": 0.7550763334019078,
      "learning_rate": 5.1414751453490154e-06,
      "loss": 0.1478,
      "step": 17354
    },
    {
      "epoch": 0.5063014178190093,
      "grad_norm": 0.7393954077232628,
      "learning_rate": 5.141002899717044e-06,
      "loss": 0.1353,
      "step": 17355
    },
    {
      "epoch": 0.5063305910496528,
      "grad_norm": 0.6858918738623919,
      "learning_rate": 5.140530652826232e-06,
      "loss": 0.1338,
      "step": 17356
    },
    {
      "epoch": 0.5063597642802964,
      "grad_norm": 0.7929363077110662,
      "learning_rate": 5.1400584046807955e-06,
      "loss": 0.1545,
      "step": 17357
    },
    {
      "epoch": 0.5063889375109399,
      "grad_norm": 0.8007111275954959,
      "learning_rate": 5.139586155284953e-06,
      "loss": 0.1383,
      "step": 17358
    },
    {
      "epoch": 0.5064181107415835,
      "grad_norm": 0.6879773730704335,
      "learning_rate": 5.139113904642916e-06,
      "loss": 0.1092,
      "step": 17359
    },
    {
      "epoch": 0.506447283972227,
      "grad_norm": 0.7878276610794269,
      "learning_rate": 5.138641652758904e-06,
      "loss": 0.1178,
      "step": 17360
    },
    {
      "epoch": 0.5064764572028706,
      "grad_norm": 0.756600483158757,
      "learning_rate": 5.138169399637134e-06,
      "loss": 0.1171,
      "step": 17361
    },
    {
      "epoch": 0.5065056304335142,
      "grad_norm": 0.6037140498171352,
      "learning_rate": 5.137697145281821e-06,
      "loss": 0.1156,
      "step": 17362
    },
    {
      "epoch": 0.5065348036641578,
      "grad_norm": 0.8472983398237784,
      "learning_rate": 5.137224889697178e-06,
      "loss": 0.1358,
      "step": 17363
    },
    {
      "epoch": 0.5065639768948014,
      "grad_norm": 0.7878403651131111,
      "learning_rate": 5.136752632887425e-06,
      "loss": 0.1225,
      "step": 17364
    },
    {
      "epoch": 0.5065931501254449,
      "grad_norm": 0.8571396047247545,
      "learning_rate": 5.136280374856778e-06,
      "loss": 0.1395,
      "step": 17365
    },
    {
      "epoch": 0.5066223233560885,
      "grad_norm": 0.8080667401260934,
      "learning_rate": 5.135808115609451e-06,
      "loss": 0.1165,
      "step": 17366
    },
    {
      "epoch": 0.506651496586732,
      "grad_norm": 0.8838439608089907,
      "learning_rate": 5.135335855149662e-06,
      "loss": 0.1404,
      "step": 17367
    },
    {
      "epoch": 0.5066806698173756,
      "grad_norm": 0.7789429548029285,
      "learning_rate": 5.134863593481628e-06,
      "loss": 0.1242,
      "step": 17368
    },
    {
      "epoch": 0.5067098430480191,
      "grad_norm": 0.7882215555047755,
      "learning_rate": 5.134391330609563e-06,
      "loss": 0.155,
      "step": 17369
    },
    {
      "epoch": 0.5067390162786627,
      "grad_norm": 0.8961196088251444,
      "learning_rate": 5.133919066537683e-06,
      "loss": 0.1711,
      "step": 17370
    },
    {
      "epoch": 0.5067681895093062,
      "grad_norm": 0.9771907644859715,
      "learning_rate": 5.133446801270207e-06,
      "loss": 0.1462,
      "step": 17371
    },
    {
      "epoch": 0.5067973627399498,
      "grad_norm": 0.7900261333947414,
      "learning_rate": 5.13297453481135e-06,
      "loss": 0.1387,
      "step": 17372
    },
    {
      "epoch": 0.5068265359705934,
      "grad_norm": 0.9798097602595854,
      "learning_rate": 5.1325022671653275e-06,
      "loss": 0.1571,
      "step": 17373
    },
    {
      "epoch": 0.5068557092012369,
      "grad_norm": 0.9566408462411358,
      "learning_rate": 5.1320299983363576e-06,
      "loss": 0.1191,
      "step": 17374
    },
    {
      "epoch": 0.5068848824318805,
      "grad_norm": 0.8072634113664551,
      "learning_rate": 5.131557728328655e-06,
      "loss": 0.1379,
      "step": 17375
    },
    {
      "epoch": 0.5069140556625241,
      "grad_norm": 0.773098212033721,
      "learning_rate": 5.131085457146435e-06,
      "loss": 0.1303,
      "step": 17376
    },
    {
      "epoch": 0.5069432288931677,
      "grad_norm": 0.8632777823305222,
      "learning_rate": 5.130613184793918e-06,
      "loss": 0.1422,
      "step": 17377
    },
    {
      "epoch": 0.5069724021238112,
      "grad_norm": 0.7959921007310494,
      "learning_rate": 5.130140911275315e-06,
      "loss": 0.1416,
      "step": 17378
    },
    {
      "epoch": 0.5070015753544548,
      "grad_norm": 0.6881613802772266,
      "learning_rate": 5.129668636594847e-06,
      "loss": 0.1276,
      "step": 17379
    },
    {
      "epoch": 0.5070307485850983,
      "grad_norm": 0.7637479052797145,
      "learning_rate": 5.129196360756726e-06,
      "loss": 0.118,
      "step": 17380
    },
    {
      "epoch": 0.5070599218157419,
      "grad_norm": 1.0347095831555904,
      "learning_rate": 5.128724083765172e-06,
      "loss": 0.1183,
      "step": 17381
    },
    {
      "epoch": 0.5070890950463854,
      "grad_norm": 0.8213310815684511,
      "learning_rate": 5.1282518056244006e-06,
      "loss": 0.1245,
      "step": 17382
    },
    {
      "epoch": 0.507118268277029,
      "grad_norm": 0.9759906612344345,
      "learning_rate": 5.127779526338628e-06,
      "loss": 0.1478,
      "step": 17383
    },
    {
      "epoch": 0.5071474415076725,
      "grad_norm": 1.2924699679050022,
      "learning_rate": 5.127307245912069e-06,
      "loss": 0.1506,
      "step": 17384
    },
    {
      "epoch": 0.5071766147383161,
      "grad_norm": 0.9077479603573876,
      "learning_rate": 5.126834964348941e-06,
      "loss": 0.1405,
      "step": 17385
    },
    {
      "epoch": 0.5072057879689597,
      "grad_norm": 0.882082982669461,
      "learning_rate": 5.1263626816534616e-06,
      "loss": 0.1445,
      "step": 17386
    },
    {
      "epoch": 0.5072349611996032,
      "grad_norm": 0.939918307589359,
      "learning_rate": 5.125890397829847e-06,
      "loss": 0.1472,
      "step": 17387
    },
    {
      "epoch": 0.5072641344302468,
      "grad_norm": 0.8187095096245093,
      "learning_rate": 5.1254181128823124e-06,
      "loss": 0.1252,
      "step": 17388
    },
    {
      "epoch": 0.5072933076608903,
      "grad_norm": 0.8300657431033446,
      "learning_rate": 5.124945826815074e-06,
      "loss": 0.1153,
      "step": 17389
    },
    {
      "epoch": 0.507322480891534,
      "grad_norm": 0.8988592144257003,
      "learning_rate": 5.1244735396323495e-06,
      "loss": 0.1483,
      "step": 17390
    },
    {
      "epoch": 0.5073516541221775,
      "grad_norm": 0.9213824058900065,
      "learning_rate": 5.124001251338355e-06,
      "loss": 0.1379,
      "step": 17391
    },
    {
      "epoch": 0.5073808273528211,
      "grad_norm": 0.7782039516202991,
      "learning_rate": 5.1235289619373085e-06,
      "loss": 0.1461,
      "step": 17392
    },
    {
      "epoch": 0.5074100005834646,
      "grad_norm": 0.7872022005111858,
      "learning_rate": 5.123056671433423e-06,
      "loss": 0.1508,
      "step": 17393
    },
    {
      "epoch": 0.5074391738141082,
      "grad_norm": 0.8579017110269386,
      "learning_rate": 5.122584379830918e-06,
      "loss": 0.1369,
      "step": 17394
    },
    {
      "epoch": 0.5074683470447517,
      "grad_norm": 0.8714802581837974,
      "learning_rate": 5.122112087134008e-06,
      "loss": 0.1285,
      "step": 17395
    },
    {
      "epoch": 0.5074975202753953,
      "grad_norm": 0.7290086984449238,
      "learning_rate": 5.12163979334691e-06,
      "loss": 0.1221,
      "step": 17396
    },
    {
      "epoch": 0.5075266935060389,
      "grad_norm": 0.8952570803272655,
      "learning_rate": 5.121167498473844e-06,
      "loss": 0.1322,
      "step": 17397
    },
    {
      "epoch": 0.5075558667366824,
      "grad_norm": 0.7462410291138011,
      "learning_rate": 5.12069520251902e-06,
      "loss": 0.1215,
      "step": 17398
    },
    {
      "epoch": 0.507585039967326,
      "grad_norm": 0.7841741056002378,
      "learning_rate": 5.1202229054866595e-06,
      "loss": 0.1241,
      "step": 17399
    },
    {
      "epoch": 0.5076142131979695,
      "grad_norm": 0.9448811646218165,
      "learning_rate": 5.119750607380977e-06,
      "loss": 0.1421,
      "step": 17400
    },
    {
      "epoch": 0.5076433864286131,
      "grad_norm": 0.7560075039981516,
      "learning_rate": 5.119278308206191e-06,
      "loss": 0.113,
      "step": 17401
    },
    {
      "epoch": 0.5076725596592566,
      "grad_norm": 1.1668001301099393,
      "learning_rate": 5.118806007966516e-06,
      "loss": 0.1544,
      "step": 17402
    },
    {
      "epoch": 0.5077017328899003,
      "grad_norm": 1.2219707925183994,
      "learning_rate": 5.118333706666168e-06,
      "loss": 0.1326,
      "step": 17403
    },
    {
      "epoch": 0.5077309061205438,
      "grad_norm": 0.9900389618322261,
      "learning_rate": 5.117861404309367e-06,
      "loss": 0.1083,
      "step": 17404
    },
    {
      "epoch": 0.5077600793511874,
      "grad_norm": 0.7042060620325492,
      "learning_rate": 5.117389100900326e-06,
      "loss": 0.1499,
      "step": 17405
    },
    {
      "epoch": 0.5077892525818309,
      "grad_norm": 1.0610797808611254,
      "learning_rate": 5.116916796443264e-06,
      "loss": 0.1354,
      "step": 17406
    },
    {
      "epoch": 0.5078184258124745,
      "grad_norm": 1.2112837320831007,
      "learning_rate": 5.116444490942397e-06,
      "loss": 0.1188,
      "step": 17407
    },
    {
      "epoch": 0.507847599043118,
      "grad_norm": 0.757975343444948,
      "learning_rate": 5.1159721844019406e-06,
      "loss": 0.1446,
      "step": 17408
    },
    {
      "epoch": 0.5078767722737616,
      "grad_norm": 0.8757713595024581,
      "learning_rate": 5.115499876826113e-06,
      "loss": 0.1363,
      "step": 17409
    },
    {
      "epoch": 0.5079059455044052,
      "grad_norm": 1.0200454868819515,
      "learning_rate": 5.115027568219129e-06,
      "loss": 0.1445,
      "step": 17410
    },
    {
      "epoch": 0.5079351187350487,
      "grad_norm": 0.7946393618360597,
      "learning_rate": 5.114555258585207e-06,
      "loss": 0.1259,
      "step": 17411
    },
    {
      "epoch": 0.5079642919656923,
      "grad_norm": 0.755484192261936,
      "learning_rate": 5.114082947928563e-06,
      "loss": 0.1579,
      "step": 17412
    },
    {
      "epoch": 0.5079934651963358,
      "grad_norm": 0.805338398118202,
      "learning_rate": 5.113610636253413e-06,
      "loss": 0.1415,
      "step": 17413
    },
    {
      "epoch": 0.5080226384269794,
      "grad_norm": 0.7999682988143448,
      "learning_rate": 5.113138323563975e-06,
      "loss": 0.1414,
      "step": 17414
    },
    {
      "epoch": 0.5080518116576229,
      "grad_norm": 0.798125551415898,
      "learning_rate": 5.112666009864466e-06,
      "loss": 0.126,
      "step": 17415
    },
    {
      "epoch": 0.5080809848882665,
      "grad_norm": 0.6759290380349545,
      "learning_rate": 5.1121936951591e-06,
      "loss": 0.1105,
      "step": 17416
    },
    {
      "epoch": 0.5081101581189101,
      "grad_norm": 0.6684177179268687,
      "learning_rate": 5.111721379452096e-06,
      "loss": 0.1297,
      "step": 17417
    },
    {
      "epoch": 0.5081393313495537,
      "grad_norm": 0.8306387513930461,
      "learning_rate": 5.111249062747671e-06,
      "loss": 0.1288,
      "step": 17418
    },
    {
      "epoch": 0.5081685045801972,
      "grad_norm": 1.0230107081557667,
      "learning_rate": 5.11077674505004e-06,
      "loss": 0.1212,
      "step": 17419
    },
    {
      "epoch": 0.5081976778108408,
      "grad_norm": 0.9203159726182012,
      "learning_rate": 5.11030442636342e-06,
      "loss": 0.1463,
      "step": 17420
    },
    {
      "epoch": 0.5082268510414844,
      "grad_norm": 0.8596779820305135,
      "learning_rate": 5.10983210669203e-06,
      "loss": 0.1225,
      "step": 17421
    },
    {
      "epoch": 0.5082560242721279,
      "grad_norm": 0.8261003187516668,
      "learning_rate": 5.109359786040086e-06,
      "loss": 0.1305,
      "step": 17422
    },
    {
      "epoch": 0.5082851975027715,
      "grad_norm": 1.0041683647406259,
      "learning_rate": 5.108887464411802e-06,
      "loss": 0.1547,
      "step": 17423
    },
    {
      "epoch": 0.508314370733415,
      "grad_norm": 1.0042134265473222,
      "learning_rate": 5.108415141811398e-06,
      "loss": 0.117,
      "step": 17424
    },
    {
      "epoch": 0.5083435439640586,
      "grad_norm": 0.6871673440834162,
      "learning_rate": 5.107942818243088e-06,
      "loss": 0.1151,
      "step": 17425
    },
    {
      "epoch": 0.5083727171947021,
      "grad_norm": 0.8310470443975275,
      "learning_rate": 5.1074704937110895e-06,
      "loss": 0.1337,
      "step": 17426
    },
    {
      "epoch": 0.5084018904253457,
      "grad_norm": 0.9398306613967943,
      "learning_rate": 5.1069981682196235e-06,
      "loss": 0.123,
      "step": 17427
    },
    {
      "epoch": 0.5084310636559892,
      "grad_norm": 0.8089940837278848,
      "learning_rate": 5.106525841772902e-06,
      "loss": 0.1067,
      "step": 17428
    },
    {
      "epoch": 0.5084602368866328,
      "grad_norm": 0.6566858907538361,
      "learning_rate": 5.106053514375142e-06,
      "loss": 0.1653,
      "step": 17429
    },
    {
      "epoch": 0.5084894101172764,
      "grad_norm": 0.8595110387011408,
      "learning_rate": 5.105581186030563e-06,
      "loss": 0.1445,
      "step": 17430
    },
    {
      "epoch": 0.50851858334792,
      "grad_norm": 0.7523261269920449,
      "learning_rate": 5.1051088567433785e-06,
      "loss": 0.1308,
      "step": 17431
    },
    {
      "epoch": 0.5085477565785635,
      "grad_norm": 0.829148605483477,
      "learning_rate": 5.104636526517809e-06,
      "loss": 0.1104,
      "step": 17432
    },
    {
      "epoch": 0.5085769298092071,
      "grad_norm": 0.7246840243450478,
      "learning_rate": 5.104164195358068e-06,
      "loss": 0.1569,
      "step": 17433
    },
    {
      "epoch": 0.5086061030398507,
      "grad_norm": 0.9536137671145061,
      "learning_rate": 5.103691863268375e-06,
      "loss": 0.137,
      "step": 17434
    },
    {
      "epoch": 0.5086352762704942,
      "grad_norm": 1.0648240016753865,
      "learning_rate": 5.103219530252945e-06,
      "loss": 0.1312,
      "step": 17435
    },
    {
      "epoch": 0.5086644495011378,
      "grad_norm": 0.7450880858814997,
      "learning_rate": 5.102747196315997e-06,
      "loss": 0.1155,
      "step": 17436
    },
    {
      "epoch": 0.5086936227317813,
      "grad_norm": 0.8871366210252537,
      "learning_rate": 5.102274861461747e-06,
      "loss": 0.1493,
      "step": 17437
    },
    {
      "epoch": 0.5087227959624249,
      "grad_norm": 0.8154720472206844,
      "learning_rate": 5.101802525694409e-06,
      "loss": 0.1353,
      "step": 17438
    },
    {
      "epoch": 0.5087519691930684,
      "grad_norm": 0.9148185585487233,
      "learning_rate": 5.101330189018205e-06,
      "loss": 0.1093,
      "step": 17439
    },
    {
      "epoch": 0.508781142423712,
      "grad_norm": 0.8445701927656427,
      "learning_rate": 5.100857851437347e-06,
      "loss": 0.1264,
      "step": 17440
    },
    {
      "epoch": 0.5088103156543555,
      "grad_norm": 0.7670467790226362,
      "learning_rate": 5.100385512956054e-06,
      "loss": 0.1374,
      "step": 17441
    },
    {
      "epoch": 0.5088394888849991,
      "grad_norm": 0.8350280756894248,
      "learning_rate": 5.099913173578546e-06,
      "loss": 0.1626,
      "step": 17442
    },
    {
      "epoch": 0.5088686621156426,
      "grad_norm": 1.124872795905888,
      "learning_rate": 5.099440833309035e-06,
      "loss": 0.1231,
      "step": 17443
    },
    {
      "epoch": 0.5088978353462863,
      "grad_norm": 1.0196721305331584,
      "learning_rate": 5.09896849215174e-06,
      "loss": 0.1748,
      "step": 17444
    },
    {
      "epoch": 0.5089270085769299,
      "grad_norm": 0.7511860367937689,
      "learning_rate": 5.0984961501108785e-06,
      "loss": 0.1374,
      "step": 17445
    },
    {
      "epoch": 0.5089561818075734,
      "grad_norm": 0.829116893736274,
      "learning_rate": 5.098023807190666e-06,
      "loss": 0.1036,
      "step": 17446
    },
    {
      "epoch": 0.508985355038217,
      "grad_norm": 0.8072556863341455,
      "learning_rate": 5.097551463395321e-06,
      "loss": 0.1075,
      "step": 17447
    },
    {
      "epoch": 0.5090145282688605,
      "grad_norm": 0.9118758133732958,
      "learning_rate": 5.0970791187290605e-06,
      "loss": 0.1241,
      "step": 17448
    },
    {
      "epoch": 0.5090437014995041,
      "grad_norm": 0.699691390826038,
      "learning_rate": 5.0966067731961e-06,
      "loss": 0.1224,
      "step": 17449
    },
    {
      "epoch": 0.5090728747301476,
      "grad_norm": 0.8384194162642024,
      "learning_rate": 5.096134426800657e-06,
      "loss": 0.1512,
      "step": 17450
    },
    {
      "epoch": 0.5091020479607912,
      "grad_norm": 0.786729089874759,
      "learning_rate": 5.095662079546949e-06,
      "loss": 0.1318,
      "step": 17451
    },
    {
      "epoch": 0.5091312211914347,
      "grad_norm": 0.8723570546988967,
      "learning_rate": 5.095189731439194e-06,
      "loss": 0.1526,
      "step": 17452
    },
    {
      "epoch": 0.5091603944220783,
      "grad_norm": 0.8062033254186469,
      "learning_rate": 5.094717382481605e-06,
      "loss": 0.1367,
      "step": 17453
    },
    {
      "epoch": 0.5091895676527218,
      "grad_norm": 0.7979303462656121,
      "learning_rate": 5.094245032678406e-06,
      "loss": 0.1245,
      "step": 17454
    },
    {
      "epoch": 0.5092187408833654,
      "grad_norm": 0.7828414313310849,
      "learning_rate": 5.093772682033806e-06,
      "loss": 0.1407,
      "step": 17455
    },
    {
      "epoch": 0.5092479141140089,
      "grad_norm": 0.7756363833803713,
      "learning_rate": 5.093300330552027e-06,
      "loss": 0.1241,
      "step": 17456
    },
    {
      "epoch": 0.5092770873446526,
      "grad_norm": 0.793736937840087,
      "learning_rate": 5.0928279782372855e-06,
      "loss": 0.1347,
      "step": 17457
    },
    {
      "epoch": 0.5093062605752962,
      "grad_norm": 0.7788558158774311,
      "learning_rate": 5.092355625093798e-06,
      "loss": 0.1446,
      "step": 17458
    },
    {
      "epoch": 0.5093354338059397,
      "grad_norm": 0.673358783316202,
      "learning_rate": 5.0918832711257805e-06,
      "loss": 0.1237,
      "step": 17459
    },
    {
      "epoch": 0.5093646070365833,
      "grad_norm": 0.8420016865602312,
      "learning_rate": 5.091410916337452e-06,
      "loss": 0.1274,
      "step": 17460
    },
    {
      "epoch": 0.5093937802672268,
      "grad_norm": 0.7502259822245504,
      "learning_rate": 5.090938560733029e-06,
      "loss": 0.1307,
      "step": 17461
    },
    {
      "epoch": 0.5094229534978704,
      "grad_norm": 0.7980311105265848,
      "learning_rate": 5.090466204316727e-06,
      "loss": 0.1256,
      "step": 17462
    },
    {
      "epoch": 0.5094521267285139,
      "grad_norm": 0.7784150212040712,
      "learning_rate": 5.089993847092764e-06,
      "loss": 0.1449,
      "step": 17463
    },
    {
      "epoch": 0.5094812999591575,
      "grad_norm": 0.7988999313518343,
      "learning_rate": 5.089521489065358e-06,
      "loss": 0.1404,
      "step": 17464
    },
    {
      "epoch": 0.509510473189801,
      "grad_norm": 0.8574086188145461,
      "learning_rate": 5.089049130238727e-06,
      "loss": 0.1422,
      "step": 17465
    },
    {
      "epoch": 0.5095396464204446,
      "grad_norm": 0.713583071387903,
      "learning_rate": 5.088576770617086e-06,
      "loss": 0.134,
      "step": 17466
    },
    {
      "epoch": 0.5095688196510881,
      "grad_norm": 0.7387580018763268,
      "learning_rate": 5.088104410204652e-06,
      "loss": 0.1192,
      "step": 17467
    },
    {
      "epoch": 0.5095979928817317,
      "grad_norm": 0.8620868978061649,
      "learning_rate": 5.087632049005643e-06,
      "loss": 0.122,
      "step": 17468
    },
    {
      "epoch": 0.5096271661123752,
      "grad_norm": 0.7908676300118478,
      "learning_rate": 5.087159687024277e-06,
      "loss": 0.1358,
      "step": 17469
    },
    {
      "epoch": 0.5096563393430188,
      "grad_norm": 0.8784872923271049,
      "learning_rate": 5.086687324264768e-06,
      "loss": 0.1409,
      "step": 17470
    },
    {
      "epoch": 0.5096855125736625,
      "grad_norm": 1.0483879927732656,
      "learning_rate": 5.086214960731337e-06,
      "loss": 0.1493,
      "step": 17471
    },
    {
      "epoch": 0.509714685804306,
      "grad_norm": 0.7200407098885422,
      "learning_rate": 5.085742596428199e-06,
      "loss": 0.1311,
      "step": 17472
    },
    {
      "epoch": 0.5097438590349496,
      "grad_norm": 0.6268472174358896,
      "learning_rate": 5.085270231359572e-06,
      "loss": 0.1261,
      "step": 17473
    },
    {
      "epoch": 0.5097730322655931,
      "grad_norm": 0.9703086724801832,
      "learning_rate": 5.084797865529673e-06,
      "loss": 0.1338,
      "step": 17474
    },
    {
      "epoch": 0.5098022054962367,
      "grad_norm": 0.8499811925044137,
      "learning_rate": 5.084325498942717e-06,
      "loss": 0.1165,
      "step": 17475
    },
    {
      "epoch": 0.5098313787268802,
      "grad_norm": 0.8616719357043495,
      "learning_rate": 5.083853131602924e-06,
      "loss": 0.1412,
      "step": 17476
    },
    {
      "epoch": 0.5098605519575238,
      "grad_norm": 0.8336430000759242,
      "learning_rate": 5.083380763514511e-06,
      "loss": 0.1298,
      "step": 17477
    },
    {
      "epoch": 0.5098897251881673,
      "grad_norm": 0.7461456777905648,
      "learning_rate": 5.082908394681694e-06,
      "loss": 0.1189,
      "step": 17478
    },
    {
      "epoch": 0.5099188984188109,
      "grad_norm": 0.9183240458443164,
      "learning_rate": 5.08243602510869e-06,
      "loss": 0.1506,
      "step": 17479
    },
    {
      "epoch": 0.5099480716494544,
      "grad_norm": 0.8203233671850735,
      "learning_rate": 5.081963654799717e-06,
      "loss": 0.1306,
      "step": 17480
    },
    {
      "epoch": 0.509977244880098,
      "grad_norm": 1.0360600413094372,
      "learning_rate": 5.0814912837589926e-06,
      "loss": 0.141,
      "step": 17481
    },
    {
      "epoch": 0.5100064181107415,
      "grad_norm": 1.1149290780890595,
      "learning_rate": 5.081018911990734e-06,
      "loss": 0.1279,
      "step": 17482
    },
    {
      "epoch": 0.5100355913413851,
      "grad_norm": 1.0399195394392293,
      "learning_rate": 5.080546539499156e-06,
      "loss": 0.1554,
      "step": 17483
    },
    {
      "epoch": 0.5100647645720288,
      "grad_norm": 0.7729377457812602,
      "learning_rate": 5.08007416628848e-06,
      "loss": 0.1164,
      "step": 17484
    },
    {
      "epoch": 0.5100939378026723,
      "grad_norm": 0.6820680789318545,
      "learning_rate": 5.079601792362919e-06,
      "loss": 0.1182,
      "step": 17485
    },
    {
      "epoch": 0.5101231110333159,
      "grad_norm": 0.9411728310634727,
      "learning_rate": 5.079129417726694e-06,
      "loss": 0.1135,
      "step": 17486
    },
    {
      "epoch": 0.5101522842639594,
      "grad_norm": 0.954943445540097,
      "learning_rate": 5.07865704238402e-06,
      "loss": 0.12,
      "step": 17487
    },
    {
      "epoch": 0.510181457494603,
      "grad_norm": 0.7679639014582577,
      "learning_rate": 5.078184666339113e-06,
      "loss": 0.1333,
      "step": 17488
    },
    {
      "epoch": 0.5102106307252465,
      "grad_norm": 0.8363182413744034,
      "learning_rate": 5.077712289596194e-06,
      "loss": 0.1335,
      "step": 17489
    },
    {
      "epoch": 0.5102398039558901,
      "grad_norm": 0.7267867294204925,
      "learning_rate": 5.077239912159477e-06,
      "loss": 0.1288,
      "step": 17490
    },
    {
      "epoch": 0.5102689771865336,
      "grad_norm": 1.1942479269315884,
      "learning_rate": 5.076767534033181e-06,
      "loss": 0.1359,
      "step": 17491
    },
    {
      "epoch": 0.5102981504171772,
      "grad_norm": 0.9139303107840787,
      "learning_rate": 5.076295155221523e-06,
      "loss": 0.1269,
      "step": 17492
    },
    {
      "epoch": 0.5103273236478207,
      "grad_norm": 0.811733228885649,
      "learning_rate": 5.07582277572872e-06,
      "loss": 0.1416,
      "step": 17493
    },
    {
      "epoch": 0.5103564968784643,
      "grad_norm": 0.7993082769531046,
      "learning_rate": 5.075350395558989e-06,
      "loss": 0.1239,
      "step": 17494
    },
    {
      "epoch": 0.5103856701091078,
      "grad_norm": 1.0356845721725232,
      "learning_rate": 5.074878014716548e-06,
      "loss": 0.1319,
      "step": 17495
    },
    {
      "epoch": 0.5104148433397514,
      "grad_norm": 0.9208334162372714,
      "learning_rate": 5.0744056332056135e-06,
      "loss": 0.1312,
      "step": 17496
    },
    {
      "epoch": 0.510444016570395,
      "grad_norm": 1.1003553933247492,
      "learning_rate": 5.073933251030403e-06,
      "loss": 0.1519,
      "step": 17497
    },
    {
      "epoch": 0.5104731898010386,
      "grad_norm": 0.7936843075235583,
      "learning_rate": 5.073460868195135e-06,
      "loss": 0.1394,
      "step": 17498
    },
    {
      "epoch": 0.5105023630316822,
      "grad_norm": 1.0421305551238624,
      "learning_rate": 5.072988484704026e-06,
      "loss": 0.1207,
      "step": 17499
    },
    {
      "epoch": 0.5105315362623257,
      "grad_norm": 1.2136703774253128,
      "learning_rate": 5.072516100561292e-06,
      "loss": 0.1125,
      "step": 17500
    },
    {
      "epoch": 0.5105607094929693,
      "grad_norm": 1.0813212636620535,
      "learning_rate": 5.0720437157711525e-06,
      "loss": 0.148,
      "step": 17501
    },
    {
      "epoch": 0.5105898827236128,
      "grad_norm": 1.0195924965369898,
      "learning_rate": 5.0715713303378245e-06,
      "loss": 0.1378,
      "step": 17502
    },
    {
      "epoch": 0.5106190559542564,
      "grad_norm": 0.9360398833281904,
      "learning_rate": 5.071098944265524e-06,
      "loss": 0.135,
      "step": 17503
    },
    {
      "epoch": 0.5106482291848999,
      "grad_norm": 1.0452041218987642,
      "learning_rate": 5.070626557558469e-06,
      "loss": 0.1481,
      "step": 17504
    },
    {
      "epoch": 0.5106774024155435,
      "grad_norm": 1.0237875216021775,
      "learning_rate": 5.070154170220877e-06,
      "loss": 0.1709,
      "step": 17505
    },
    {
      "epoch": 0.510706575646187,
      "grad_norm": 0.856728350090205,
      "learning_rate": 5.069681782256965e-06,
      "loss": 0.1202,
      "step": 17506
    },
    {
      "epoch": 0.5107357488768306,
      "grad_norm": 0.9616693050458184,
      "learning_rate": 5.069209393670951e-06,
      "loss": 0.1446,
      "step": 17507
    },
    {
      "epoch": 0.5107649221074742,
      "grad_norm": 0.8628997542542997,
      "learning_rate": 5.0687370044670525e-06,
      "loss": 0.1379,
      "step": 17508
    },
    {
      "epoch": 0.5107940953381177,
      "grad_norm": 0.8683347621934591,
      "learning_rate": 5.068264614649485e-06,
      "loss": 0.1336,
      "step": 17509
    },
    {
      "epoch": 0.5108232685687613,
      "grad_norm": 0.7882985045782255,
      "learning_rate": 5.067792224222469e-06,
      "loss": 0.1357,
      "step": 17510
    },
    {
      "epoch": 0.5108524417994049,
      "grad_norm": 0.802689756041416,
      "learning_rate": 5.06731983319022e-06,
      "loss": 0.1137,
      "step": 17511
    },
    {
      "epoch": 0.5108816150300485,
      "grad_norm": 0.8961327508057595,
      "learning_rate": 5.066847441556955e-06,
      "loss": 0.1501,
      "step": 17512
    },
    {
      "epoch": 0.510910788260692,
      "grad_norm": 1.072911160374994,
      "learning_rate": 5.066375049326891e-06,
      "loss": 0.1558,
      "step": 17513
    },
    {
      "epoch": 0.5109399614913356,
      "grad_norm": 0.7563739992307312,
      "learning_rate": 5.065902656504249e-06,
      "loss": 0.1367,
      "step": 17514
    },
    {
      "epoch": 0.5109691347219791,
      "grad_norm": 0.9185384509595925,
      "learning_rate": 5.065430263093241e-06,
      "loss": 0.1302,
      "step": 17515
    },
    {
      "epoch": 0.5109983079526227,
      "grad_norm": 0.7980854257951039,
      "learning_rate": 5.064957869098089e-06,
      "loss": 0.1258,
      "step": 17516
    },
    {
      "epoch": 0.5110274811832662,
      "grad_norm": 0.8508350465342617,
      "learning_rate": 5.064485474523009e-06,
      "loss": 0.1474,
      "step": 17517
    },
    {
      "epoch": 0.5110566544139098,
      "grad_norm": 0.8594741215635663,
      "learning_rate": 5.064013079372217e-06,
      "loss": 0.1448,
      "step": 17518
    },
    {
      "epoch": 0.5110858276445533,
      "grad_norm": 0.9353567233323511,
      "learning_rate": 5.063540683649932e-06,
      "loss": 0.1599,
      "step": 17519
    },
    {
      "epoch": 0.5111150008751969,
      "grad_norm": 0.7570092298781683,
      "learning_rate": 5.063068287360371e-06,
      "loss": 0.1218,
      "step": 17520
    },
    {
      "epoch": 0.5111441741058405,
      "grad_norm": 0.9619765117042295,
      "learning_rate": 5.062595890507751e-06,
      "loss": 0.1459,
      "step": 17521
    },
    {
      "epoch": 0.511173347336484,
      "grad_norm": 0.9031932490312972,
      "learning_rate": 5.0621234930962905e-06,
      "loss": 0.1289,
      "step": 17522
    },
    {
      "epoch": 0.5112025205671276,
      "grad_norm": 0.913318391035932,
      "learning_rate": 5.061651095130205e-06,
      "loss": 0.1411,
      "step": 17523
    },
    {
      "epoch": 0.5112316937977711,
      "grad_norm": 0.7774639919480909,
      "learning_rate": 5.061178696613714e-06,
      "loss": 0.131,
      "step": 17524
    },
    {
      "epoch": 0.5112608670284148,
      "grad_norm": 0.9805357211513239,
      "learning_rate": 5.060706297551035e-06,
      "loss": 0.1281,
      "step": 17525
    },
    {
      "epoch": 0.5112900402590583,
      "grad_norm": 0.7394969456489723,
      "learning_rate": 5.060233897946383e-06,
      "loss": 0.1324,
      "step": 17526
    },
    {
      "epoch": 0.5113192134897019,
      "grad_norm": 0.9975029201407476,
      "learning_rate": 5.059761497803978e-06,
      "loss": 0.1324,
      "step": 17527
    },
    {
      "epoch": 0.5113483867203454,
      "grad_norm": 0.7696731450396544,
      "learning_rate": 5.059289097128036e-06,
      "loss": 0.1304,
      "step": 17528
    },
    {
      "epoch": 0.511377559950989,
      "grad_norm": 0.8056673509710328,
      "learning_rate": 5.058816695922777e-06,
      "loss": 0.1347,
      "step": 17529
    },
    {
      "epoch": 0.5114067331816325,
      "grad_norm": 0.9691332752521078,
      "learning_rate": 5.058344294192414e-06,
      "loss": 0.1321,
      "step": 17530
    },
    {
      "epoch": 0.5114359064122761,
      "grad_norm": 0.7686200758317691,
      "learning_rate": 5.057871891941168e-06,
      "loss": 0.1207,
      "step": 17531
    },
    {
      "epoch": 0.5114650796429197,
      "grad_norm": 0.8759365359133702,
      "learning_rate": 5.057399489173258e-06,
      "loss": 0.1299,
      "step": 17532
    },
    {
      "epoch": 0.5114942528735632,
      "grad_norm": 0.8709034331203749,
      "learning_rate": 5.056927085892895e-06,
      "loss": 0.1235,
      "step": 17533
    },
    {
      "epoch": 0.5115234261042068,
      "grad_norm": 0.9124801522464158,
      "learning_rate": 5.056454682104304e-06,
      "loss": 0.1473,
      "step": 17534
    },
    {
      "epoch": 0.5115525993348503,
      "grad_norm": 1.1281914280370087,
      "learning_rate": 5.055982277811698e-06,
      "loss": 0.1331,
      "step": 17535
    },
    {
      "epoch": 0.5115817725654939,
      "grad_norm": 0.797872254490622,
      "learning_rate": 5.055509873019295e-06,
      "loss": 0.1517,
      "step": 17536
    },
    {
      "epoch": 0.5116109457961374,
      "grad_norm": 0.7042960811354347,
      "learning_rate": 5.055037467731313e-06,
      "loss": 0.1542,
      "step": 17537
    },
    {
      "epoch": 0.511640119026781,
      "grad_norm": 0.8982350288571852,
      "learning_rate": 5.05456506195197e-06,
      "loss": 0.1301,
      "step": 17538
    },
    {
      "epoch": 0.5116692922574246,
      "grad_norm": 0.9645314326560633,
      "learning_rate": 5.054092655685483e-06,
      "loss": 0.1189,
      "step": 17539
    },
    {
      "epoch": 0.5116984654880682,
      "grad_norm": 0.6831060075712297,
      "learning_rate": 5.05362024893607e-06,
      "loss": 0.1207,
      "step": 17540
    },
    {
      "epoch": 0.5117276387187117,
      "grad_norm": 0.8136685565395272,
      "learning_rate": 5.053147841707949e-06,
      "loss": 0.1289,
      "step": 17541
    },
    {
      "epoch": 0.5117568119493553,
      "grad_norm": 0.9101201701656183,
      "learning_rate": 5.052675434005334e-06,
      "loss": 0.148,
      "step": 17542
    },
    {
      "epoch": 0.5117859851799988,
      "grad_norm": 0.9390603386217481,
      "learning_rate": 5.052203025832447e-06,
      "loss": 0.1241,
      "step": 17543
    },
    {
      "epoch": 0.5118151584106424,
      "grad_norm": 0.7610631210478856,
      "learning_rate": 5.051730617193505e-06,
      "loss": 0.118,
      "step": 17544
    },
    {
      "epoch": 0.511844331641286,
      "grad_norm": 0.7814541424739787,
      "learning_rate": 5.051258208092723e-06,
      "loss": 0.1219,
      "step": 17545
    },
    {
      "epoch": 0.5118735048719295,
      "grad_norm": 0.7967455639705543,
      "learning_rate": 5.05078579853432e-06,
      "loss": 0.1385,
      "step": 17546
    },
    {
      "epoch": 0.5119026781025731,
      "grad_norm": 0.6755519418654807,
      "learning_rate": 5.050313388522514e-06,
      "loss": 0.1315,
      "step": 17547
    },
    {
      "epoch": 0.5119318513332166,
      "grad_norm": 0.8998580896578239,
      "learning_rate": 5.0498409780615205e-06,
      "loss": 0.1306,
      "step": 17548
    },
    {
      "epoch": 0.5119610245638602,
      "grad_norm": 0.92405078889,
      "learning_rate": 5.049368567155561e-06,
      "loss": 0.1323,
      "step": 17549
    },
    {
      "epoch": 0.5119901977945037,
      "grad_norm": 0.785876774642788,
      "learning_rate": 5.04889615580885e-06,
      "loss": 0.1383,
      "step": 17550
    },
    {
      "epoch": 0.5120193710251473,
      "grad_norm": 0.7165323386571989,
      "learning_rate": 5.048423744025605e-06,
      "loss": 0.1329,
      "step": 17551
    },
    {
      "epoch": 0.5120485442557909,
      "grad_norm": 0.7620758413797071,
      "learning_rate": 5.047951331810046e-06,
      "loss": 0.1339,
      "step": 17552
    },
    {
      "epoch": 0.5120777174864345,
      "grad_norm": 0.8656110648443649,
      "learning_rate": 5.047478919166388e-06,
      "loss": 0.1296,
      "step": 17553
    },
    {
      "epoch": 0.512106890717078,
      "grad_norm": 0.9855420724478027,
      "learning_rate": 5.047006506098849e-06,
      "loss": 0.1347,
      "step": 17554
    },
    {
      "epoch": 0.5121360639477216,
      "grad_norm": 0.8203545053568296,
      "learning_rate": 5.046534092611648e-06,
      "loss": 0.1383,
      "step": 17555
    },
    {
      "epoch": 0.5121652371783652,
      "grad_norm": 0.8336024513130412,
      "learning_rate": 5.046061678709001e-06,
      "loss": 0.1253,
      "step": 17556
    },
    {
      "epoch": 0.5121944104090087,
      "grad_norm": 0.8251741447645392,
      "learning_rate": 5.045589264395127e-06,
      "loss": 0.1279,
      "step": 17557
    },
    {
      "epoch": 0.5122235836396523,
      "grad_norm": 0.7249019702836738,
      "learning_rate": 5.045116849674242e-06,
      "loss": 0.1158,
      "step": 17558
    },
    {
      "epoch": 0.5122527568702958,
      "grad_norm": 0.8212257179288065,
      "learning_rate": 5.0446444345505655e-06,
      "loss": 0.1363,
      "step": 17559
    },
    {
      "epoch": 0.5122819301009394,
      "grad_norm": 0.7925447040494781,
      "learning_rate": 5.044172019028313e-06,
      "loss": 0.127,
      "step": 17560
    },
    {
      "epoch": 0.5123111033315829,
      "grad_norm": 0.8123743961452655,
      "learning_rate": 5.043699603111703e-06,
      "loss": 0.12,
      "step": 17561
    },
    {
      "epoch": 0.5123402765622265,
      "grad_norm": 0.8630978133301008,
      "learning_rate": 5.043227186804956e-06,
      "loss": 0.1229,
      "step": 17562
    },
    {
      "epoch": 0.51236944979287,
      "grad_norm": 0.8541533524559927,
      "learning_rate": 5.042754770112284e-06,
      "loss": 0.1292,
      "step": 17563
    },
    {
      "epoch": 0.5123986230235136,
      "grad_norm": 0.842949923027598,
      "learning_rate": 5.0422823530379105e-06,
      "loss": 0.1472,
      "step": 17564
    },
    {
      "epoch": 0.5124277962541571,
      "grad_norm": 0.8409518885481113,
      "learning_rate": 5.0418099355860484e-06,
      "loss": 0.1346,
      "step": 17565
    },
    {
      "epoch": 0.5124569694848008,
      "grad_norm": 0.7374106530409561,
      "learning_rate": 5.041337517760917e-06,
      "loss": 0.1303,
      "step": 17566
    },
    {
      "epoch": 0.5124861427154443,
      "grad_norm": 0.8742229032087201,
      "learning_rate": 5.040865099566735e-06,
      "loss": 0.1367,
      "step": 17567
    },
    {
      "epoch": 0.5125153159460879,
      "grad_norm": 1.016779321728823,
      "learning_rate": 5.040392681007718e-06,
      "loss": 0.1245,
      "step": 17568
    },
    {
      "epoch": 0.5125444891767315,
      "grad_norm": 0.7807388059490147,
      "learning_rate": 5.039920262088086e-06,
      "loss": 0.1505,
      "step": 17569
    },
    {
      "epoch": 0.512573662407375,
      "grad_norm": 0.7747413133406592,
      "learning_rate": 5.039447842812055e-06,
      "loss": 0.1195,
      "step": 17570
    },
    {
      "epoch": 0.5126028356380186,
      "grad_norm": 0.8271880281283326,
      "learning_rate": 5.038975423183842e-06,
      "loss": 0.1298,
      "step": 17571
    },
    {
      "epoch": 0.5126320088686621,
      "grad_norm": 0.7830936799746516,
      "learning_rate": 5.038503003207668e-06,
      "loss": 0.1273,
      "step": 17572
    },
    {
      "epoch": 0.5126611820993057,
      "grad_norm": 0.8771580466338059,
      "learning_rate": 5.0380305828877465e-06,
      "loss": 0.1221,
      "step": 17573
    },
    {
      "epoch": 0.5126903553299492,
      "grad_norm": 0.8718309850456868,
      "learning_rate": 5.037558162228299e-06,
      "loss": 0.1324,
      "step": 17574
    },
    {
      "epoch": 0.5127195285605928,
      "grad_norm": 0.9590825845942171,
      "learning_rate": 5.037085741233538e-06,
      "loss": 0.1425,
      "step": 17575
    },
    {
      "epoch": 0.5127487017912363,
      "grad_norm": 0.8733374508286359,
      "learning_rate": 5.036613319907686e-06,
      "loss": 0.1509,
      "step": 17576
    },
    {
      "epoch": 0.5127778750218799,
      "grad_norm": 0.8010297410646896,
      "learning_rate": 5.036140898254961e-06,
      "loss": 0.1281,
      "step": 17577
    },
    {
      "epoch": 0.5128070482525234,
      "grad_norm": 0.818046871337447,
      "learning_rate": 5.035668476279576e-06,
      "loss": 0.1186,
      "step": 17578
    },
    {
      "epoch": 0.5128362214831671,
      "grad_norm": 1.2083012101166253,
      "learning_rate": 5.035196053985753e-06,
      "loss": 0.1373,
      "step": 17579
    },
    {
      "epoch": 0.5128653947138107,
      "grad_norm": 0.9142451664612667,
      "learning_rate": 5.034723631377707e-06,
      "loss": 0.1431,
      "step": 17580
    },
    {
      "epoch": 0.5128945679444542,
      "grad_norm": 0.8908800856985881,
      "learning_rate": 5.034251208459657e-06,
      "loss": 0.1472,
      "step": 17581
    },
    {
      "epoch": 0.5129237411750978,
      "grad_norm": 1.0317160445217477,
      "learning_rate": 5.03377878523582e-06,
      "loss": 0.1572,
      "step": 17582
    },
    {
      "epoch": 0.5129529144057413,
      "grad_norm": 0.8756008051929964,
      "learning_rate": 5.033306361710415e-06,
      "loss": 0.1299,
      "step": 17583
    },
    {
      "epoch": 0.5129820876363849,
      "grad_norm": 0.7728398782043059,
      "learning_rate": 5.032833937887658e-06,
      "loss": 0.1269,
      "step": 17584
    },
    {
      "epoch": 0.5130112608670284,
      "grad_norm": 0.7538051765640672,
      "learning_rate": 5.032361513771767e-06,
      "loss": 0.1318,
      "step": 17585
    },
    {
      "epoch": 0.513040434097672,
      "grad_norm": 0.8545320646510951,
      "learning_rate": 5.0318890893669615e-06,
      "loss": 0.1359,
      "step": 17586
    },
    {
      "epoch": 0.5130696073283155,
      "grad_norm": 0.9585410008967832,
      "learning_rate": 5.031416664677456e-06,
      "loss": 0.1292,
      "step": 17587
    },
    {
      "epoch": 0.5130987805589591,
      "grad_norm": 1.0299142078365062,
      "learning_rate": 5.030944239707471e-06,
      "loss": 0.1143,
      "step": 17588
    },
    {
      "epoch": 0.5131279537896026,
      "grad_norm": 0.9619283975036986,
      "learning_rate": 5.0304718144612255e-06,
      "loss": 0.1362,
      "step": 17589
    },
    {
      "epoch": 0.5131571270202462,
      "grad_norm": 0.7429583931989896,
      "learning_rate": 5.029999388942931e-06,
      "loss": 0.1368,
      "step": 17590
    },
    {
      "epoch": 0.5131863002508897,
      "grad_norm": 0.8229938502388782,
      "learning_rate": 5.029526963156811e-06,
      "loss": 0.1452,
      "step": 17591
    },
    {
      "epoch": 0.5132154734815333,
      "grad_norm": 1.0138785203681089,
      "learning_rate": 5.029054537107082e-06,
      "loss": 0.1206,
      "step": 17592
    },
    {
      "epoch": 0.513244646712177,
      "grad_norm": 0.8362051566001064,
      "learning_rate": 5.028582110797959e-06,
      "loss": 0.1138,
      "step": 17593
    },
    {
      "epoch": 0.5132738199428205,
      "grad_norm": 1.0142553667413128,
      "learning_rate": 5.028109684233664e-06,
      "loss": 0.1448,
      "step": 17594
    },
    {
      "epoch": 0.5133029931734641,
      "grad_norm": 0.9349747283301558,
      "learning_rate": 5.027637257418412e-06,
      "loss": 0.1438,
      "step": 17595
    },
    {
      "epoch": 0.5133321664041076,
      "grad_norm": 0.8878888874083781,
      "learning_rate": 5.02716483035642e-06,
      "loss": 0.1513,
      "step": 17596
    },
    {
      "epoch": 0.5133613396347512,
      "grad_norm": 0.7865816982391449,
      "learning_rate": 5.026692403051908e-06,
      "loss": 0.1119,
      "step": 17597
    },
    {
      "epoch": 0.5133905128653947,
      "grad_norm": 0.7100548874746431,
      "learning_rate": 5.026219975509091e-06,
      "loss": 0.1223,
      "step": 17598
    },
    {
      "epoch": 0.5134196860960383,
      "grad_norm": 0.9445340145825436,
      "learning_rate": 5.02574754773219e-06,
      "loss": 0.1398,
      "step": 17599
    },
    {
      "epoch": 0.5134488593266818,
      "grad_norm": 1.002108731341329,
      "learning_rate": 5.02527511972542e-06,
      "loss": 0.1184,
      "step": 17600
    },
    {
      "epoch": 0.5134780325573254,
      "grad_norm": 0.6824222856757094,
      "learning_rate": 5.0248026914930006e-06,
      "loss": 0.1557,
      "step": 17601
    },
    {
      "epoch": 0.5135072057879689,
      "grad_norm": 0.9177669362364731,
      "learning_rate": 5.024330263039148e-06,
      "loss": 0.1191,
      "step": 17602
    },
    {
      "epoch": 0.5135363790186125,
      "grad_norm": 0.9441676301186944,
      "learning_rate": 5.023857834368081e-06,
      "loss": 0.1572,
      "step": 17603
    },
    {
      "epoch": 0.513565552249256,
      "grad_norm": 0.7064923808484347,
      "learning_rate": 5.023385405484018e-06,
      "loss": 0.1245,
      "step": 17604
    },
    {
      "epoch": 0.5135947254798996,
      "grad_norm": 0.7447612330377841,
      "learning_rate": 5.022912976391174e-06,
      "loss": 0.1275,
      "step": 17605
    },
    {
      "epoch": 0.5136238987105433,
      "grad_norm": 0.9186222233356722,
      "learning_rate": 5.022440547093768e-06,
      "loss": 0.1269,
      "step": 17606
    },
    {
      "epoch": 0.5136530719411868,
      "grad_norm": 0.674380939403684,
      "learning_rate": 5.02196811759602e-06,
      "loss": 0.1119,
      "step": 17607
    },
    {
      "epoch": 0.5136822451718304,
      "grad_norm": 0.6018218128908944,
      "learning_rate": 5.021495687902144e-06,
      "loss": 0.122,
      "step": 17608
    },
    {
      "epoch": 0.5137114184024739,
      "grad_norm": 0.8038044934493266,
      "learning_rate": 5.021023258016362e-06,
      "loss": 0.137,
      "step": 17609
    },
    {
      "epoch": 0.5137405916331175,
      "grad_norm": 0.7717034856489727,
      "learning_rate": 5.020550827942887e-06,
      "loss": 0.1238,
      "step": 17610
    },
    {
      "epoch": 0.513769764863761,
      "grad_norm": 0.7571141400189079,
      "learning_rate": 5.02007839768594e-06,
      "loss": 0.1161,
      "step": 17611
    },
    {
      "epoch": 0.5137989380944046,
      "grad_norm": 0.7604141439047912,
      "learning_rate": 5.019605967249739e-06,
      "loss": 0.1792,
      "step": 17612
    },
    {
      "epoch": 0.5138281113250481,
      "grad_norm": 0.9124473784167703,
      "learning_rate": 5.019133536638499e-06,
      "loss": 0.1323,
      "step": 17613
    },
    {
      "epoch": 0.5138572845556917,
      "grad_norm": 0.9377508153489198,
      "learning_rate": 5.018661105856439e-06,
      "loss": 0.1159,
      "step": 17614
    },
    {
      "epoch": 0.5138864577863352,
      "grad_norm": 0.9459563426893842,
      "learning_rate": 5.0181886749077795e-06,
      "loss": 0.1254,
      "step": 17615
    },
    {
      "epoch": 0.5139156310169788,
      "grad_norm": 0.8730453821464376,
      "learning_rate": 5.017716243796733e-06,
      "loss": 0.1417,
      "step": 17616
    },
    {
      "epoch": 0.5139448042476223,
      "grad_norm": 0.9597291800094656,
      "learning_rate": 5.017243812527522e-06,
      "loss": 0.1297,
      "step": 17617
    },
    {
      "epoch": 0.5139739774782659,
      "grad_norm": 1.116313804130204,
      "learning_rate": 5.0167713811043615e-06,
      "loss": 0.1524,
      "step": 17618
    },
    {
      "epoch": 0.5140031507089095,
      "grad_norm": 0.6809247080188088,
      "learning_rate": 5.016298949531472e-06,
      "loss": 0.1129,
      "step": 17619
    },
    {
      "epoch": 0.5140323239395531,
      "grad_norm": 0.9446785784678333,
      "learning_rate": 5.015826517813066e-06,
      "loss": 0.1142,
      "step": 17620
    },
    {
      "epoch": 0.5140614971701967,
      "grad_norm": 0.8700839566767229,
      "learning_rate": 5.0153540859533666e-06,
      "loss": 0.1257,
      "step": 17621
    },
    {
      "epoch": 0.5140906704008402,
      "grad_norm": 0.6687352248983223,
      "learning_rate": 5.01488165395659e-06,
      "loss": 0.1361,
      "step": 17622
    },
    {
      "epoch": 0.5141198436314838,
      "grad_norm": 0.8536488013155896,
      "learning_rate": 5.0144092218269524e-06,
      "loss": 0.1298,
      "step": 17623
    },
    {
      "epoch": 0.5141490168621273,
      "grad_norm": 0.8045445463211859,
      "learning_rate": 5.013936789568674e-06,
      "loss": 0.1075,
      "step": 17624
    },
    {
      "epoch": 0.5141781900927709,
      "grad_norm": 0.886544578939302,
      "learning_rate": 5.013464357185971e-06,
      "loss": 0.1283,
      "step": 17625
    },
    {
      "epoch": 0.5142073633234144,
      "grad_norm": 0.8834498824643948,
      "learning_rate": 5.01299192468306e-06,
      "loss": 0.1465,
      "step": 17626
    },
    {
      "epoch": 0.514236536554058,
      "grad_norm": 0.8235137054250127,
      "learning_rate": 5.012519492064162e-06,
      "loss": 0.1357,
      "step": 17627
    },
    {
      "epoch": 0.5142657097847015,
      "grad_norm": 0.7779755421483731,
      "learning_rate": 5.012047059333492e-06,
      "loss": 0.1136,
      "step": 17628
    },
    {
      "epoch": 0.5142948830153451,
      "grad_norm": 0.7521641273170295,
      "learning_rate": 5.011574626495269e-06,
      "loss": 0.1601,
      "step": 17629
    },
    {
      "epoch": 0.5143240562459886,
      "grad_norm": 0.7202731880658887,
      "learning_rate": 5.01110219355371e-06,
      "loss": 0.133,
      "step": 17630
    },
    {
      "epoch": 0.5143532294766322,
      "grad_norm": 0.771261148817068,
      "learning_rate": 5.010629760513034e-06,
      "loss": 0.1204,
      "step": 17631
    },
    {
      "epoch": 0.5143824027072758,
      "grad_norm": 0.7949068248008649,
      "learning_rate": 5.010157327377457e-06,
      "loss": 0.1112,
      "step": 17632
    },
    {
      "epoch": 0.5144115759379194,
      "grad_norm": 0.7917686820114463,
      "learning_rate": 5.009684894151199e-06,
      "loss": 0.1575,
      "step": 17633
    },
    {
      "epoch": 0.514440749168563,
      "grad_norm": 0.7125942396034246,
      "learning_rate": 5.009212460838477e-06,
      "loss": 0.123,
      "step": 17634
    },
    {
      "epoch": 0.5144699223992065,
      "grad_norm": 0.8952945642164067,
      "learning_rate": 5.008740027443506e-06,
      "loss": 0.1482,
      "step": 17635
    },
    {
      "epoch": 0.5144990956298501,
      "grad_norm": 0.8271549553230554,
      "learning_rate": 5.008267593970507e-06,
      "loss": 0.1398,
      "step": 17636
    },
    {
      "epoch": 0.5145282688604936,
      "grad_norm": 0.7547581475037284,
      "learning_rate": 5.0077951604236985e-06,
      "loss": 0.144,
      "step": 17637
    },
    {
      "epoch": 0.5145574420911372,
      "grad_norm": 0.8611805200564947,
      "learning_rate": 5.007322726807294e-06,
      "loss": 0.1212,
      "step": 17638
    },
    {
      "epoch": 0.5145866153217807,
      "grad_norm": 0.8804799122613016,
      "learning_rate": 5.006850293125517e-06,
      "loss": 0.1372,
      "step": 17639
    },
    {
      "epoch": 0.5146157885524243,
      "grad_norm": 0.7698369940580078,
      "learning_rate": 5.0063778593825805e-06,
      "loss": 0.123,
      "step": 17640
    },
    {
      "epoch": 0.5146449617830678,
      "grad_norm": 0.916682018196249,
      "learning_rate": 5.005905425582705e-06,
      "loss": 0.1588,
      "step": 17641
    },
    {
      "epoch": 0.5146741350137114,
      "grad_norm": 0.7255126727410842,
      "learning_rate": 5.005432991730106e-06,
      "loss": 0.1291,
      "step": 17642
    },
    {
      "epoch": 0.514703308244355,
      "grad_norm": 0.8011659620481545,
      "learning_rate": 5.0049605578290025e-06,
      "loss": 0.1393,
      "step": 17643
    },
    {
      "epoch": 0.5147324814749985,
      "grad_norm": 0.770103932765385,
      "learning_rate": 5.004488123883614e-06,
      "loss": 0.1165,
      "step": 17644
    },
    {
      "epoch": 0.5147616547056421,
      "grad_norm": 0.6142928258533513,
      "learning_rate": 5.004015689898155e-06,
      "loss": 0.1191,
      "step": 17645
    },
    {
      "epoch": 0.5147908279362856,
      "grad_norm": 1.0560506694150225,
      "learning_rate": 5.003543255876845e-06,
      "loss": 0.157,
      "step": 17646
    },
    {
      "epoch": 0.5148200011669293,
      "grad_norm": 0.8755699419388113,
      "learning_rate": 5.0030708218239025e-06,
      "loss": 0.1436,
      "step": 17647
    },
    {
      "epoch": 0.5148491743975728,
      "grad_norm": 0.794533891524701,
      "learning_rate": 5.002598387743544e-06,
      "loss": 0.1349,
      "step": 17648
    },
    {
      "epoch": 0.5148783476282164,
      "grad_norm": 0.8392470518062469,
      "learning_rate": 5.002125953639988e-06,
      "loss": 0.1253,
      "step": 17649
    },
    {
      "epoch": 0.5149075208588599,
      "grad_norm": 0.9559536658700135,
      "learning_rate": 5.001653519517451e-06,
      "loss": 0.1304,
      "step": 17650
    },
    {
      "epoch": 0.5149366940895035,
      "grad_norm": 0.7364703795814111,
      "learning_rate": 5.001181085380152e-06,
      "loss": 0.1282,
      "step": 17651
    },
    {
      "epoch": 0.514965867320147,
      "grad_norm": 0.8225490988635491,
      "learning_rate": 5.00070865123231e-06,
      "loss": 0.1213,
      "step": 17652
    },
    {
      "epoch": 0.5149950405507906,
      "grad_norm": 1.0874898380750317,
      "learning_rate": 5.000236217078139e-06,
      "loss": 0.1303,
      "step": 17653
    },
    {
      "epoch": 0.5150242137814341,
      "grad_norm": 0.6688242311374385,
      "learning_rate": 4.999763782921862e-06,
      "loss": 0.1224,
      "step": 17654
    },
    {
      "epoch": 0.5150533870120777,
      "grad_norm": 0.8607544504149954,
      "learning_rate": 4.999291348767692e-06,
      "loss": 0.1468,
      "step": 17655
    },
    {
      "epoch": 0.5150825602427213,
      "grad_norm": 1.0217087991651117,
      "learning_rate": 4.998818914619849e-06,
      "loss": 0.1319,
      "step": 17656
    },
    {
      "epoch": 0.5151117334733648,
      "grad_norm": 0.8437567190693104,
      "learning_rate": 4.99834648048255e-06,
      "loss": 0.1451,
      "step": 17657
    },
    {
      "epoch": 0.5151409067040084,
      "grad_norm": 0.7201711567972063,
      "learning_rate": 4.997874046360013e-06,
      "loss": 0.1269,
      "step": 17658
    },
    {
      "epoch": 0.5151700799346519,
      "grad_norm": 0.8352761030653358,
      "learning_rate": 4.997401612256458e-06,
      "loss": 0.1332,
      "step": 17659
    },
    {
      "epoch": 0.5151992531652956,
      "grad_norm": 1.004129729343521,
      "learning_rate": 4.996929178176099e-06,
      "loss": 0.1416,
      "step": 17660
    },
    {
      "epoch": 0.5152284263959391,
      "grad_norm": 1.2824937203244915,
      "learning_rate": 4.996456744123156e-06,
      "loss": 0.117,
      "step": 17661
    },
    {
      "epoch": 0.5152575996265827,
      "grad_norm": 1.1857359085225379,
      "learning_rate": 4.995984310101847e-06,
      "loss": 0.1129,
      "step": 17662
    },
    {
      "epoch": 0.5152867728572262,
      "grad_norm": 1.1371941299943034,
      "learning_rate": 4.995511876116387e-06,
      "loss": 0.121,
      "step": 17663
    },
    {
      "epoch": 0.5153159460878698,
      "grad_norm": 1.196261158483275,
      "learning_rate": 4.995039442170998e-06,
      "loss": 0.1331,
      "step": 17664
    },
    {
      "epoch": 0.5153451193185133,
      "grad_norm": 1.1620631468864877,
      "learning_rate": 4.9945670082698945e-06,
      "loss": 0.1541,
      "step": 17665
    },
    {
      "epoch": 0.5153742925491569,
      "grad_norm": 1.0097065937778051,
      "learning_rate": 4.994094574417296e-06,
      "loss": 0.1258,
      "step": 17666
    },
    {
      "epoch": 0.5154034657798005,
      "grad_norm": 0.821252905649037,
      "learning_rate": 4.993622140617421e-06,
      "loss": 0.144,
      "step": 17667
    },
    {
      "epoch": 0.515432639010444,
      "grad_norm": 0.9205304381811602,
      "learning_rate": 4.993149706874485e-06,
      "loss": 0.1354,
      "step": 17668
    },
    {
      "epoch": 0.5154618122410876,
      "grad_norm": 1.2005603618970164,
      "learning_rate": 4.992677273192706e-06,
      "loss": 0.1268,
      "step": 17669
    },
    {
      "epoch": 0.5154909854717311,
      "grad_norm": 0.9359330023349163,
      "learning_rate": 4.992204839576302e-06,
      "loss": 0.138,
      "step": 17670
    },
    {
      "epoch": 0.5155201587023747,
      "grad_norm": 0.8701714764215726,
      "learning_rate": 4.9917324060294946e-06,
      "loss": 0.1119,
      "step": 17671
    },
    {
      "epoch": 0.5155493319330182,
      "grad_norm": 0.8238086979703602,
      "learning_rate": 4.991259972556496e-06,
      "loss": 0.1162,
      "step": 17672
    },
    {
      "epoch": 0.5155785051636618,
      "grad_norm": 0.770604622471963,
      "learning_rate": 4.990787539161525e-06,
      "loss": 0.1222,
      "step": 17673
    },
    {
      "epoch": 0.5156076783943054,
      "grad_norm": 0.6771629181307338,
      "learning_rate": 4.990315105848804e-06,
      "loss": 0.1263,
      "step": 17674
    },
    {
      "epoch": 0.515636851624949,
      "grad_norm": 0.8926126858578739,
      "learning_rate": 4.989842672622543e-06,
      "loss": 0.1166,
      "step": 17675
    },
    {
      "epoch": 0.5156660248555925,
      "grad_norm": 0.8297622269993699,
      "learning_rate": 4.989370239486968e-06,
      "loss": 0.1322,
      "step": 17676
    },
    {
      "epoch": 0.5156951980862361,
      "grad_norm": 0.7446893830893277,
      "learning_rate": 4.988897806446291e-06,
      "loss": 0.1313,
      "step": 17677
    },
    {
      "epoch": 0.5157243713168796,
      "grad_norm": 0.9243250876529159,
      "learning_rate": 4.9884253735047325e-06,
      "loss": 0.1541,
      "step": 17678
    },
    {
      "epoch": 0.5157535445475232,
      "grad_norm": 0.8860627931972566,
      "learning_rate": 4.98795294066651e-06,
      "loss": 0.1197,
      "step": 17679
    },
    {
      "epoch": 0.5157827177781668,
      "grad_norm": 1.0050192417510047,
      "learning_rate": 4.987480507935841e-06,
      "loss": 0.1245,
      "step": 17680
    },
    {
      "epoch": 0.5158118910088103,
      "grad_norm": 0.7079725561051315,
      "learning_rate": 4.987008075316941e-06,
      "loss": 0.1387,
      "step": 17681
    },
    {
      "epoch": 0.5158410642394539,
      "grad_norm": 0.7984669677892621,
      "learning_rate": 4.986535642814031e-06,
      "loss": 0.1236,
      "step": 17682
    },
    {
      "epoch": 0.5158702374700974,
      "grad_norm": 0.9139776999032819,
      "learning_rate": 4.9860632104313276e-06,
      "loss": 0.1452,
      "step": 17683
    },
    {
      "epoch": 0.515899410700741,
      "grad_norm": 0.7610917318336733,
      "learning_rate": 4.985590778173049e-06,
      "loss": 0.1086,
      "step": 17684
    },
    {
      "epoch": 0.5159285839313845,
      "grad_norm": 0.7632751121205954,
      "learning_rate": 4.9851183460434115e-06,
      "loss": 0.1461,
      "step": 17685
    },
    {
      "epoch": 0.5159577571620281,
      "grad_norm": 0.7501627366479159,
      "learning_rate": 4.984645914046635e-06,
      "loss": 0.1279,
      "step": 17686
    },
    {
      "epoch": 0.5159869303926717,
      "grad_norm": 1.019683836730631,
      "learning_rate": 4.984173482186934e-06,
      "loss": 0.1554,
      "step": 17687
    },
    {
      "epoch": 0.5160161036233153,
      "grad_norm": 0.8740614422971671,
      "learning_rate": 4.98370105046853e-06,
      "loss": 0.168,
      "step": 17688
    },
    {
      "epoch": 0.5160452768539588,
      "grad_norm": 0.7718973254598634,
      "learning_rate": 4.983228618895639e-06,
      "loss": 0.1221,
      "step": 17689
    },
    {
      "epoch": 0.5160744500846024,
      "grad_norm": 0.7935483578242476,
      "learning_rate": 4.98275618747248e-06,
      "loss": 0.1459,
      "step": 17690
    },
    {
      "epoch": 0.516103623315246,
      "grad_norm": 1.0596746784375382,
      "learning_rate": 4.982283756203268e-06,
      "loss": 0.1521,
      "step": 17691
    },
    {
      "epoch": 0.5161327965458895,
      "grad_norm": 0.7472360856697575,
      "learning_rate": 4.981811325092224e-06,
      "loss": 0.1212,
      "step": 17692
    },
    {
      "epoch": 0.5161619697765331,
      "grad_norm": 0.7540684429918678,
      "learning_rate": 4.98133889414356e-06,
      "loss": 0.1408,
      "step": 17693
    },
    {
      "epoch": 0.5161911430071766,
      "grad_norm": 0.8577320979432324,
      "learning_rate": 4.980866463361502e-06,
      "loss": 0.1413,
      "step": 17694
    },
    {
      "epoch": 0.5162203162378202,
      "grad_norm": 0.9033068190112308,
      "learning_rate": 4.980394032750263e-06,
      "loss": 0.1542,
      "step": 17695
    },
    {
      "epoch": 0.5162494894684637,
      "grad_norm": 0.7692487755193975,
      "learning_rate": 4.979921602314061e-06,
      "loss": 0.1314,
      "step": 17696
    },
    {
      "epoch": 0.5162786626991073,
      "grad_norm": 0.9400376900876605,
      "learning_rate": 4.979449172057115e-06,
      "loss": 0.1336,
      "step": 17697
    },
    {
      "epoch": 0.5163078359297508,
      "grad_norm": 0.7299198992071241,
      "learning_rate": 4.978976741983641e-06,
      "loss": 0.1241,
      "step": 17698
    },
    {
      "epoch": 0.5163370091603944,
      "grad_norm": 0.7482717083200953,
      "learning_rate": 4.978504312097856e-06,
      "loss": 0.1343,
      "step": 17699
    },
    {
      "epoch": 0.5163661823910379,
      "grad_norm": 0.8087436392121131,
      "learning_rate": 4.978031882403981e-06,
      "loss": 0.1507,
      "step": 17700
    },
    {
      "epoch": 0.5163953556216816,
      "grad_norm": 0.868521306445695,
      "learning_rate": 4.977559452906233e-06,
      "loss": 0.1319,
      "step": 17701
    },
    {
      "epoch": 0.5164245288523251,
      "grad_norm": 0.936283371969216,
      "learning_rate": 4.977087023608828e-06,
      "loss": 0.1178,
      "step": 17702
    },
    {
      "epoch": 0.5164537020829687,
      "grad_norm": 1.175195682655177,
      "learning_rate": 4.976614594515985e-06,
      "loss": 0.1491,
      "step": 17703
    },
    {
      "epoch": 0.5164828753136123,
      "grad_norm": 0.9890558252635985,
      "learning_rate": 4.976142165631921e-06,
      "loss": 0.1439,
      "step": 17704
    },
    {
      "epoch": 0.5165120485442558,
      "grad_norm": 0.9898106675366942,
      "learning_rate": 4.975669736960852e-06,
      "loss": 0.1493,
      "step": 17705
    },
    {
      "epoch": 0.5165412217748994,
      "grad_norm": 1.0099130958080584,
      "learning_rate": 4.975197308507001e-06,
      "loss": 0.1283,
      "step": 17706
    },
    {
      "epoch": 0.5165703950055429,
      "grad_norm": 0.6482686432632625,
      "learning_rate": 4.9747248802745814e-06,
      "loss": 0.114,
      "step": 17707
    },
    {
      "epoch": 0.5165995682361865,
      "grad_norm": 0.9074951647529433,
      "learning_rate": 4.974252452267811e-06,
      "loss": 0.1499,
      "step": 17708
    },
    {
      "epoch": 0.51662874146683,
      "grad_norm": 0.8029354281442845,
      "learning_rate": 4.973780024490911e-06,
      "loss": 0.1286,
      "step": 17709
    },
    {
      "epoch": 0.5166579146974736,
      "grad_norm": 0.9127033778546586,
      "learning_rate": 4.9733075969480945e-06,
      "loss": 0.1491,
      "step": 17710
    },
    {
      "epoch": 0.5166870879281171,
      "grad_norm": 0.7613957159556328,
      "learning_rate": 4.972835169643581e-06,
      "loss": 0.125,
      "step": 17711
    },
    {
      "epoch": 0.5167162611587607,
      "grad_norm": 0.6994314010900585,
      "learning_rate": 4.9723627425815895e-06,
      "loss": 0.1548,
      "step": 17712
    },
    {
      "epoch": 0.5167454343894042,
      "grad_norm": 0.7249464277301007,
      "learning_rate": 4.9718903157663364e-06,
      "loss": 0.1374,
      "step": 17713
    },
    {
      "epoch": 0.5167746076200479,
      "grad_norm": 0.8740554154516622,
      "learning_rate": 4.971417889202042e-06,
      "loss": 0.1414,
      "step": 17714
    },
    {
      "epoch": 0.5168037808506915,
      "grad_norm": 0.7367444142907535,
      "learning_rate": 4.97094546289292e-06,
      "loss": 0.1279,
      "step": 17715
    },
    {
      "epoch": 0.516832954081335,
      "grad_norm": 0.8791510432754738,
      "learning_rate": 4.97047303684319e-06,
      "loss": 0.1249,
      "step": 17716
    },
    {
      "epoch": 0.5168621273119786,
      "grad_norm": 0.8161126947784589,
      "learning_rate": 4.970000611057069e-06,
      "loss": 0.1386,
      "step": 17717
    },
    {
      "epoch": 0.5168913005426221,
      "grad_norm": 0.8634750377528411,
      "learning_rate": 4.969528185538776e-06,
      "loss": 0.1372,
      "step": 17718
    },
    {
      "epoch": 0.5169204737732657,
      "grad_norm": 0.8407895590347889,
      "learning_rate": 4.96905576029253e-06,
      "loss": 0.1436,
      "step": 17719
    },
    {
      "epoch": 0.5169496470039092,
      "grad_norm": 1.0012309494361669,
      "learning_rate": 4.968583335322545e-06,
      "loss": 0.1389,
      "step": 17720
    },
    {
      "epoch": 0.5169788202345528,
      "grad_norm": 1.0487690076129972,
      "learning_rate": 4.96811091063304e-06,
      "loss": 0.1409,
      "step": 17721
    },
    {
      "epoch": 0.5170079934651963,
      "grad_norm": 0.6886249733579866,
      "learning_rate": 4.967638486228235e-06,
      "loss": 0.1461,
      "step": 17722
    },
    {
      "epoch": 0.5170371666958399,
      "grad_norm": 1.7070397978566778,
      "learning_rate": 4.967166062112342e-06,
      "loss": 0.1489,
      "step": 17723
    },
    {
      "epoch": 0.5170663399264834,
      "grad_norm": 0.8597711687918821,
      "learning_rate": 4.966693638289587e-06,
      "loss": 0.1172,
      "step": 17724
    },
    {
      "epoch": 0.517095513157127,
      "grad_norm": 0.853591338688919,
      "learning_rate": 4.9662212147641805e-06,
      "loss": 0.1234,
      "step": 17725
    },
    {
      "epoch": 0.5171246863877705,
      "grad_norm": 0.719466915487513,
      "learning_rate": 4.9657487915403446e-06,
      "loss": 0.1324,
      "step": 17726
    },
    {
      "epoch": 0.5171538596184141,
      "grad_norm": 0.8899182333294233,
      "learning_rate": 4.965276368622295e-06,
      "loss": 0.1291,
      "step": 17727
    },
    {
      "epoch": 0.5171830328490578,
      "grad_norm": 0.9061730129709415,
      "learning_rate": 4.96480394601425e-06,
      "loss": 0.1344,
      "step": 17728
    },
    {
      "epoch": 0.5172122060797013,
      "grad_norm": 0.9292343509728695,
      "learning_rate": 4.9643315237204246e-06,
      "loss": 0.155,
      "step": 17729
    },
    {
      "epoch": 0.5172413793103449,
      "grad_norm": 0.9195084573904982,
      "learning_rate": 4.963859101745041e-06,
      "loss": 0.1327,
      "step": 17730
    },
    {
      "epoch": 0.5172705525409884,
      "grad_norm": 1.9916532290169138,
      "learning_rate": 4.9633866800923145e-06,
      "loss": 0.1512,
      "step": 17731
    },
    {
      "epoch": 0.517299725771632,
      "grad_norm": 0.8279354201987689,
      "learning_rate": 4.962914258766463e-06,
      "loss": 0.1268,
      "step": 17732
    },
    {
      "epoch": 0.5173288990022755,
      "grad_norm": 0.7465001089712736,
      "learning_rate": 4.962441837771704e-06,
      "loss": 0.126,
      "step": 17733
    },
    {
      "epoch": 0.5173580722329191,
      "grad_norm": 1.2480594259412,
      "learning_rate": 4.961969417112256e-06,
      "loss": 0.1317,
      "step": 17734
    },
    {
      "epoch": 0.5173872454635626,
      "grad_norm": 1.1180437220550352,
      "learning_rate": 4.961496996792333e-06,
      "loss": 0.1374,
      "step": 17735
    },
    {
      "epoch": 0.5174164186942062,
      "grad_norm": 0.7644397191168941,
      "learning_rate": 4.961024576816158e-06,
      "loss": 0.1264,
      "step": 17736
    },
    {
      "epoch": 0.5174455919248497,
      "grad_norm": 1.125579767318,
      "learning_rate": 4.960552157187947e-06,
      "loss": 0.1287,
      "step": 17737
    },
    {
      "epoch": 0.5174747651554933,
      "grad_norm": 1.7882388009657824,
      "learning_rate": 4.9600797379119155e-06,
      "loss": 0.154,
      "step": 17738
    },
    {
      "epoch": 0.5175039383861368,
      "grad_norm": 0.800660711242164,
      "learning_rate": 4.959607318992284e-06,
      "loss": 0.1181,
      "step": 17739
    },
    {
      "epoch": 0.5175331116167804,
      "grad_norm": 1.1732319497813264,
      "learning_rate": 4.959134900433268e-06,
      "loss": 0.1502,
      "step": 17740
    },
    {
      "epoch": 0.5175622848474241,
      "grad_norm": 1.1002167995407082,
      "learning_rate": 4.958662482239084e-06,
      "loss": 0.1526,
      "step": 17741
    },
    {
      "epoch": 0.5175914580780676,
      "grad_norm": 0.839185950225196,
      "learning_rate": 4.958190064413953e-06,
      "loss": 0.1357,
      "step": 17742
    },
    {
      "epoch": 0.5176206313087112,
      "grad_norm": 0.8207595591182254,
      "learning_rate": 4.957717646962091e-06,
      "loss": 0.147,
      "step": 17743
    },
    {
      "epoch": 0.5176498045393547,
      "grad_norm": 1.0114933892310523,
      "learning_rate": 4.957245229887717e-06,
      "loss": 0.1193,
      "step": 17744
    },
    {
      "epoch": 0.5176789777699983,
      "grad_norm": 1.0847668761992957,
      "learning_rate": 4.956772813195046e-06,
      "loss": 0.135,
      "step": 17745
    },
    {
      "epoch": 0.5177081510006418,
      "grad_norm": 0.7678586858077632,
      "learning_rate": 4.9563003968882975e-06,
      "loss": 0.1245,
      "step": 17746
    },
    {
      "epoch": 0.5177373242312854,
      "grad_norm": 1.1090211114566713,
      "learning_rate": 4.955827980971688e-06,
      "loss": 0.147,
      "step": 17747
    },
    {
      "epoch": 0.5177664974619289,
      "grad_norm": 0.8378878431401968,
      "learning_rate": 4.955355565449435e-06,
      "loss": 0.1256,
      "step": 17748
    },
    {
      "epoch": 0.5177956706925725,
      "grad_norm": 0.748962072543815,
      "learning_rate": 4.95488315032576e-06,
      "loss": 0.1365,
      "step": 17749
    },
    {
      "epoch": 0.517824843923216,
      "grad_norm": 0.8700736053153962,
      "learning_rate": 4.9544107356048756e-06,
      "loss": 0.1205,
      "step": 17750
    },
    {
      "epoch": 0.5178540171538596,
      "grad_norm": 0.9766888630828362,
      "learning_rate": 4.953938321291001e-06,
      "loss": 0.1291,
      "step": 17751
    },
    {
      "epoch": 0.5178831903845031,
      "grad_norm": 0.8786663876319774,
      "learning_rate": 4.953465907388353e-06,
      "loss": 0.1241,
      "step": 17752
    },
    {
      "epoch": 0.5179123636151467,
      "grad_norm": 0.7814683983961322,
      "learning_rate": 4.9529934939011514e-06,
      "loss": 0.1288,
      "step": 17753
    },
    {
      "epoch": 0.5179415368457903,
      "grad_norm": 0.7162963342767883,
      "learning_rate": 4.952521080833614e-06,
      "loss": 0.1172,
      "step": 17754
    },
    {
      "epoch": 0.5179707100764339,
      "grad_norm": 0.7422130006335729,
      "learning_rate": 4.952048668189956e-06,
      "loss": 0.1167,
      "step": 17755
    },
    {
      "epoch": 0.5179998833070775,
      "grad_norm": 0.9636738077209109,
      "learning_rate": 4.9515762559743955e-06,
      "loss": 0.1192,
      "step": 17756
    },
    {
      "epoch": 0.518029056537721,
      "grad_norm": 0.8079579914363468,
      "learning_rate": 4.9511038441911515e-06,
      "loss": 0.1201,
      "step": 17757
    },
    {
      "epoch": 0.5180582297683646,
      "grad_norm": 0.7498702957223226,
      "learning_rate": 4.9506314328444395e-06,
      "loss": 0.1288,
      "step": 17758
    },
    {
      "epoch": 0.5180874029990081,
      "grad_norm": 0.754107044012069,
      "learning_rate": 4.950159021938479e-06,
      "loss": 0.137,
      "step": 17759
    },
    {
      "epoch": 0.5181165762296517,
      "grad_norm": 0.7427974198618872,
      "learning_rate": 4.949686611477487e-06,
      "loss": 0.1127,
      "step": 17760
    },
    {
      "epoch": 0.5181457494602952,
      "grad_norm": 0.7629154342469427,
      "learning_rate": 4.949214201465682e-06,
      "loss": 0.1163,
      "step": 17761
    },
    {
      "epoch": 0.5181749226909388,
      "grad_norm": 0.7295565415995703,
      "learning_rate": 4.948741791907279e-06,
      "loss": 0.111,
      "step": 17762
    },
    {
      "epoch": 0.5182040959215823,
      "grad_norm": 0.9266575335231287,
      "learning_rate": 4.948269382806497e-06,
      "loss": 0.1416,
      "step": 17763
    },
    {
      "epoch": 0.5182332691522259,
      "grad_norm": 0.8798685542925329,
      "learning_rate": 4.947796974167553e-06,
      "loss": 0.1222,
      "step": 17764
    },
    {
      "epoch": 0.5182624423828694,
      "grad_norm": 1.2083189150380842,
      "learning_rate": 4.947324565994666e-06,
      "loss": 0.1371,
      "step": 17765
    },
    {
      "epoch": 0.518291615613513,
      "grad_norm": 1.0981785801723138,
      "learning_rate": 4.946852158292054e-06,
      "loss": 0.1448,
      "step": 17766
    },
    {
      "epoch": 0.5183207888441566,
      "grad_norm": 0.8165686750111091,
      "learning_rate": 4.946379751063932e-06,
      "loss": 0.1293,
      "step": 17767
    },
    {
      "epoch": 0.5183499620748001,
      "grad_norm": 1.1957180571188588,
      "learning_rate": 4.9459073443145185e-06,
      "loss": 0.1216,
      "step": 17768
    },
    {
      "epoch": 0.5183791353054438,
      "grad_norm": 0.7525766178725002,
      "learning_rate": 4.945434938048032e-06,
      "loss": 0.139,
      "step": 17769
    },
    {
      "epoch": 0.5184083085360873,
      "grad_norm": 0.8103711165343084,
      "learning_rate": 4.9449625322686874e-06,
      "loss": 0.1322,
      "step": 17770
    },
    {
      "epoch": 0.5184374817667309,
      "grad_norm": 1.1193703453006678,
      "learning_rate": 4.944490126980706e-06,
      "loss": 0.1148,
      "step": 17771
    },
    {
      "epoch": 0.5184666549973744,
      "grad_norm": 0.8859469023702427,
      "learning_rate": 4.944017722188303e-06,
      "loss": 0.1308,
      "step": 17772
    },
    {
      "epoch": 0.518495828228018,
      "grad_norm": 0.7090720779509818,
      "learning_rate": 4.943545317895697e-06,
      "loss": 0.1268,
      "step": 17773
    },
    {
      "epoch": 0.5185250014586615,
      "grad_norm": 0.9905126126943034,
      "learning_rate": 4.9430729141071056e-06,
      "loss": 0.1225,
      "step": 17774
    },
    {
      "epoch": 0.5185541746893051,
      "grad_norm": 0.9087945970173723,
      "learning_rate": 4.942600510826745e-06,
      "loss": 0.1123,
      "step": 17775
    },
    {
      "epoch": 0.5185833479199486,
      "grad_norm": 0.753379908493967,
      "learning_rate": 4.942128108058832e-06,
      "loss": 0.1122,
      "step": 17776
    },
    {
      "epoch": 0.5186125211505922,
      "grad_norm": 0.9766768732241771,
      "learning_rate": 4.941655705807586e-06,
      "loss": 0.1476,
      "step": 17777
    },
    {
      "epoch": 0.5186416943812358,
      "grad_norm": 0.8597888733321941,
      "learning_rate": 4.941183304077224e-06,
      "loss": 0.1202,
      "step": 17778
    },
    {
      "epoch": 0.5186708676118793,
      "grad_norm": 0.7241541105926349,
      "learning_rate": 4.9407109028719644e-06,
      "loss": 0.1273,
      "step": 17779
    },
    {
      "epoch": 0.5187000408425229,
      "grad_norm": 0.9018762007631137,
      "learning_rate": 4.940238502196024e-06,
      "loss": 0.1425,
      "step": 17780
    },
    {
      "epoch": 0.5187292140731664,
      "grad_norm": 1.113150947027481,
      "learning_rate": 4.939766102053619e-06,
      "loss": 0.1359,
      "step": 17781
    },
    {
      "epoch": 0.5187583873038101,
      "grad_norm": 0.9067067536712301,
      "learning_rate": 4.939293702448966e-06,
      "loss": 0.1366,
      "step": 17782
    },
    {
      "epoch": 0.5187875605344536,
      "grad_norm": 0.869308613776223,
      "learning_rate": 4.938821303386287e-06,
      "loss": 0.1348,
      "step": 17783
    },
    {
      "epoch": 0.5188167337650972,
      "grad_norm": 1.030779845618955,
      "learning_rate": 4.938348904869796e-06,
      "loss": 0.1516,
      "step": 17784
    },
    {
      "epoch": 0.5188459069957407,
      "grad_norm": 0.7756506386233125,
      "learning_rate": 4.937876506903711e-06,
      "loss": 0.1155,
      "step": 17785
    },
    {
      "epoch": 0.5188750802263843,
      "grad_norm": 0.8730181617833321,
      "learning_rate": 4.9374041094922506e-06,
      "loss": 0.1316,
      "step": 17786
    },
    {
      "epoch": 0.5189042534570278,
      "grad_norm": 1.0480714878501245,
      "learning_rate": 4.936931712639632e-06,
      "loss": 0.1389,
      "step": 17787
    },
    {
      "epoch": 0.5189334266876714,
      "grad_norm": 0.918854444863424,
      "learning_rate": 4.936459316350069e-06,
      "loss": 0.1568,
      "step": 17788
    },
    {
      "epoch": 0.518962599918315,
      "grad_norm": 0.7993345800592149,
      "learning_rate": 4.935986920627784e-06,
      "loss": 0.121,
      "step": 17789
    },
    {
      "epoch": 0.5189917731489585,
      "grad_norm": 1.1084824142156962,
      "learning_rate": 4.935514525476992e-06,
      "loss": 0.1463,
      "step": 17790
    },
    {
      "epoch": 0.519020946379602,
      "grad_norm": 0.8425773712132383,
      "learning_rate": 4.9350421309019125e-06,
      "loss": 0.1402,
      "step": 17791
    },
    {
      "epoch": 0.5190501196102456,
      "grad_norm": 0.8271009729135738,
      "learning_rate": 4.93456973690676e-06,
      "loss": 0.1229,
      "step": 17792
    },
    {
      "epoch": 0.5190792928408892,
      "grad_norm": 0.7775744912527479,
      "learning_rate": 4.934097343495753e-06,
      "loss": 0.112,
      "step": 17793
    },
    {
      "epoch": 0.5191084660715327,
      "grad_norm": 0.8043375251488273,
      "learning_rate": 4.933624950673109e-06,
      "loss": 0.1205,
      "step": 17794
    },
    {
      "epoch": 0.5191376393021763,
      "grad_norm": 0.7250849815657042,
      "learning_rate": 4.933152558443045e-06,
      "loss": 0.1102,
      "step": 17795
    },
    {
      "epoch": 0.5191668125328199,
      "grad_norm": 0.6975336060490384,
      "learning_rate": 4.932680166809782e-06,
      "loss": 0.1209,
      "step": 17796
    },
    {
      "epoch": 0.5191959857634635,
      "grad_norm": 0.744123554043262,
      "learning_rate": 4.932207775777532e-06,
      "loss": 0.1128,
      "step": 17797
    },
    {
      "epoch": 0.519225158994107,
      "grad_norm": 0.7732292829867687,
      "learning_rate": 4.9317353853505154e-06,
      "loss": 0.1554,
      "step": 17798
    },
    {
      "epoch": 0.5192543322247506,
      "grad_norm": 1.0536207532554802,
      "learning_rate": 4.931262995532951e-06,
      "loss": 0.1365,
      "step": 17799
    },
    {
      "epoch": 0.5192835054553941,
      "grad_norm": 0.7685445534392636,
      "learning_rate": 4.930790606329049e-06,
      "loss": 0.1342,
      "step": 17800
    },
    {
      "epoch": 0.5193126786860377,
      "grad_norm": 1.2868047870908867,
      "learning_rate": 4.9303182177430355e-06,
      "loss": 0.1524,
      "step": 17801
    },
    {
      "epoch": 0.5193418519166813,
      "grad_norm": 0.9592999503782187,
      "learning_rate": 4.9298458297791245e-06,
      "loss": 0.1396,
      "step": 17802
    },
    {
      "epoch": 0.5193710251473248,
      "grad_norm": 0.8482988127290358,
      "learning_rate": 4.929373442441533e-06,
      "loss": 0.1252,
      "step": 17803
    },
    {
      "epoch": 0.5194001983779684,
      "grad_norm": 0.8686835971258066,
      "learning_rate": 4.928901055734479e-06,
      "loss": 0.1088,
      "step": 17804
    },
    {
      "epoch": 0.5194293716086119,
      "grad_norm": 1.549980148624875,
      "learning_rate": 4.928428669662178e-06,
      "loss": 0.1414,
      "step": 17805
    },
    {
      "epoch": 0.5194585448392555,
      "grad_norm": 0.954973356348734,
      "learning_rate": 4.927956284228848e-06,
      "loss": 0.1187,
      "step": 17806
    },
    {
      "epoch": 0.519487718069899,
      "grad_norm": 1.031618422626665,
      "learning_rate": 4.927483899438708e-06,
      "loss": 0.1476,
      "step": 17807
    },
    {
      "epoch": 0.5195168913005426,
      "grad_norm": 1.2986421010957385,
      "learning_rate": 4.9270115152959744e-06,
      "loss": 0.1192,
      "step": 17808
    },
    {
      "epoch": 0.5195460645311862,
      "grad_norm": 1.1509890595581167,
      "learning_rate": 4.926539131804867e-06,
      "loss": 0.1427,
      "step": 17809
    },
    {
      "epoch": 0.5195752377618298,
      "grad_norm": 0.9717509027750101,
      "learning_rate": 4.926066748969598e-06,
      "loss": 0.142,
      "step": 17810
    },
    {
      "epoch": 0.5196044109924733,
      "grad_norm": 0.720133769036651,
      "learning_rate": 4.925594366794388e-06,
      "loss": 0.1319,
      "step": 17811
    },
    {
      "epoch": 0.5196335842231169,
      "grad_norm": 0.7808693343070151,
      "learning_rate": 4.925121985283453e-06,
      "loss": 0.1183,
      "step": 17812
    },
    {
      "epoch": 0.5196627574537604,
      "grad_norm": 0.7395202818105009,
      "learning_rate": 4.924649604441012e-06,
      "loss": 0.1172,
      "step": 17813
    },
    {
      "epoch": 0.519691930684404,
      "grad_norm": 0.7640524827593631,
      "learning_rate": 4.9241772242712815e-06,
      "loss": 0.1289,
      "step": 17814
    },
    {
      "epoch": 0.5197211039150476,
      "grad_norm": 0.7583174019718506,
      "learning_rate": 4.9237048447784785e-06,
      "loss": 0.137,
      "step": 17815
    },
    {
      "epoch": 0.5197502771456911,
      "grad_norm": 0.7979366143303395,
      "learning_rate": 4.92323246596682e-06,
      "loss": 0.1201,
      "step": 17816
    },
    {
      "epoch": 0.5197794503763347,
      "grad_norm": 0.6877777126262582,
      "learning_rate": 4.9227600878405255e-06,
      "loss": 0.1341,
      "step": 17817
    },
    {
      "epoch": 0.5198086236069782,
      "grad_norm": 0.8602122677761864,
      "learning_rate": 4.922287710403807e-06,
      "loss": 0.1194,
      "step": 17818
    },
    {
      "epoch": 0.5198377968376218,
      "grad_norm": 0.7375428944619327,
      "learning_rate": 4.921815333660888e-06,
      "loss": 0.137,
      "step": 17819
    },
    {
      "epoch": 0.5198669700682653,
      "grad_norm": 0.9867675926808651,
      "learning_rate": 4.9213429576159815e-06,
      "loss": 0.1333,
      "step": 17820
    },
    {
      "epoch": 0.5198961432989089,
      "grad_norm": 0.7379810370770487,
      "learning_rate": 4.920870582273308e-06,
      "loss": 0.1384,
      "step": 17821
    },
    {
      "epoch": 0.5199253165295524,
      "grad_norm": 0.848173272023187,
      "learning_rate": 4.920398207637082e-06,
      "loss": 0.1087,
      "step": 17822
    },
    {
      "epoch": 0.5199544897601961,
      "grad_norm": 0.9426890226773692,
      "learning_rate": 4.919925833711522e-06,
      "loss": 0.1423,
      "step": 17823
    },
    {
      "epoch": 0.5199836629908396,
      "grad_norm": 0.7548892339920918,
      "learning_rate": 4.919453460500844e-06,
      "loss": 0.1172,
      "step": 17824
    },
    {
      "epoch": 0.5200128362214832,
      "grad_norm": 0.771375472373932,
      "learning_rate": 4.918981088009267e-06,
      "loss": 0.1259,
      "step": 17825
    },
    {
      "epoch": 0.5200420094521268,
      "grad_norm": 0.806062664754491,
      "learning_rate": 4.918508716241009e-06,
      "loss": 0.1294,
      "step": 17826
    },
    {
      "epoch": 0.5200711826827703,
      "grad_norm": 0.918711591193355,
      "learning_rate": 4.918036345200284e-06,
      "loss": 0.1431,
      "step": 17827
    },
    {
      "epoch": 0.5201003559134139,
      "grad_norm": 0.8793300357363661,
      "learning_rate": 4.917563974891311e-06,
      "loss": 0.1093,
      "step": 17828
    },
    {
      "epoch": 0.5201295291440574,
      "grad_norm": 0.6963931385633928,
      "learning_rate": 4.917091605318309e-06,
      "loss": 0.1057,
      "step": 17829
    },
    {
      "epoch": 0.520158702374701,
      "grad_norm": 0.7431056834038489,
      "learning_rate": 4.91661923648549e-06,
      "loss": 0.1095,
      "step": 17830
    },
    {
      "epoch": 0.5201878756053445,
      "grad_norm": 0.8950737012664087,
      "learning_rate": 4.916146868397077e-06,
      "loss": 0.1289,
      "step": 17831
    },
    {
      "epoch": 0.5202170488359881,
      "grad_norm": 0.8267027874287781,
      "learning_rate": 4.915674501057284e-06,
      "loss": 0.1234,
      "step": 17832
    },
    {
      "epoch": 0.5202462220666316,
      "grad_norm": 0.9156664218792455,
      "learning_rate": 4.91520213447033e-06,
      "loss": 0.118,
      "step": 17833
    },
    {
      "epoch": 0.5202753952972752,
      "grad_norm": 0.8229915052361602,
      "learning_rate": 4.914729768640431e-06,
      "loss": 0.1257,
      "step": 17834
    },
    {
      "epoch": 0.5203045685279187,
      "grad_norm": 0.8387745144811057,
      "learning_rate": 4.914257403571803e-06,
      "loss": 0.1486,
      "step": 17835
    },
    {
      "epoch": 0.5203337417585624,
      "grad_norm": 0.8746183836734868,
      "learning_rate": 4.9137850392686635e-06,
      "loss": 0.1214,
      "step": 17836
    },
    {
      "epoch": 0.520362914989206,
      "grad_norm": 0.8686617024439045,
      "learning_rate": 4.913312675735233e-06,
      "loss": 0.1357,
      "step": 17837
    },
    {
      "epoch": 0.5203920882198495,
      "grad_norm": 0.804809133997618,
      "learning_rate": 4.912840312975725e-06,
      "loss": 0.1516,
      "step": 17838
    },
    {
      "epoch": 0.5204212614504931,
      "grad_norm": 0.8651921029484754,
      "learning_rate": 4.912367950994358e-06,
      "loss": 0.1175,
      "step": 17839
    },
    {
      "epoch": 0.5204504346811366,
      "grad_norm": 0.858855350432069,
      "learning_rate": 4.91189558979535e-06,
      "loss": 0.1501,
      "step": 17840
    },
    {
      "epoch": 0.5204796079117802,
      "grad_norm": 0.7022536918286209,
      "learning_rate": 4.911423229382915e-06,
      "loss": 0.1339,
      "step": 17841
    },
    {
      "epoch": 0.5205087811424237,
      "grad_norm": 0.929108569504628,
      "learning_rate": 4.910950869761273e-06,
      "loss": 0.1502,
      "step": 17842
    },
    {
      "epoch": 0.5205379543730673,
      "grad_norm": 0.810850832292827,
      "learning_rate": 4.910478510934642e-06,
      "loss": 0.109,
      "step": 17843
    },
    {
      "epoch": 0.5205671276037108,
      "grad_norm": 0.7207408769937799,
      "learning_rate": 4.9100061529072365e-06,
      "loss": 0.1498,
      "step": 17844
    },
    {
      "epoch": 0.5205963008343544,
      "grad_norm": 0.875596935326705,
      "learning_rate": 4.9095337956832744e-06,
      "loss": 0.1288,
      "step": 17845
    },
    {
      "epoch": 0.5206254740649979,
      "grad_norm": 0.7938756474073557,
      "learning_rate": 4.9090614392669735e-06,
      "loss": 0.1186,
      "step": 17846
    },
    {
      "epoch": 0.5206546472956415,
      "grad_norm": 1.020857028179819,
      "learning_rate": 4.90858908366255e-06,
      "loss": 0.1051,
      "step": 17847
    },
    {
      "epoch": 0.520683820526285,
      "grad_norm": 1.0545725879355818,
      "learning_rate": 4.90811672887422e-06,
      "loss": 0.1255,
      "step": 17848
    },
    {
      "epoch": 0.5207129937569286,
      "grad_norm": 0.7909117978497822,
      "learning_rate": 4.907644374906204e-06,
      "loss": 0.1341,
      "step": 17849
    },
    {
      "epoch": 0.5207421669875723,
      "grad_norm": 0.7248427416592674,
      "learning_rate": 4.907172021762715e-06,
      "loss": 0.1305,
      "step": 17850
    },
    {
      "epoch": 0.5207713402182158,
      "grad_norm": 0.7969163509737802,
      "learning_rate": 4.906699669447975e-06,
      "loss": 0.1369,
      "step": 17851
    },
    {
      "epoch": 0.5208005134488594,
      "grad_norm": 0.7503094517373456,
      "learning_rate": 4.9062273179661965e-06,
      "loss": 0.1368,
      "step": 17852
    },
    {
      "epoch": 0.5208296866795029,
      "grad_norm": 0.764510925958115,
      "learning_rate": 4.9057549673215976e-06,
      "loss": 0.139,
      "step": 17853
    },
    {
      "epoch": 0.5208588599101465,
      "grad_norm": 0.9238158315036814,
      "learning_rate": 4.9052826175183946e-06,
      "loss": 0.1379,
      "step": 17854
    },
    {
      "epoch": 0.52088803314079,
      "grad_norm": 0.8985348784809547,
      "learning_rate": 4.904810268560807e-06,
      "loss": 0.1513,
      "step": 17855
    },
    {
      "epoch": 0.5209172063714336,
      "grad_norm": 0.9198487150224192,
      "learning_rate": 4.904337920453053e-06,
      "loss": 0.1471,
      "step": 17856
    },
    {
      "epoch": 0.5209463796020771,
      "grad_norm": 1.3279149130796433,
      "learning_rate": 4.903865573199344e-06,
      "loss": 0.1306,
      "step": 17857
    },
    {
      "epoch": 0.5209755528327207,
      "grad_norm": 0.9732633830223836,
      "learning_rate": 4.903393226803902e-06,
      "loss": 0.1389,
      "step": 17858
    },
    {
      "epoch": 0.5210047260633642,
      "grad_norm": 0.9224297104087886,
      "learning_rate": 4.902920881270942e-06,
      "loss": 0.146,
      "step": 17859
    },
    {
      "epoch": 0.5210338992940078,
      "grad_norm": 0.9539881656581738,
      "learning_rate": 4.902448536604679e-06,
      "loss": 0.1304,
      "step": 17860
    },
    {
      "epoch": 0.5210630725246513,
      "grad_norm": 0.7559097551927099,
      "learning_rate": 4.901976192809335e-06,
      "loss": 0.123,
      "step": 17861
    },
    {
      "epoch": 0.5210922457552949,
      "grad_norm": 0.9404253632952876,
      "learning_rate": 4.901503849889122e-06,
      "loss": 0.1328,
      "step": 17862
    },
    {
      "epoch": 0.5211214189859386,
      "grad_norm": 0.8255707159001553,
      "learning_rate": 4.901031507848261e-06,
      "loss": 0.1389,
      "step": 17863
    },
    {
      "epoch": 0.5211505922165821,
      "grad_norm": 0.8598240984880731,
      "learning_rate": 4.900559166690968e-06,
      "loss": 0.1184,
      "step": 17864
    },
    {
      "epoch": 0.5211797654472257,
      "grad_norm": 0.7687898219097739,
      "learning_rate": 4.900086826421457e-06,
      "loss": 0.1361,
      "step": 17865
    },
    {
      "epoch": 0.5212089386778692,
      "grad_norm": 1.2607156104616033,
      "learning_rate": 4.899614487043945e-06,
      "loss": 0.136,
      "step": 17866
    },
    {
      "epoch": 0.5212381119085128,
      "grad_norm": 1.1577965033863988,
      "learning_rate": 4.899142148562654e-06,
      "loss": 0.1024,
      "step": 17867
    },
    {
      "epoch": 0.5212672851391563,
      "grad_norm": 0.8993489940128342,
      "learning_rate": 4.8986698109817965e-06,
      "loss": 0.1366,
      "step": 17868
    },
    {
      "epoch": 0.5212964583697999,
      "grad_norm": 0.8256751835174652,
      "learning_rate": 4.8981974743055924e-06,
      "loss": 0.1407,
      "step": 17869
    },
    {
      "epoch": 0.5213256316004434,
      "grad_norm": 1.8909717940180342,
      "learning_rate": 4.897725138538256e-06,
      "loss": 0.1468,
      "step": 17870
    },
    {
      "epoch": 0.521354804831087,
      "grad_norm": 0.7629539101206839,
      "learning_rate": 4.897252803684004e-06,
      "loss": 0.1455,
      "step": 17871
    },
    {
      "epoch": 0.5213839780617305,
      "grad_norm": 0.844166739001318,
      "learning_rate": 4.896780469747055e-06,
      "loss": 0.1389,
      "step": 17872
    },
    {
      "epoch": 0.5214131512923741,
      "grad_norm": 0.8643156654137838,
      "learning_rate": 4.896308136731626e-06,
      "loss": 0.1184,
      "step": 17873
    },
    {
      "epoch": 0.5214423245230176,
      "grad_norm": 0.6863796990266603,
      "learning_rate": 4.895835804641933e-06,
      "loss": 0.1166,
      "step": 17874
    },
    {
      "epoch": 0.5214714977536612,
      "grad_norm": 0.6952077020349884,
      "learning_rate": 4.895363473482193e-06,
      "loss": 0.1326,
      "step": 17875
    },
    {
      "epoch": 0.5215006709843047,
      "grad_norm": 0.8471047800515966,
      "learning_rate": 4.894891143256622e-06,
      "loss": 0.125,
      "step": 17876
    },
    {
      "epoch": 0.5215298442149484,
      "grad_norm": 0.7736863298443879,
      "learning_rate": 4.894418813969441e-06,
      "loss": 0.1225,
      "step": 17877
    },
    {
      "epoch": 0.521559017445592,
      "grad_norm": 0.7933700083617907,
      "learning_rate": 4.893946485624859e-06,
      "loss": 0.1219,
      "step": 17878
    },
    {
      "epoch": 0.5215881906762355,
      "grad_norm": 0.6815658508332655,
      "learning_rate": 4.8934741582271e-06,
      "loss": 0.1132,
      "step": 17879
    },
    {
      "epoch": 0.5216173639068791,
      "grad_norm": 0.7561530162098145,
      "learning_rate": 4.893001831780378e-06,
      "loss": 0.1395,
      "step": 17880
    },
    {
      "epoch": 0.5216465371375226,
      "grad_norm": 0.8749908551044697,
      "learning_rate": 4.892529506288911e-06,
      "loss": 0.1068,
      "step": 17881
    },
    {
      "epoch": 0.5216757103681662,
      "grad_norm": 1.3182560129097096,
      "learning_rate": 4.892057181756914e-06,
      "loss": 0.1061,
      "step": 17882
    },
    {
      "epoch": 0.5217048835988097,
      "grad_norm": 0.6291798547307672,
      "learning_rate": 4.891584858188605e-06,
      "loss": 0.1268,
      "step": 17883
    },
    {
      "epoch": 0.5217340568294533,
      "grad_norm": 0.7589851009998014,
      "learning_rate": 4.891112535588199e-06,
      "loss": 0.1286,
      "step": 17884
    },
    {
      "epoch": 0.5217632300600968,
      "grad_norm": 0.9022577316131594,
      "learning_rate": 4.890640213959915e-06,
      "loss": 0.1268,
      "step": 17885
    },
    {
      "epoch": 0.5217924032907404,
      "grad_norm": 0.9731030876978904,
      "learning_rate": 4.890167893307971e-06,
      "loss": 0.1518,
      "step": 17886
    },
    {
      "epoch": 0.521821576521384,
      "grad_norm": 0.7098412431093563,
      "learning_rate": 4.889695573636581e-06,
      "loss": 0.1295,
      "step": 17887
    },
    {
      "epoch": 0.5218507497520275,
      "grad_norm": 0.9869573351912039,
      "learning_rate": 4.889223254949961e-06,
      "loss": 0.1299,
      "step": 17888
    },
    {
      "epoch": 0.521879922982671,
      "grad_norm": 0.9072045110410829,
      "learning_rate": 4.888750937252332e-06,
      "loss": 0.1259,
      "step": 17889
    },
    {
      "epoch": 0.5219090962133147,
      "grad_norm": 0.8978386946958383,
      "learning_rate": 4.8882786205479035e-06,
      "loss": 0.1377,
      "step": 17890
    },
    {
      "epoch": 0.5219382694439583,
      "grad_norm": 0.8882988365300463,
      "learning_rate": 4.887806304840901e-06,
      "loss": 0.1401,
      "step": 17891
    },
    {
      "epoch": 0.5219674426746018,
      "grad_norm": 0.9094948331594452,
      "learning_rate": 4.887333990135536e-06,
      "loss": 0.1407,
      "step": 17892
    },
    {
      "epoch": 0.5219966159052454,
      "grad_norm": 0.6853262552371552,
      "learning_rate": 4.886861676436026e-06,
      "loss": 0.1167,
      "step": 17893
    },
    {
      "epoch": 0.5220257891358889,
      "grad_norm": 0.7485944270381532,
      "learning_rate": 4.886389363746588e-06,
      "loss": 0.1317,
      "step": 17894
    },
    {
      "epoch": 0.5220549623665325,
      "grad_norm": 0.8338742387687241,
      "learning_rate": 4.885917052071439e-06,
      "loss": 0.1138,
      "step": 17895
    },
    {
      "epoch": 0.522084135597176,
      "grad_norm": 0.7409709048756515,
      "learning_rate": 4.885444741414794e-06,
      "loss": 0.1173,
      "step": 17896
    },
    {
      "epoch": 0.5221133088278196,
      "grad_norm": 0.7580988775971637,
      "learning_rate": 4.884972431780872e-06,
      "loss": 0.1255,
      "step": 17897
    },
    {
      "epoch": 0.5221424820584631,
      "grad_norm": 0.8213209898400915,
      "learning_rate": 4.884500123173888e-06,
      "loss": 0.158,
      "step": 17898
    },
    {
      "epoch": 0.5221716552891067,
      "grad_norm": 0.8303623412264455,
      "learning_rate": 4.884027815598061e-06,
      "loss": 0.1214,
      "step": 17899
    },
    {
      "epoch": 0.5222008285197502,
      "grad_norm": 0.7614129240235559,
      "learning_rate": 4.8835555090576054e-06,
      "loss": 0.108,
      "step": 17900
    },
    {
      "epoch": 0.5222300017503938,
      "grad_norm": 0.860717154105392,
      "learning_rate": 4.883083203556738e-06,
      "loss": 0.1396,
      "step": 17901
    },
    {
      "epoch": 0.5222591749810374,
      "grad_norm": 0.757320014920277,
      "learning_rate": 4.882610899099674e-06,
      "loss": 0.1281,
      "step": 17902
    },
    {
      "epoch": 0.5222883482116809,
      "grad_norm": 0.8023616902442609,
      "learning_rate": 4.882138595690635e-06,
      "loss": 0.1353,
      "step": 17903
    },
    {
      "epoch": 0.5223175214423246,
      "grad_norm": 0.7335446218700364,
      "learning_rate": 4.881666293333832e-06,
      "loss": 0.1366,
      "step": 17904
    },
    {
      "epoch": 0.5223466946729681,
      "grad_norm": 0.6739565189851321,
      "learning_rate": 4.881193992033486e-06,
      "loss": 0.1359,
      "step": 17905
    },
    {
      "epoch": 0.5223758679036117,
      "grad_norm": 0.911529365925771,
      "learning_rate": 4.880721691793812e-06,
      "loss": 0.1478,
      "step": 17906
    },
    {
      "epoch": 0.5224050411342552,
      "grad_norm": 0.8483509395103137,
      "learning_rate": 4.880249392619025e-06,
      "loss": 0.1425,
      "step": 17907
    },
    {
      "epoch": 0.5224342143648988,
      "grad_norm": 0.8178655393409022,
      "learning_rate": 4.879777094513341e-06,
      "loss": 0.1368,
      "step": 17908
    },
    {
      "epoch": 0.5224633875955423,
      "grad_norm": 0.8577307170717238,
      "learning_rate": 4.879304797480981e-06,
      "loss": 0.1404,
      "step": 17909
    },
    {
      "epoch": 0.5224925608261859,
      "grad_norm": 0.9602509403066533,
      "learning_rate": 4.878832501526158e-06,
      "loss": 0.1429,
      "step": 17910
    },
    {
      "epoch": 0.5225217340568294,
      "grad_norm": 0.8733446378812333,
      "learning_rate": 4.878360206653091e-06,
      "loss": 0.15,
      "step": 17911
    },
    {
      "epoch": 0.522550907287473,
      "grad_norm": 0.9655338759400075,
      "learning_rate": 4.877887912865994e-06,
      "loss": 0.1472,
      "step": 17912
    },
    {
      "epoch": 0.5225800805181166,
      "grad_norm": 0.8931499260447278,
      "learning_rate": 4.877415620169084e-06,
      "loss": 0.1356,
      "step": 17913
    },
    {
      "epoch": 0.5226092537487601,
      "grad_norm": 0.9231256882193069,
      "learning_rate": 4.876943328566578e-06,
      "loss": 0.1235,
      "step": 17914
    },
    {
      "epoch": 0.5226384269794037,
      "grad_norm": 1.1325598380436084,
      "learning_rate": 4.876471038062693e-06,
      "loss": 0.1531,
      "step": 17915
    },
    {
      "epoch": 0.5226676002100472,
      "grad_norm": 1.186704811758057,
      "learning_rate": 4.875998748661646e-06,
      "loss": 0.1905,
      "step": 17916
    },
    {
      "epoch": 0.5226967734406909,
      "grad_norm": 0.8180383593609283,
      "learning_rate": 4.875526460367651e-06,
      "loss": 0.1346,
      "step": 17917
    },
    {
      "epoch": 0.5227259466713344,
      "grad_norm": 0.9811199477158028,
      "learning_rate": 4.8750541731849274e-06,
      "loss": 0.1134,
      "step": 17918
    },
    {
      "epoch": 0.522755119901978,
      "grad_norm": 0.9616250190910933,
      "learning_rate": 4.874581887117691e-06,
      "loss": 0.1319,
      "step": 17919
    },
    {
      "epoch": 0.5227842931326215,
      "grad_norm": 0.8495857545743318,
      "learning_rate": 4.874109602170154e-06,
      "loss": 0.1222,
      "step": 17920
    },
    {
      "epoch": 0.5228134663632651,
      "grad_norm": 0.9981163796100289,
      "learning_rate": 4.873637318346539e-06,
      "loss": 0.1475,
      "step": 17921
    },
    {
      "epoch": 0.5228426395939086,
      "grad_norm": 0.9542983875370893,
      "learning_rate": 4.8731650356510605e-06,
      "loss": 0.1239,
      "step": 17922
    },
    {
      "epoch": 0.5228718128245522,
      "grad_norm": 0.8052732719910499,
      "learning_rate": 4.872692754087933e-06,
      "loss": 0.1433,
      "step": 17923
    },
    {
      "epoch": 0.5229009860551957,
      "grad_norm": 0.7535801357383433,
      "learning_rate": 4.872220473661376e-06,
      "loss": 0.1296,
      "step": 17924
    },
    {
      "epoch": 0.5229301592858393,
      "grad_norm": 0.7714976778803393,
      "learning_rate": 4.871748194375602e-06,
      "loss": 0.1479,
      "step": 17925
    },
    {
      "epoch": 0.5229593325164829,
      "grad_norm": 0.8562842980877414,
      "learning_rate": 4.871275916234829e-06,
      "loss": 0.1138,
      "step": 17926
    },
    {
      "epoch": 0.5229885057471264,
      "grad_norm": 0.7413114455314824,
      "learning_rate": 4.870803639243275e-06,
      "loss": 0.1324,
      "step": 17927
    },
    {
      "epoch": 0.52301767897777,
      "grad_norm": 0.7978088224484438,
      "learning_rate": 4.8703313634051555e-06,
      "loss": 0.1557,
      "step": 17928
    },
    {
      "epoch": 0.5230468522084135,
      "grad_norm": 0.7027258225234448,
      "learning_rate": 4.869859088724687e-06,
      "loss": 0.1324,
      "step": 17929
    },
    {
      "epoch": 0.5230760254390571,
      "grad_norm": 0.7510819893563819,
      "learning_rate": 4.8693868152060844e-06,
      "loss": 0.1338,
      "step": 17930
    },
    {
      "epoch": 0.5231051986697007,
      "grad_norm": 0.8155236962523871,
      "learning_rate": 4.868914542853566e-06,
      "loss": 0.1259,
      "step": 17931
    },
    {
      "epoch": 0.5231343719003443,
      "grad_norm": 0.7732616948535456,
      "learning_rate": 4.868442271671346e-06,
      "loss": 0.1276,
      "step": 17932
    },
    {
      "epoch": 0.5231635451309878,
      "grad_norm": 0.833652499312342,
      "learning_rate": 4.867970001663644e-06,
      "loss": 0.1335,
      "step": 17933
    },
    {
      "epoch": 0.5231927183616314,
      "grad_norm": 0.8366874000975631,
      "learning_rate": 4.867497732834673e-06,
      "loss": 0.1387,
      "step": 17934
    },
    {
      "epoch": 0.523221891592275,
      "grad_norm": 0.7508403598543987,
      "learning_rate": 4.867025465188651e-06,
      "loss": 0.1398,
      "step": 17935
    },
    {
      "epoch": 0.5232510648229185,
      "grad_norm": 0.8776091395129162,
      "learning_rate": 4.866553198729795e-06,
      "loss": 0.1144,
      "step": 17936
    },
    {
      "epoch": 0.523280238053562,
      "grad_norm": 0.7561549927851885,
      "learning_rate": 4.866080933462318e-06,
      "loss": 0.1259,
      "step": 17937
    },
    {
      "epoch": 0.5233094112842056,
      "grad_norm": 0.8702700009110349,
      "learning_rate": 4.865608669390439e-06,
      "loss": 0.1182,
      "step": 17938
    },
    {
      "epoch": 0.5233385845148492,
      "grad_norm": 0.8289225573785339,
      "learning_rate": 4.8651364065183735e-06,
      "loss": 0.123,
      "step": 17939
    },
    {
      "epoch": 0.5233677577454927,
      "grad_norm": 2.2467421701393797,
      "learning_rate": 4.864664144850339e-06,
      "loss": 0.1335,
      "step": 17940
    },
    {
      "epoch": 0.5233969309761363,
      "grad_norm": 0.9348183107750612,
      "learning_rate": 4.864191884390551e-06,
      "loss": 0.1268,
      "step": 17941
    },
    {
      "epoch": 0.5234261042067798,
      "grad_norm": 0.8360624824019696,
      "learning_rate": 4.863719625143225e-06,
      "loss": 0.1202,
      "step": 17942
    },
    {
      "epoch": 0.5234552774374234,
      "grad_norm": 0.8640078876565908,
      "learning_rate": 4.8632473671125765e-06,
      "loss": 0.1066,
      "step": 17943
    },
    {
      "epoch": 0.523484450668067,
      "grad_norm": 0.9376836752967381,
      "learning_rate": 4.862775110302823e-06,
      "loss": 0.1333,
      "step": 17944
    },
    {
      "epoch": 0.5235136238987106,
      "grad_norm": 0.819108078445002,
      "learning_rate": 4.862302854718181e-06,
      "loss": 0.1325,
      "step": 17945
    },
    {
      "epoch": 0.5235427971293541,
      "grad_norm": 0.8755362805588103,
      "learning_rate": 4.861830600362868e-06,
      "loss": 0.142,
      "step": 17946
    },
    {
      "epoch": 0.5235719703599977,
      "grad_norm": 0.7584084125841402,
      "learning_rate": 4.861358347241097e-06,
      "loss": 0.1347,
      "step": 17947
    },
    {
      "epoch": 0.5236011435906412,
      "grad_norm": 0.9762185605530063,
      "learning_rate": 4.860886095357085e-06,
      "loss": 0.1266,
      "step": 17948
    },
    {
      "epoch": 0.5236303168212848,
      "grad_norm": 0.850976016020747,
      "learning_rate": 4.860413844715048e-06,
      "loss": 0.1518,
      "step": 17949
    },
    {
      "epoch": 0.5236594900519284,
      "grad_norm": 1.0727609637353392,
      "learning_rate": 4.859941595319204e-06,
      "loss": 0.138,
      "step": 17950
    },
    {
      "epoch": 0.5236886632825719,
      "grad_norm": 0.7412763744903017,
      "learning_rate": 4.859469347173769e-06,
      "loss": 0.1341,
      "step": 17951
    },
    {
      "epoch": 0.5237178365132155,
      "grad_norm": 0.6602935382793448,
      "learning_rate": 4.858997100282958e-06,
      "loss": 0.1229,
      "step": 17952
    },
    {
      "epoch": 0.523747009743859,
      "grad_norm": 0.8813426744458847,
      "learning_rate": 4.858524854650986e-06,
      "loss": 0.0988,
      "step": 17953
    },
    {
      "epoch": 0.5237761829745026,
      "grad_norm": 0.7512036713866219,
      "learning_rate": 4.858052610282072e-06,
      "loss": 0.1381,
      "step": 17954
    },
    {
      "epoch": 0.5238053562051461,
      "grad_norm": 0.9621052485718656,
      "learning_rate": 4.857580367180427e-06,
      "loss": 0.1346,
      "step": 17955
    },
    {
      "epoch": 0.5238345294357897,
      "grad_norm": 0.7738331930747737,
      "learning_rate": 4.857108125350274e-06,
      "loss": 0.1134,
      "step": 17956
    },
    {
      "epoch": 0.5238637026664332,
      "grad_norm": 0.8259746644171821,
      "learning_rate": 4.856635884795824e-06,
      "loss": 0.1252,
      "step": 17957
    },
    {
      "epoch": 0.5238928758970769,
      "grad_norm": 0.731470398207601,
      "learning_rate": 4.856163645521295e-06,
      "loss": 0.1423,
      "step": 17958
    },
    {
      "epoch": 0.5239220491277204,
      "grad_norm": 0.8188305469047616,
      "learning_rate": 4.855691407530903e-06,
      "loss": 0.1247,
      "step": 17959
    },
    {
      "epoch": 0.523951222358364,
      "grad_norm": 0.8393247378678323,
      "learning_rate": 4.855219170828863e-06,
      "loss": 0.136,
      "step": 17960
    },
    {
      "epoch": 0.5239803955890076,
      "grad_norm": 0.7694881940520427,
      "learning_rate": 4.854746935419391e-06,
      "loss": 0.1373,
      "step": 17961
    },
    {
      "epoch": 0.5240095688196511,
      "grad_norm": 0.6646887651826038,
      "learning_rate": 4.8542747013067046e-06,
      "loss": 0.1216,
      "step": 17962
    },
    {
      "epoch": 0.5240387420502947,
      "grad_norm": 0.7698194675114775,
      "learning_rate": 4.85380246849502e-06,
      "loss": 0.1143,
      "step": 17963
    },
    {
      "epoch": 0.5240679152809382,
      "grad_norm": 0.9663948066989679,
      "learning_rate": 4.853330236988551e-06,
      "loss": 0.1528,
      "step": 17964
    },
    {
      "epoch": 0.5240970885115818,
      "grad_norm": 0.8500157732306555,
      "learning_rate": 4.852858006791513e-06,
      "loss": 0.1301,
      "step": 17965
    },
    {
      "epoch": 0.5241262617422253,
      "grad_norm": 0.6561816000060233,
      "learning_rate": 4.852385777908127e-06,
      "loss": 0.1154,
      "step": 17966
    },
    {
      "epoch": 0.5241554349728689,
      "grad_norm": 1.2660261846960206,
      "learning_rate": 4.8519135503426014e-06,
      "loss": 0.1201,
      "step": 17967
    },
    {
      "epoch": 0.5241846082035124,
      "grad_norm": 0.9443999314956311,
      "learning_rate": 4.851441324099159e-06,
      "loss": 0.1088,
      "step": 17968
    },
    {
      "epoch": 0.524213781434156,
      "grad_norm": 0.8122989850323468,
      "learning_rate": 4.850969099182013e-06,
      "loss": 0.1508,
      "step": 17969
    },
    {
      "epoch": 0.5242429546647995,
      "grad_norm": 0.9833396853502512,
      "learning_rate": 4.850496875595379e-06,
      "loss": 0.1233,
      "step": 17970
    },
    {
      "epoch": 0.5242721278954432,
      "grad_norm": 0.986616223271879,
      "learning_rate": 4.850024653343473e-06,
      "loss": 0.1344,
      "step": 17971
    },
    {
      "epoch": 0.5243013011260867,
      "grad_norm": 0.8007862832192146,
      "learning_rate": 4.849552432430512e-06,
      "loss": 0.1372,
      "step": 17972
    },
    {
      "epoch": 0.5243304743567303,
      "grad_norm": 1.0666085204198423,
      "learning_rate": 4.849080212860709e-06,
      "loss": 0.1324,
      "step": 17973
    },
    {
      "epoch": 0.5243596475873739,
      "grad_norm": 0.9058754526315972,
      "learning_rate": 4.848607994638282e-06,
      "loss": 0.1593,
      "step": 17974
    },
    {
      "epoch": 0.5243888208180174,
      "grad_norm": 0.8441704512336876,
      "learning_rate": 4.848135777767447e-06,
      "loss": 0.1104,
      "step": 17975
    },
    {
      "epoch": 0.524417994048661,
      "grad_norm": 0.8940368108007474,
      "learning_rate": 4.847663562252422e-06,
      "loss": 0.1223,
      "step": 17976
    },
    {
      "epoch": 0.5244471672793045,
      "grad_norm": 0.800211833876078,
      "learning_rate": 4.8471913480974184e-06,
      "loss": 0.1291,
      "step": 17977
    },
    {
      "epoch": 0.5244763405099481,
      "grad_norm": 0.7713669762026707,
      "learning_rate": 4.846719135306654e-06,
      "loss": 0.1247,
      "step": 17978
    },
    {
      "epoch": 0.5245055137405916,
      "grad_norm": 0.9492886397041358,
      "learning_rate": 4.846246923884343e-06,
      "loss": 0.136,
      "step": 17979
    },
    {
      "epoch": 0.5245346869712352,
      "grad_norm": 1.0445991721570165,
      "learning_rate": 4.845774713834705e-06,
      "loss": 0.1535,
      "step": 17980
    },
    {
      "epoch": 0.5245638602018787,
      "grad_norm": 0.8926232799278784,
      "learning_rate": 4.845302505161954e-06,
      "loss": 0.1349,
      "step": 17981
    },
    {
      "epoch": 0.5245930334325223,
      "grad_norm": 0.869689043109833,
      "learning_rate": 4.844830297870303e-06,
      "loss": 0.1393,
      "step": 17982
    },
    {
      "epoch": 0.5246222066631658,
      "grad_norm": 0.9578446700207232,
      "learning_rate": 4.844358091963971e-06,
      "loss": 0.1361,
      "step": 17983
    },
    {
      "epoch": 0.5246513798938094,
      "grad_norm": 1.1267656458186959,
      "learning_rate": 4.8438858874471754e-06,
      "loss": 0.1239,
      "step": 17984
    },
    {
      "epoch": 0.524680553124453,
      "grad_norm": 0.9362500665418243,
      "learning_rate": 4.843413684324124e-06,
      "loss": 0.1585,
      "step": 17985
    },
    {
      "epoch": 0.5247097263550966,
      "grad_norm": 0.8570036739285297,
      "learning_rate": 4.842941482599041e-06,
      "loss": 0.1336,
      "step": 17986
    },
    {
      "epoch": 0.5247388995857402,
      "grad_norm": 1.1604667286988664,
      "learning_rate": 4.8424692822761395e-06,
      "loss": 0.1415,
      "step": 17987
    },
    {
      "epoch": 0.5247680728163837,
      "grad_norm": 1.0697962974820818,
      "learning_rate": 4.841997083359634e-06,
      "loss": 0.1177,
      "step": 17988
    },
    {
      "epoch": 0.5247972460470273,
      "grad_norm": 0.7622841840977652,
      "learning_rate": 4.841524885853742e-06,
      "loss": 0.1328,
      "step": 17989
    },
    {
      "epoch": 0.5248264192776708,
      "grad_norm": 0.9267699854793973,
      "learning_rate": 4.841052689762676e-06,
      "loss": 0.1262,
      "step": 17990
    },
    {
      "epoch": 0.5248555925083144,
      "grad_norm": 1.0435908215995608,
      "learning_rate": 4.840580495090654e-06,
      "loss": 0.1209,
      "step": 17991
    },
    {
      "epoch": 0.5248847657389579,
      "grad_norm": 0.9682931262440511,
      "learning_rate": 4.840108301841891e-06,
      "loss": 0.1447,
      "step": 17992
    },
    {
      "epoch": 0.5249139389696015,
      "grad_norm": 0.5997827566998676,
      "learning_rate": 4.839636110020605e-06,
      "loss": 0.1083,
      "step": 17993
    },
    {
      "epoch": 0.524943112200245,
      "grad_norm": 0.8357244523095261,
      "learning_rate": 4.839163919631008e-06,
      "loss": 0.1394,
      "step": 17994
    },
    {
      "epoch": 0.5249722854308886,
      "grad_norm": 0.8528270064538509,
      "learning_rate": 4.8386917306773166e-06,
      "loss": 0.1316,
      "step": 17995
    },
    {
      "epoch": 0.5250014586615321,
      "grad_norm": 0.7857553200242777,
      "learning_rate": 4.838219543163749e-06,
      "loss": 0.1245,
      "step": 17996
    },
    {
      "epoch": 0.5250306318921757,
      "grad_norm": 0.760674877072806,
      "learning_rate": 4.837747357094515e-06,
      "loss": 0.1301,
      "step": 17997
    },
    {
      "epoch": 0.5250598051228194,
      "grad_norm": 0.8523709086749641,
      "learning_rate": 4.837275172473837e-06,
      "loss": 0.177,
      "step": 17998
    },
    {
      "epoch": 0.5250889783534629,
      "grad_norm": 1.2116779135550535,
      "learning_rate": 4.836802989305927e-06,
      "loss": 0.1324,
      "step": 17999
    },
    {
      "epoch": 0.5251181515841065,
      "grad_norm": 0.8755427782452537,
      "learning_rate": 4.836330807595e-06,
      "loss": 0.1252,
      "step": 18000
    },
    {
      "epoch": 0.52514732481475,
      "grad_norm": 0.8016962301962923,
      "learning_rate": 4.835858627345273e-06,
      "loss": 0.1389,
      "step": 18001
    },
    {
      "epoch": 0.5251764980453936,
      "grad_norm": 0.8869979893036224,
      "learning_rate": 4.835386448560961e-06,
      "loss": 0.1498,
      "step": 18002
    },
    {
      "epoch": 0.5252056712760371,
      "grad_norm": 0.9275440747445214,
      "learning_rate": 4.834914271246279e-06,
      "loss": 0.1411,
      "step": 18003
    },
    {
      "epoch": 0.5252348445066807,
      "grad_norm": 0.8299382513115088,
      "learning_rate": 4.834442095405443e-06,
      "loss": 0.1371,
      "step": 18004
    },
    {
      "epoch": 0.5252640177373242,
      "grad_norm": 0.7344429097988446,
      "learning_rate": 4.833969921042669e-06,
      "loss": 0.1354,
      "step": 18005
    },
    {
      "epoch": 0.5252931909679678,
      "grad_norm": 0.8020292187680751,
      "learning_rate": 4.833497748162172e-06,
      "loss": 0.1174,
      "step": 18006
    },
    {
      "epoch": 0.5253223641986113,
      "grad_norm": 0.7589677544688201,
      "learning_rate": 4.833025576768168e-06,
      "loss": 0.1168,
      "step": 18007
    },
    {
      "epoch": 0.5253515374292549,
      "grad_norm": 1.0809993126779969,
      "learning_rate": 4.8325534068648705e-06,
      "loss": 0.134,
      "step": 18008
    },
    {
      "epoch": 0.5253807106598984,
      "grad_norm": 0.8306161921408216,
      "learning_rate": 4.8320812384564955e-06,
      "loss": 0.1252,
      "step": 18009
    },
    {
      "epoch": 0.525409883890542,
      "grad_norm": 0.7180312168582362,
      "learning_rate": 4.83160907154726e-06,
      "loss": 0.1113,
      "step": 18010
    },
    {
      "epoch": 0.5254390571211855,
      "grad_norm": 0.7818417928655756,
      "learning_rate": 4.83113690614138e-06,
      "loss": 0.1431,
      "step": 18011
    },
    {
      "epoch": 0.5254682303518292,
      "grad_norm": 0.9556346596527349,
      "learning_rate": 4.830664742243068e-06,
      "loss": 0.1287,
      "step": 18012
    },
    {
      "epoch": 0.5254974035824728,
      "grad_norm": 1.2172309169139588,
      "learning_rate": 4.830192579856541e-06,
      "loss": 0.135,
      "step": 18013
    },
    {
      "epoch": 0.5255265768131163,
      "grad_norm": 0.9589657292617719,
      "learning_rate": 4.829720418986015e-06,
      "loss": 0.1319,
      "step": 18014
    },
    {
      "epoch": 0.5255557500437599,
      "grad_norm": 1.05892071662572,
      "learning_rate": 4.829248259635701e-06,
      "loss": 0.1404,
      "step": 18015
    },
    {
      "epoch": 0.5255849232744034,
      "grad_norm": 0.9285604008415789,
      "learning_rate": 4.828776101809821e-06,
      "loss": 0.1275,
      "step": 18016
    },
    {
      "epoch": 0.525614096505047,
      "grad_norm": 0.975494494513817,
      "learning_rate": 4.8283039455125865e-06,
      "loss": 0.1325,
      "step": 18017
    },
    {
      "epoch": 0.5256432697356905,
      "grad_norm": 0.7933315168665201,
      "learning_rate": 4.827831790748213e-06,
      "loss": 0.1181,
      "step": 18018
    },
    {
      "epoch": 0.5256724429663341,
      "grad_norm": 1.0561985706953883,
      "learning_rate": 4.827359637520917e-06,
      "loss": 0.1438,
      "step": 18019
    },
    {
      "epoch": 0.5257016161969776,
      "grad_norm": 0.8767416437329977,
      "learning_rate": 4.826887485834913e-06,
      "loss": 0.1425,
      "step": 18020
    },
    {
      "epoch": 0.5257307894276212,
      "grad_norm": 1.030326838227654,
      "learning_rate": 4.826415335694414e-06,
      "loss": 0.1291,
      "step": 18021
    },
    {
      "epoch": 0.5257599626582647,
      "grad_norm": 0.6721898444369182,
      "learning_rate": 4.8259431871036395e-06,
      "loss": 0.1278,
      "step": 18022
    },
    {
      "epoch": 0.5257891358889083,
      "grad_norm": 0.709761200362223,
      "learning_rate": 4.825471040066803e-06,
      "loss": 0.1208,
      "step": 18023
    },
    {
      "epoch": 0.5258183091195519,
      "grad_norm": 0.8058778338832103,
      "learning_rate": 4.824998894588118e-06,
      "loss": 0.1473,
      "step": 18024
    },
    {
      "epoch": 0.5258474823501954,
      "grad_norm": 0.7934397989321833,
      "learning_rate": 4.824526750671802e-06,
      "loss": 0.1096,
      "step": 18025
    },
    {
      "epoch": 0.5258766555808391,
      "grad_norm": 0.7762464528155616,
      "learning_rate": 4.8240546083220705e-06,
      "loss": 0.1274,
      "step": 18026
    },
    {
      "epoch": 0.5259058288114826,
      "grad_norm": 0.8353728222100885,
      "learning_rate": 4.823582467543133e-06,
      "loss": 0.1287,
      "step": 18027
    },
    {
      "epoch": 0.5259350020421262,
      "grad_norm": 1.001478652873019,
      "learning_rate": 4.823110328339213e-06,
      "loss": 0.1248,
      "step": 18028
    },
    {
      "epoch": 0.5259641752727697,
      "grad_norm": 0.7875069717632527,
      "learning_rate": 4.822638190714521e-06,
      "loss": 0.1423,
      "step": 18029
    },
    {
      "epoch": 0.5259933485034133,
      "grad_norm": 1.136917047056234,
      "learning_rate": 4.822166054673273e-06,
      "loss": 0.1286,
      "step": 18030
    },
    {
      "epoch": 0.5260225217340568,
      "grad_norm": 0.8338412718231685,
      "learning_rate": 4.821693920219684e-06,
      "loss": 0.1506,
      "step": 18031
    },
    {
      "epoch": 0.5260516949647004,
      "grad_norm": 0.8810387903852749,
      "learning_rate": 4.821221787357969e-06,
      "loss": 0.1283,
      "step": 18032
    },
    {
      "epoch": 0.5260808681953439,
      "grad_norm": 0.8387559980864229,
      "learning_rate": 4.820749656092342e-06,
      "loss": 0.1445,
      "step": 18033
    },
    {
      "epoch": 0.5261100414259875,
      "grad_norm": 0.6970389163980308,
      "learning_rate": 4.820277526427019e-06,
      "loss": 0.1138,
      "step": 18034
    },
    {
      "epoch": 0.526139214656631,
      "grad_norm": 1.1730141652813515,
      "learning_rate": 4.8198053983662175e-06,
      "loss": 0.1412,
      "step": 18035
    },
    {
      "epoch": 0.5261683878872746,
      "grad_norm": 0.8330160665392379,
      "learning_rate": 4.81933327191415e-06,
      "loss": 0.1441,
      "step": 18036
    },
    {
      "epoch": 0.5261975611179182,
      "grad_norm": 1.1819803911284248,
      "learning_rate": 4.818861147075031e-06,
      "loss": 0.1326,
      "step": 18037
    },
    {
      "epoch": 0.5262267343485617,
      "grad_norm": 0.7637892124219897,
      "learning_rate": 4.818389023853077e-06,
      "loss": 0.1344,
      "step": 18038
    },
    {
      "epoch": 0.5262559075792054,
      "grad_norm": 0.8886704083543827,
      "learning_rate": 4.817916902252501e-06,
      "loss": 0.1468,
      "step": 18039
    },
    {
      "epoch": 0.5262850808098489,
      "grad_norm": 0.738187964168913,
      "learning_rate": 4.817444782277521e-06,
      "loss": 0.129,
      "step": 18040
    },
    {
      "epoch": 0.5263142540404925,
      "grad_norm": 0.739917219648246,
      "learning_rate": 4.8169726639323514e-06,
      "loss": 0.1182,
      "step": 18041
    },
    {
      "epoch": 0.526343427271136,
      "grad_norm": 0.703088669527814,
      "learning_rate": 4.816500547221204e-06,
      "loss": 0.1242,
      "step": 18042
    },
    {
      "epoch": 0.5263726005017796,
      "grad_norm": 0.9440728665811604,
      "learning_rate": 4.816028432148298e-06,
      "loss": 0.1125,
      "step": 18043
    },
    {
      "epoch": 0.5264017737324231,
      "grad_norm": 0.6979161404789853,
      "learning_rate": 4.8155563187178454e-06,
      "loss": 0.1252,
      "step": 18044
    },
    {
      "epoch": 0.5264309469630667,
      "grad_norm": 0.9156033733058908,
      "learning_rate": 4.815084206934059e-06,
      "loss": 0.1681,
      "step": 18045
    },
    {
      "epoch": 0.5264601201937102,
      "grad_norm": 0.8652140379149614,
      "learning_rate": 4.8146120968011605e-06,
      "loss": 0.137,
      "step": 18046
    },
    {
      "epoch": 0.5264892934243538,
      "grad_norm": 1.2206878237891405,
      "learning_rate": 4.81413998832336e-06,
      "loss": 0.1134,
      "step": 18047
    },
    {
      "epoch": 0.5265184666549974,
      "grad_norm": 0.8484945222426621,
      "learning_rate": 4.813667881504872e-06,
      "loss": 0.1251,
      "step": 18048
    },
    {
      "epoch": 0.5265476398856409,
      "grad_norm": 0.8924153261661256,
      "learning_rate": 4.813195776349915e-06,
      "loss": 0.1281,
      "step": 18049
    },
    {
      "epoch": 0.5265768131162845,
      "grad_norm": 0.8985907140185593,
      "learning_rate": 4.8127236728627005e-06,
      "loss": 0.1218,
      "step": 18050
    },
    {
      "epoch": 0.526605986346928,
      "grad_norm": 0.7852840943978696,
      "learning_rate": 4.8122515710474426e-06,
      "loss": 0.1316,
      "step": 18051
    },
    {
      "epoch": 0.5266351595775716,
      "grad_norm": 0.850122060780698,
      "learning_rate": 4.8117794709083595e-06,
      "loss": 0.1353,
      "step": 18052
    },
    {
      "epoch": 0.5266643328082152,
      "grad_norm": 0.7281238097267843,
      "learning_rate": 4.811307372449665e-06,
      "loss": 0.1529,
      "step": 18053
    },
    {
      "epoch": 0.5266935060388588,
      "grad_norm": 0.8030551206498359,
      "learning_rate": 4.810835275675572e-06,
      "loss": 0.1123,
      "step": 18054
    },
    {
      "epoch": 0.5267226792695023,
      "grad_norm": 0.8632120116245481,
      "learning_rate": 4.810363180590298e-06,
      "loss": 0.1318,
      "step": 18055
    },
    {
      "epoch": 0.5267518525001459,
      "grad_norm": 0.7320787543014441,
      "learning_rate": 4.809891087198056e-06,
      "loss": 0.1319,
      "step": 18056
    },
    {
      "epoch": 0.5267810257307894,
      "grad_norm": 0.8081987981181039,
      "learning_rate": 4.8094189955030576e-06,
      "loss": 0.1503,
      "step": 18057
    },
    {
      "epoch": 0.526810198961433,
      "grad_norm": 0.9697511601745913,
      "learning_rate": 4.808946905509524e-06,
      "loss": 0.1378,
      "step": 18058
    },
    {
      "epoch": 0.5268393721920765,
      "grad_norm": 0.7832968130363371,
      "learning_rate": 4.808474817221666e-06,
      "loss": 0.1357,
      "step": 18059
    },
    {
      "epoch": 0.5268685454227201,
      "grad_norm": 0.743714129006087,
      "learning_rate": 4.808002730643699e-06,
      "loss": 0.1348,
      "step": 18060
    },
    {
      "epoch": 0.5268977186533637,
      "grad_norm": 0.8473482931279858,
      "learning_rate": 4.80753064577984e-06,
      "loss": 0.1377,
      "step": 18061
    },
    {
      "epoch": 0.5269268918840072,
      "grad_norm": 0.9650357162729263,
      "learning_rate": 4.807058562634299e-06,
      "loss": 0.1463,
      "step": 18062
    },
    {
      "epoch": 0.5269560651146508,
      "grad_norm": 0.9278197670917236,
      "learning_rate": 4.806586481211293e-06,
      "loss": 0.1289,
      "step": 18063
    },
    {
      "epoch": 0.5269852383452943,
      "grad_norm": 1.0563501491813705,
      "learning_rate": 4.806114401515037e-06,
      "loss": 0.1137,
      "step": 18064
    },
    {
      "epoch": 0.5270144115759379,
      "grad_norm": 0.8898702218224827,
      "learning_rate": 4.805642323549746e-06,
      "loss": 0.1269,
      "step": 18065
    },
    {
      "epoch": 0.5270435848065815,
      "grad_norm": 1.1734522270005805,
      "learning_rate": 4.805170247319634e-06,
      "loss": 0.1188,
      "step": 18066
    },
    {
      "epoch": 0.5270727580372251,
      "grad_norm": 0.9957179615002449,
      "learning_rate": 4.804698172828915e-06,
      "loss": 0.1203,
      "step": 18067
    },
    {
      "epoch": 0.5271019312678686,
      "grad_norm": 0.9750430119631452,
      "learning_rate": 4.804226100081805e-06,
      "loss": 0.1241,
      "step": 18068
    },
    {
      "epoch": 0.5271311044985122,
      "grad_norm": 0.8965316300644635,
      "learning_rate": 4.803754029082516e-06,
      "loss": 0.1403,
      "step": 18069
    },
    {
      "epoch": 0.5271602777291557,
      "grad_norm": 0.9609371465783336,
      "learning_rate": 4.803281959835265e-06,
      "loss": 0.1589,
      "step": 18070
    },
    {
      "epoch": 0.5271894509597993,
      "grad_norm": 1.0046840178298868,
      "learning_rate": 4.802809892344267e-06,
      "loss": 0.1542,
      "step": 18071
    },
    {
      "epoch": 0.5272186241904429,
      "grad_norm": 0.8883119399217058,
      "learning_rate": 4.802337826613733e-06,
      "loss": 0.1275,
      "step": 18072
    },
    {
      "epoch": 0.5272477974210864,
      "grad_norm": 1.129326124691106,
      "learning_rate": 4.801865762647881e-06,
      "loss": 0.1218,
      "step": 18073
    },
    {
      "epoch": 0.52727697065173,
      "grad_norm": 0.8033499427478014,
      "learning_rate": 4.8013937004509255e-06,
      "loss": 0.1454,
      "step": 18074
    },
    {
      "epoch": 0.5273061438823735,
      "grad_norm": 0.838794416406774,
      "learning_rate": 4.800921640027075e-06,
      "loss": 0.1143,
      "step": 18075
    },
    {
      "epoch": 0.5273353171130171,
      "grad_norm": 0.7796150481679842,
      "learning_rate": 4.800449581380553e-06,
      "loss": 0.1467,
      "step": 18076
    },
    {
      "epoch": 0.5273644903436606,
      "grad_norm": 0.7990660761113042,
      "learning_rate": 4.799977524515569e-06,
      "loss": 0.1448,
      "step": 18077
    },
    {
      "epoch": 0.5273936635743042,
      "grad_norm": 0.7281474478754107,
      "learning_rate": 4.799505469436336e-06,
      "loss": 0.1143,
      "step": 18078
    },
    {
      "epoch": 0.5274228368049477,
      "grad_norm": 0.6220072426216634,
      "learning_rate": 4.799033416147072e-06,
      "loss": 0.1214,
      "step": 18079
    },
    {
      "epoch": 0.5274520100355914,
      "grad_norm": 0.8078280407580145,
      "learning_rate": 4.798561364651989e-06,
      "loss": 0.129,
      "step": 18080
    },
    {
      "epoch": 0.5274811832662349,
      "grad_norm": 0.9613274756810768,
      "learning_rate": 4.798089314955301e-06,
      "loss": 0.1186,
      "step": 18081
    },
    {
      "epoch": 0.5275103564968785,
      "grad_norm": 0.9082737903113405,
      "learning_rate": 4.797617267061225e-06,
      "loss": 0.1264,
      "step": 18082
    },
    {
      "epoch": 0.527539529727522,
      "grad_norm": 0.8825640198193014,
      "learning_rate": 4.797145220973974e-06,
      "loss": 0.1178,
      "step": 18083
    },
    {
      "epoch": 0.5275687029581656,
      "grad_norm": 1.0322207175883822,
      "learning_rate": 4.796673176697761e-06,
      "loss": 0.1431,
      "step": 18084
    },
    {
      "epoch": 0.5275978761888092,
      "grad_norm": 0.8572898625618728,
      "learning_rate": 4.796201134236802e-06,
      "loss": 0.1313,
      "step": 18085
    },
    {
      "epoch": 0.5276270494194527,
      "grad_norm": 0.6383544910445332,
      "learning_rate": 4.795729093595311e-06,
      "loss": 0.1033,
      "step": 18086
    },
    {
      "epoch": 0.5276562226500963,
      "grad_norm": 1.1487167821455868,
      "learning_rate": 4.795257054777498e-06,
      "loss": 0.1352,
      "step": 18087
    },
    {
      "epoch": 0.5276853958807398,
      "grad_norm": 1.227802510553886,
      "learning_rate": 4.794785017787586e-06,
      "loss": 0.1281,
      "step": 18088
    },
    {
      "epoch": 0.5277145691113834,
      "grad_norm": 1.0471796945750702,
      "learning_rate": 4.794312982629782e-06,
      "loss": 0.1271,
      "step": 18089
    },
    {
      "epoch": 0.5277437423420269,
      "grad_norm": 1.015561482373048,
      "learning_rate": 4.793840949308303e-06,
      "loss": 0.1337,
      "step": 18090
    },
    {
      "epoch": 0.5277729155726705,
      "grad_norm": 1.2683844195254879,
      "learning_rate": 4.793368917827364e-06,
      "loss": 0.1393,
      "step": 18091
    },
    {
      "epoch": 0.527802088803314,
      "grad_norm": 1.0412588226822705,
      "learning_rate": 4.792896888191178e-06,
      "loss": 0.1199,
      "step": 18092
    },
    {
      "epoch": 0.5278312620339577,
      "grad_norm": 0.8197454736929621,
      "learning_rate": 4.792424860403956e-06,
      "loss": 0.1458,
      "step": 18093
    },
    {
      "epoch": 0.5278604352646012,
      "grad_norm": 0.937736034001576,
      "learning_rate": 4.791952834469918e-06,
      "loss": 0.1415,
      "step": 18094
    },
    {
      "epoch": 0.5278896084952448,
      "grad_norm": 0.7921275622064367,
      "learning_rate": 4.791480810393274e-06,
      "loss": 0.1434,
      "step": 18095
    },
    {
      "epoch": 0.5279187817258884,
      "grad_norm": 0.981332889317743,
      "learning_rate": 4.791008788178242e-06,
      "loss": 0.1139,
      "step": 18096
    },
    {
      "epoch": 0.5279479549565319,
      "grad_norm": 0.7827055333623226,
      "learning_rate": 4.790536767829031e-06,
      "loss": 0.1229,
      "step": 18097
    },
    {
      "epoch": 0.5279771281871755,
      "grad_norm": 0.7327383006507882,
      "learning_rate": 4.790064749349859e-06,
      "loss": 0.1229,
      "step": 18098
    },
    {
      "epoch": 0.528006301417819,
      "grad_norm": 0.9917459690118268,
      "learning_rate": 4.789592732744938e-06,
      "loss": 0.145,
      "step": 18099
    },
    {
      "epoch": 0.5280354746484626,
      "grad_norm": 0.8834634369964915,
      "learning_rate": 4.789120718018483e-06,
      "loss": 0.1365,
      "step": 18100
    },
    {
      "epoch": 0.5280646478791061,
      "grad_norm": 0.8989532761807614,
      "learning_rate": 4.788648705174709e-06,
      "loss": 0.1509,
      "step": 18101
    },
    {
      "epoch": 0.5280938211097497,
      "grad_norm": 0.9894850757227963,
      "learning_rate": 4.788176694217829e-06,
      "loss": 0.1635,
      "step": 18102
    },
    {
      "epoch": 0.5281229943403932,
      "grad_norm": 0.7255062625172629,
      "learning_rate": 4.787704685152056e-06,
      "loss": 0.1191,
      "step": 18103
    },
    {
      "epoch": 0.5281521675710368,
      "grad_norm": 0.9273245366090763,
      "learning_rate": 4.787232677981606e-06,
      "loss": 0.1336,
      "step": 18104
    },
    {
      "epoch": 0.5281813408016803,
      "grad_norm": 1.0659834874039709,
      "learning_rate": 4.786760672710688e-06,
      "loss": 0.1449,
      "step": 18105
    },
    {
      "epoch": 0.5282105140323239,
      "grad_norm": 0.9509628085125836,
      "learning_rate": 4.786288669343524e-06,
      "loss": 0.1389,
      "step": 18106
    },
    {
      "epoch": 0.5282396872629675,
      "grad_norm": 0.8294774142889741,
      "learning_rate": 4.785816667884322e-06,
      "loss": 0.1167,
      "step": 18107
    },
    {
      "epoch": 0.5282688604936111,
      "grad_norm": 0.7845219601055182,
      "learning_rate": 4.785344668337298e-06,
      "loss": 0.1248,
      "step": 18108
    },
    {
      "epoch": 0.5282980337242547,
      "grad_norm": 1.0530402775112828,
      "learning_rate": 4.784872670706667e-06,
      "loss": 0.1316,
      "step": 18109
    },
    {
      "epoch": 0.5283272069548982,
      "grad_norm": 0.7586720307979221,
      "learning_rate": 4.78440067499664e-06,
      "loss": 0.109,
      "step": 18110
    },
    {
      "epoch": 0.5283563801855418,
      "grad_norm": 0.7611260963669129,
      "learning_rate": 4.783928681211431e-06,
      "loss": 0.1457,
      "step": 18111
    },
    {
      "epoch": 0.5283855534161853,
      "grad_norm": 0.7210615560156368,
      "learning_rate": 4.7834566893552566e-06,
      "loss": 0.1175,
      "step": 18112
    },
    {
      "epoch": 0.5284147266468289,
      "grad_norm": 0.7575481673555559,
      "learning_rate": 4.78298469943233e-06,
      "loss": 0.1223,
      "step": 18113
    },
    {
      "epoch": 0.5284438998774724,
      "grad_norm": 0.9555994621082579,
      "learning_rate": 4.782512711446864e-06,
      "loss": 0.1534,
      "step": 18114
    },
    {
      "epoch": 0.528473073108116,
      "grad_norm": 0.604928116169317,
      "learning_rate": 4.782040725403071e-06,
      "loss": 0.1299,
      "step": 18115
    },
    {
      "epoch": 0.5285022463387595,
      "grad_norm": 0.7659277118805252,
      "learning_rate": 4.781568741305168e-06,
      "loss": 0.1377,
      "step": 18116
    },
    {
      "epoch": 0.5285314195694031,
      "grad_norm": 0.8654886039910025,
      "learning_rate": 4.781096759157365e-06,
      "loss": 0.1313,
      "step": 18117
    },
    {
      "epoch": 0.5285605928000466,
      "grad_norm": 0.6664524862480887,
      "learning_rate": 4.78062477896388e-06,
      "loss": 0.1355,
      "step": 18118
    },
    {
      "epoch": 0.5285897660306902,
      "grad_norm": 0.5936013882066392,
      "learning_rate": 4.780152800728924e-06,
      "loss": 0.113,
      "step": 18119
    },
    {
      "epoch": 0.5286189392613339,
      "grad_norm": 0.7703810986902896,
      "learning_rate": 4.779680824456711e-06,
      "loss": 0.1262,
      "step": 18120
    },
    {
      "epoch": 0.5286481124919774,
      "grad_norm": 0.7784612891200107,
      "learning_rate": 4.779208850151456e-06,
      "loss": 0.1179,
      "step": 18121
    },
    {
      "epoch": 0.528677285722621,
      "grad_norm": 0.8464230582609625,
      "learning_rate": 4.778736877817371e-06,
      "loss": 0.1367,
      "step": 18122
    },
    {
      "epoch": 0.5287064589532645,
      "grad_norm": 0.7059243571332273,
      "learning_rate": 4.778264907458669e-06,
      "loss": 0.1244,
      "step": 18123
    },
    {
      "epoch": 0.5287356321839081,
      "grad_norm": 1.0484842853668985,
      "learning_rate": 4.777792939079566e-06,
      "loss": 0.1199,
      "step": 18124
    },
    {
      "epoch": 0.5287648054145516,
      "grad_norm": 0.87445289181572,
      "learning_rate": 4.777320972684275e-06,
      "loss": 0.1375,
      "step": 18125
    },
    {
      "epoch": 0.5287939786451952,
      "grad_norm": 0.955840944166885,
      "learning_rate": 4.77684900827701e-06,
      "loss": 0.1306,
      "step": 18126
    },
    {
      "epoch": 0.5288231518758387,
      "grad_norm": 1.0386475112368847,
      "learning_rate": 4.776377045861983e-06,
      "loss": 0.1288,
      "step": 18127
    },
    {
      "epoch": 0.5288523251064823,
      "grad_norm": 0.7025667394062385,
      "learning_rate": 4.775905085443407e-06,
      "loss": 0.1256,
      "step": 18128
    },
    {
      "epoch": 0.5288814983371258,
      "grad_norm": 1.1036444458007375,
      "learning_rate": 4.775433127025498e-06,
      "loss": 0.1225,
      "step": 18129
    },
    {
      "epoch": 0.5289106715677694,
      "grad_norm": 0.6748829544189402,
      "learning_rate": 4.774961170612468e-06,
      "loss": 0.1239,
      "step": 18130
    },
    {
      "epoch": 0.5289398447984129,
      "grad_norm": 0.7721274296734262,
      "learning_rate": 4.774489216208532e-06,
      "loss": 0.1278,
      "step": 18131
    },
    {
      "epoch": 0.5289690180290565,
      "grad_norm": 0.7973691056935861,
      "learning_rate": 4.774017263817902e-06,
      "loss": 0.1169,
      "step": 18132
    },
    {
      "epoch": 0.5289981912597,
      "grad_norm": 0.8711552955112548,
      "learning_rate": 4.773545313444792e-06,
      "loss": 0.1538,
      "step": 18133
    },
    {
      "epoch": 0.5290273644903437,
      "grad_norm": 0.9545024171331314,
      "learning_rate": 4.773073365093417e-06,
      "loss": 0.1515,
      "step": 18134
    },
    {
      "epoch": 0.5290565377209873,
      "grad_norm": 0.6827698208518546,
      "learning_rate": 4.772601418767983e-06,
      "loss": 0.1116,
      "step": 18135
    },
    {
      "epoch": 0.5290857109516308,
      "grad_norm": 0.927146834457475,
      "learning_rate": 4.772129474472715e-06,
      "loss": 0.1261,
      "step": 18136
    },
    {
      "epoch": 0.5291148841822744,
      "grad_norm": 0.9836776608461734,
      "learning_rate": 4.771657532211819e-06,
      "loss": 0.138,
      "step": 18137
    },
    {
      "epoch": 0.5291440574129179,
      "grad_norm": 0.7881476004287299,
      "learning_rate": 4.77118559198951e-06,
      "loss": 0.1208,
      "step": 18138
    },
    {
      "epoch": 0.5291732306435615,
      "grad_norm": 0.8841536043278565,
      "learning_rate": 4.7707136538100026e-06,
      "loss": 0.1395,
      "step": 18139
    },
    {
      "epoch": 0.529202403874205,
      "grad_norm": 1.2884414593715057,
      "learning_rate": 4.770241717677506e-06,
      "loss": 0.1543,
      "step": 18140
    },
    {
      "epoch": 0.5292315771048486,
      "grad_norm": 0.8431313028877536,
      "learning_rate": 4.769769783596238e-06,
      "loss": 0.1508,
      "step": 18141
    },
    {
      "epoch": 0.5292607503354921,
      "grad_norm": 0.8680004736037638,
      "learning_rate": 4.769297851570411e-06,
      "loss": 0.1198,
      "step": 18142
    },
    {
      "epoch": 0.5292899235661357,
      "grad_norm": 0.9459812532286426,
      "learning_rate": 4.768825921604238e-06,
      "loss": 0.1185,
      "step": 18143
    },
    {
      "epoch": 0.5293190967967792,
      "grad_norm": 1.0297871352150496,
      "learning_rate": 4.768353993701931e-06,
      "loss": 0.1218,
      "step": 18144
    },
    {
      "epoch": 0.5293482700274228,
      "grad_norm": 0.8701244527498139,
      "learning_rate": 4.767882067867705e-06,
      "loss": 0.1291,
      "step": 18145
    },
    {
      "epoch": 0.5293774432580663,
      "grad_norm": 1.066987590461949,
      "learning_rate": 4.7674101441057705e-06,
      "loss": 0.1386,
      "step": 18146
    },
    {
      "epoch": 0.52940661648871,
      "grad_norm": 0.9628167726103253,
      "learning_rate": 4.766938222420344e-06,
      "loss": 0.1346,
      "step": 18147
    },
    {
      "epoch": 0.5294357897193536,
      "grad_norm": 0.9860449775612433,
      "learning_rate": 4.766466302815639e-06,
      "loss": 0.1339,
      "step": 18148
    },
    {
      "epoch": 0.5294649629499971,
      "grad_norm": 0.787033332965655,
      "learning_rate": 4.765994385295865e-06,
      "loss": 0.1201,
      "step": 18149
    },
    {
      "epoch": 0.5294941361806407,
      "grad_norm": 0.7549511063452695,
      "learning_rate": 4.765522469865239e-06,
      "loss": 0.1245,
      "step": 18150
    },
    {
      "epoch": 0.5295233094112842,
      "grad_norm": 0.9348648305971067,
      "learning_rate": 4.765050556527973e-06,
      "loss": 0.1428,
      "step": 18151
    },
    {
      "epoch": 0.5295524826419278,
      "grad_norm": 0.8012189678711377,
      "learning_rate": 4.7645786452882746e-06,
      "loss": 0.1439,
      "step": 18152
    },
    {
      "epoch": 0.5295816558725713,
      "grad_norm": 0.7953172030733313,
      "learning_rate": 4.764106736150367e-06,
      "loss": 0.131,
      "step": 18153
    },
    {
      "epoch": 0.5296108291032149,
      "grad_norm": 0.7893499175576796,
      "learning_rate": 4.7636348291184555e-06,
      "loss": 0.1301,
      "step": 18154
    },
    {
      "epoch": 0.5296400023338584,
      "grad_norm": 0.7676449840213458,
      "learning_rate": 4.763162924196757e-06,
      "loss": 0.1129,
      "step": 18155
    },
    {
      "epoch": 0.529669175564502,
      "grad_norm": 0.8807712011856247,
      "learning_rate": 4.762691021389484e-06,
      "loss": 0.1423,
      "step": 18156
    },
    {
      "epoch": 0.5296983487951455,
      "grad_norm": 0.8807696551720602,
      "learning_rate": 4.762219120700848e-06,
      "loss": 0.1317,
      "step": 18157
    },
    {
      "epoch": 0.5297275220257891,
      "grad_norm": 1.0291963794905532,
      "learning_rate": 4.761747222135062e-06,
      "loss": 0.1134,
      "step": 18158
    },
    {
      "epoch": 0.5297566952564327,
      "grad_norm": 0.8358811957714001,
      "learning_rate": 4.7612753256963405e-06,
      "loss": 0.1367,
      "step": 18159
    },
    {
      "epoch": 0.5297858684870762,
      "grad_norm": 0.932982533203107,
      "learning_rate": 4.760803431388896e-06,
      "loss": 0.1354,
      "step": 18160
    },
    {
      "epoch": 0.5298150417177199,
      "grad_norm": 1.0072010753475942,
      "learning_rate": 4.760331539216943e-06,
      "loss": 0.1431,
      "step": 18161
    },
    {
      "epoch": 0.5298442149483634,
      "grad_norm": 0.9612798982168413,
      "learning_rate": 4.759859649184692e-06,
      "loss": 0.1273,
      "step": 18162
    },
    {
      "epoch": 0.529873388179007,
      "grad_norm": 0.7939814182068036,
      "learning_rate": 4.759387761296355e-06,
      "loss": 0.1472,
      "step": 18163
    },
    {
      "epoch": 0.5299025614096505,
      "grad_norm": 0.6750643649613849,
      "learning_rate": 4.758915875556147e-06,
      "loss": 0.1361,
      "step": 18164
    },
    {
      "epoch": 0.5299317346402941,
      "grad_norm": 1.0139754207777394,
      "learning_rate": 4.758443991968282e-06,
      "loss": 0.1373,
      "step": 18165
    },
    {
      "epoch": 0.5299609078709376,
      "grad_norm": 0.9152048475594219,
      "learning_rate": 4.7579721105369705e-06,
      "loss": 0.1263,
      "step": 18166
    },
    {
      "epoch": 0.5299900811015812,
      "grad_norm": 1.0364722244867663,
      "learning_rate": 4.757500231266427e-06,
      "loss": 0.1678,
      "step": 18167
    },
    {
      "epoch": 0.5300192543322247,
      "grad_norm": 0.8473708298773877,
      "learning_rate": 4.757028354160862e-06,
      "loss": 0.1267,
      "step": 18168
    },
    {
      "epoch": 0.5300484275628683,
      "grad_norm": 0.879045223881066,
      "learning_rate": 4.756556479224493e-06,
      "loss": 0.1385,
      "step": 18169
    },
    {
      "epoch": 0.5300776007935118,
      "grad_norm": 0.9393381422047005,
      "learning_rate": 4.756084606461526e-06,
      "loss": 0.1461,
      "step": 18170
    },
    {
      "epoch": 0.5301067740241554,
      "grad_norm": 1.0444034765539267,
      "learning_rate": 4.7556127358761785e-06,
      "loss": 0.1426,
      "step": 18171
    },
    {
      "epoch": 0.530135947254799,
      "grad_norm": 0.8727968068672698,
      "learning_rate": 4.755140867472663e-06,
      "loss": 0.1317,
      "step": 18172
    },
    {
      "epoch": 0.5301651204854425,
      "grad_norm": 0.9287140610081616,
      "learning_rate": 4.754669001255192e-06,
      "loss": 0.1292,
      "step": 18173
    },
    {
      "epoch": 0.5301942937160862,
      "grad_norm": 1.0607407479073734,
      "learning_rate": 4.754197137227976e-06,
      "loss": 0.1476,
      "step": 18174
    },
    {
      "epoch": 0.5302234669467297,
      "grad_norm": 0.8928006746861766,
      "learning_rate": 4.753725275395229e-06,
      "loss": 0.1408,
      "step": 18175
    },
    {
      "epoch": 0.5302526401773733,
      "grad_norm": 0.8564182428234062,
      "learning_rate": 4.753253415761164e-06,
      "loss": 0.1365,
      "step": 18176
    },
    {
      "epoch": 0.5302818134080168,
      "grad_norm": 0.8327865366363296,
      "learning_rate": 4.752781558329994e-06,
      "loss": 0.1476,
      "step": 18177
    },
    {
      "epoch": 0.5303109866386604,
      "grad_norm": 0.8220980041107818,
      "learning_rate": 4.752309703105933e-06,
      "loss": 0.1617,
      "step": 18178
    },
    {
      "epoch": 0.5303401598693039,
      "grad_norm": 0.8700150925041665,
      "learning_rate": 4.75183785009319e-06,
      "loss": 0.1533,
      "step": 18179
    },
    {
      "epoch": 0.5303693330999475,
      "grad_norm": 0.8551945400335113,
      "learning_rate": 4.7513659992959795e-06,
      "loss": 0.1315,
      "step": 18180
    },
    {
      "epoch": 0.530398506330591,
      "grad_norm": 0.8874755265774167,
      "learning_rate": 4.750894150718516e-06,
      "loss": 0.1223,
      "step": 18181
    },
    {
      "epoch": 0.5304276795612346,
      "grad_norm": 0.9902142340042741,
      "learning_rate": 4.750422304365006e-06,
      "loss": 0.133,
      "step": 18182
    },
    {
      "epoch": 0.5304568527918782,
      "grad_norm": 0.826339138436057,
      "learning_rate": 4.749950460239669e-06,
      "loss": 0.1382,
      "step": 18183
    },
    {
      "epoch": 0.5304860260225217,
      "grad_norm": 0.9591896728975197,
      "learning_rate": 4.749478618346714e-06,
      "loss": 0.1482,
      "step": 18184
    },
    {
      "epoch": 0.5305151992531653,
      "grad_norm": 1.0308127724807137,
      "learning_rate": 4.749006778690354e-06,
      "loss": 0.1308,
      "step": 18185
    },
    {
      "epoch": 0.5305443724838088,
      "grad_norm": 0.8470755796075768,
      "learning_rate": 4.748534941274803e-06,
      "loss": 0.1498,
      "step": 18186
    },
    {
      "epoch": 0.5305735457144524,
      "grad_norm": 0.730272608511541,
      "learning_rate": 4.748063106104271e-06,
      "loss": 0.1125,
      "step": 18187
    },
    {
      "epoch": 0.530602718945096,
      "grad_norm": 0.7683053062257869,
      "learning_rate": 4.74759127318297e-06,
      "loss": 0.1319,
      "step": 18188
    },
    {
      "epoch": 0.5306318921757396,
      "grad_norm": 0.7876833514650872,
      "learning_rate": 4.7471194425151145e-06,
      "loss": 0.1086,
      "step": 18189
    },
    {
      "epoch": 0.5306610654063831,
      "grad_norm": 0.6449356624882449,
      "learning_rate": 4.746647614104917e-06,
      "loss": 0.1102,
      "step": 18190
    },
    {
      "epoch": 0.5306902386370267,
      "grad_norm": 0.7419098809986556,
      "learning_rate": 4.74617578795659e-06,
      "loss": 0.137,
      "step": 18191
    },
    {
      "epoch": 0.5307194118676702,
      "grad_norm": 0.8498207794600134,
      "learning_rate": 4.745703964074344e-06,
      "loss": 0.1456,
      "step": 18192
    },
    {
      "epoch": 0.5307485850983138,
      "grad_norm": 0.8679690788819033,
      "learning_rate": 4.745232142462392e-06,
      "loss": 0.1223,
      "step": 18193
    },
    {
      "epoch": 0.5307777583289573,
      "grad_norm": 0.6259903203854398,
      "learning_rate": 4.744760323124945e-06,
      "loss": 0.1455,
      "step": 18194
    },
    {
      "epoch": 0.5308069315596009,
      "grad_norm": 0.7957118791469273,
      "learning_rate": 4.744288506066219e-06,
      "loss": 0.1343,
      "step": 18195
    },
    {
      "epoch": 0.5308361047902445,
      "grad_norm": 0.8133105330846443,
      "learning_rate": 4.743816691290425e-06,
      "loss": 0.1217,
      "step": 18196
    },
    {
      "epoch": 0.530865278020888,
      "grad_norm": 0.7151682397135266,
      "learning_rate": 4.743344878801773e-06,
      "loss": 0.1258,
      "step": 18197
    },
    {
      "epoch": 0.5308944512515316,
      "grad_norm": 0.761165780384021,
      "learning_rate": 4.742873068604477e-06,
      "loss": 0.1513,
      "step": 18198
    },
    {
      "epoch": 0.5309236244821751,
      "grad_norm": 0.8718161032869127,
      "learning_rate": 4.74240126070275e-06,
      "loss": 0.1281,
      "step": 18199
    },
    {
      "epoch": 0.5309527977128187,
      "grad_norm": 0.8866589715428614,
      "learning_rate": 4.7419294551008e-06,
      "loss": 0.1262,
      "step": 18200
    },
    {
      "epoch": 0.5309819709434623,
      "grad_norm": 0.8178419401134385,
      "learning_rate": 4.741457651802844e-06,
      "loss": 0.1365,
      "step": 18201
    },
    {
      "epoch": 0.5310111441741059,
      "grad_norm": 0.8823398417940378,
      "learning_rate": 4.7409858508130925e-06,
      "loss": 0.1512,
      "step": 18202
    },
    {
      "epoch": 0.5310403174047494,
      "grad_norm": 0.9261725894905353,
      "learning_rate": 4.7405140521357585e-06,
      "loss": 0.145,
      "step": 18203
    },
    {
      "epoch": 0.531069490635393,
      "grad_norm": 0.927896716636194,
      "learning_rate": 4.740042255775052e-06,
      "loss": 0.1288,
      "step": 18204
    },
    {
      "epoch": 0.5310986638660365,
      "grad_norm": 0.7190119095763443,
      "learning_rate": 4.739570461735186e-06,
      "loss": 0.1238,
      "step": 18205
    },
    {
      "epoch": 0.5311278370966801,
      "grad_norm": 0.9254830175686871,
      "learning_rate": 4.739098670020372e-06,
      "loss": 0.1329,
      "step": 18206
    },
    {
      "epoch": 0.5311570103273237,
      "grad_norm": 0.7772275685820007,
      "learning_rate": 4.738626880634823e-06,
      "loss": 0.1603,
      "step": 18207
    },
    {
      "epoch": 0.5311861835579672,
      "grad_norm": 0.9083866916896929,
      "learning_rate": 4.738155093582753e-06,
      "loss": 0.1805,
      "step": 18208
    },
    {
      "epoch": 0.5312153567886108,
      "grad_norm": 0.9578283106622949,
      "learning_rate": 4.73768330886837e-06,
      "loss": 0.1221,
      "step": 18209
    },
    {
      "epoch": 0.5312445300192543,
      "grad_norm": 0.9589037101494035,
      "learning_rate": 4.7372115264958885e-06,
      "loss": 0.1472,
      "step": 18210
    },
    {
      "epoch": 0.5312737032498979,
      "grad_norm": 0.8128078720110864,
      "learning_rate": 4.736739746469521e-06,
      "loss": 0.1292,
      "step": 18211
    },
    {
      "epoch": 0.5313028764805414,
      "grad_norm": 1.1196850412460473,
      "learning_rate": 4.736267968793474e-06,
      "loss": 0.1276,
      "step": 18212
    },
    {
      "epoch": 0.531332049711185,
      "grad_norm": 0.9749254399697772,
      "learning_rate": 4.735796193471967e-06,
      "loss": 0.1386,
      "step": 18213
    },
    {
      "epoch": 0.5313612229418285,
      "grad_norm": 0.8128488652324242,
      "learning_rate": 4.735324420509208e-06,
      "loss": 0.1253,
      "step": 18214
    },
    {
      "epoch": 0.5313903961724722,
      "grad_norm": 0.7719495539185899,
      "learning_rate": 4.734852649909409e-06,
      "loss": 0.1437,
      "step": 18215
    },
    {
      "epoch": 0.5314195694031157,
      "grad_norm": 1.3777910466487588,
      "learning_rate": 4.734380881676783e-06,
      "loss": 0.1249,
      "step": 18216
    },
    {
      "epoch": 0.5314487426337593,
      "grad_norm": 0.9424403854254995,
      "learning_rate": 4.733909115815541e-06,
      "loss": 0.1421,
      "step": 18217
    },
    {
      "epoch": 0.5314779158644029,
      "grad_norm": 0.7700984218747459,
      "learning_rate": 4.733437352329893e-06,
      "loss": 0.128,
      "step": 18218
    },
    {
      "epoch": 0.5315070890950464,
      "grad_norm": 0.7503305025733185,
      "learning_rate": 4.732965591224054e-06,
      "loss": 0.127,
      "step": 18219
    },
    {
      "epoch": 0.53153626232569,
      "grad_norm": 1.0114362003752448,
      "learning_rate": 4.732493832502234e-06,
      "loss": 0.1402,
      "step": 18220
    },
    {
      "epoch": 0.5315654355563335,
      "grad_norm": 0.8115849228481044,
      "learning_rate": 4.7320220761686474e-06,
      "loss": 0.1194,
      "step": 18221
    },
    {
      "epoch": 0.5315946087869771,
      "grad_norm": 0.7141742109317262,
      "learning_rate": 4.731550322227502e-06,
      "loss": 0.1222,
      "step": 18222
    },
    {
      "epoch": 0.5316237820176206,
      "grad_norm": 0.7910402233028568,
      "learning_rate": 4.731078570683011e-06,
      "loss": 0.1375,
      "step": 18223
    },
    {
      "epoch": 0.5316529552482642,
      "grad_norm": 0.866716925506884,
      "learning_rate": 4.730606821539386e-06,
      "loss": 0.1341,
      "step": 18224
    },
    {
      "epoch": 0.5316821284789077,
      "grad_norm": 0.7100187237823343,
      "learning_rate": 4.73013507480084e-06,
      "loss": 0.1265,
      "step": 18225
    },
    {
      "epoch": 0.5317113017095513,
      "grad_norm": 0.7937530420766605,
      "learning_rate": 4.7296633304715834e-06,
      "loss": 0.1542,
      "step": 18226
    },
    {
      "epoch": 0.5317404749401948,
      "grad_norm": 0.8790082634737036,
      "learning_rate": 4.729191588555827e-06,
      "loss": 0.1292,
      "step": 18227
    },
    {
      "epoch": 0.5317696481708385,
      "grad_norm": 0.7977555259832114,
      "learning_rate": 4.728719849057785e-06,
      "loss": 0.11,
      "step": 18228
    },
    {
      "epoch": 0.531798821401482,
      "grad_norm": 0.8802254598764814,
      "learning_rate": 4.7282481119816684e-06,
      "loss": 0.129,
      "step": 18229
    },
    {
      "epoch": 0.5318279946321256,
      "grad_norm": 0.8308378588462866,
      "learning_rate": 4.727776377331685e-06,
      "loss": 0.1318,
      "step": 18230
    },
    {
      "epoch": 0.5318571678627692,
      "grad_norm": 1.0403894789855197,
      "learning_rate": 4.72730464511205e-06,
      "loss": 0.1639,
      "step": 18231
    },
    {
      "epoch": 0.5318863410934127,
      "grad_norm": 0.777906028070433,
      "learning_rate": 4.726832915326974e-06,
      "loss": 0.1096,
      "step": 18232
    },
    {
      "epoch": 0.5319155143240563,
      "grad_norm": 0.6856424421621916,
      "learning_rate": 4.7263611879806694e-06,
      "loss": 0.1511,
      "step": 18233
    },
    {
      "epoch": 0.5319446875546998,
      "grad_norm": 1.054616312461978,
      "learning_rate": 4.725889463077346e-06,
      "loss": 0.1324,
      "step": 18234
    },
    {
      "epoch": 0.5319738607853434,
      "grad_norm": 1.0271182679398891,
      "learning_rate": 4.725417740621217e-06,
      "loss": 0.1371,
      "step": 18235
    },
    {
      "epoch": 0.5320030340159869,
      "grad_norm": 1.0183451635925291,
      "learning_rate": 4.724946020616491e-06,
      "loss": 0.1444,
      "step": 18236
    },
    {
      "epoch": 0.5320322072466305,
      "grad_norm": 1.1563805190256737,
      "learning_rate": 4.724474303067381e-06,
      "loss": 0.1199,
      "step": 18237
    },
    {
      "epoch": 0.532061380477274,
      "grad_norm": 1.378097088732142,
      "learning_rate": 4.724002587978102e-06,
      "loss": 0.1242,
      "step": 18238
    },
    {
      "epoch": 0.5320905537079176,
      "grad_norm": 0.7790409610984101,
      "learning_rate": 4.7235308753528596e-06,
      "loss": 0.1382,
      "step": 18239
    },
    {
      "epoch": 0.5321197269385611,
      "grad_norm": 0.9516491913643963,
      "learning_rate": 4.723059165195868e-06,
      "loss": 0.1451,
      "step": 18240
    },
    {
      "epoch": 0.5321489001692047,
      "grad_norm": 1.070747120837479,
      "learning_rate": 4.722587457511339e-06,
      "loss": 0.1201,
      "step": 18241
    },
    {
      "epoch": 0.5321780733998484,
      "grad_norm": 0.890840183191396,
      "learning_rate": 4.72211575230348e-06,
      "loss": 0.1096,
      "step": 18242
    },
    {
      "epoch": 0.5322072466304919,
      "grad_norm": 0.8084631046858048,
      "learning_rate": 4.721644049576508e-06,
      "loss": 0.1327,
      "step": 18243
    },
    {
      "epoch": 0.5322364198611355,
      "grad_norm": 1.0445416958195826,
      "learning_rate": 4.721172349334631e-06,
      "loss": 0.1206,
      "step": 18244
    },
    {
      "epoch": 0.532265593091779,
      "grad_norm": 0.8798676295080163,
      "learning_rate": 4.72070065158206e-06,
      "loss": 0.145,
      "step": 18245
    },
    {
      "epoch": 0.5322947663224226,
      "grad_norm": 0.8842797310684821,
      "learning_rate": 4.720228956323009e-06,
      "loss": 0.1291,
      "step": 18246
    },
    {
      "epoch": 0.5323239395530661,
      "grad_norm": 0.9091357762333823,
      "learning_rate": 4.719757263561685e-06,
      "loss": 0.1208,
      "step": 18247
    },
    {
      "epoch": 0.5323531127837097,
      "grad_norm": 0.8923979134276224,
      "learning_rate": 4.7192855733023e-06,
      "loss": 0.1412,
      "step": 18248
    },
    {
      "epoch": 0.5323822860143532,
      "grad_norm": 0.8404394908659193,
      "learning_rate": 4.718813885549069e-06,
      "loss": 0.1361,
      "step": 18249
    },
    {
      "epoch": 0.5324114592449968,
      "grad_norm": 0.8390766493099656,
      "learning_rate": 4.718342200306199e-06,
      "loss": 0.1082,
      "step": 18250
    },
    {
      "epoch": 0.5324406324756403,
      "grad_norm": 0.9777215074669852,
      "learning_rate": 4.717870517577904e-06,
      "loss": 0.1215,
      "step": 18251
    },
    {
      "epoch": 0.5324698057062839,
      "grad_norm": 0.6881839018889654,
      "learning_rate": 4.717398837368392e-06,
      "loss": 0.1365,
      "step": 18252
    },
    {
      "epoch": 0.5324989789369274,
      "grad_norm": 0.9999322787493645,
      "learning_rate": 4.716927159681877e-06,
      "loss": 0.1379,
      "step": 18253
    },
    {
      "epoch": 0.532528152167571,
      "grad_norm": 0.8681590806963833,
      "learning_rate": 4.716455484522567e-06,
      "loss": 0.1291,
      "step": 18254
    },
    {
      "epoch": 0.5325573253982147,
      "grad_norm": 0.7197364165320143,
      "learning_rate": 4.715983811894678e-06,
      "loss": 0.1143,
      "step": 18255
    },
    {
      "epoch": 0.5325864986288582,
      "grad_norm": 0.8390255890591161,
      "learning_rate": 4.715512141802415e-06,
      "loss": 0.1334,
      "step": 18256
    },
    {
      "epoch": 0.5326156718595018,
      "grad_norm": 0.8020806753716382,
      "learning_rate": 4.715040474249993e-06,
      "loss": 0.1228,
      "step": 18257
    },
    {
      "epoch": 0.5326448450901453,
      "grad_norm": 0.8289200491529507,
      "learning_rate": 4.714568809241622e-06,
      "loss": 0.13,
      "step": 18258
    },
    {
      "epoch": 0.5326740183207889,
      "grad_norm": 0.9476852145563937,
      "learning_rate": 4.7140971467815115e-06,
      "loss": 0.1385,
      "step": 18259
    },
    {
      "epoch": 0.5327031915514324,
      "grad_norm": 0.8103467856705289,
      "learning_rate": 4.713625486873872e-06,
      "loss": 0.122,
      "step": 18260
    },
    {
      "epoch": 0.532732364782076,
      "grad_norm": 0.8168846218972035,
      "learning_rate": 4.713153829522918e-06,
      "loss": 0.1151,
      "step": 18261
    },
    {
      "epoch": 0.5327615380127195,
      "grad_norm": 0.7159185013828294,
      "learning_rate": 4.712682174732857e-06,
      "loss": 0.128,
      "step": 18262
    },
    {
      "epoch": 0.5327907112433631,
      "grad_norm": 0.7375691227334467,
      "learning_rate": 4.7122105225079015e-06,
      "loss": 0.1491,
      "step": 18263
    },
    {
      "epoch": 0.5328198844740066,
      "grad_norm": 0.9646780648256801,
      "learning_rate": 4.711738872852262e-06,
      "loss": 0.1343,
      "step": 18264
    },
    {
      "epoch": 0.5328490577046502,
      "grad_norm": 0.7199992123139404,
      "learning_rate": 4.711267225770149e-06,
      "loss": 0.1362,
      "step": 18265
    },
    {
      "epoch": 0.5328782309352937,
      "grad_norm": 0.6174057422571153,
      "learning_rate": 4.710795581265772e-06,
      "loss": 0.144,
      "step": 18266
    },
    {
      "epoch": 0.5329074041659373,
      "grad_norm": 0.8041162452665113,
      "learning_rate": 4.710323939343343e-06,
      "loss": 0.1202,
      "step": 18267
    },
    {
      "epoch": 0.5329365773965808,
      "grad_norm": 0.8354353108165564,
      "learning_rate": 4.709852300007075e-06,
      "loss": 0.1137,
      "step": 18268
    },
    {
      "epoch": 0.5329657506272245,
      "grad_norm": 0.9102347114209531,
      "learning_rate": 4.709380663261175e-06,
      "loss": 0.1598,
      "step": 18269
    },
    {
      "epoch": 0.5329949238578681,
      "grad_norm": 0.8691516424405844,
      "learning_rate": 4.7089090291098555e-06,
      "loss": 0.1469,
      "step": 18270
    },
    {
      "epoch": 0.5330240970885116,
      "grad_norm": 0.894379100194545,
      "learning_rate": 4.708437397557327e-06,
      "loss": 0.1528,
      "step": 18271
    },
    {
      "epoch": 0.5330532703191552,
      "grad_norm": 0.7686223797221675,
      "learning_rate": 4.707965768607797e-06,
      "loss": 0.1186,
      "step": 18272
    },
    {
      "epoch": 0.5330824435497987,
      "grad_norm": 0.9922473040053408,
      "learning_rate": 4.7074941422654825e-06,
      "loss": 0.1301,
      "step": 18273
    },
    {
      "epoch": 0.5331116167804423,
      "grad_norm": 0.9553222278714265,
      "learning_rate": 4.7070225185345885e-06,
      "loss": 0.1348,
      "step": 18274
    },
    {
      "epoch": 0.5331407900110858,
      "grad_norm": 0.8780426277561764,
      "learning_rate": 4.706550897419328e-06,
      "loss": 0.1292,
      "step": 18275
    },
    {
      "epoch": 0.5331699632417294,
      "grad_norm": 1.1271964222389244,
      "learning_rate": 4.706079278923912e-06,
      "loss": 0.1277,
      "step": 18276
    },
    {
      "epoch": 0.5331991364723729,
      "grad_norm": 0.7803256032983364,
      "learning_rate": 4.70560766305255e-06,
      "loss": 0.1235,
      "step": 18277
    },
    {
      "epoch": 0.5332283097030165,
      "grad_norm": 1.062307277822807,
      "learning_rate": 4.70513604980945e-06,
      "loss": 0.1201,
      "step": 18278
    },
    {
      "epoch": 0.53325748293366,
      "grad_norm": 0.8432346201306513,
      "learning_rate": 4.704664439198826e-06,
      "loss": 0.1199,
      "step": 18279
    },
    {
      "epoch": 0.5332866561643036,
      "grad_norm": 0.8053567904561258,
      "learning_rate": 4.704192831224888e-06,
      "loss": 0.1382,
      "step": 18280
    },
    {
      "epoch": 0.5333158293949471,
      "grad_norm": 0.7709820122302663,
      "learning_rate": 4.703721225891847e-06,
      "loss": 0.1163,
      "step": 18281
    },
    {
      "epoch": 0.5333450026255907,
      "grad_norm": 0.9814907406419364,
      "learning_rate": 4.703249623203911e-06,
      "loss": 0.1502,
      "step": 18282
    },
    {
      "epoch": 0.5333741758562344,
      "grad_norm": 0.8545745632077943,
      "learning_rate": 4.702778023165291e-06,
      "loss": 0.1574,
      "step": 18283
    },
    {
      "epoch": 0.5334033490868779,
      "grad_norm": 0.7053356035176296,
      "learning_rate": 4.7023064257801976e-06,
      "loss": 0.1506,
      "step": 18284
    },
    {
      "epoch": 0.5334325223175215,
      "grad_norm": 1.1326987087328027,
      "learning_rate": 4.7018348310528424e-06,
      "loss": 0.1292,
      "step": 18285
    },
    {
      "epoch": 0.533461695548165,
      "grad_norm": 0.7508903359106306,
      "learning_rate": 4.701363238987435e-06,
      "loss": 0.1339,
      "step": 18286
    },
    {
      "epoch": 0.5334908687788086,
      "grad_norm": 0.6477469308655547,
      "learning_rate": 4.700891649588185e-06,
      "loss": 0.117,
      "step": 18287
    },
    {
      "epoch": 0.5335200420094521,
      "grad_norm": 0.6742799625847177,
      "learning_rate": 4.700420062859303e-06,
      "loss": 0.1634,
      "step": 18288
    },
    {
      "epoch": 0.5335492152400957,
      "grad_norm": 1.3577615709575646,
      "learning_rate": 4.6999484788049985e-06,
      "loss": 0.1352,
      "step": 18289
    },
    {
      "epoch": 0.5335783884707392,
      "grad_norm": 0.7758935998448669,
      "learning_rate": 4.699476897429481e-06,
      "loss": 0.1581,
      "step": 18290
    },
    {
      "epoch": 0.5336075617013828,
      "grad_norm": 2.846208854867453,
      "learning_rate": 4.699005318736965e-06,
      "loss": 0.1346,
      "step": 18291
    },
    {
      "epoch": 0.5336367349320263,
      "grad_norm": 0.6612591799521408,
      "learning_rate": 4.698533742731655e-06,
      "loss": 0.1322,
      "step": 18292
    },
    {
      "epoch": 0.5336659081626699,
      "grad_norm": 0.8580813226827683,
      "learning_rate": 4.698062169417766e-06,
      "loss": 0.1117,
      "step": 18293
    },
    {
      "epoch": 0.5336950813933135,
      "grad_norm": 0.5712477496198003,
      "learning_rate": 4.697590598799505e-06,
      "loss": 0.1257,
      "step": 18294
    },
    {
      "epoch": 0.533724254623957,
      "grad_norm": 0.895768043690643,
      "learning_rate": 4.697119030881083e-06,
      "loss": 0.1391,
      "step": 18295
    },
    {
      "epoch": 0.5337534278546007,
      "grad_norm": 0.8231409957602026,
      "learning_rate": 4.696647465666709e-06,
      "loss": 0.1208,
      "step": 18296
    },
    {
      "epoch": 0.5337826010852442,
      "grad_norm": 0.794681568119074,
      "learning_rate": 4.6961759031605945e-06,
      "loss": 0.1171,
      "step": 18297
    },
    {
      "epoch": 0.5338117743158878,
      "grad_norm": 0.7995639882069969,
      "learning_rate": 4.695704343366951e-06,
      "loss": 0.1487,
      "step": 18298
    },
    {
      "epoch": 0.5338409475465313,
      "grad_norm": 0.860057475567077,
      "learning_rate": 4.695232786289984e-06,
      "loss": 0.1186,
      "step": 18299
    },
    {
      "epoch": 0.5338701207771749,
      "grad_norm": 0.9380352743996345,
      "learning_rate": 4.694761231933907e-06,
      "loss": 0.1115,
      "step": 18300
    },
    {
      "epoch": 0.5338992940078184,
      "grad_norm": 0.7989324976010658,
      "learning_rate": 4.694289680302929e-06,
      "loss": 0.1485,
      "step": 18301
    },
    {
      "epoch": 0.533928467238462,
      "grad_norm": 0.87914004930729,
      "learning_rate": 4.693818131401258e-06,
      "loss": 0.1346,
      "step": 18302
    },
    {
      "epoch": 0.5339576404691055,
      "grad_norm": 0.7506380667800656,
      "learning_rate": 4.693346585233108e-06,
      "loss": 0.1205,
      "step": 18303
    },
    {
      "epoch": 0.5339868136997491,
      "grad_norm": 1.2642888865243207,
      "learning_rate": 4.692875041802686e-06,
      "loss": 0.1198,
      "step": 18304
    },
    {
      "epoch": 0.5340159869303926,
      "grad_norm": 0.8676706384944769,
      "learning_rate": 4.692403501114201e-06,
      "loss": 0.1275,
      "step": 18305
    },
    {
      "epoch": 0.5340451601610362,
      "grad_norm": 0.7709817409464969,
      "learning_rate": 4.691931963171866e-06,
      "loss": 0.1002,
      "step": 18306
    },
    {
      "epoch": 0.5340743333916798,
      "grad_norm": 1.351751291138029,
      "learning_rate": 4.691460427979888e-06,
      "loss": 0.1518,
      "step": 18307
    },
    {
      "epoch": 0.5341035066223233,
      "grad_norm": 1.1386904341793178,
      "learning_rate": 4.690988895542477e-06,
      "loss": 0.1496,
      "step": 18308
    },
    {
      "epoch": 0.5341326798529669,
      "grad_norm": 1.2382735820396864,
      "learning_rate": 4.690517365863843e-06,
      "loss": 0.1769,
      "step": 18309
    },
    {
      "epoch": 0.5341618530836105,
      "grad_norm": 1.2838804628353189,
      "learning_rate": 4.690045838948197e-06,
      "loss": 0.1269,
      "step": 18310
    },
    {
      "epoch": 0.5341910263142541,
      "grad_norm": 0.948425471216006,
      "learning_rate": 4.689574314799749e-06,
      "loss": 0.1213,
      "step": 18311
    },
    {
      "epoch": 0.5342201995448976,
      "grad_norm": 0.8091162461170046,
      "learning_rate": 4.689102793422706e-06,
      "loss": 0.1259,
      "step": 18312
    },
    {
      "epoch": 0.5342493727755412,
      "grad_norm": 1.2537966712209563,
      "learning_rate": 4.688631274821279e-06,
      "loss": 0.1353,
      "step": 18313
    },
    {
      "epoch": 0.5342785460061847,
      "grad_norm": 0.937753720979893,
      "learning_rate": 4.688159758999676e-06,
      "loss": 0.1308,
      "step": 18314
    },
    {
      "epoch": 0.5343077192368283,
      "grad_norm": 0.8199593811915793,
      "learning_rate": 4.687688245962111e-06,
      "loss": 0.1301,
      "step": 18315
    },
    {
      "epoch": 0.5343368924674718,
      "grad_norm": 1.0522108926739333,
      "learning_rate": 4.68721673571279e-06,
      "loss": 0.1357,
      "step": 18316
    },
    {
      "epoch": 0.5343660656981154,
      "grad_norm": 0.9877584992858726,
      "learning_rate": 4.686745228255923e-06,
      "loss": 0.1278,
      "step": 18317
    },
    {
      "epoch": 0.534395238928759,
      "grad_norm": 0.6561644672374607,
      "learning_rate": 4.686273723595721e-06,
      "loss": 0.1138,
      "step": 18318
    },
    {
      "epoch": 0.5344244121594025,
      "grad_norm": 0.674583807057486,
      "learning_rate": 4.685802221736391e-06,
      "loss": 0.1439,
      "step": 18319
    },
    {
      "epoch": 0.5344535853900461,
      "grad_norm": 0.8126408168353374,
      "learning_rate": 4.685330722682143e-06,
      "loss": 0.1348,
      "step": 18320
    },
    {
      "epoch": 0.5344827586206896,
      "grad_norm": 0.9298237473943516,
      "learning_rate": 4.684859226437188e-06,
      "loss": 0.123,
      "step": 18321
    },
    {
      "epoch": 0.5345119318513332,
      "grad_norm": 0.6441497151952471,
      "learning_rate": 4.684387733005735e-06,
      "loss": 0.1285,
      "step": 18322
    },
    {
      "epoch": 0.5345411050819768,
      "grad_norm": 0.6988583308964291,
      "learning_rate": 4.6839162423919946e-06,
      "loss": 0.1261,
      "step": 18323
    },
    {
      "epoch": 0.5345702783126204,
      "grad_norm": 0.729748063937843,
      "learning_rate": 4.683444754600172e-06,
      "loss": 0.1171,
      "step": 18324
    },
    {
      "epoch": 0.5345994515432639,
      "grad_norm": 0.7203507248612669,
      "learning_rate": 4.6829732696344796e-06,
      "loss": 0.1534,
      "step": 18325
    },
    {
      "epoch": 0.5346286247739075,
      "grad_norm": 0.8065783589834921,
      "learning_rate": 4.6825017874991255e-06,
      "loss": 0.1368,
      "step": 18326
    },
    {
      "epoch": 0.534657798004551,
      "grad_norm": 0.9653649908196572,
      "learning_rate": 4.6820303081983205e-06,
      "loss": 0.1433,
      "step": 18327
    },
    {
      "epoch": 0.5346869712351946,
      "grad_norm": 0.9393678065840173,
      "learning_rate": 4.681558831736274e-06,
      "loss": 0.136,
      "step": 18328
    },
    {
      "epoch": 0.5347161444658382,
      "grad_norm": 0.808057939594074,
      "learning_rate": 4.681087358117193e-06,
      "loss": 0.1583,
      "step": 18329
    },
    {
      "epoch": 0.5347453176964817,
      "grad_norm": 0.7662604504264521,
      "learning_rate": 4.680615887345288e-06,
      "loss": 0.1311,
      "step": 18330
    },
    {
      "epoch": 0.5347744909271253,
      "grad_norm": 0.9298896011013003,
      "learning_rate": 4.680144419424769e-06,
      "loss": 0.1489,
      "step": 18331
    },
    {
      "epoch": 0.5348036641577688,
      "grad_norm": 0.7472354714700918,
      "learning_rate": 4.679672954359842e-06,
      "loss": 0.1452,
      "step": 18332
    },
    {
      "epoch": 0.5348328373884124,
      "grad_norm": 0.8432475776533255,
      "learning_rate": 4.679201492154721e-06,
      "loss": 0.1393,
      "step": 18333
    },
    {
      "epoch": 0.5348620106190559,
      "grad_norm": 0.8881170093770653,
      "learning_rate": 4.678730032813611e-06,
      "loss": 0.1253,
      "step": 18334
    },
    {
      "epoch": 0.5348911838496995,
      "grad_norm": 0.9069534303549787,
      "learning_rate": 4.678258576340723e-06,
      "loss": 0.117,
      "step": 18335
    },
    {
      "epoch": 0.534920357080343,
      "grad_norm": 0.7528468968079108,
      "learning_rate": 4.677787122740267e-06,
      "loss": 0.1175,
      "step": 18336
    },
    {
      "epoch": 0.5349495303109867,
      "grad_norm": 0.9679569993903948,
      "learning_rate": 4.677315672016446e-06,
      "loss": 0.1154,
      "step": 18337
    },
    {
      "epoch": 0.5349787035416302,
      "grad_norm": 1.2178409028124713,
      "learning_rate": 4.6768442241734785e-06,
      "loss": 0.1319,
      "step": 18338
    },
    {
      "epoch": 0.5350078767722738,
      "grad_norm": 1.094091458256245,
      "learning_rate": 4.676372779215568e-06,
      "loss": 0.1321,
      "step": 18339
    },
    {
      "epoch": 0.5350370500029173,
      "grad_norm": 0.82715505011348,
      "learning_rate": 4.675901337146922e-06,
      "loss": 0.1224,
      "step": 18340
    },
    {
      "epoch": 0.5350662232335609,
      "grad_norm": 1.0231909253261822,
      "learning_rate": 4.675429897971754e-06,
      "loss": 0.1355,
      "step": 18341
    },
    {
      "epoch": 0.5350953964642045,
      "grad_norm": 0.9893810869493288,
      "learning_rate": 4.674958461694269e-06,
      "loss": 0.147,
      "step": 18342
    },
    {
      "epoch": 0.535124569694848,
      "grad_norm": 0.7506321777001245,
      "learning_rate": 4.674487028318676e-06,
      "loss": 0.117,
      "step": 18343
    },
    {
      "epoch": 0.5351537429254916,
      "grad_norm": 1.0344483586868616,
      "learning_rate": 4.674015597849186e-06,
      "loss": 0.1283,
      "step": 18344
    },
    {
      "epoch": 0.5351829161561351,
      "grad_norm": 0.9586845928124975,
      "learning_rate": 4.673544170290009e-06,
      "loss": 0.1486,
      "step": 18345
    },
    {
      "epoch": 0.5352120893867787,
      "grad_norm": 0.7940525509400088,
      "learning_rate": 4.673072745645349e-06,
      "loss": 0.1306,
      "step": 18346
    },
    {
      "epoch": 0.5352412626174222,
      "grad_norm": 1.3334246405174768,
      "learning_rate": 4.672601323919419e-06,
      "loss": 0.1287,
      "step": 18347
    },
    {
      "epoch": 0.5352704358480658,
      "grad_norm": 1.078371798384496,
      "learning_rate": 4.6721299051164265e-06,
      "loss": 0.1197,
      "step": 18348
    },
    {
      "epoch": 0.5352996090787093,
      "grad_norm": 0.9785701120685548,
      "learning_rate": 4.671658489240577e-06,
      "loss": 0.1278,
      "step": 18349
    },
    {
      "epoch": 0.535328782309353,
      "grad_norm": 0.7759886837248762,
      "learning_rate": 4.671187076296085e-06,
      "loss": 0.109,
      "step": 18350
    },
    {
      "epoch": 0.5353579555399965,
      "grad_norm": 1.1109812964350543,
      "learning_rate": 4.670715666287156e-06,
      "loss": 0.1415,
      "step": 18351
    },
    {
      "epoch": 0.5353871287706401,
      "grad_norm": 1.2929363377866827,
      "learning_rate": 4.670244259217998e-06,
      "loss": 0.1513,
      "step": 18352
    },
    {
      "epoch": 0.5354163020012837,
      "grad_norm": 0.9022509312476248,
      "learning_rate": 4.669772855092822e-06,
      "loss": 0.1476,
      "step": 18353
    },
    {
      "epoch": 0.5354454752319272,
      "grad_norm": 0.988257571648059,
      "learning_rate": 4.6693014539158345e-06,
      "loss": 0.1361,
      "step": 18354
    },
    {
      "epoch": 0.5354746484625708,
      "grad_norm": 1.0434758164432263,
      "learning_rate": 4.668830055691243e-06,
      "loss": 0.1454,
      "step": 18355
    },
    {
      "epoch": 0.5355038216932143,
      "grad_norm": 0.9014227433768871,
      "learning_rate": 4.668358660423259e-06,
      "loss": 0.1285,
      "step": 18356
    },
    {
      "epoch": 0.5355329949238579,
      "grad_norm": 0.8525085880182496,
      "learning_rate": 4.66788726811609e-06,
      "loss": 0.1295,
      "step": 18357
    },
    {
      "epoch": 0.5355621681545014,
      "grad_norm": 0.9098629368849164,
      "learning_rate": 4.667415878773945e-06,
      "loss": 0.1453,
      "step": 18358
    },
    {
      "epoch": 0.535591341385145,
      "grad_norm": 0.8745548385921665,
      "learning_rate": 4.6669444924010305e-06,
      "loss": 0.1233,
      "step": 18359
    },
    {
      "epoch": 0.5356205146157885,
      "grad_norm": 0.8206904657070538,
      "learning_rate": 4.666473109001556e-06,
      "loss": 0.1677,
      "step": 18360
    },
    {
      "epoch": 0.5356496878464321,
      "grad_norm": 0.8218439636259399,
      "learning_rate": 4.666001728579729e-06,
      "loss": 0.1466,
      "step": 18361
    },
    {
      "epoch": 0.5356788610770756,
      "grad_norm": 0.9085453548345437,
      "learning_rate": 4.66553035113976e-06,
      "loss": 0.1272,
      "step": 18362
    },
    {
      "epoch": 0.5357080343077192,
      "grad_norm": 0.8833952600137825,
      "learning_rate": 4.665058976685857e-06,
      "loss": 0.1234,
      "step": 18363
    },
    {
      "epoch": 0.5357372075383628,
      "grad_norm": 1.0060277647472204,
      "learning_rate": 4.664587605222226e-06,
      "loss": 0.1292,
      "step": 18364
    },
    {
      "epoch": 0.5357663807690064,
      "grad_norm": 1.0899336140879554,
      "learning_rate": 4.6641162367530775e-06,
      "loss": 0.1033,
      "step": 18365
    },
    {
      "epoch": 0.53579555399965,
      "grad_norm": 1.2235143866002742,
      "learning_rate": 4.66364487128262e-06,
      "loss": 0.1169,
      "step": 18366
    },
    {
      "epoch": 0.5358247272302935,
      "grad_norm": 0.8596889715983336,
      "learning_rate": 4.663173508815058e-06,
      "loss": 0.1535,
      "step": 18367
    },
    {
      "epoch": 0.5358539004609371,
      "grad_norm": 0.9054611873990683,
      "learning_rate": 4.662702149354605e-06,
      "loss": 0.1192,
      "step": 18368
    },
    {
      "epoch": 0.5358830736915806,
      "grad_norm": 0.9634229690797912,
      "learning_rate": 4.662230792905465e-06,
      "loss": 0.118,
      "step": 18369
    },
    {
      "epoch": 0.5359122469222242,
      "grad_norm": 1.0595127571269867,
      "learning_rate": 4.66175943947185e-06,
      "loss": 0.1328,
      "step": 18370
    },
    {
      "epoch": 0.5359414201528677,
      "grad_norm": 1.0649972455942749,
      "learning_rate": 4.661288089057965e-06,
      "loss": 0.1497,
      "step": 18371
    },
    {
      "epoch": 0.5359705933835113,
      "grad_norm": 0.8622703140674189,
      "learning_rate": 4.660816741668019e-06,
      "loss": 0.1228,
      "step": 18372
    },
    {
      "epoch": 0.5359997666141548,
      "grad_norm": 0.9624169763035911,
      "learning_rate": 4.660345397306219e-06,
      "loss": 0.1315,
      "step": 18373
    },
    {
      "epoch": 0.5360289398447984,
      "grad_norm": 0.8373174658170262,
      "learning_rate": 4.659874055976775e-06,
      "loss": 0.1505,
      "step": 18374
    },
    {
      "epoch": 0.5360581130754419,
      "grad_norm": 0.7389060289683349,
      "learning_rate": 4.6594027176838955e-06,
      "loss": 0.1159,
      "step": 18375
    },
    {
      "epoch": 0.5360872863060855,
      "grad_norm": 0.6855628089539305,
      "learning_rate": 4.658931382431786e-06,
      "loss": 0.1153,
      "step": 18376
    },
    {
      "epoch": 0.5361164595367292,
      "grad_norm": 0.9392507510743571,
      "learning_rate": 4.658460050224656e-06,
      "loss": 0.1362,
      "step": 18377
    },
    {
      "epoch": 0.5361456327673727,
      "grad_norm": 0.9069703368660548,
      "learning_rate": 4.657988721066714e-06,
      "loss": 0.1269,
      "step": 18378
    },
    {
      "epoch": 0.5361748059980163,
      "grad_norm": 0.8773801036238599,
      "learning_rate": 4.657517394962164e-06,
      "loss": 0.1242,
      "step": 18379
    },
    {
      "epoch": 0.5362039792286598,
      "grad_norm": 0.7399085306556702,
      "learning_rate": 4.65704607191522e-06,
      "loss": 0.1503,
      "step": 18380
    },
    {
      "epoch": 0.5362331524593034,
      "grad_norm": 1.0119142077092582,
      "learning_rate": 4.656574751930085e-06,
      "loss": 0.118,
      "step": 18381
    },
    {
      "epoch": 0.5362623256899469,
      "grad_norm": 1.5654223764628867,
      "learning_rate": 4.65610343501097e-06,
      "loss": 0.152,
      "step": 18382
    },
    {
      "epoch": 0.5362914989205905,
      "grad_norm": 0.8274538592274344,
      "learning_rate": 4.655632121162082e-06,
      "loss": 0.1457,
      "step": 18383
    },
    {
      "epoch": 0.536320672151234,
      "grad_norm": 1.2318373194834533,
      "learning_rate": 4.6551608103876275e-06,
      "loss": 0.1415,
      "step": 18384
    },
    {
      "epoch": 0.5363498453818776,
      "grad_norm": 0.9486937306815246,
      "learning_rate": 4.654689502691813e-06,
      "loss": 0.1259,
      "step": 18385
    },
    {
      "epoch": 0.5363790186125211,
      "grad_norm": 0.7057054803619004,
      "learning_rate": 4.65421819807885e-06,
      "loss": 0.1167,
      "step": 18386
    },
    {
      "epoch": 0.5364081918431647,
      "grad_norm": 1.0210682161050608,
      "learning_rate": 4.653746896552944e-06,
      "loss": 0.1405,
      "step": 18387
    },
    {
      "epoch": 0.5364373650738082,
      "grad_norm": 0.8450355942890404,
      "learning_rate": 4.653275598118304e-06,
      "loss": 0.1375,
      "step": 18388
    },
    {
      "epoch": 0.5364665383044518,
      "grad_norm": 0.743774645346137,
      "learning_rate": 4.652804302779136e-06,
      "loss": 0.1071,
      "step": 18389
    },
    {
      "epoch": 0.5364957115350953,
      "grad_norm": 0.7219983884839402,
      "learning_rate": 4.652333010539648e-06,
      "loss": 0.1317,
      "step": 18390
    },
    {
      "epoch": 0.536524884765739,
      "grad_norm": 1.6790037943494809,
      "learning_rate": 4.651861721404047e-06,
      "loss": 0.1561,
      "step": 18391
    },
    {
      "epoch": 0.5365540579963826,
      "grad_norm": 0.8807400125325477,
      "learning_rate": 4.651390435376543e-06,
      "loss": 0.1159,
      "step": 18392
    },
    {
      "epoch": 0.5365832312270261,
      "grad_norm": 0.9634830345396149,
      "learning_rate": 4.650919152461342e-06,
      "loss": 0.1004,
      "step": 18393
    },
    {
      "epoch": 0.5366124044576697,
      "grad_norm": 0.8857535844913024,
      "learning_rate": 4.650447872662651e-06,
      "loss": 0.1382,
      "step": 18394
    },
    {
      "epoch": 0.5366415776883132,
      "grad_norm": 0.8063250823663929,
      "learning_rate": 4.649976595984678e-06,
      "loss": 0.1289,
      "step": 18395
    },
    {
      "epoch": 0.5366707509189568,
      "grad_norm": 0.8622685643994183,
      "learning_rate": 4.649505322431631e-06,
      "loss": 0.1182,
      "step": 18396
    },
    {
      "epoch": 0.5366999241496003,
      "grad_norm": 1.0892713923536639,
      "learning_rate": 4.649034052007714e-06,
      "loss": 0.1478,
      "step": 18397
    },
    {
      "epoch": 0.5367290973802439,
      "grad_norm": 0.8673382068332008,
      "learning_rate": 4.648562784717141e-06,
      "loss": 0.1265,
      "step": 18398
    },
    {
      "epoch": 0.5367582706108874,
      "grad_norm": 0.8608091835670153,
      "learning_rate": 4.648091520564114e-06,
      "loss": 0.1288,
      "step": 18399
    },
    {
      "epoch": 0.536787443841531,
      "grad_norm": 0.856921635053501,
      "learning_rate": 4.647620259552841e-06,
      "loss": 0.134,
      "step": 18400
    },
    {
      "epoch": 0.5368166170721745,
      "grad_norm": 0.8035283308925599,
      "learning_rate": 4.647149001687532e-06,
      "loss": 0.1229,
      "step": 18401
    },
    {
      "epoch": 0.5368457903028181,
      "grad_norm": 0.7661735637383169,
      "learning_rate": 4.6466777469723916e-06,
      "loss": 0.134,
      "step": 18402
    },
    {
      "epoch": 0.5368749635334616,
      "grad_norm": 1.0260182917262406,
      "learning_rate": 4.646206495411627e-06,
      "loss": 0.1318,
      "step": 18403
    },
    {
      "epoch": 0.5369041367641053,
      "grad_norm": 0.7281531016042366,
      "learning_rate": 4.645735247009447e-06,
      "loss": 0.1312,
      "step": 18404
    },
    {
      "epoch": 0.5369333099947489,
      "grad_norm": 0.9359715608461058,
      "learning_rate": 4.645264001770059e-06,
      "loss": 0.1369,
      "step": 18405
    },
    {
      "epoch": 0.5369624832253924,
      "grad_norm": 0.7474998995425077,
      "learning_rate": 4.6447927596976685e-06,
      "loss": 0.1356,
      "step": 18406
    },
    {
      "epoch": 0.536991656456036,
      "grad_norm": 0.9396636845455977,
      "learning_rate": 4.644321520796484e-06,
      "loss": 0.1235,
      "step": 18407
    },
    {
      "epoch": 0.5370208296866795,
      "grad_norm": 0.900428593419211,
      "learning_rate": 4.6438502850707125e-06,
      "loss": 0.1396,
      "step": 18408
    },
    {
      "epoch": 0.5370500029173231,
      "grad_norm": 0.7501024115615466,
      "learning_rate": 4.643379052524557e-06,
      "loss": 0.1275,
      "step": 18409
    },
    {
      "epoch": 0.5370791761479666,
      "grad_norm": 0.8911013787200006,
      "learning_rate": 4.642907823162232e-06,
      "loss": 0.1263,
      "step": 18410
    },
    {
      "epoch": 0.5371083493786102,
      "grad_norm": 1.267359403901331,
      "learning_rate": 4.642436596987939e-06,
      "loss": 0.1241,
      "step": 18411
    },
    {
      "epoch": 0.5371375226092537,
      "grad_norm": 1.2193543182524031,
      "learning_rate": 4.6419653740058875e-06,
      "loss": 0.1349,
      "step": 18412
    },
    {
      "epoch": 0.5371666958398973,
      "grad_norm": 0.869884365322348,
      "learning_rate": 4.6414941542202854e-06,
      "loss": 0.1479,
      "step": 18413
    },
    {
      "epoch": 0.5371958690705408,
      "grad_norm": 0.9270581440670198,
      "learning_rate": 4.6410229376353355e-06,
      "loss": 0.1255,
      "step": 18414
    },
    {
      "epoch": 0.5372250423011844,
      "grad_norm": 0.8335069150306736,
      "learning_rate": 4.6405517242552465e-06,
      "loss": 0.1413,
      "step": 18415
    },
    {
      "epoch": 0.537254215531828,
      "grad_norm": 0.9004884093575155,
      "learning_rate": 4.640080514084227e-06,
      "loss": 0.1376,
      "step": 18416
    },
    {
      "epoch": 0.5372833887624715,
      "grad_norm": 0.9495672066905021,
      "learning_rate": 4.639609307126483e-06,
      "loss": 0.1207,
      "step": 18417
    },
    {
      "epoch": 0.5373125619931152,
      "grad_norm": 0.8584298778242642,
      "learning_rate": 4.639138103386222e-06,
      "loss": 0.1182,
      "step": 18418
    },
    {
      "epoch": 0.5373417352237587,
      "grad_norm": 0.9689798923975219,
      "learning_rate": 4.638666902867649e-06,
      "loss": 0.1518,
      "step": 18419
    },
    {
      "epoch": 0.5373709084544023,
      "grad_norm": 0.8713494961194964,
      "learning_rate": 4.63819570557497e-06,
      "loss": 0.1625,
      "step": 18420
    },
    {
      "epoch": 0.5374000816850458,
      "grad_norm": 0.8097312640427291,
      "learning_rate": 4.637724511512394e-06,
      "loss": 0.1019,
      "step": 18421
    },
    {
      "epoch": 0.5374292549156894,
      "grad_norm": 0.7818637297060073,
      "learning_rate": 4.637253320684128e-06,
      "loss": 0.1411,
      "step": 18422
    },
    {
      "epoch": 0.5374584281463329,
      "grad_norm": 0.7725537560170543,
      "learning_rate": 4.636782133094379e-06,
      "loss": 0.1358,
      "step": 18423
    },
    {
      "epoch": 0.5374876013769765,
      "grad_norm": 0.9685417795608715,
      "learning_rate": 4.636310948747351e-06,
      "loss": 0.1161,
      "step": 18424
    },
    {
      "epoch": 0.53751677460762,
      "grad_norm": 0.8661703798486129,
      "learning_rate": 4.6358397676472514e-06,
      "loss": 0.1179,
      "step": 18425
    },
    {
      "epoch": 0.5375459478382636,
      "grad_norm": 0.9748207706114557,
      "learning_rate": 4.63536858979829e-06,
      "loss": 0.1328,
      "step": 18426
    },
    {
      "epoch": 0.5375751210689071,
      "grad_norm": 0.8068977563207552,
      "learning_rate": 4.634897415204665e-06,
      "loss": 0.1255,
      "step": 18427
    },
    {
      "epoch": 0.5376042942995507,
      "grad_norm": 0.9814118356836369,
      "learning_rate": 4.6344262438705945e-06,
      "loss": 0.1238,
      "step": 18428
    },
    {
      "epoch": 0.5376334675301943,
      "grad_norm": 0.8697629592549748,
      "learning_rate": 4.633955075800277e-06,
      "loss": 0.1236,
      "step": 18429
    },
    {
      "epoch": 0.5376626407608378,
      "grad_norm": 0.818823172399622,
      "learning_rate": 4.633483910997921e-06,
      "loss": 0.1373,
      "step": 18430
    },
    {
      "epoch": 0.5376918139914815,
      "grad_norm": 1.1620154208251865,
      "learning_rate": 4.633012749467735e-06,
      "loss": 0.1327,
      "step": 18431
    },
    {
      "epoch": 0.537720987222125,
      "grad_norm": 0.9945830533717772,
      "learning_rate": 4.632541591213922e-06,
      "loss": 0.1381,
      "step": 18432
    },
    {
      "epoch": 0.5377501604527686,
      "grad_norm": 1.0759595745860047,
      "learning_rate": 4.6320704362406895e-06,
      "loss": 0.1394,
      "step": 18433
    },
    {
      "epoch": 0.5377793336834121,
      "grad_norm": 1.0332282875284455,
      "learning_rate": 4.6315992845522445e-06,
      "loss": 0.1262,
      "step": 18434
    },
    {
      "epoch": 0.5378085069140557,
      "grad_norm": 1.0009817731707173,
      "learning_rate": 4.631128136152795e-06,
      "loss": 0.1424,
      "step": 18435
    },
    {
      "epoch": 0.5378376801446992,
      "grad_norm": 0.829985548869482,
      "learning_rate": 4.6306569910465435e-06,
      "loss": 0.1438,
      "step": 18436
    },
    {
      "epoch": 0.5378668533753428,
      "grad_norm": 0.9543572266061056,
      "learning_rate": 4.630185849237699e-06,
      "loss": 0.1326,
      "step": 18437
    },
    {
      "epoch": 0.5378960266059863,
      "grad_norm": 1.1472750665460945,
      "learning_rate": 4.629714710730468e-06,
      "loss": 0.1523,
      "step": 18438
    },
    {
      "epoch": 0.5379251998366299,
      "grad_norm": 0.9609814818823639,
      "learning_rate": 4.629243575529052e-06,
      "loss": 0.1196,
      "step": 18439
    },
    {
      "epoch": 0.5379543730672735,
      "grad_norm": 1.0839039490434084,
      "learning_rate": 4.628772443637664e-06,
      "loss": 0.1168,
      "step": 18440
    },
    {
      "epoch": 0.537983546297917,
      "grad_norm": 0.9058019196393359,
      "learning_rate": 4.628301315060506e-06,
      "loss": 0.127,
      "step": 18441
    },
    {
      "epoch": 0.5380127195285606,
      "grad_norm": 0.7880115419121816,
      "learning_rate": 4.627830189801785e-06,
      "loss": 0.1108,
      "step": 18442
    },
    {
      "epoch": 0.5380418927592041,
      "grad_norm": 0.8383131103373016,
      "learning_rate": 4.627359067865709e-06,
      "loss": 0.1246,
      "step": 18443
    },
    {
      "epoch": 0.5380710659898477,
      "grad_norm": 0.7706655732006188,
      "learning_rate": 4.6268879492564815e-06,
      "loss": 0.1326,
      "step": 18444
    },
    {
      "epoch": 0.5381002392204913,
      "grad_norm": 0.6893824246782932,
      "learning_rate": 4.626416833978307e-06,
      "loss": 0.1111,
      "step": 18445
    },
    {
      "epoch": 0.5381294124511349,
      "grad_norm": 0.7094778064231246,
      "learning_rate": 4.6259457220353955e-06,
      "loss": 0.1244,
      "step": 18446
    },
    {
      "epoch": 0.5381585856817784,
      "grad_norm": 0.816157148753955,
      "learning_rate": 4.625474613431951e-06,
      "loss": 0.1257,
      "step": 18447
    },
    {
      "epoch": 0.538187758912422,
      "grad_norm": 0.7448126542639731,
      "learning_rate": 4.625003508172181e-06,
      "loss": 0.1234,
      "step": 18448
    },
    {
      "epoch": 0.5382169321430655,
      "grad_norm": 0.7032555178388354,
      "learning_rate": 4.624532406260289e-06,
      "loss": 0.1298,
      "step": 18449
    },
    {
      "epoch": 0.5382461053737091,
      "grad_norm": 0.7566557780826755,
      "learning_rate": 4.6240613077004825e-06,
      "loss": 0.1118,
      "step": 18450
    },
    {
      "epoch": 0.5382752786043526,
      "grad_norm": 1.024552593339411,
      "learning_rate": 4.623590212496966e-06,
      "loss": 0.1501,
      "step": 18451
    },
    {
      "epoch": 0.5383044518349962,
      "grad_norm": 1.002637863063864,
      "learning_rate": 4.6231191206539464e-06,
      "loss": 0.1304,
      "step": 18452
    },
    {
      "epoch": 0.5383336250656398,
      "grad_norm": 0.8508473027450237,
      "learning_rate": 4.622648032175631e-06,
      "loss": 0.1146,
      "step": 18453
    },
    {
      "epoch": 0.5383627982962833,
      "grad_norm": 0.7604976770223463,
      "learning_rate": 4.622176947066223e-06,
      "loss": 0.1287,
      "step": 18454
    },
    {
      "epoch": 0.5383919715269269,
      "grad_norm": 0.8756015731827811,
      "learning_rate": 4.621705865329928e-06,
      "loss": 0.1435,
      "step": 18455
    },
    {
      "epoch": 0.5384211447575704,
      "grad_norm": 0.9054175597379397,
      "learning_rate": 4.621234786970955e-06,
      "loss": 0.1207,
      "step": 18456
    },
    {
      "epoch": 0.538450317988214,
      "grad_norm": 0.7993475119439174,
      "learning_rate": 4.620763711993504e-06,
      "loss": 0.1231,
      "step": 18457
    },
    {
      "epoch": 0.5384794912188576,
      "grad_norm": 1.0069903678745922,
      "learning_rate": 4.620292640401786e-06,
      "loss": 0.1306,
      "step": 18458
    },
    {
      "epoch": 0.5385086644495012,
      "grad_norm": 0.7675858475618413,
      "learning_rate": 4.619821572200005e-06,
      "loss": 0.1371,
      "step": 18459
    },
    {
      "epoch": 0.5385378376801447,
      "grad_norm": 0.9962535852556412,
      "learning_rate": 4.6193505073923655e-06,
      "loss": 0.1272,
      "step": 18460
    },
    {
      "epoch": 0.5385670109107883,
      "grad_norm": 0.7955796040305474,
      "learning_rate": 4.6188794459830756e-06,
      "loss": 0.1281,
      "step": 18461
    },
    {
      "epoch": 0.5385961841414318,
      "grad_norm": 0.803729078148684,
      "learning_rate": 4.618408387976337e-06,
      "loss": 0.131,
      "step": 18462
    },
    {
      "epoch": 0.5386253573720754,
      "grad_norm": 0.8122718679589646,
      "learning_rate": 4.617937333376356e-06,
      "loss": 0.1207,
      "step": 18463
    },
    {
      "epoch": 0.538654530602719,
      "grad_norm": 0.9027543324881537,
      "learning_rate": 4.617466282187341e-06,
      "loss": 0.1115,
      "step": 18464
    },
    {
      "epoch": 0.5386837038333625,
      "grad_norm": 1.1345576211791466,
      "learning_rate": 4.616995234413498e-06,
      "loss": 0.13,
      "step": 18465
    },
    {
      "epoch": 0.5387128770640061,
      "grad_norm": 0.9931104688525274,
      "learning_rate": 4.616524190059028e-06,
      "loss": 0.1356,
      "step": 18466
    },
    {
      "epoch": 0.5387420502946496,
      "grad_norm": 0.9092572288132816,
      "learning_rate": 4.616053149128137e-06,
      "loss": 0.1498,
      "step": 18467
    },
    {
      "epoch": 0.5387712235252932,
      "grad_norm": 0.8681668748462004,
      "learning_rate": 4.615582111625035e-06,
      "loss": 0.1703,
      "step": 18468
    },
    {
      "epoch": 0.5388003967559367,
      "grad_norm": 0.9380233133435865,
      "learning_rate": 4.61511107755392e-06,
      "loss": 0.1305,
      "step": 18469
    },
    {
      "epoch": 0.5388295699865803,
      "grad_norm": 0.8526693956400522,
      "learning_rate": 4.614640046919004e-06,
      "loss": 0.1352,
      "step": 18470
    },
    {
      "epoch": 0.5388587432172238,
      "grad_norm": 0.8469171229904489,
      "learning_rate": 4.6141690197244895e-06,
      "loss": 0.1688,
      "step": 18471
    },
    {
      "epoch": 0.5388879164478675,
      "grad_norm": 1.0020668817402003,
      "learning_rate": 4.613697995974582e-06,
      "loss": 0.1278,
      "step": 18472
    },
    {
      "epoch": 0.538917089678511,
      "grad_norm": 0.84438952777186,
      "learning_rate": 4.613226975673488e-06,
      "loss": 0.1405,
      "step": 18473
    },
    {
      "epoch": 0.5389462629091546,
      "grad_norm": 0.945618628826909,
      "learning_rate": 4.61275595882541e-06,
      "loss": 0.1465,
      "step": 18474
    },
    {
      "epoch": 0.5389754361397981,
      "grad_norm": 1.0235841023439358,
      "learning_rate": 4.612284945434552e-06,
      "loss": 0.1443,
      "step": 18475
    },
    {
      "epoch": 0.5390046093704417,
      "grad_norm": 0.6546548950283324,
      "learning_rate": 4.611813935505124e-06,
      "loss": 0.1225,
      "step": 18476
    },
    {
      "epoch": 0.5390337826010853,
      "grad_norm": 1.0493183644750592,
      "learning_rate": 4.611342929041327e-06,
      "loss": 0.1713,
      "step": 18477
    },
    {
      "epoch": 0.5390629558317288,
      "grad_norm": 0.9463567859642354,
      "learning_rate": 4.61087192604737e-06,
      "loss": 0.1094,
      "step": 18478
    },
    {
      "epoch": 0.5390921290623724,
      "grad_norm": 0.772709685839752,
      "learning_rate": 4.610400926527454e-06,
      "loss": 0.1027,
      "step": 18479
    },
    {
      "epoch": 0.5391213022930159,
      "grad_norm": 0.7836107240767829,
      "learning_rate": 4.609929930485785e-06,
      "loss": 0.1312,
      "step": 18480
    },
    {
      "epoch": 0.5391504755236595,
      "grad_norm": 1.0118977320199922,
      "learning_rate": 4.609458937926568e-06,
      "loss": 0.1449,
      "step": 18481
    },
    {
      "epoch": 0.539179648754303,
      "grad_norm": 0.7708828717475101,
      "learning_rate": 4.608987948854009e-06,
      "loss": 0.1142,
      "step": 18482
    },
    {
      "epoch": 0.5392088219849466,
      "grad_norm": 0.7905709797869556,
      "learning_rate": 4.608516963272314e-06,
      "loss": 0.118,
      "step": 18483
    },
    {
      "epoch": 0.5392379952155901,
      "grad_norm": 0.8069241387258512,
      "learning_rate": 4.6080459811856845e-06,
      "loss": 0.1328,
      "step": 18484
    },
    {
      "epoch": 0.5392671684462338,
      "grad_norm": 1.0309158279777073,
      "learning_rate": 4.6075750025983274e-06,
      "loss": 0.1236,
      "step": 18485
    },
    {
      "epoch": 0.5392963416768773,
      "grad_norm": 0.7039157364644283,
      "learning_rate": 4.607104027514448e-06,
      "loss": 0.1254,
      "step": 18486
    },
    {
      "epoch": 0.5393255149075209,
      "grad_norm": 0.8343523265323943,
      "learning_rate": 4.606633055938247e-06,
      "loss": 0.1404,
      "step": 18487
    },
    {
      "epoch": 0.5393546881381645,
      "grad_norm": 0.6620159980620091,
      "learning_rate": 4.606162087873934e-06,
      "loss": 0.1576,
      "step": 18488
    },
    {
      "epoch": 0.539383861368808,
      "grad_norm": 0.7441419946854502,
      "learning_rate": 4.605691123325712e-06,
      "loss": 0.1205,
      "step": 18489
    },
    {
      "epoch": 0.5394130345994516,
      "grad_norm": 1.0306287315275824,
      "learning_rate": 4.605220162297785e-06,
      "loss": 0.1245,
      "step": 18490
    },
    {
      "epoch": 0.5394422078300951,
      "grad_norm": 0.9544581723560933,
      "learning_rate": 4.60474920479436e-06,
      "loss": 0.1322,
      "step": 18491
    },
    {
      "epoch": 0.5394713810607387,
      "grad_norm": 0.9809156334129635,
      "learning_rate": 4.604278250819638e-06,
      "loss": 0.1248,
      "step": 18492
    },
    {
      "epoch": 0.5395005542913822,
      "grad_norm": 0.9011113477017791,
      "learning_rate": 4.603807300377825e-06,
      "loss": 0.1178,
      "step": 18493
    },
    {
      "epoch": 0.5395297275220258,
      "grad_norm": 0.8838891360815239,
      "learning_rate": 4.603336353473126e-06,
      "loss": 0.134,
      "step": 18494
    },
    {
      "epoch": 0.5395589007526693,
      "grad_norm": 0.9477505501652712,
      "learning_rate": 4.602865410109747e-06,
      "loss": 0.1588,
      "step": 18495
    },
    {
      "epoch": 0.5395880739833129,
      "grad_norm": 1.1140434308745006,
      "learning_rate": 4.602394470291889e-06,
      "loss": 0.1084,
      "step": 18496
    },
    {
      "epoch": 0.5396172472139564,
      "grad_norm": 0.8818063594020238,
      "learning_rate": 4.601923534023759e-06,
      "loss": 0.0971,
      "step": 18497
    },
    {
      "epoch": 0.5396464204446,
      "grad_norm": 0.8463094940252679,
      "learning_rate": 4.601452601309562e-06,
      "loss": 0.1281,
      "step": 18498
    },
    {
      "epoch": 0.5396755936752436,
      "grad_norm": 0.8236064778402271,
      "learning_rate": 4.600981672153497e-06,
      "loss": 0.1254,
      "step": 18499
    },
    {
      "epoch": 0.5397047669058872,
      "grad_norm": 0.762362504079061,
      "learning_rate": 4.600510746559776e-06,
      "loss": 0.1093,
      "step": 18500
    },
    {
      "epoch": 0.5397339401365308,
      "grad_norm": 0.9572674060564242,
      "learning_rate": 4.600039824532599e-06,
      "loss": 0.1193,
      "step": 18501
    },
    {
      "epoch": 0.5397631133671743,
      "grad_norm": 0.7105556118142405,
      "learning_rate": 4.599568906076169e-06,
      "loss": 0.1038,
      "step": 18502
    },
    {
      "epoch": 0.5397922865978179,
      "grad_norm": 1.4230552122953444,
      "learning_rate": 4.599097991194695e-06,
      "loss": 0.1558,
      "step": 18503
    },
    {
      "epoch": 0.5398214598284614,
      "grad_norm": 1.0784991990489363,
      "learning_rate": 4.598627079892378e-06,
      "loss": 0.1072,
      "step": 18504
    },
    {
      "epoch": 0.539850633059105,
      "grad_norm": 0.7736688661207487,
      "learning_rate": 4.59815617217342e-06,
      "loss": 0.1289,
      "step": 18505
    },
    {
      "epoch": 0.5398798062897485,
      "grad_norm": 0.9221273996752505,
      "learning_rate": 4.59768526804203e-06,
      "loss": 0.1189,
      "step": 18506
    },
    {
      "epoch": 0.5399089795203921,
      "grad_norm": 0.821146265980746,
      "learning_rate": 4.597214367502409e-06,
      "loss": 0.1321,
      "step": 18507
    },
    {
      "epoch": 0.5399381527510356,
      "grad_norm": 0.7993447388617366,
      "learning_rate": 4.596743470558764e-06,
      "loss": 0.1314,
      "step": 18508
    },
    {
      "epoch": 0.5399673259816792,
      "grad_norm": 0.7863949008157836,
      "learning_rate": 4.596272577215295e-06,
      "loss": 0.1096,
      "step": 18509
    },
    {
      "epoch": 0.5399964992123227,
      "grad_norm": 0.7868174842687745,
      "learning_rate": 4.595801687476209e-06,
      "loss": 0.1194,
      "step": 18510
    },
    {
      "epoch": 0.5400256724429663,
      "grad_norm": 0.8563130500686809,
      "learning_rate": 4.595330801345707e-06,
      "loss": 0.1379,
      "step": 18511
    },
    {
      "epoch": 0.5400548456736098,
      "grad_norm": 0.8570011607210023,
      "learning_rate": 4.594859918827996e-06,
      "loss": 0.1217,
      "step": 18512
    },
    {
      "epoch": 0.5400840189042535,
      "grad_norm": 0.7020689895705245,
      "learning_rate": 4.594389039927281e-06,
      "loss": 0.1245,
      "step": 18513
    },
    {
      "epoch": 0.5401131921348971,
      "grad_norm": 0.7365351004202181,
      "learning_rate": 4.593918164647763e-06,
      "loss": 0.1227,
      "step": 18514
    },
    {
      "epoch": 0.5401423653655406,
      "grad_norm": 0.9497995167706723,
      "learning_rate": 4.593447292993645e-06,
      "loss": 0.1248,
      "step": 18515
    },
    {
      "epoch": 0.5401715385961842,
      "grad_norm": 0.9012064912899808,
      "learning_rate": 4.592976424969135e-06,
      "loss": 0.1325,
      "step": 18516
    },
    {
      "epoch": 0.5402007118268277,
      "grad_norm": 1.0477580990927593,
      "learning_rate": 4.592505560578431e-06,
      "loss": 0.1188,
      "step": 18517
    },
    {
      "epoch": 0.5402298850574713,
      "grad_norm": 0.8948818940261655,
      "learning_rate": 4.592034699825743e-06,
      "loss": 0.1408,
      "step": 18518
    },
    {
      "epoch": 0.5402590582881148,
      "grad_norm": 0.8467170548211386,
      "learning_rate": 4.59156384271527e-06,
      "loss": 0.137,
      "step": 18519
    },
    {
      "epoch": 0.5402882315187584,
      "grad_norm": 0.811727248192837,
      "learning_rate": 4.591092989251219e-06,
      "loss": 0.1321,
      "step": 18520
    },
    {
      "epoch": 0.5403174047494019,
      "grad_norm": 1.043395801216942,
      "learning_rate": 4.590622139437792e-06,
      "loss": 0.1317,
      "step": 18521
    },
    {
      "epoch": 0.5403465779800455,
      "grad_norm": 1.1229914141576245,
      "learning_rate": 4.590151293279192e-06,
      "loss": 0.1393,
      "step": 18522
    },
    {
      "epoch": 0.540375751210689,
      "grad_norm": 0.8158467164834956,
      "learning_rate": 4.589680450779622e-06,
      "loss": 0.1455,
      "step": 18523
    },
    {
      "epoch": 0.5404049244413326,
      "grad_norm": 0.84450730775348,
      "learning_rate": 4.589209611943289e-06,
      "loss": 0.134,
      "step": 18524
    },
    {
      "epoch": 0.5404340976719761,
      "grad_norm": 0.8061892052318363,
      "learning_rate": 4.5887387767743955e-06,
      "loss": 0.1312,
      "step": 18525
    },
    {
      "epoch": 0.5404632709026198,
      "grad_norm": 0.8457112123241478,
      "learning_rate": 4.588267945277142e-06,
      "loss": 0.1329,
      "step": 18526
    },
    {
      "epoch": 0.5404924441332634,
      "grad_norm": 0.8079282516340712,
      "learning_rate": 4.587797117455735e-06,
      "loss": 0.1353,
      "step": 18527
    },
    {
      "epoch": 0.5405216173639069,
      "grad_norm": 0.9582440122427279,
      "learning_rate": 4.587326293314378e-06,
      "loss": 0.1143,
      "step": 18528
    },
    {
      "epoch": 0.5405507905945505,
      "grad_norm": 0.9462693153503788,
      "learning_rate": 4.586855472857269e-06,
      "loss": 0.1228,
      "step": 18529
    },
    {
      "epoch": 0.540579963825194,
      "grad_norm": 0.7384460324234042,
      "learning_rate": 4.58638465608862e-06,
      "loss": 0.1174,
      "step": 18530
    },
    {
      "epoch": 0.5406091370558376,
      "grad_norm": 0.7686077928824532,
      "learning_rate": 4.585913843012628e-06,
      "loss": 0.1306,
      "step": 18531
    },
    {
      "epoch": 0.5406383102864811,
      "grad_norm": 0.9531871625500782,
      "learning_rate": 4.5854430336335e-06,
      "loss": 0.1323,
      "step": 18532
    },
    {
      "epoch": 0.5406674835171247,
      "grad_norm": 0.8085206343946517,
      "learning_rate": 4.584972227955437e-06,
      "loss": 0.1663,
      "step": 18533
    },
    {
      "epoch": 0.5406966567477682,
      "grad_norm": 0.9274307278452015,
      "learning_rate": 4.584501425982641e-06,
      "loss": 0.1396,
      "step": 18534
    },
    {
      "epoch": 0.5407258299784118,
      "grad_norm": 0.8118635885139049,
      "learning_rate": 4.584030627719319e-06,
      "loss": 0.1288,
      "step": 18535
    },
    {
      "epoch": 0.5407550032090553,
      "grad_norm": 0.6881775262599032,
      "learning_rate": 4.5835598331696725e-06,
      "loss": 0.1127,
      "step": 18536
    },
    {
      "epoch": 0.5407841764396989,
      "grad_norm": 0.8751145615556514,
      "learning_rate": 4.5830890423379035e-06,
      "loss": 0.1332,
      "step": 18537
    },
    {
      "epoch": 0.5408133496703424,
      "grad_norm": 0.9482218663944888,
      "learning_rate": 4.582618255228218e-06,
      "loss": 0.1207,
      "step": 18538
    },
    {
      "epoch": 0.540842522900986,
      "grad_norm": 0.8477653576217563,
      "learning_rate": 4.582147471844814e-06,
      "loss": 0.1437,
      "step": 18539
    },
    {
      "epoch": 0.5408716961316297,
      "grad_norm": 0.7862490789715032,
      "learning_rate": 4.581676692191899e-06,
      "loss": 0.1286,
      "step": 18540
    },
    {
      "epoch": 0.5409008693622732,
      "grad_norm": 0.7373498074330986,
      "learning_rate": 4.581205916273675e-06,
      "loss": 0.1359,
      "step": 18541
    },
    {
      "epoch": 0.5409300425929168,
      "grad_norm": 0.8518693891851921,
      "learning_rate": 4.580735144094343e-06,
      "loss": 0.1095,
      "step": 18542
    },
    {
      "epoch": 0.5409592158235603,
      "grad_norm": 0.7929335141174001,
      "learning_rate": 4.58026437565811e-06,
      "loss": 0.1223,
      "step": 18543
    },
    {
      "epoch": 0.5409883890542039,
      "grad_norm": 0.949890133436839,
      "learning_rate": 4.579793610969175e-06,
      "loss": 0.1477,
      "step": 18544
    },
    {
      "epoch": 0.5410175622848474,
      "grad_norm": 0.6923494122570102,
      "learning_rate": 4.579322850031743e-06,
      "loss": 0.1268,
      "step": 18545
    },
    {
      "epoch": 0.541046735515491,
      "grad_norm": 0.9872506294900837,
      "learning_rate": 4.578852092850014e-06,
      "loss": 0.1211,
      "step": 18546
    },
    {
      "epoch": 0.5410759087461345,
      "grad_norm": 0.7901695596699595,
      "learning_rate": 4.578381339428197e-06,
      "loss": 0.1559,
      "step": 18547
    },
    {
      "epoch": 0.5411050819767781,
      "grad_norm": 0.7477654018944593,
      "learning_rate": 4.5779105897704874e-06,
      "loss": 0.1287,
      "step": 18548
    },
    {
      "epoch": 0.5411342552074216,
      "grad_norm": 1.0079866343956774,
      "learning_rate": 4.577439843881093e-06,
      "loss": 0.1306,
      "step": 18549
    },
    {
      "epoch": 0.5411634284380652,
      "grad_norm": 0.8164972594869021,
      "learning_rate": 4.5769691017642135e-06,
      "loss": 0.1234,
      "step": 18550
    },
    {
      "epoch": 0.5411926016687088,
      "grad_norm": 0.7352347377635433,
      "learning_rate": 4.5764983634240554e-06,
      "loss": 0.1489,
      "step": 18551
    },
    {
      "epoch": 0.5412217748993523,
      "grad_norm": 0.9238793751207289,
      "learning_rate": 4.576027628864815e-06,
      "loss": 0.1356,
      "step": 18552
    },
    {
      "epoch": 0.541250948129996,
      "grad_norm": 0.9410922114238712,
      "learning_rate": 4.575556898090701e-06,
      "loss": 0.1336,
      "step": 18553
    },
    {
      "epoch": 0.5412801213606395,
      "grad_norm": 0.890025217300632,
      "learning_rate": 4.575086171105913e-06,
      "loss": 0.1284,
      "step": 18554
    },
    {
      "epoch": 0.5413092945912831,
      "grad_norm": 0.7760769456198849,
      "learning_rate": 4.574615447914656e-06,
      "loss": 0.1368,
      "step": 18555
    },
    {
      "epoch": 0.5413384678219266,
      "grad_norm": 0.8683222818050699,
      "learning_rate": 4.574144728521129e-06,
      "loss": 0.1483,
      "step": 18556
    },
    {
      "epoch": 0.5413676410525702,
      "grad_norm": 0.7828015637307851,
      "learning_rate": 4.573674012929537e-06,
      "loss": 0.1134,
      "step": 18557
    },
    {
      "epoch": 0.5413968142832137,
      "grad_norm": 0.772556403125213,
      "learning_rate": 4.57320330114408e-06,
      "loss": 0.1245,
      "step": 18558
    },
    {
      "epoch": 0.5414259875138573,
      "grad_norm": 0.8124959768576329,
      "learning_rate": 4.572732593168963e-06,
      "loss": 0.1484,
      "step": 18559
    },
    {
      "epoch": 0.5414551607445008,
      "grad_norm": 0.8928366475724265,
      "learning_rate": 4.5722618890083886e-06,
      "loss": 0.1437,
      "step": 18560
    },
    {
      "epoch": 0.5414843339751444,
      "grad_norm": 0.8576042037903062,
      "learning_rate": 4.571791188666556e-06,
      "loss": 0.1161,
      "step": 18561
    },
    {
      "epoch": 0.541513507205788,
      "grad_norm": 0.6334579477388942,
      "learning_rate": 4.571320492147671e-06,
      "loss": 0.1439,
      "step": 18562
    },
    {
      "epoch": 0.5415426804364315,
      "grad_norm": 1.2423093574568127,
      "learning_rate": 4.570849799455935e-06,
      "loss": 0.1575,
      "step": 18563
    },
    {
      "epoch": 0.541571853667075,
      "grad_norm": 1.0674356324885308,
      "learning_rate": 4.5703791105955465e-06,
      "loss": 0.1241,
      "step": 18564
    },
    {
      "epoch": 0.5416010268977186,
      "grad_norm": 0.8917905360190724,
      "learning_rate": 4.5699084255707135e-06,
      "loss": 0.1091,
      "step": 18565
    },
    {
      "epoch": 0.5416302001283622,
      "grad_norm": 0.9229579882678267,
      "learning_rate": 4.569437744385634e-06,
      "loss": 0.1418,
      "step": 18566
    },
    {
      "epoch": 0.5416593733590058,
      "grad_norm": 0.7628392567268024,
      "learning_rate": 4.568967067044512e-06,
      "loss": 0.1292,
      "step": 18567
    },
    {
      "epoch": 0.5416885465896494,
      "grad_norm": 1.3246170348709403,
      "learning_rate": 4.56849639355155e-06,
      "loss": 0.0971,
      "step": 18568
    },
    {
      "epoch": 0.5417177198202929,
      "grad_norm": 0.845091498366653,
      "learning_rate": 4.568025723910948e-06,
      "loss": 0.1319,
      "step": 18569
    },
    {
      "epoch": 0.5417468930509365,
      "grad_norm": 0.6984478644385249,
      "learning_rate": 4.567555058126909e-06,
      "loss": 0.131,
      "step": 18570
    },
    {
      "epoch": 0.54177606628158,
      "grad_norm": 1.070303298446535,
      "learning_rate": 4.567084396203636e-06,
      "loss": 0.1449,
      "step": 18571
    },
    {
      "epoch": 0.5418052395122236,
      "grad_norm": 1.1262681782985426,
      "learning_rate": 4.566613738145329e-06,
      "loss": 0.1249,
      "step": 18572
    },
    {
      "epoch": 0.5418344127428671,
      "grad_norm": 0.8433029057458278,
      "learning_rate": 4.566143083956193e-06,
      "loss": 0.1589,
      "step": 18573
    },
    {
      "epoch": 0.5418635859735107,
      "grad_norm": 0.7064582794321326,
      "learning_rate": 4.565672433640428e-06,
      "loss": 0.1134,
      "step": 18574
    },
    {
      "epoch": 0.5418927592041543,
      "grad_norm": 0.951049905265706,
      "learning_rate": 4.565201787202234e-06,
      "loss": 0.1102,
      "step": 18575
    },
    {
      "epoch": 0.5419219324347978,
      "grad_norm": 0.8282929047926882,
      "learning_rate": 4.564731144645814e-06,
      "loss": 0.148,
      "step": 18576
    },
    {
      "epoch": 0.5419511056654414,
      "grad_norm": 0.7453905798825045,
      "learning_rate": 4.564260505975373e-06,
      "loss": 0.1303,
      "step": 18577
    },
    {
      "epoch": 0.5419802788960849,
      "grad_norm": 0.7995034987138877,
      "learning_rate": 4.5637898711951086e-06,
      "loss": 0.1259,
      "step": 18578
    },
    {
      "epoch": 0.5420094521267285,
      "grad_norm": 0.8275938207800875,
      "learning_rate": 4.563319240309225e-06,
      "loss": 0.1276,
      "step": 18579
    },
    {
      "epoch": 0.5420386253573721,
      "grad_norm": 0.7407051337221364,
      "learning_rate": 4.562848613321922e-06,
      "loss": 0.1536,
      "step": 18580
    },
    {
      "epoch": 0.5420677985880157,
      "grad_norm": 0.7538172452892475,
      "learning_rate": 4.562377990237404e-06,
      "loss": 0.1321,
      "step": 18581
    },
    {
      "epoch": 0.5420969718186592,
      "grad_norm": 1.905030882722415,
      "learning_rate": 4.561907371059868e-06,
      "loss": 0.1274,
      "step": 18582
    },
    {
      "epoch": 0.5421261450493028,
      "grad_norm": 1.0625206190115513,
      "learning_rate": 4.5614367557935205e-06,
      "loss": 0.1152,
      "step": 18583
    },
    {
      "epoch": 0.5421553182799463,
      "grad_norm": 1.0209787428644586,
      "learning_rate": 4.56096614444256e-06,
      "loss": 0.155,
      "step": 18584
    },
    {
      "epoch": 0.5421844915105899,
      "grad_norm": 0.7645543365119774,
      "learning_rate": 4.560495537011191e-06,
      "loss": 0.1311,
      "step": 18585
    },
    {
      "epoch": 0.5422136647412334,
      "grad_norm": 1.1025132199310623,
      "learning_rate": 4.560024933503611e-06,
      "loss": 0.1313,
      "step": 18586
    },
    {
      "epoch": 0.542242837971877,
      "grad_norm": 0.9262771547987466,
      "learning_rate": 4.559554333924024e-06,
      "loss": 0.1334,
      "step": 18587
    },
    {
      "epoch": 0.5422720112025206,
      "grad_norm": 0.8751620781825704,
      "learning_rate": 4.559083738276629e-06,
      "loss": 0.1463,
      "step": 18588
    },
    {
      "epoch": 0.5423011844331641,
      "grad_norm": 0.9838603475520283,
      "learning_rate": 4.55861314656563e-06,
      "loss": 0.1289,
      "step": 18589
    },
    {
      "epoch": 0.5423303576638077,
      "grad_norm": 1.013986579247909,
      "learning_rate": 4.558142558795229e-06,
      "loss": 0.1137,
      "step": 18590
    },
    {
      "epoch": 0.5423595308944512,
      "grad_norm": 0.9428278662199534,
      "learning_rate": 4.5576719749696255e-06,
      "loss": 0.1319,
      "step": 18591
    },
    {
      "epoch": 0.5423887041250948,
      "grad_norm": 0.9500908104131973,
      "learning_rate": 4.55720139509302e-06,
      "loss": 0.1423,
      "step": 18592
    },
    {
      "epoch": 0.5424178773557383,
      "grad_norm": 0.7443225951884858,
      "learning_rate": 4.556730819169617e-06,
      "loss": 0.1269,
      "step": 18593
    },
    {
      "epoch": 0.542447050586382,
      "grad_norm": 0.8387766165065289,
      "learning_rate": 4.556260247203611e-06,
      "loss": 0.1208,
      "step": 18594
    },
    {
      "epoch": 0.5424762238170255,
      "grad_norm": 0.8724240408580441,
      "learning_rate": 4.55578967919921e-06,
      "loss": 0.1294,
      "step": 18595
    },
    {
      "epoch": 0.5425053970476691,
      "grad_norm": 0.9397541649578579,
      "learning_rate": 4.555319115160613e-06,
      "loss": 0.1193,
      "step": 18596
    },
    {
      "epoch": 0.5425345702783126,
      "grad_norm": 0.8715328430253131,
      "learning_rate": 4.554848555092021e-06,
      "loss": 0.1328,
      "step": 18597
    },
    {
      "epoch": 0.5425637435089562,
      "grad_norm": 1.0141080824533748,
      "learning_rate": 4.554377998997635e-06,
      "loss": 0.1373,
      "step": 18598
    },
    {
      "epoch": 0.5425929167395998,
      "grad_norm": 0.977833134828352,
      "learning_rate": 4.553907446881655e-06,
      "loss": 0.1457,
      "step": 18599
    },
    {
      "epoch": 0.5426220899702433,
      "grad_norm": 0.7314597419648076,
      "learning_rate": 4.553436898748283e-06,
      "loss": 0.1321,
      "step": 18600
    },
    {
      "epoch": 0.5426512632008869,
      "grad_norm": 0.7464298405060341,
      "learning_rate": 4.552966354601719e-06,
      "loss": 0.13,
      "step": 18601
    },
    {
      "epoch": 0.5426804364315304,
      "grad_norm": 0.9192476285587113,
      "learning_rate": 4.552495814446165e-06,
      "loss": 0.124,
      "step": 18602
    },
    {
      "epoch": 0.542709609662174,
      "grad_norm": 0.9471931251716911,
      "learning_rate": 4.552025278285823e-06,
      "loss": 0.1412,
      "step": 18603
    },
    {
      "epoch": 0.5427387828928175,
      "grad_norm": 0.7028199390854473,
      "learning_rate": 4.551554746124891e-06,
      "loss": 0.1122,
      "step": 18604
    },
    {
      "epoch": 0.5427679561234611,
      "grad_norm": 0.8186137046998053,
      "learning_rate": 4.551084217967573e-06,
      "loss": 0.1385,
      "step": 18605
    },
    {
      "epoch": 0.5427971293541046,
      "grad_norm": 0.9767308062095534,
      "learning_rate": 4.550613693818064e-06,
      "loss": 0.1273,
      "step": 18606
    },
    {
      "epoch": 0.5428263025847483,
      "grad_norm": 0.8968856658488537,
      "learning_rate": 4.550143173680573e-06,
      "loss": 0.1382,
      "step": 18607
    },
    {
      "epoch": 0.5428554758153918,
      "grad_norm": 0.8372639466185194,
      "learning_rate": 4.549672657559294e-06,
      "loss": 0.114,
      "step": 18608
    },
    {
      "epoch": 0.5428846490460354,
      "grad_norm": 0.9577222514170669,
      "learning_rate": 4.54920214545843e-06,
      "loss": 0.1362,
      "step": 18609
    },
    {
      "epoch": 0.542913822276679,
      "grad_norm": 0.8554428290139974,
      "learning_rate": 4.5487316373821834e-06,
      "loss": 0.1324,
      "step": 18610
    },
    {
      "epoch": 0.5429429955073225,
      "grad_norm": 0.8378792662146597,
      "learning_rate": 4.548261133334753e-06,
      "loss": 0.1033,
      "step": 18611
    },
    {
      "epoch": 0.542972168737966,
      "grad_norm": 0.8425684330542159,
      "learning_rate": 4.547790633320336e-06,
      "loss": 0.1192,
      "step": 18612
    },
    {
      "epoch": 0.5430013419686096,
      "grad_norm": 0.86600847540191,
      "learning_rate": 4.547320137343138e-06,
      "loss": 0.1343,
      "step": 18613
    },
    {
      "epoch": 0.5430305151992532,
      "grad_norm": 0.8970330089790087,
      "learning_rate": 4.546849645407359e-06,
      "loss": 0.1306,
      "step": 18614
    },
    {
      "epoch": 0.5430596884298967,
      "grad_norm": 1.2082125477194066,
      "learning_rate": 4.546379157517198e-06,
      "loss": 0.1082,
      "step": 18615
    },
    {
      "epoch": 0.5430888616605403,
      "grad_norm": 0.8747763423161303,
      "learning_rate": 4.545908673676855e-06,
      "loss": 0.1427,
      "step": 18616
    },
    {
      "epoch": 0.5431180348911838,
      "grad_norm": 0.9298206824513096,
      "learning_rate": 4.545438193890531e-06,
      "loss": 0.1235,
      "step": 18617
    },
    {
      "epoch": 0.5431472081218274,
      "grad_norm": 0.897925886878897,
      "learning_rate": 4.544967718162425e-06,
      "loss": 0.1105,
      "step": 18618
    },
    {
      "epoch": 0.5431763813524709,
      "grad_norm": 0.7862787817646996,
      "learning_rate": 4.544497246496741e-06,
      "loss": 0.1265,
      "step": 18619
    },
    {
      "epoch": 0.5432055545831145,
      "grad_norm": 0.7829494278367473,
      "learning_rate": 4.544026778897676e-06,
      "loss": 0.1247,
      "step": 18620
    },
    {
      "epoch": 0.5432347278137581,
      "grad_norm": 0.8739761744992527,
      "learning_rate": 4.54355631536943e-06,
      "loss": 0.1229,
      "step": 18621
    },
    {
      "epoch": 0.5432639010444017,
      "grad_norm": 0.7817666258065223,
      "learning_rate": 4.543085855916205e-06,
      "loss": 0.1522,
      "step": 18622
    },
    {
      "epoch": 0.5432930742750453,
      "grad_norm": 0.8743724793823562,
      "learning_rate": 4.542615400542202e-06,
      "loss": 0.1376,
      "step": 18623
    },
    {
      "epoch": 0.5433222475056888,
      "grad_norm": 0.8417347293161281,
      "learning_rate": 4.542144949251615e-06,
      "loss": 0.1304,
      "step": 18624
    },
    {
      "epoch": 0.5433514207363324,
      "grad_norm": 0.8349549582346112,
      "learning_rate": 4.541674502048653e-06,
      "loss": 0.1122,
      "step": 18625
    },
    {
      "epoch": 0.5433805939669759,
      "grad_norm": 0.9844219872835687,
      "learning_rate": 4.54120405893751e-06,
      "loss": 0.1391,
      "step": 18626
    },
    {
      "epoch": 0.5434097671976195,
      "grad_norm": 0.9521398611024925,
      "learning_rate": 4.540733619922388e-06,
      "loss": 0.1398,
      "step": 18627
    },
    {
      "epoch": 0.543438940428263,
      "grad_norm": 1.403520958876466,
      "learning_rate": 4.540263185007487e-06,
      "loss": 0.1415,
      "step": 18628
    },
    {
      "epoch": 0.5434681136589066,
      "grad_norm": 1.0320447509130322,
      "learning_rate": 4.539792754197006e-06,
      "loss": 0.1218,
      "step": 18629
    },
    {
      "epoch": 0.5434972868895501,
      "grad_norm": 1.2343273669027426,
      "learning_rate": 4.539322327495144e-06,
      "loss": 0.1466,
      "step": 18630
    },
    {
      "epoch": 0.5435264601201937,
      "grad_norm": 0.8574991551732334,
      "learning_rate": 4.538851904906103e-06,
      "loss": 0.1337,
      "step": 18631
    },
    {
      "epoch": 0.5435556333508372,
      "grad_norm": 0.8387819903807323,
      "learning_rate": 4.538381486434083e-06,
      "loss": 0.116,
      "step": 18632
    },
    {
      "epoch": 0.5435848065814808,
      "grad_norm": 1.2056660015842662,
      "learning_rate": 4.537911072083284e-06,
      "loss": 0.1494,
      "step": 18633
    },
    {
      "epoch": 0.5436139798121244,
      "grad_norm": 0.9794726032666625,
      "learning_rate": 4.537440661857903e-06,
      "loss": 0.1473,
      "step": 18634
    },
    {
      "epoch": 0.543643153042768,
      "grad_norm": 1.100841704205615,
      "learning_rate": 4.536970255762142e-06,
      "loss": 0.1292,
      "step": 18635
    },
    {
      "epoch": 0.5436723262734116,
      "grad_norm": 0.8550148477909255,
      "learning_rate": 4.536499853800198e-06,
      "loss": 0.137,
      "step": 18636
    },
    {
      "epoch": 0.5437014995040551,
      "grad_norm": 0.8420445117887098,
      "learning_rate": 4.536029455976276e-06,
      "loss": 0.1206,
      "step": 18637
    },
    {
      "epoch": 0.5437306727346987,
      "grad_norm": 1.0208099243470854,
      "learning_rate": 4.53555906229457e-06,
      "loss": 0.1314,
      "step": 18638
    },
    {
      "epoch": 0.5437598459653422,
      "grad_norm": 0.9248521454814603,
      "learning_rate": 4.5350886727592824e-06,
      "loss": 0.119,
      "step": 18639
    },
    {
      "epoch": 0.5437890191959858,
      "grad_norm": 0.8622934723177336,
      "learning_rate": 4.534618287374613e-06,
      "loss": 0.1101,
      "step": 18640
    },
    {
      "epoch": 0.5438181924266293,
      "grad_norm": 0.930709123492009,
      "learning_rate": 4.53414790614476e-06,
      "loss": 0.1512,
      "step": 18641
    },
    {
      "epoch": 0.5438473656572729,
      "grad_norm": 1.0691718997838953,
      "learning_rate": 4.533677529073921e-06,
      "loss": 0.1463,
      "step": 18642
    },
    {
      "epoch": 0.5438765388879164,
      "grad_norm": 0.8207725725808066,
      "learning_rate": 4.5332071561663e-06,
      "loss": 0.1404,
      "step": 18643
    },
    {
      "epoch": 0.54390571211856,
      "grad_norm": 0.8619108900355196,
      "learning_rate": 4.532736787426093e-06,
      "loss": 0.1499,
      "step": 18644
    },
    {
      "epoch": 0.5439348853492035,
      "grad_norm": 0.7963081018313473,
      "learning_rate": 4.5322664228575024e-06,
      "loss": 0.1343,
      "step": 18645
    },
    {
      "epoch": 0.5439640585798471,
      "grad_norm": 1.052160880951865,
      "learning_rate": 4.531796062464724e-06,
      "loss": 0.1287,
      "step": 18646
    },
    {
      "epoch": 0.5439932318104906,
      "grad_norm": 0.8674671496535811,
      "learning_rate": 4.531325706251959e-06,
      "loss": 0.1266,
      "step": 18647
    },
    {
      "epoch": 0.5440224050411343,
      "grad_norm": 0.7351219368697155,
      "learning_rate": 4.530855354223405e-06,
      "loss": 0.1191,
      "step": 18648
    },
    {
      "epoch": 0.5440515782717779,
      "grad_norm": 0.8441911575644822,
      "learning_rate": 4.530385006383263e-06,
      "loss": 0.1256,
      "step": 18649
    },
    {
      "epoch": 0.5440807515024214,
      "grad_norm": 0.8942356063346709,
      "learning_rate": 4.5299146627357325e-06,
      "loss": 0.1166,
      "step": 18650
    },
    {
      "epoch": 0.544109924733065,
      "grad_norm": 0.8453180381197173,
      "learning_rate": 4.5294443232850115e-06,
      "loss": 0.1316,
      "step": 18651
    },
    {
      "epoch": 0.5441390979637085,
      "grad_norm": 0.9194448328534545,
      "learning_rate": 4.528973988035299e-06,
      "loss": 0.118,
      "step": 18652
    },
    {
      "epoch": 0.5441682711943521,
      "grad_norm": 0.8963468507354588,
      "learning_rate": 4.528503656990794e-06,
      "loss": 0.1229,
      "step": 18653
    },
    {
      "epoch": 0.5441974444249956,
      "grad_norm": 0.9618334380729433,
      "learning_rate": 4.528033330155694e-06,
      "loss": 0.1295,
      "step": 18654
    },
    {
      "epoch": 0.5442266176556392,
      "grad_norm": 0.8519382505129798,
      "learning_rate": 4.527563007534203e-06,
      "loss": 0.1303,
      "step": 18655
    },
    {
      "epoch": 0.5442557908862827,
      "grad_norm": 0.9808986787414548,
      "learning_rate": 4.527092689130515e-06,
      "loss": 0.1404,
      "step": 18656
    },
    {
      "epoch": 0.5442849641169263,
      "grad_norm": 0.8217557534225198,
      "learning_rate": 4.526622374948831e-06,
      "loss": 0.1258,
      "step": 18657
    },
    {
      "epoch": 0.5443141373475698,
      "grad_norm": 0.8524935676331612,
      "learning_rate": 4.526152064993351e-06,
      "loss": 0.1387,
      "step": 18658
    },
    {
      "epoch": 0.5443433105782134,
      "grad_norm": 0.7730121827305676,
      "learning_rate": 4.525681759268271e-06,
      "loss": 0.1263,
      "step": 18659
    },
    {
      "epoch": 0.5443724838088569,
      "grad_norm": 0.9901844344913672,
      "learning_rate": 4.525211457777789e-06,
      "loss": 0.1393,
      "step": 18660
    },
    {
      "epoch": 0.5444016570395006,
      "grad_norm": 0.8078128701754148,
      "learning_rate": 4.524741160526107e-06,
      "loss": 0.1363,
      "step": 18661
    },
    {
      "epoch": 0.5444308302701442,
      "grad_norm": 0.8081108537745513,
      "learning_rate": 4.524270867517423e-06,
      "loss": 0.13,
      "step": 18662
    },
    {
      "epoch": 0.5444600035007877,
      "grad_norm": 0.7951821217886085,
      "learning_rate": 4.523800578755936e-06,
      "loss": 0.1267,
      "step": 18663
    },
    {
      "epoch": 0.5444891767314313,
      "grad_norm": 0.7916764994858416,
      "learning_rate": 4.523330294245843e-06,
      "loss": 0.1246,
      "step": 18664
    },
    {
      "epoch": 0.5445183499620748,
      "grad_norm": 0.6801620602796086,
      "learning_rate": 4.522860013991343e-06,
      "loss": 0.115,
      "step": 18665
    },
    {
      "epoch": 0.5445475231927184,
      "grad_norm": 0.8694378580903237,
      "learning_rate": 4.522389737996634e-06,
      "loss": 0.1383,
      "step": 18666
    },
    {
      "epoch": 0.5445766964233619,
      "grad_norm": 0.7734560245462737,
      "learning_rate": 4.5219194662659175e-06,
      "loss": 0.1304,
      "step": 18667
    },
    {
      "epoch": 0.5446058696540055,
      "grad_norm": 0.6987330885457839,
      "learning_rate": 4.521449198803388e-06,
      "loss": 0.1278,
      "step": 18668
    },
    {
      "epoch": 0.544635042884649,
      "grad_norm": 0.7725151046455366,
      "learning_rate": 4.5209789356132475e-06,
      "loss": 0.114,
      "step": 18669
    },
    {
      "epoch": 0.5446642161152926,
      "grad_norm": 0.9217262555492821,
      "learning_rate": 4.520508676699692e-06,
      "loss": 0.1464,
      "step": 18670
    },
    {
      "epoch": 0.5446933893459361,
      "grad_norm": 0.8209866915718372,
      "learning_rate": 4.5200384220669204e-06,
      "loss": 0.1393,
      "step": 18671
    },
    {
      "epoch": 0.5447225625765797,
      "grad_norm": 0.7208983744735961,
      "learning_rate": 4.519568171719131e-06,
      "loss": 0.1274,
      "step": 18672
    },
    {
      "epoch": 0.5447517358072232,
      "grad_norm": 0.8469303619525171,
      "learning_rate": 4.519097925660522e-06,
      "loss": 0.1345,
      "step": 18673
    },
    {
      "epoch": 0.5447809090378668,
      "grad_norm": 0.8605894598785747,
      "learning_rate": 4.518627683895292e-06,
      "loss": 0.1304,
      "step": 18674
    },
    {
      "epoch": 0.5448100822685105,
      "grad_norm": 0.9702323629730008,
      "learning_rate": 4.518157446427641e-06,
      "loss": 0.1364,
      "step": 18675
    },
    {
      "epoch": 0.544839255499154,
      "grad_norm": 0.8255608939055838,
      "learning_rate": 4.517687213261763e-06,
      "loss": 0.1324,
      "step": 18676
    },
    {
      "epoch": 0.5448684287297976,
      "grad_norm": 1.2221890049807367,
      "learning_rate": 4.517216984401859e-06,
      "loss": 0.1551,
      "step": 18677
    },
    {
      "epoch": 0.5448976019604411,
      "grad_norm": 0.958857925699575,
      "learning_rate": 4.5167467598521255e-06,
      "loss": 0.1141,
      "step": 18678
    },
    {
      "epoch": 0.5449267751910847,
      "grad_norm": 0.7141807691434207,
      "learning_rate": 4.516276539616763e-06,
      "loss": 0.1264,
      "step": 18679
    },
    {
      "epoch": 0.5449559484217282,
      "grad_norm": 0.8791452586420072,
      "learning_rate": 4.51580632369997e-06,
      "loss": 0.1384,
      "step": 18680
    },
    {
      "epoch": 0.5449851216523718,
      "grad_norm": 1.1676846362190259,
      "learning_rate": 4.51533611210594e-06,
      "loss": 0.1313,
      "step": 18681
    },
    {
      "epoch": 0.5450142948830153,
      "grad_norm": 0.8867457317922832,
      "learning_rate": 4.514865904838873e-06,
      "loss": 0.1302,
      "step": 18682
    },
    {
      "epoch": 0.5450434681136589,
      "grad_norm": 0.7035972351414402,
      "learning_rate": 4.51439570190297e-06,
      "loss": 0.1455,
      "step": 18683
    },
    {
      "epoch": 0.5450726413443024,
      "grad_norm": 0.9727928675577748,
      "learning_rate": 4.513925503302422e-06,
      "loss": 0.136,
      "step": 18684
    },
    {
      "epoch": 0.545101814574946,
      "grad_norm": 0.7911244894352147,
      "learning_rate": 4.513455309041435e-06,
      "loss": 0.1173,
      "step": 18685
    },
    {
      "epoch": 0.5451309878055896,
      "grad_norm": 0.5604570224485353,
      "learning_rate": 4.512985119124201e-06,
      "loss": 0.1074,
      "step": 18686
    },
    {
      "epoch": 0.5451601610362331,
      "grad_norm": 0.8242058665485705,
      "learning_rate": 4.51251493355492e-06,
      "loss": 0.1403,
      "step": 18687
    },
    {
      "epoch": 0.5451893342668768,
      "grad_norm": 0.7932939935576612,
      "learning_rate": 4.512044752337791e-06,
      "loss": 0.1559,
      "step": 18688
    },
    {
      "epoch": 0.5452185074975203,
      "grad_norm": 0.8979089486879996,
      "learning_rate": 4.511574575477008e-06,
      "loss": 0.1326,
      "step": 18689
    },
    {
      "epoch": 0.5452476807281639,
      "grad_norm": 0.71343820691617,
      "learning_rate": 4.51110440297677e-06,
      "loss": 0.1248,
      "step": 18690
    },
    {
      "epoch": 0.5452768539588074,
      "grad_norm": 0.6444381535690982,
      "learning_rate": 4.510634234841276e-06,
      "loss": 0.1209,
      "step": 18691
    },
    {
      "epoch": 0.545306027189451,
      "grad_norm": 0.713407745145363,
      "learning_rate": 4.510164071074722e-06,
      "loss": 0.1417,
      "step": 18692
    },
    {
      "epoch": 0.5453352004200945,
      "grad_norm": 0.7842246343689397,
      "learning_rate": 4.509693911681309e-06,
      "loss": 0.142,
      "step": 18693
    },
    {
      "epoch": 0.5453643736507381,
      "grad_norm": 0.7797764613995918,
      "learning_rate": 4.509223756665229e-06,
      "loss": 0.1373,
      "step": 18694
    },
    {
      "epoch": 0.5453935468813816,
      "grad_norm": 0.9279253897875245,
      "learning_rate": 4.508753606030683e-06,
      "loss": 0.141,
      "step": 18695
    },
    {
      "epoch": 0.5454227201120252,
      "grad_norm": 1.2388409268297025,
      "learning_rate": 4.508283459781866e-06,
      "loss": 0.1272,
      "step": 18696
    },
    {
      "epoch": 0.5454518933426687,
      "grad_norm": 0.8143512484818307,
      "learning_rate": 4.50781331792298e-06,
      "loss": 0.1023,
      "step": 18697
    },
    {
      "epoch": 0.5454810665733123,
      "grad_norm": 0.8197034669807962,
      "learning_rate": 4.507343180458217e-06,
      "loss": 0.1316,
      "step": 18698
    },
    {
      "epoch": 0.5455102398039559,
      "grad_norm": 0.9597287125437736,
      "learning_rate": 4.5068730473917775e-06,
      "loss": 0.1202,
      "step": 18699
    },
    {
      "epoch": 0.5455394130345994,
      "grad_norm": 1.746329316587141,
      "learning_rate": 4.506402918727858e-06,
      "loss": 0.111,
      "step": 18700
    },
    {
      "epoch": 0.545568586265243,
      "grad_norm": 0.8173571891349378,
      "learning_rate": 4.505932794470655e-06,
      "loss": 0.1242,
      "step": 18701
    },
    {
      "epoch": 0.5455977594958866,
      "grad_norm": 0.9096773756712586,
      "learning_rate": 4.505462674624364e-06,
      "loss": 0.1152,
      "step": 18702
    },
    {
      "epoch": 0.5456269327265302,
      "grad_norm": 0.6762544356314814,
      "learning_rate": 4.504992559193186e-06,
      "loss": 0.1144,
      "step": 18703
    },
    {
      "epoch": 0.5456561059571737,
      "grad_norm": 0.7441982050700876,
      "learning_rate": 4.504522448181317e-06,
      "loss": 0.1221,
      "step": 18704
    },
    {
      "epoch": 0.5456852791878173,
      "grad_norm": 0.6798766630157737,
      "learning_rate": 4.504052341592953e-06,
      "loss": 0.1209,
      "step": 18705
    },
    {
      "epoch": 0.5457144524184608,
      "grad_norm": 0.8271821107677139,
      "learning_rate": 4.503582239432291e-06,
      "loss": 0.1394,
      "step": 18706
    },
    {
      "epoch": 0.5457436256491044,
      "grad_norm": 0.8143690873232363,
      "learning_rate": 4.503112141703528e-06,
      "loss": 0.12,
      "step": 18707
    },
    {
      "epoch": 0.545772798879748,
      "grad_norm": 0.916131360435358,
      "learning_rate": 4.50264204841086e-06,
      "loss": 0.1279,
      "step": 18708
    },
    {
      "epoch": 0.5458019721103915,
      "grad_norm": 1.0924993975321524,
      "learning_rate": 4.502171959558486e-06,
      "loss": 0.1228,
      "step": 18709
    },
    {
      "epoch": 0.545831145341035,
      "grad_norm": 0.9329447919396929,
      "learning_rate": 4.501701875150604e-06,
      "loss": 0.1409,
      "step": 18710
    },
    {
      "epoch": 0.5458603185716786,
      "grad_norm": 1.0741350878115938,
      "learning_rate": 4.501231795191406e-06,
      "loss": 0.138,
      "step": 18711
    },
    {
      "epoch": 0.5458894918023222,
      "grad_norm": 0.917101561364549,
      "learning_rate": 4.500761719685093e-06,
      "loss": 0.1525,
      "step": 18712
    },
    {
      "epoch": 0.5459186650329657,
      "grad_norm": 0.8087043155629119,
      "learning_rate": 4.50029164863586e-06,
      "loss": 0.1264,
      "step": 18713
    },
    {
      "epoch": 0.5459478382636093,
      "grad_norm": 0.855974439774273,
      "learning_rate": 4.499821582047902e-06,
      "loss": 0.1116,
      "step": 18714
    },
    {
      "epoch": 0.5459770114942529,
      "grad_norm": 0.8794473717442446,
      "learning_rate": 4.4993515199254196e-06,
      "loss": 0.1304,
      "step": 18715
    },
    {
      "epoch": 0.5460061847248965,
      "grad_norm": 0.946860807346336,
      "learning_rate": 4.498881462272607e-06,
      "loss": 0.1559,
      "step": 18716
    },
    {
      "epoch": 0.54603535795554,
      "grad_norm": 0.8348987097597831,
      "learning_rate": 4.49841140909366e-06,
      "loss": 0.1204,
      "step": 18717
    },
    {
      "epoch": 0.5460645311861836,
      "grad_norm": 0.780462143600301,
      "learning_rate": 4.497941360392778e-06,
      "loss": 0.1174,
      "step": 18718
    },
    {
      "epoch": 0.5460937044168271,
      "grad_norm": 0.765716565392337,
      "learning_rate": 4.4974713161741545e-06,
      "loss": 0.1352,
      "step": 18719
    },
    {
      "epoch": 0.5461228776474707,
      "grad_norm": 1.0252641668279476,
      "learning_rate": 4.497001276441986e-06,
      "loss": 0.1359,
      "step": 18720
    },
    {
      "epoch": 0.5461520508781142,
      "grad_norm": 0.7572138221774259,
      "learning_rate": 4.496531241200472e-06,
      "loss": 0.1298,
      "step": 18721
    },
    {
      "epoch": 0.5461812241087578,
      "grad_norm": 0.6336423310001275,
      "learning_rate": 4.496061210453806e-06,
      "loss": 0.0975,
      "step": 18722
    },
    {
      "epoch": 0.5462103973394014,
      "grad_norm": 0.8224626514752715,
      "learning_rate": 4.4955911842061864e-06,
      "loss": 0.1116,
      "step": 18723
    },
    {
      "epoch": 0.5462395705700449,
      "grad_norm": 0.9424743288141211,
      "learning_rate": 4.4951211624618065e-06,
      "loss": 0.1461,
      "step": 18724
    },
    {
      "epoch": 0.5462687438006885,
      "grad_norm": 1.1566218562939485,
      "learning_rate": 4.494651145224864e-06,
      "loss": 0.1468,
      "step": 18725
    },
    {
      "epoch": 0.546297917031332,
      "grad_norm": 0.8847982767943576,
      "learning_rate": 4.494181132499557e-06,
      "loss": 0.1124,
      "step": 18726
    },
    {
      "epoch": 0.5463270902619756,
      "grad_norm": 0.9788825134811449,
      "learning_rate": 4.493711124290081e-06,
      "loss": 0.1251,
      "step": 18727
    },
    {
      "epoch": 0.5463562634926191,
      "grad_norm": 1.0327831909363185,
      "learning_rate": 4.493241120600629e-06,
      "loss": 0.1202,
      "step": 18728
    },
    {
      "epoch": 0.5463854367232628,
      "grad_norm": 1.04227725466138,
      "learning_rate": 4.4927711214354005e-06,
      "loss": 0.1282,
      "step": 18729
    },
    {
      "epoch": 0.5464146099539063,
      "grad_norm": 1.0892908731422253,
      "learning_rate": 4.492301126798591e-06,
      "loss": 0.1474,
      "step": 18730
    },
    {
      "epoch": 0.5464437831845499,
      "grad_norm": 1.445404712102127,
      "learning_rate": 4.491831136694393e-06,
      "loss": 0.1357,
      "step": 18731
    },
    {
      "epoch": 0.5464729564151934,
      "grad_norm": 0.8457448446123691,
      "learning_rate": 4.491361151127008e-06,
      "loss": 0.1188,
      "step": 18732
    },
    {
      "epoch": 0.546502129645837,
      "grad_norm": 0.698862966818781,
      "learning_rate": 4.490891170100629e-06,
      "loss": 0.1282,
      "step": 18733
    },
    {
      "epoch": 0.5465313028764806,
      "grad_norm": 1.075784804102348,
      "learning_rate": 4.490421193619451e-06,
      "loss": 0.136,
      "step": 18734
    },
    {
      "epoch": 0.5465604761071241,
      "grad_norm": 0.9105765184802457,
      "learning_rate": 4.489951221687672e-06,
      "loss": 0.1176,
      "step": 18735
    },
    {
      "epoch": 0.5465896493377677,
      "grad_norm": 0.8338228380624473,
      "learning_rate": 4.489481254309486e-06,
      "loss": 0.1463,
      "step": 18736
    },
    {
      "epoch": 0.5466188225684112,
      "grad_norm": 0.8184887980584117,
      "learning_rate": 4.489011291489089e-06,
      "loss": 0.1343,
      "step": 18737
    },
    {
      "epoch": 0.5466479957990548,
      "grad_norm": 1.0756234626517278,
      "learning_rate": 4.488541333230678e-06,
      "loss": 0.147,
      "step": 18738
    },
    {
      "epoch": 0.5466771690296983,
      "grad_norm": 1.1569508670024704,
      "learning_rate": 4.488071379538447e-06,
      "loss": 0.138,
      "step": 18739
    },
    {
      "epoch": 0.5467063422603419,
      "grad_norm": 1.0553493069012183,
      "learning_rate": 4.487601430416595e-06,
      "loss": 0.1382,
      "step": 18740
    },
    {
      "epoch": 0.5467355154909854,
      "grad_norm": 0.73847327189784,
      "learning_rate": 4.487131485869313e-06,
      "loss": 0.1461,
      "step": 18741
    },
    {
      "epoch": 0.5467646887216291,
      "grad_norm": 1.2745943829878523,
      "learning_rate": 4.486661545900799e-06,
      "loss": 0.1039,
      "step": 18742
    },
    {
      "epoch": 0.5467938619522726,
      "grad_norm": 0.9256812935285194,
      "learning_rate": 4.486191610515247e-06,
      "loss": 0.1368,
      "step": 18743
    },
    {
      "epoch": 0.5468230351829162,
      "grad_norm": 1.0334598848100578,
      "learning_rate": 4.485721679716855e-06,
      "loss": 0.1497,
      "step": 18744
    },
    {
      "epoch": 0.5468522084135597,
      "grad_norm": 0.9181275271618506,
      "learning_rate": 4.485251753509818e-06,
      "loss": 0.126,
      "step": 18745
    },
    {
      "epoch": 0.5468813816442033,
      "grad_norm": 0.8469800679286065,
      "learning_rate": 4.484781831898329e-06,
      "loss": 0.1555,
      "step": 18746
    },
    {
      "epoch": 0.5469105548748469,
      "grad_norm": 0.8484698155359741,
      "learning_rate": 4.484311914886585e-06,
      "loss": 0.1281,
      "step": 18747
    },
    {
      "epoch": 0.5469397281054904,
      "grad_norm": 1.2317439107696468,
      "learning_rate": 4.483842002478783e-06,
      "loss": 0.1352,
      "step": 18748
    },
    {
      "epoch": 0.546968901336134,
      "grad_norm": 1.071208504436812,
      "learning_rate": 4.483372094679112e-06,
      "loss": 0.122,
      "step": 18749
    },
    {
      "epoch": 0.5469980745667775,
      "grad_norm": 0.7195957535184634,
      "learning_rate": 4.482902191491775e-06,
      "loss": 0.1161,
      "step": 18750
    },
    {
      "epoch": 0.5470272477974211,
      "grad_norm": 0.7326552436370776,
      "learning_rate": 4.482432292920963e-06,
      "loss": 0.1303,
      "step": 18751
    },
    {
      "epoch": 0.5470564210280646,
      "grad_norm": 0.7813913349897524,
      "learning_rate": 4.481962398970872e-06,
      "loss": 0.1177,
      "step": 18752
    },
    {
      "epoch": 0.5470855942587082,
      "grad_norm": 1.783801470151127,
      "learning_rate": 4.481492509645698e-06,
      "loss": 0.1144,
      "step": 18753
    },
    {
      "epoch": 0.5471147674893517,
      "grad_norm": 0.9595920176408392,
      "learning_rate": 4.481022624949635e-06,
      "loss": 0.1535,
      "step": 18754
    },
    {
      "epoch": 0.5471439407199953,
      "grad_norm": 0.7438172546724363,
      "learning_rate": 4.480552744886876e-06,
      "loss": 0.1077,
      "step": 18755
    },
    {
      "epoch": 0.547173113950639,
      "grad_norm": 0.7688152617798003,
      "learning_rate": 4.4800828694616195e-06,
      "loss": 0.1364,
      "step": 18756
    },
    {
      "epoch": 0.5472022871812825,
      "grad_norm": 0.9405447055173943,
      "learning_rate": 4.479612998678059e-06,
      "loss": 0.1283,
      "step": 18757
    },
    {
      "epoch": 0.547231460411926,
      "grad_norm": 0.7854322612157514,
      "learning_rate": 4.47914313254039e-06,
      "loss": 0.1116,
      "step": 18758
    },
    {
      "epoch": 0.5472606336425696,
      "grad_norm": 0.8431792024419208,
      "learning_rate": 4.478673271052806e-06,
      "loss": 0.1489,
      "step": 18759
    },
    {
      "epoch": 0.5472898068732132,
      "grad_norm": 0.7991888279351618,
      "learning_rate": 4.478203414219503e-06,
      "loss": 0.1252,
      "step": 18760
    },
    {
      "epoch": 0.5473189801038567,
      "grad_norm": 0.8240294087855686,
      "learning_rate": 4.477733562044673e-06,
      "loss": 0.1249,
      "step": 18761
    },
    {
      "epoch": 0.5473481533345003,
      "grad_norm": 0.9701580584668879,
      "learning_rate": 4.477263714532517e-06,
      "loss": 0.1343,
      "step": 18762
    },
    {
      "epoch": 0.5473773265651438,
      "grad_norm": 0.9308008446272982,
      "learning_rate": 4.476793871687224e-06,
      "loss": 0.1444,
      "step": 18763
    },
    {
      "epoch": 0.5474064997957874,
      "grad_norm": 0.9927858917002728,
      "learning_rate": 4.4763240335129895e-06,
      "loss": 0.1063,
      "step": 18764
    },
    {
      "epoch": 0.5474356730264309,
      "grad_norm": 0.7792231841900343,
      "learning_rate": 4.475854200014011e-06,
      "loss": 0.1179,
      "step": 18765
    },
    {
      "epoch": 0.5474648462570745,
      "grad_norm": 0.9923921341333932,
      "learning_rate": 4.47538437119448e-06,
      "loss": 0.1396,
      "step": 18766
    },
    {
      "epoch": 0.547494019487718,
      "grad_norm": 1.1559385992876254,
      "learning_rate": 4.474914547058591e-06,
      "loss": 0.154,
      "step": 18767
    },
    {
      "epoch": 0.5475231927183616,
      "grad_norm": 0.6660916757162116,
      "learning_rate": 4.4744447276105405e-06,
      "loss": 0.1185,
      "step": 18768
    },
    {
      "epoch": 0.5475523659490051,
      "grad_norm": 1.0079416046977872,
      "learning_rate": 4.473974912854522e-06,
      "loss": 0.1483,
      "step": 18769
    },
    {
      "epoch": 0.5475815391796488,
      "grad_norm": 0.9979158077808288,
      "learning_rate": 4.473505102794731e-06,
      "loss": 0.1447,
      "step": 18770
    },
    {
      "epoch": 0.5476107124102924,
      "grad_norm": 0.6353070059279827,
      "learning_rate": 4.4730352974353595e-06,
      "loss": 0.1156,
      "step": 18771
    },
    {
      "epoch": 0.5476398856409359,
      "grad_norm": 0.8250928276288193,
      "learning_rate": 4.472565496780603e-06,
      "loss": 0.1376,
      "step": 18772
    },
    {
      "epoch": 0.5476690588715795,
      "grad_norm": 0.8017297030902534,
      "learning_rate": 4.472095700834655e-06,
      "loss": 0.1239,
      "step": 18773
    },
    {
      "epoch": 0.547698232102223,
      "grad_norm": 0.827653124740946,
      "learning_rate": 4.471625909601712e-06,
      "loss": 0.1395,
      "step": 18774
    },
    {
      "epoch": 0.5477274053328666,
      "grad_norm": 0.714812532505758,
      "learning_rate": 4.471156123085968e-06,
      "loss": 0.1034,
      "step": 18775
    },
    {
      "epoch": 0.5477565785635101,
      "grad_norm": 0.94154036922442,
      "learning_rate": 4.470686341291614e-06,
      "loss": 0.1239,
      "step": 18776
    },
    {
      "epoch": 0.5477857517941537,
      "grad_norm": 0.8996131738315101,
      "learning_rate": 4.470216564222846e-06,
      "loss": 0.1094,
      "step": 18777
    },
    {
      "epoch": 0.5478149250247972,
      "grad_norm": 0.8582004083861352,
      "learning_rate": 4.469746791883859e-06,
      "loss": 0.1262,
      "step": 18778
    },
    {
      "epoch": 0.5478440982554408,
      "grad_norm": 0.9166084305535637,
      "learning_rate": 4.469277024278844e-06,
      "loss": 0.1077,
      "step": 18779
    },
    {
      "epoch": 0.5478732714860843,
      "grad_norm": 1.2600268115693165,
      "learning_rate": 4.468807261412e-06,
      "loss": 0.1574,
      "step": 18780
    },
    {
      "epoch": 0.5479024447167279,
      "grad_norm": 0.8629881857398644,
      "learning_rate": 4.468337503287516e-06,
      "loss": 0.1545,
      "step": 18781
    },
    {
      "epoch": 0.5479316179473714,
      "grad_norm": 0.9043997551778598,
      "learning_rate": 4.467867749909588e-06,
      "loss": 0.104,
      "step": 18782
    },
    {
      "epoch": 0.5479607911780151,
      "grad_norm": 1.0151971542149922,
      "learning_rate": 4.4673980012824106e-06,
      "loss": 0.1412,
      "step": 18783
    },
    {
      "epoch": 0.5479899644086587,
      "grad_norm": 1.1540785596688323,
      "learning_rate": 4.466928257410176e-06,
      "loss": 0.178,
      "step": 18784
    },
    {
      "epoch": 0.5480191376393022,
      "grad_norm": 1.4450616556343472,
      "learning_rate": 4.466458518297078e-06,
      "loss": 0.1141,
      "step": 18785
    },
    {
      "epoch": 0.5480483108699458,
      "grad_norm": 0.8877658422972836,
      "learning_rate": 4.465988783947311e-06,
      "loss": 0.1152,
      "step": 18786
    },
    {
      "epoch": 0.5480774841005893,
      "grad_norm": 1.0677561065489143,
      "learning_rate": 4.465519054365071e-06,
      "loss": 0.1195,
      "step": 18787
    },
    {
      "epoch": 0.5481066573312329,
      "grad_norm": 1.0366187645591158,
      "learning_rate": 4.4650493295545475e-06,
      "loss": 0.1147,
      "step": 18788
    },
    {
      "epoch": 0.5481358305618764,
      "grad_norm": 0.9566943292061366,
      "learning_rate": 4.464579609519936e-06,
      "loss": 0.1167,
      "step": 18789
    },
    {
      "epoch": 0.54816500379252,
      "grad_norm": 1.015505908262457,
      "learning_rate": 4.464109894265431e-06,
      "loss": 0.1141,
      "step": 18790
    },
    {
      "epoch": 0.5481941770231635,
      "grad_norm": 1.0564093438562037,
      "learning_rate": 4.463640183795222e-06,
      "loss": 0.1225,
      "step": 18791
    },
    {
      "epoch": 0.5482233502538071,
      "grad_norm": 1.5484185495907816,
      "learning_rate": 4.463170478113509e-06,
      "loss": 0.1517,
      "step": 18792
    },
    {
      "epoch": 0.5482525234844506,
      "grad_norm": 0.9621266986658162,
      "learning_rate": 4.462700777224479e-06,
      "loss": 0.1299,
      "step": 18793
    },
    {
      "epoch": 0.5482816967150942,
      "grad_norm": 1.0427061543192153,
      "learning_rate": 4.46223108113233e-06,
      "loss": 0.1438,
      "step": 18794
    },
    {
      "epoch": 0.5483108699457377,
      "grad_norm": 0.7867770643621365,
      "learning_rate": 4.4617613898412534e-06,
      "loss": 0.1329,
      "step": 18795
    },
    {
      "epoch": 0.5483400431763813,
      "grad_norm": 0.6628114665732285,
      "learning_rate": 4.461291703355443e-06,
      "loss": 0.1289,
      "step": 18796
    },
    {
      "epoch": 0.548369216407025,
      "grad_norm": 0.8931836886938811,
      "learning_rate": 4.460822021679089e-06,
      "loss": 0.1222,
      "step": 18797
    },
    {
      "epoch": 0.5483983896376685,
      "grad_norm": 0.9416935724452078,
      "learning_rate": 4.4603523448163894e-06,
      "loss": 0.1166,
      "step": 18798
    },
    {
      "epoch": 0.5484275628683121,
      "grad_norm": 0.660218947610981,
      "learning_rate": 4.459882672771535e-06,
      "loss": 0.1337,
      "step": 18799
    },
    {
      "epoch": 0.5484567360989556,
      "grad_norm": 0.83965462166203,
      "learning_rate": 4.45941300554872e-06,
      "loss": 0.1134,
      "step": 18800
    },
    {
      "epoch": 0.5484859093295992,
      "grad_norm": 1.0496612007027168,
      "learning_rate": 4.4589433431521356e-06,
      "loss": 0.1134,
      "step": 18801
    },
    {
      "epoch": 0.5485150825602427,
      "grad_norm": 0.8463714576445494,
      "learning_rate": 4.458473685585976e-06,
      "loss": 0.1342,
      "step": 18802
    },
    {
      "epoch": 0.5485442557908863,
      "grad_norm": 0.8421325209491768,
      "learning_rate": 4.458004032854432e-06,
      "loss": 0.1328,
      "step": 18803
    },
    {
      "epoch": 0.5485734290215298,
      "grad_norm": 0.9108583905917115,
      "learning_rate": 4.457534384961701e-06,
      "loss": 0.1343,
      "step": 18804
    },
    {
      "epoch": 0.5486026022521734,
      "grad_norm": 0.9689609962829522,
      "learning_rate": 4.457064741911974e-06,
      "loss": 0.1279,
      "step": 18805
    },
    {
      "epoch": 0.5486317754828169,
      "grad_norm": 0.7041795736720375,
      "learning_rate": 4.456595103709443e-06,
      "loss": 0.1352,
      "step": 18806
    },
    {
      "epoch": 0.5486609487134605,
      "grad_norm": 0.8467823049388483,
      "learning_rate": 4.456125470358301e-06,
      "loss": 0.1292,
      "step": 18807
    },
    {
      "epoch": 0.548690121944104,
      "grad_norm": 0.7456565169942629,
      "learning_rate": 4.455655841862742e-06,
      "loss": 0.1455,
      "step": 18808
    },
    {
      "epoch": 0.5487192951747476,
      "grad_norm": 0.8075679281034006,
      "learning_rate": 4.455186218226953e-06,
      "loss": 0.1343,
      "step": 18809
    },
    {
      "epoch": 0.5487484684053913,
      "grad_norm": 0.7829420297078291,
      "learning_rate": 4.454716599455137e-06,
      "loss": 0.1273,
      "step": 18810
    },
    {
      "epoch": 0.5487776416360348,
      "grad_norm": 0.8074066550865667,
      "learning_rate": 4.454246985551478e-06,
      "loss": 0.1107,
      "step": 18811
    },
    {
      "epoch": 0.5488068148666784,
      "grad_norm": 0.877346506703811,
      "learning_rate": 4.453777376520173e-06,
      "loss": 0.1243,
      "step": 18812
    },
    {
      "epoch": 0.5488359880973219,
      "grad_norm": 0.8439324148329742,
      "learning_rate": 4.4533077723654134e-06,
      "loss": 0.1441,
      "step": 18813
    },
    {
      "epoch": 0.5488651613279655,
      "grad_norm": 0.9160281794188151,
      "learning_rate": 4.452838173091391e-06,
      "loss": 0.1096,
      "step": 18814
    },
    {
      "epoch": 0.548894334558609,
      "grad_norm": 1.2818435066609408,
      "learning_rate": 4.452368578702297e-06,
      "loss": 0.1427,
      "step": 18815
    },
    {
      "epoch": 0.5489235077892526,
      "grad_norm": 0.8903131888603528,
      "learning_rate": 4.451898989202327e-06,
      "loss": 0.1206,
      "step": 18816
    },
    {
      "epoch": 0.5489526810198961,
      "grad_norm": 1.0707974982028392,
      "learning_rate": 4.451429404595673e-06,
      "loss": 0.1292,
      "step": 18817
    },
    {
      "epoch": 0.5489818542505397,
      "grad_norm": 1.068831289893814,
      "learning_rate": 4.450959824886525e-06,
      "loss": 0.1226,
      "step": 18818
    },
    {
      "epoch": 0.5490110274811832,
      "grad_norm": 0.9618543277054882,
      "learning_rate": 4.450490250079077e-06,
      "loss": 0.1305,
      "step": 18819
    },
    {
      "epoch": 0.5490402007118268,
      "grad_norm": 0.7889767428814896,
      "learning_rate": 4.450020680177522e-06,
      "loss": 0.1379,
      "step": 18820
    },
    {
      "epoch": 0.5490693739424704,
      "grad_norm": 0.9420117986656027,
      "learning_rate": 4.449551115186049e-06,
      "loss": 0.1456,
      "step": 18821
    },
    {
      "epoch": 0.5490985471731139,
      "grad_norm": 0.9483556160076674,
      "learning_rate": 4.4490815551088535e-06,
      "loss": 0.1174,
      "step": 18822
    },
    {
      "epoch": 0.5491277204037575,
      "grad_norm": 0.8420100584383265,
      "learning_rate": 4.448611999950126e-06,
      "loss": 0.12,
      "step": 18823
    },
    {
      "epoch": 0.5491568936344011,
      "grad_norm": 0.753450656429226,
      "learning_rate": 4.448142449714059e-06,
      "loss": 0.1085,
      "step": 18824
    },
    {
      "epoch": 0.5491860668650447,
      "grad_norm": 0.7376249513628443,
      "learning_rate": 4.447672904404846e-06,
      "loss": 0.1207,
      "step": 18825
    },
    {
      "epoch": 0.5492152400956882,
      "grad_norm": 0.6532449538371288,
      "learning_rate": 4.447203364026675e-06,
      "loss": 0.1288,
      "step": 18826
    },
    {
      "epoch": 0.5492444133263318,
      "grad_norm": 0.7441042072903009,
      "learning_rate": 4.44673382858374e-06,
      "loss": 0.1357,
      "step": 18827
    },
    {
      "epoch": 0.5492735865569753,
      "grad_norm": 0.7667964860350982,
      "learning_rate": 4.446264298080235e-06,
      "loss": 0.1356,
      "step": 18828
    },
    {
      "epoch": 0.5493027597876189,
      "grad_norm": 0.7521707498130297,
      "learning_rate": 4.44579477252035e-06,
      "loss": 0.1316,
      "step": 18829
    },
    {
      "epoch": 0.5493319330182624,
      "grad_norm": 0.799486513772945,
      "learning_rate": 4.4453252519082775e-06,
      "loss": 0.1362,
      "step": 18830
    },
    {
      "epoch": 0.549361106248906,
      "grad_norm": 0.7546343097194946,
      "learning_rate": 4.444855736248208e-06,
      "loss": 0.1221,
      "step": 18831
    },
    {
      "epoch": 0.5493902794795495,
      "grad_norm": 0.7752573259250796,
      "learning_rate": 4.444386225544334e-06,
      "loss": 0.1556,
      "step": 18832
    },
    {
      "epoch": 0.5494194527101931,
      "grad_norm": 0.685234704736772,
      "learning_rate": 4.443916719800846e-06,
      "loss": 0.1318,
      "step": 18833
    },
    {
      "epoch": 0.5494486259408367,
      "grad_norm": 0.8198630358754666,
      "learning_rate": 4.443447219021938e-06,
      "loss": 0.1556,
      "step": 18834
    },
    {
      "epoch": 0.5494777991714802,
      "grad_norm": 0.7886386679426491,
      "learning_rate": 4.442977723211801e-06,
      "loss": 0.1151,
      "step": 18835
    },
    {
      "epoch": 0.5495069724021238,
      "grad_norm": 0.9014790405505484,
      "learning_rate": 4.442508232374625e-06,
      "loss": 0.1099,
      "step": 18836
    },
    {
      "epoch": 0.5495361456327674,
      "grad_norm": 0.8411352161674372,
      "learning_rate": 4.442038746514603e-06,
      "loss": 0.1483,
      "step": 18837
    },
    {
      "epoch": 0.549565318863411,
      "grad_norm": 0.6923289850293326,
      "learning_rate": 4.441569265635927e-06,
      "loss": 0.1189,
      "step": 18838
    },
    {
      "epoch": 0.5495944920940545,
      "grad_norm": 0.78649112074162,
      "learning_rate": 4.441099789742783e-06,
      "loss": 0.103,
      "step": 18839
    },
    {
      "epoch": 0.5496236653246981,
      "grad_norm": 0.7811436076507703,
      "learning_rate": 4.440630318839371e-06,
      "loss": 0.1345,
      "step": 18840
    },
    {
      "epoch": 0.5496528385553416,
      "grad_norm": 0.7639822127543663,
      "learning_rate": 4.4401608529298755e-06,
      "loss": 0.1333,
      "step": 18841
    },
    {
      "epoch": 0.5496820117859852,
      "grad_norm": 0.9188100326365559,
      "learning_rate": 4.439691392018492e-06,
      "loss": 0.1568,
      "step": 18842
    },
    {
      "epoch": 0.5497111850166287,
      "grad_norm": 0.7253342840516087,
      "learning_rate": 4.439221936109409e-06,
      "loss": 0.1204,
      "step": 18843
    },
    {
      "epoch": 0.5497403582472723,
      "grad_norm": 0.9615458196395555,
      "learning_rate": 4.438752485206819e-06,
      "loss": 0.1289,
      "step": 18844
    },
    {
      "epoch": 0.5497695314779159,
      "grad_norm": 0.8532075732843499,
      "learning_rate": 4.438283039314912e-06,
      "loss": 0.1226,
      "step": 18845
    },
    {
      "epoch": 0.5497987047085594,
      "grad_norm": 0.8816727263528346,
      "learning_rate": 4.437813598437881e-06,
      "loss": 0.1205,
      "step": 18846
    },
    {
      "epoch": 0.549827877939203,
      "grad_norm": 0.9298808247796456,
      "learning_rate": 4.437344162579917e-06,
      "loss": 0.1403,
      "step": 18847
    },
    {
      "epoch": 0.5498570511698465,
      "grad_norm": 0.7830926199444167,
      "learning_rate": 4.4368747317452075e-06,
      "loss": 0.1482,
      "step": 18848
    },
    {
      "epoch": 0.5498862244004901,
      "grad_norm": 0.7605544103110462,
      "learning_rate": 4.436405305937947e-06,
      "loss": 0.1236,
      "step": 18849
    },
    {
      "epoch": 0.5499153976311336,
      "grad_norm": 0.8261668690686542,
      "learning_rate": 4.435935885162327e-06,
      "loss": 0.1159,
      "step": 18850
    },
    {
      "epoch": 0.5499445708617773,
      "grad_norm": 1.0097377594927257,
      "learning_rate": 4.435466469422533e-06,
      "loss": 0.1247,
      "step": 18851
    },
    {
      "epoch": 0.5499737440924208,
      "grad_norm": 0.9728248677311301,
      "learning_rate": 4.434997058722762e-06,
      "loss": 0.1353,
      "step": 18852
    },
    {
      "epoch": 0.5500029173230644,
      "grad_norm": 0.7422217850705474,
      "learning_rate": 4.434527653067203e-06,
      "loss": 0.113,
      "step": 18853
    },
    {
      "epoch": 0.5500320905537079,
      "grad_norm": 0.9094787481848825,
      "learning_rate": 4.434058252460045e-06,
      "loss": 0.1263,
      "step": 18854
    },
    {
      "epoch": 0.5500612637843515,
      "grad_norm": 0.6841078949401904,
      "learning_rate": 4.433588856905481e-06,
      "loss": 0.1326,
      "step": 18855
    },
    {
      "epoch": 0.550090437014995,
      "grad_norm": 0.8828866013868408,
      "learning_rate": 4.4331194664077e-06,
      "loss": 0.1367,
      "step": 18856
    },
    {
      "epoch": 0.5501196102456386,
      "grad_norm": 0.7830079930268402,
      "learning_rate": 4.432650080970891e-06,
      "loss": 0.1462,
      "step": 18857
    },
    {
      "epoch": 0.5501487834762822,
      "grad_norm": 0.8156306303494512,
      "learning_rate": 4.432180700599248e-06,
      "loss": 0.1238,
      "step": 18858
    },
    {
      "epoch": 0.5501779567069257,
      "grad_norm": 0.7823258402850047,
      "learning_rate": 4.431711325296961e-06,
      "loss": 0.1343,
      "step": 18859
    },
    {
      "epoch": 0.5502071299375693,
      "grad_norm": 0.8022561333768476,
      "learning_rate": 4.43124195506822e-06,
      "loss": 0.1164,
      "step": 18860
    },
    {
      "epoch": 0.5502363031682128,
      "grad_norm": 0.8583481960707582,
      "learning_rate": 4.430772589917214e-06,
      "loss": 0.1213,
      "step": 18861
    },
    {
      "epoch": 0.5502654763988564,
      "grad_norm": 0.954864727716638,
      "learning_rate": 4.4303032298481344e-06,
      "loss": 0.1365,
      "step": 18862
    },
    {
      "epoch": 0.5502946496294999,
      "grad_norm": 0.8819247378086011,
      "learning_rate": 4.429833874865171e-06,
      "loss": 0.1229,
      "step": 18863
    },
    {
      "epoch": 0.5503238228601436,
      "grad_norm": 0.9175827146331453,
      "learning_rate": 4.429364524972516e-06,
      "loss": 0.1375,
      "step": 18864
    },
    {
      "epoch": 0.5503529960907871,
      "grad_norm": 0.7254745234518502,
      "learning_rate": 4.428895180174358e-06,
      "loss": 0.1235,
      "step": 18865
    },
    {
      "epoch": 0.5503821693214307,
      "grad_norm": 0.9335599154474736,
      "learning_rate": 4.428425840474888e-06,
      "loss": 0.1351,
      "step": 18866
    },
    {
      "epoch": 0.5504113425520742,
      "grad_norm": 1.048875958370967,
      "learning_rate": 4.427956505878294e-06,
      "loss": 0.148,
      "step": 18867
    },
    {
      "epoch": 0.5504405157827178,
      "grad_norm": 0.6447775964429916,
      "learning_rate": 4.42748717638877e-06,
      "loss": 0.1099,
      "step": 18868
    },
    {
      "epoch": 0.5504696890133614,
      "grad_norm": 0.9248577677453754,
      "learning_rate": 4.4270178520105e-06,
      "loss": 0.1192,
      "step": 18869
    },
    {
      "epoch": 0.5504988622440049,
      "grad_norm": 0.8270878868959397,
      "learning_rate": 4.426548532747681e-06,
      "loss": 0.1286,
      "step": 18870
    },
    {
      "epoch": 0.5505280354746485,
      "grad_norm": 0.7425708562237897,
      "learning_rate": 4.426079218604499e-06,
      "loss": 0.1607,
      "step": 18871
    },
    {
      "epoch": 0.550557208705292,
      "grad_norm": 1.4173539162091753,
      "learning_rate": 4.4256099095851455e-06,
      "loss": 0.1403,
      "step": 18872
    },
    {
      "epoch": 0.5505863819359356,
      "grad_norm": 0.9591979772681544,
      "learning_rate": 4.42514060569381e-06,
      "loss": 0.1249,
      "step": 18873
    },
    {
      "epoch": 0.5506155551665791,
      "grad_norm": 0.9629079639441862,
      "learning_rate": 4.424671306934681e-06,
      "loss": 0.1343,
      "step": 18874
    },
    {
      "epoch": 0.5506447283972227,
      "grad_norm": 0.6543564246871194,
      "learning_rate": 4.424202013311947e-06,
      "loss": 0.1111,
      "step": 18875
    },
    {
      "epoch": 0.5506739016278662,
      "grad_norm": 0.9133388080681468,
      "learning_rate": 4.423732724829802e-06,
      "loss": 0.1119,
      "step": 18876
    },
    {
      "epoch": 0.5507030748585098,
      "grad_norm": 0.8416084456937348,
      "learning_rate": 4.423263441492436e-06,
      "loss": 0.1339,
      "step": 18877
    },
    {
      "epoch": 0.5507322480891534,
      "grad_norm": 0.7324154434440577,
      "learning_rate": 4.4227941633040335e-06,
      "loss": 0.1335,
      "step": 18878
    },
    {
      "epoch": 0.550761421319797,
      "grad_norm": 0.8143888661673182,
      "learning_rate": 4.422324890268787e-06,
      "loss": 0.1416,
      "step": 18879
    },
    {
      "epoch": 0.5507905945504405,
      "grad_norm": 0.8755486013829306,
      "learning_rate": 4.421855622390887e-06,
      "loss": 0.1256,
      "step": 18880
    },
    {
      "epoch": 0.5508197677810841,
      "grad_norm": 0.9647428164434501,
      "learning_rate": 4.42138635967452e-06,
      "loss": 0.1195,
      "step": 18881
    },
    {
      "epoch": 0.5508489410117277,
      "grad_norm": 0.6968924654914538,
      "learning_rate": 4.420917102123879e-06,
      "loss": 0.1073,
      "step": 18882
    },
    {
      "epoch": 0.5508781142423712,
      "grad_norm": 0.6776839132508738,
      "learning_rate": 4.420447849743152e-06,
      "loss": 0.1218,
      "step": 18883
    },
    {
      "epoch": 0.5509072874730148,
      "grad_norm": 0.9780392719740577,
      "learning_rate": 4.419978602536529e-06,
      "loss": 0.1301,
      "step": 18884
    },
    {
      "epoch": 0.5509364607036583,
      "grad_norm": 1.269366287466131,
      "learning_rate": 4.419509360508198e-06,
      "loss": 0.1227,
      "step": 18885
    },
    {
      "epoch": 0.5509656339343019,
      "grad_norm": 0.988261489618055,
      "learning_rate": 4.419040123662348e-06,
      "loss": 0.1373,
      "step": 18886
    },
    {
      "epoch": 0.5509948071649454,
      "grad_norm": 0.998681512237093,
      "learning_rate": 4.418570892003169e-06,
      "loss": 0.1255,
      "step": 18887
    },
    {
      "epoch": 0.551023980395589,
      "grad_norm": 0.9859257190037307,
      "learning_rate": 4.418101665534851e-06,
      "loss": 0.1302,
      "step": 18888
    },
    {
      "epoch": 0.5510531536262325,
      "grad_norm": 0.7435861505082985,
      "learning_rate": 4.417632444261582e-06,
      "loss": 0.1201,
      "step": 18889
    },
    {
      "epoch": 0.5510823268568761,
      "grad_norm": 0.9044645211984617,
      "learning_rate": 4.417163228187552e-06,
      "loss": 0.1296,
      "step": 18890
    },
    {
      "epoch": 0.5511115000875197,
      "grad_norm": 0.6673412854286197,
      "learning_rate": 4.41669401731695e-06,
      "loss": 0.1198,
      "step": 18891
    },
    {
      "epoch": 0.5511406733181633,
      "grad_norm": 1.015387950005781,
      "learning_rate": 4.416224811653963e-06,
      "loss": 0.1262,
      "step": 18892
    },
    {
      "epoch": 0.5511698465488069,
      "grad_norm": 0.7409449697376059,
      "learning_rate": 4.415755611202782e-06,
      "loss": 0.1135,
      "step": 18893
    },
    {
      "epoch": 0.5511990197794504,
      "grad_norm": 0.8683402773180618,
      "learning_rate": 4.415286415967596e-06,
      "loss": 0.1353,
      "step": 18894
    },
    {
      "epoch": 0.551228193010094,
      "grad_norm": 0.8628280057573453,
      "learning_rate": 4.414817225952594e-06,
      "loss": 0.1204,
      "step": 18895
    },
    {
      "epoch": 0.5512573662407375,
      "grad_norm": 0.6847709045708442,
      "learning_rate": 4.414348041161963e-06,
      "loss": 0.1202,
      "step": 18896
    },
    {
      "epoch": 0.5512865394713811,
      "grad_norm": 0.8020461498396281,
      "learning_rate": 4.413878861599893e-06,
      "loss": 0.1239,
      "step": 18897
    },
    {
      "epoch": 0.5513157127020246,
      "grad_norm": 1.0200723866121209,
      "learning_rate": 4.413409687270574e-06,
      "loss": 0.1431,
      "step": 18898
    },
    {
      "epoch": 0.5513448859326682,
      "grad_norm": 1.0289337290145673,
      "learning_rate": 4.412940518178191e-06,
      "loss": 0.1361,
      "step": 18899
    },
    {
      "epoch": 0.5513740591633117,
      "grad_norm": 1.095416605994027,
      "learning_rate": 4.412471354326936e-06,
      "loss": 0.1303,
      "step": 18900
    },
    {
      "epoch": 0.5514032323939553,
      "grad_norm": 1.0664787186450981,
      "learning_rate": 4.412002195720996e-06,
      "loss": 0.1484,
      "step": 18901
    },
    {
      "epoch": 0.5514324056245988,
      "grad_norm": 1.1820260823233966,
      "learning_rate": 4.41153304236456e-06,
      "loss": 0.1453,
      "step": 18902
    },
    {
      "epoch": 0.5514615788552424,
      "grad_norm": 1.0926917410889156,
      "learning_rate": 4.411063894261818e-06,
      "loss": 0.1195,
      "step": 18903
    },
    {
      "epoch": 0.5514907520858859,
      "grad_norm": 0.8201506636812573,
      "learning_rate": 4.410594751416956e-06,
      "loss": 0.1208,
      "step": 18904
    },
    {
      "epoch": 0.5515199253165296,
      "grad_norm": 0.9268618846094715,
      "learning_rate": 4.410125613834162e-06,
      "loss": 0.1236,
      "step": 18905
    },
    {
      "epoch": 0.5515490985471732,
      "grad_norm": 0.8709982156274102,
      "learning_rate": 4.409656481517627e-06,
      "loss": 0.1423,
      "step": 18906
    },
    {
      "epoch": 0.5515782717778167,
      "grad_norm": 1.0799782334146752,
      "learning_rate": 4.409187354471539e-06,
      "loss": 0.151,
      "step": 18907
    },
    {
      "epoch": 0.5516074450084603,
      "grad_norm": 0.8726156079233057,
      "learning_rate": 4.4087182327000845e-06,
      "loss": 0.1437,
      "step": 18908
    },
    {
      "epoch": 0.5516366182391038,
      "grad_norm": 0.880339981058879,
      "learning_rate": 4.408249116207452e-06,
      "loss": 0.133,
      "step": 18909
    },
    {
      "epoch": 0.5516657914697474,
      "grad_norm": 1.0609898160043045,
      "learning_rate": 4.407780004997831e-06,
      "loss": 0.1209,
      "step": 18910
    },
    {
      "epoch": 0.5516949647003909,
      "grad_norm": 0.8067179625470575,
      "learning_rate": 4.407310899075406e-06,
      "loss": 0.1126,
      "step": 18911
    },
    {
      "epoch": 0.5517241379310345,
      "grad_norm": 0.8214607414672347,
      "learning_rate": 4.406841798444371e-06,
      "loss": 0.1423,
      "step": 18912
    },
    {
      "epoch": 0.551753311161678,
      "grad_norm": 0.9296961843547017,
      "learning_rate": 4.40637270310891e-06,
      "loss": 0.1279,
      "step": 18913
    },
    {
      "epoch": 0.5517824843923216,
      "grad_norm": 1.0150414366375669,
      "learning_rate": 4.4059036130732115e-06,
      "loss": 0.1354,
      "step": 18914
    },
    {
      "epoch": 0.5518116576229651,
      "grad_norm": 0.8882836837030985,
      "learning_rate": 4.4054345283414645e-06,
      "loss": 0.1278,
      "step": 18915
    },
    {
      "epoch": 0.5518408308536087,
      "grad_norm": 0.8148474926750665,
      "learning_rate": 4.404965448917855e-06,
      "loss": 0.1229,
      "step": 18916
    },
    {
      "epoch": 0.5518700040842522,
      "grad_norm": 0.9414039923913972,
      "learning_rate": 4.4044963748065716e-06,
      "loss": 0.111,
      "step": 18917
    },
    {
      "epoch": 0.5518991773148959,
      "grad_norm": 0.8041724061132699,
      "learning_rate": 4.404027306011804e-06,
      "loss": 0.1262,
      "step": 18918
    },
    {
      "epoch": 0.5519283505455395,
      "grad_norm": 0.9318898626723681,
      "learning_rate": 4.403558242537737e-06,
      "loss": 0.1236,
      "step": 18919
    },
    {
      "epoch": 0.551957523776183,
      "grad_norm": 1.1528299941047244,
      "learning_rate": 4.40308918438856e-06,
      "loss": 0.1519,
      "step": 18920
    },
    {
      "epoch": 0.5519866970068266,
      "grad_norm": 0.8343848843723077,
      "learning_rate": 4.402620131568461e-06,
      "loss": 0.1046,
      "step": 18921
    },
    {
      "epoch": 0.5520158702374701,
      "grad_norm": 0.8668240869909689,
      "learning_rate": 4.402151084081625e-06,
      "loss": 0.1454,
      "step": 18922
    },
    {
      "epoch": 0.5520450434681137,
      "grad_norm": 0.6814790320732358,
      "learning_rate": 4.401682041932243e-06,
      "loss": 0.1235,
      "step": 18923
    },
    {
      "epoch": 0.5520742166987572,
      "grad_norm": 0.8920669265290863,
      "learning_rate": 4.4012130051245e-06,
      "loss": 0.1225,
      "step": 18924
    },
    {
      "epoch": 0.5521033899294008,
      "grad_norm": 0.7313361740763353,
      "learning_rate": 4.400743973662586e-06,
      "loss": 0.1241,
      "step": 18925
    },
    {
      "epoch": 0.5521325631600443,
      "grad_norm": 0.7913429969906784,
      "learning_rate": 4.400274947550685e-06,
      "loss": 0.1375,
      "step": 18926
    },
    {
      "epoch": 0.5521617363906879,
      "grad_norm": 0.8398671674310144,
      "learning_rate": 4.3998059267929875e-06,
      "loss": 0.1427,
      "step": 18927
    },
    {
      "epoch": 0.5521909096213314,
      "grad_norm": 0.9810835566100664,
      "learning_rate": 4.3993369113936765e-06,
      "loss": 0.1091,
      "step": 18928
    },
    {
      "epoch": 0.552220082851975,
      "grad_norm": 0.6773255354104609,
      "learning_rate": 4.3988679013569455e-06,
      "loss": 0.1066,
      "step": 18929
    },
    {
      "epoch": 0.5522492560826185,
      "grad_norm": 0.7687159863426298,
      "learning_rate": 4.398398896686977e-06,
      "loss": 0.1345,
      "step": 18930
    },
    {
      "epoch": 0.5522784293132621,
      "grad_norm": 0.854676951390436,
      "learning_rate": 4.39792989738796e-06,
      "loss": 0.1345,
      "step": 18931
    },
    {
      "epoch": 0.5523076025439058,
      "grad_norm": 0.6842095574049585,
      "learning_rate": 4.39746090346408e-06,
      "loss": 0.0965,
      "step": 18932
    },
    {
      "epoch": 0.5523367757745493,
      "grad_norm": 0.7438169389739349,
      "learning_rate": 4.396991914919528e-06,
      "loss": 0.1126,
      "step": 18933
    },
    {
      "epoch": 0.5523659490051929,
      "grad_norm": 0.7650484211780397,
      "learning_rate": 4.3965229317584846e-06,
      "loss": 0.1277,
      "step": 18934
    },
    {
      "epoch": 0.5523951222358364,
      "grad_norm": 0.9242249311460413,
      "learning_rate": 4.396053953985142e-06,
      "loss": 0.1393,
      "step": 18935
    },
    {
      "epoch": 0.55242429546648,
      "grad_norm": 0.8117106767305521,
      "learning_rate": 4.395584981603686e-06,
      "loss": 0.1209,
      "step": 18936
    },
    {
      "epoch": 0.5524534686971235,
      "grad_norm": 0.6625364024823419,
      "learning_rate": 4.395116014618303e-06,
      "loss": 0.1255,
      "step": 18937
    },
    {
      "epoch": 0.5524826419277671,
      "grad_norm": 1.0487966281576853,
      "learning_rate": 4.39464705303318e-06,
      "loss": 0.1352,
      "step": 18938
    },
    {
      "epoch": 0.5525118151584106,
      "grad_norm": 0.8082036924009987,
      "learning_rate": 4.394178096852503e-06,
      "loss": 0.1269,
      "step": 18939
    },
    {
      "epoch": 0.5525409883890542,
      "grad_norm": 0.822389565897582,
      "learning_rate": 4.393709146080458e-06,
      "loss": 0.1131,
      "step": 18940
    },
    {
      "epoch": 0.5525701616196977,
      "grad_norm": 1.035314430128407,
      "learning_rate": 4.393240200721234e-06,
      "loss": 0.1193,
      "step": 18941
    },
    {
      "epoch": 0.5525993348503413,
      "grad_norm": 0.9707725354811764,
      "learning_rate": 4.392771260779018e-06,
      "loss": 0.1564,
      "step": 18942
    },
    {
      "epoch": 0.5526285080809848,
      "grad_norm": 0.9636631736749641,
      "learning_rate": 4.392302326257995e-06,
      "loss": 0.1372,
      "step": 18943
    },
    {
      "epoch": 0.5526576813116284,
      "grad_norm": 1.0086431948465115,
      "learning_rate": 4.39183339716235e-06,
      "loss": 0.1284,
      "step": 18944
    },
    {
      "epoch": 0.5526868545422721,
      "grad_norm": 0.8029281205526639,
      "learning_rate": 4.391364473496273e-06,
      "loss": 0.1226,
      "step": 18945
    },
    {
      "epoch": 0.5527160277729156,
      "grad_norm": 0.8376520467839277,
      "learning_rate": 4.390895555263946e-06,
      "loss": 0.1369,
      "step": 18946
    },
    {
      "epoch": 0.5527452010035592,
      "grad_norm": 0.8621087899813299,
      "learning_rate": 4.390426642469561e-06,
      "loss": 0.1348,
      "step": 18947
    },
    {
      "epoch": 0.5527743742342027,
      "grad_norm": 1.2560347738458584,
      "learning_rate": 4.3899577351173005e-06,
      "loss": 0.1398,
      "step": 18948
    },
    {
      "epoch": 0.5528035474648463,
      "grad_norm": 0.8478268100757564,
      "learning_rate": 4.389488833211351e-06,
      "loss": 0.1362,
      "step": 18949
    },
    {
      "epoch": 0.5528327206954898,
      "grad_norm": 0.9655586142279446,
      "learning_rate": 4.389019936755902e-06,
      "loss": 0.1502,
      "step": 18950
    },
    {
      "epoch": 0.5528618939261334,
      "grad_norm": 0.7419266281202673,
      "learning_rate": 4.388551045755135e-06,
      "loss": 0.1205,
      "step": 18951
    },
    {
      "epoch": 0.5528910671567769,
      "grad_norm": 0.8711008103723668,
      "learning_rate": 4.388082160213237e-06,
      "loss": 0.1182,
      "step": 18952
    },
    {
      "epoch": 0.5529202403874205,
      "grad_norm": 0.7856337236933707,
      "learning_rate": 4.387613280134397e-06,
      "loss": 0.1286,
      "step": 18953
    },
    {
      "epoch": 0.552949413618064,
      "grad_norm": 1.0391416184434505,
      "learning_rate": 4.3871444055228e-06,
      "loss": 0.1136,
      "step": 18954
    },
    {
      "epoch": 0.5529785868487076,
      "grad_norm": 0.750986333806838,
      "learning_rate": 4.386675536382631e-06,
      "loss": 0.1304,
      "step": 18955
    },
    {
      "epoch": 0.5530077600793512,
      "grad_norm": 1.249896122674722,
      "learning_rate": 4.3862066727180765e-06,
      "loss": 0.1202,
      "step": 18956
    },
    {
      "epoch": 0.5530369333099947,
      "grad_norm": 0.8660143694255064,
      "learning_rate": 4.385737814533322e-06,
      "loss": 0.1326,
      "step": 18957
    },
    {
      "epoch": 0.5530661065406383,
      "grad_norm": 0.7776825646497837,
      "learning_rate": 4.385268961832553e-06,
      "loss": 0.124,
      "step": 18958
    },
    {
      "epoch": 0.5530952797712819,
      "grad_norm": 0.7515619600990452,
      "learning_rate": 4.384800114619957e-06,
      "loss": 0.1178,
      "step": 18959
    },
    {
      "epoch": 0.5531244530019255,
      "grad_norm": 0.8631802248534036,
      "learning_rate": 4.384331272899718e-06,
      "loss": 0.1156,
      "step": 18960
    },
    {
      "epoch": 0.553153626232569,
      "grad_norm": 0.7442139612714272,
      "learning_rate": 4.383862436676023e-06,
      "loss": 0.1208,
      "step": 18961
    },
    {
      "epoch": 0.5531827994632126,
      "grad_norm": 1.070272932987149,
      "learning_rate": 4.383393605953057e-06,
      "loss": 0.1499,
      "step": 18962
    },
    {
      "epoch": 0.5532119726938561,
      "grad_norm": 0.869308725151239,
      "learning_rate": 4.382924780735007e-06,
      "loss": 0.1342,
      "step": 18963
    },
    {
      "epoch": 0.5532411459244997,
      "grad_norm": 0.9966991000253045,
      "learning_rate": 4.3824559610260545e-06,
      "loss": 0.1653,
      "step": 18964
    },
    {
      "epoch": 0.5532703191551432,
      "grad_norm": 0.708567997809416,
      "learning_rate": 4.381987146830389e-06,
      "loss": 0.1305,
      "step": 18965
    },
    {
      "epoch": 0.5532994923857868,
      "grad_norm": 1.0003084806399476,
      "learning_rate": 4.381518338152195e-06,
      "loss": 0.1593,
      "step": 18966
    },
    {
      "epoch": 0.5533286656164303,
      "grad_norm": 0.8213124015520988,
      "learning_rate": 4.381049534995658e-06,
      "loss": 0.1015,
      "step": 18967
    },
    {
      "epoch": 0.5533578388470739,
      "grad_norm": 0.8604842656028665,
      "learning_rate": 4.380580737364962e-06,
      "loss": 0.1271,
      "step": 18968
    },
    {
      "epoch": 0.5533870120777175,
      "grad_norm": 0.7736718020630616,
      "learning_rate": 4.380111945264294e-06,
      "loss": 0.1241,
      "step": 18969
    },
    {
      "epoch": 0.553416185308361,
      "grad_norm": 0.8683832202446158,
      "learning_rate": 4.379643158697837e-06,
      "loss": 0.1601,
      "step": 18970
    },
    {
      "epoch": 0.5534453585390046,
      "grad_norm": 0.6773294747814594,
      "learning_rate": 4.3791743776697795e-06,
      "loss": 0.1154,
      "step": 18971
    },
    {
      "epoch": 0.5534745317696482,
      "grad_norm": 0.9849618752089566,
      "learning_rate": 4.378705602184306e-06,
      "loss": 0.14,
      "step": 18972
    },
    {
      "epoch": 0.5535037050002918,
      "grad_norm": 0.7972406615254845,
      "learning_rate": 4.3782368322455985e-06,
      "loss": 0.1167,
      "step": 18973
    },
    {
      "epoch": 0.5535328782309353,
      "grad_norm": 0.8624819543776043,
      "learning_rate": 4.377768067857845e-06,
      "loss": 0.166,
      "step": 18974
    },
    {
      "epoch": 0.5535620514615789,
      "grad_norm": 0.7968597633134373,
      "learning_rate": 4.37729930902523e-06,
      "loss": 0.1241,
      "step": 18975
    },
    {
      "epoch": 0.5535912246922224,
      "grad_norm": 0.8832363884273774,
      "learning_rate": 4.376830555751935e-06,
      "loss": 0.1237,
      "step": 18976
    },
    {
      "epoch": 0.553620397922866,
      "grad_norm": 0.8994375649384976,
      "learning_rate": 4.376361808042152e-06,
      "loss": 0.1342,
      "step": 18977
    },
    {
      "epoch": 0.5536495711535095,
      "grad_norm": 0.8993620114859945,
      "learning_rate": 4.37589306590006e-06,
      "loss": 0.1263,
      "step": 18978
    },
    {
      "epoch": 0.5536787443841531,
      "grad_norm": 0.8789984648277301,
      "learning_rate": 4.375424329329847e-06,
      "loss": 0.1319,
      "step": 18979
    },
    {
      "epoch": 0.5537079176147967,
      "grad_norm": 0.700312367315295,
      "learning_rate": 4.374955598335696e-06,
      "loss": 0.1208,
      "step": 18980
    },
    {
      "epoch": 0.5537370908454402,
      "grad_norm": 0.9916636045107491,
      "learning_rate": 4.374486872921792e-06,
      "loss": 0.1459,
      "step": 18981
    },
    {
      "epoch": 0.5537662640760838,
      "grad_norm": 0.7780606346331178,
      "learning_rate": 4.374018153092319e-06,
      "loss": 0.1222,
      "step": 18982
    },
    {
      "epoch": 0.5537954373067273,
      "grad_norm": 0.8056706125102487,
      "learning_rate": 4.373549438851463e-06,
      "loss": 0.1285,
      "step": 18983
    },
    {
      "epoch": 0.5538246105373709,
      "grad_norm": 0.842929800563625,
      "learning_rate": 4.373080730203408e-06,
      "loss": 0.1195,
      "step": 18984
    },
    {
      "epoch": 0.5538537837680144,
      "grad_norm": 0.6976156825232113,
      "learning_rate": 4.37261202715234e-06,
      "loss": 0.1091,
      "step": 18985
    },
    {
      "epoch": 0.5538829569986581,
      "grad_norm": 0.7394111334881569,
      "learning_rate": 4.372143329702441e-06,
      "loss": 0.1117,
      "step": 18986
    },
    {
      "epoch": 0.5539121302293016,
      "grad_norm": 0.7721938028571832,
      "learning_rate": 4.371674637857896e-06,
      "loss": 0.126,
      "step": 18987
    },
    {
      "epoch": 0.5539413034599452,
      "grad_norm": 0.8670435034129449,
      "learning_rate": 4.371205951622889e-06,
      "loss": 0.1457,
      "step": 18988
    },
    {
      "epoch": 0.5539704766905887,
      "grad_norm": 0.7389260962426617,
      "learning_rate": 4.370737271001607e-06,
      "loss": 0.125,
      "step": 18989
    },
    {
      "epoch": 0.5539996499212323,
      "grad_norm": 0.8865940446498207,
      "learning_rate": 4.3702685959982326e-06,
      "loss": 0.1394,
      "step": 18990
    },
    {
      "epoch": 0.5540288231518758,
      "grad_norm": 0.9553162359285411,
      "learning_rate": 4.369799926616949e-06,
      "loss": 0.1293,
      "step": 18991
    },
    {
      "epoch": 0.5540579963825194,
      "grad_norm": 0.8915793746933419,
      "learning_rate": 4.369331262861942e-06,
      "loss": 0.1063,
      "step": 18992
    },
    {
      "epoch": 0.554087169613163,
      "grad_norm": 0.871306471435351,
      "learning_rate": 4.368862604737395e-06,
      "loss": 0.1222,
      "step": 18993
    },
    {
      "epoch": 0.5541163428438065,
      "grad_norm": 0.8436957957319473,
      "learning_rate": 4.368393952247489e-06,
      "loss": 0.1238,
      "step": 18994
    },
    {
      "epoch": 0.5541455160744501,
      "grad_norm": 0.8121602703143961,
      "learning_rate": 4.367925305396414e-06,
      "loss": 0.1301,
      "step": 18995
    },
    {
      "epoch": 0.5541746893050936,
      "grad_norm": 0.736501040908026,
      "learning_rate": 4.36745666418835e-06,
      "loss": 0.1354,
      "step": 18996
    },
    {
      "epoch": 0.5542038625357372,
      "grad_norm": 0.9146198369061643,
      "learning_rate": 4.366988028627484e-06,
      "loss": 0.1357,
      "step": 18997
    },
    {
      "epoch": 0.5542330357663807,
      "grad_norm": 1.0272053538843235,
      "learning_rate": 4.366519398717995e-06,
      "loss": 0.1233,
      "step": 18998
    },
    {
      "epoch": 0.5542622089970244,
      "grad_norm": 3.134792651135183,
      "learning_rate": 4.366050774464071e-06,
      "loss": 0.1599,
      "step": 18999
    },
    {
      "epoch": 0.5542913822276679,
      "grad_norm": 0.6636392571345271,
      "learning_rate": 4.365582155869892e-06,
      "loss": 0.1047,
      "step": 19000
    },
    {
      "epoch": 0.5543205554583115,
      "grad_norm": 0.8338426930577785,
      "learning_rate": 4.365113542939646e-06,
      "loss": 0.122,
      "step": 19001
    },
    {
      "epoch": 0.554349728688955,
      "grad_norm": 1.0225088274138192,
      "learning_rate": 4.364644935677516e-06,
      "loss": 0.1524,
      "step": 19002
    },
    {
      "epoch": 0.5543789019195986,
      "grad_norm": 0.8356938730103732,
      "learning_rate": 4.364176334087683e-06,
      "loss": 0.1135,
      "step": 19003
    },
    {
      "epoch": 0.5544080751502422,
      "grad_norm": 0.827849731435746,
      "learning_rate": 4.363707738174331e-06,
      "loss": 0.1275,
      "step": 19004
    },
    {
      "epoch": 0.5544372483808857,
      "grad_norm": 0.758940524020363,
      "learning_rate": 4.363239147941647e-06,
      "loss": 0.1048,
      "step": 19005
    },
    {
      "epoch": 0.5544664216115293,
      "grad_norm": 0.9455837093178442,
      "learning_rate": 4.362770563393808e-06,
      "loss": 0.1345,
      "step": 19006
    },
    {
      "epoch": 0.5544955948421728,
      "grad_norm": 1.010327736798248,
      "learning_rate": 4.362301984535005e-06,
      "loss": 0.1163,
      "step": 19007
    },
    {
      "epoch": 0.5545247680728164,
      "grad_norm": 1.5037815722126615,
      "learning_rate": 4.361833411369415e-06,
      "loss": 0.1444,
      "step": 19008
    },
    {
      "epoch": 0.5545539413034599,
      "grad_norm": 1.0660801195113891,
      "learning_rate": 4.361364843901226e-06,
      "loss": 0.1411,
      "step": 19009
    },
    {
      "epoch": 0.5545831145341035,
      "grad_norm": 0.8827088248975826,
      "learning_rate": 4.360896282134619e-06,
      "loss": 0.1292,
      "step": 19010
    },
    {
      "epoch": 0.554612287764747,
      "grad_norm": 0.8612085820432255,
      "learning_rate": 4.360427726073776e-06,
      "loss": 0.1429,
      "step": 19011
    },
    {
      "epoch": 0.5546414609953906,
      "grad_norm": 0.9299876808958056,
      "learning_rate": 4.359959175722881e-06,
      "loss": 0.1332,
      "step": 19012
    },
    {
      "epoch": 0.5546706342260342,
      "grad_norm": 1.0010153111031155,
      "learning_rate": 4.3594906310861195e-06,
      "loss": 0.1662,
      "step": 19013
    },
    {
      "epoch": 0.5546998074566778,
      "grad_norm": 0.8387242150310642,
      "learning_rate": 4.359022092167672e-06,
      "loss": 0.1316,
      "step": 19014
    },
    {
      "epoch": 0.5547289806873213,
      "grad_norm": 0.8570485680581119,
      "learning_rate": 4.358553558971723e-06,
      "loss": 0.1184,
      "step": 19015
    },
    {
      "epoch": 0.5547581539179649,
      "grad_norm": 0.8463602905838344,
      "learning_rate": 4.358085031502455e-06,
      "loss": 0.1257,
      "step": 19016
    },
    {
      "epoch": 0.5547873271486085,
      "grad_norm": 0.9155141260660282,
      "learning_rate": 4.35761650976405e-06,
      "loss": 0.1378,
      "step": 19017
    },
    {
      "epoch": 0.554816500379252,
      "grad_norm": 0.8735324406153037,
      "learning_rate": 4.35714799376069e-06,
      "loss": 0.1275,
      "step": 19018
    },
    {
      "epoch": 0.5548456736098956,
      "grad_norm": 0.6755818049046284,
      "learning_rate": 4.3566794834965616e-06,
      "loss": 0.1326,
      "step": 19019
    },
    {
      "epoch": 0.5548748468405391,
      "grad_norm": 0.9367120460741722,
      "learning_rate": 4.3562109789758435e-06,
      "loss": 0.1461,
      "step": 19020
    },
    {
      "epoch": 0.5549040200711827,
      "grad_norm": 0.8767740890279596,
      "learning_rate": 4.355742480202721e-06,
      "loss": 0.1312,
      "step": 19021
    },
    {
      "epoch": 0.5549331933018262,
      "grad_norm": 0.8035646964027161,
      "learning_rate": 4.355273987181376e-06,
      "loss": 0.1407,
      "step": 19022
    },
    {
      "epoch": 0.5549623665324698,
      "grad_norm": 0.807426950519346,
      "learning_rate": 4.354805499915991e-06,
      "loss": 0.1398,
      "step": 19023
    },
    {
      "epoch": 0.5549915397631133,
      "grad_norm": 0.9118753614808602,
      "learning_rate": 4.354337018410747e-06,
      "loss": 0.1381,
      "step": 19024
    },
    {
      "epoch": 0.5550207129937569,
      "grad_norm": 0.576043163429003,
      "learning_rate": 4.353868542669828e-06,
      "loss": 0.0961,
      "step": 19025
    },
    {
      "epoch": 0.5550498862244004,
      "grad_norm": 0.6594052129528744,
      "learning_rate": 4.353400072697418e-06,
      "loss": 0.1216,
      "step": 19026
    },
    {
      "epoch": 0.5550790594550441,
      "grad_norm": 0.7585226301717503,
      "learning_rate": 4.352931608497698e-06,
      "loss": 0.1382,
      "step": 19027
    },
    {
      "epoch": 0.5551082326856877,
      "grad_norm": 0.6750017083918501,
      "learning_rate": 4.3524631500748495e-06,
      "loss": 0.1293,
      "step": 19028
    },
    {
      "epoch": 0.5551374059163312,
      "grad_norm": 0.6961170966180397,
      "learning_rate": 4.351994697433055e-06,
      "loss": 0.1114,
      "step": 19029
    },
    {
      "epoch": 0.5551665791469748,
      "grad_norm": 0.7360047297017995,
      "learning_rate": 4.351526250576496e-06,
      "loss": 0.1234,
      "step": 19030
    },
    {
      "epoch": 0.5551957523776183,
      "grad_norm": 0.74104320568258,
      "learning_rate": 4.351057809509357e-06,
      "loss": 0.1342,
      "step": 19031
    },
    {
      "epoch": 0.5552249256082619,
      "grad_norm": 1.1616859823897157,
      "learning_rate": 4.35058937423582e-06,
      "loss": 0.1241,
      "step": 19032
    },
    {
      "epoch": 0.5552540988389054,
      "grad_norm": 0.7534353068898046,
      "learning_rate": 4.350120944760065e-06,
      "loss": 0.1146,
      "step": 19033
    },
    {
      "epoch": 0.555283272069549,
      "grad_norm": 0.7359607953267799,
      "learning_rate": 4.349652521086275e-06,
      "loss": 0.1346,
      "step": 19034
    },
    {
      "epoch": 0.5553124453001925,
      "grad_norm": 0.7887782773626593,
      "learning_rate": 4.349184103218633e-06,
      "loss": 0.1226,
      "step": 19035
    },
    {
      "epoch": 0.5553416185308361,
      "grad_norm": 0.7908862976458872,
      "learning_rate": 4.348715691161317e-06,
      "loss": 0.1244,
      "step": 19036
    },
    {
      "epoch": 0.5553707917614796,
      "grad_norm": 0.7437358248556468,
      "learning_rate": 4.348247284918515e-06,
      "loss": 0.1386,
      "step": 19037
    },
    {
      "epoch": 0.5553999649921232,
      "grad_norm": 1.1328661443383496,
      "learning_rate": 4.347778884494405e-06,
      "loss": 0.1319,
      "step": 19038
    },
    {
      "epoch": 0.5554291382227667,
      "grad_norm": 0.712735036495089,
      "learning_rate": 4.347310489893169e-06,
      "loss": 0.1029,
      "step": 19039
    },
    {
      "epoch": 0.5554583114534104,
      "grad_norm": 0.7744272683956761,
      "learning_rate": 4.346842101118991e-06,
      "loss": 0.1421,
      "step": 19040
    },
    {
      "epoch": 0.555487484684054,
      "grad_norm": 0.778418849903219,
      "learning_rate": 4.346373718176049e-06,
      "loss": 0.1211,
      "step": 19041
    },
    {
      "epoch": 0.5555166579146975,
      "grad_norm": 0.9379960028819022,
      "learning_rate": 4.345905341068525e-06,
      "loss": 0.1376,
      "step": 19042
    },
    {
      "epoch": 0.5555458311453411,
      "grad_norm": 0.7194867018599489,
      "learning_rate": 4.345436969800603e-06,
      "loss": 0.1185,
      "step": 19043
    },
    {
      "epoch": 0.5555750043759846,
      "grad_norm": 0.7764692817658038,
      "learning_rate": 4.344968604376465e-06,
      "loss": 0.1294,
      "step": 19044
    },
    {
      "epoch": 0.5556041776066282,
      "grad_norm": 1.0001105838790252,
      "learning_rate": 4.34450024480029e-06,
      "loss": 0.1275,
      "step": 19045
    },
    {
      "epoch": 0.5556333508372717,
      "grad_norm": 0.724988975142468,
      "learning_rate": 4.34403189107626e-06,
      "loss": 0.1137,
      "step": 19046
    },
    {
      "epoch": 0.5556625240679153,
      "grad_norm": 0.7300662716982012,
      "learning_rate": 4.343563543208557e-06,
      "loss": 0.1538,
      "step": 19047
    },
    {
      "epoch": 0.5556916972985588,
      "grad_norm": 0.8290228078879128,
      "learning_rate": 4.343095201201361e-06,
      "loss": 0.1399,
      "step": 19048
    },
    {
      "epoch": 0.5557208705292024,
      "grad_norm": 0.8955325333341915,
      "learning_rate": 4.342626865058856e-06,
      "loss": 0.1302,
      "step": 19049
    },
    {
      "epoch": 0.5557500437598459,
      "grad_norm": 0.7533257227519368,
      "learning_rate": 4.34215853478522e-06,
      "loss": 0.1293,
      "step": 19050
    },
    {
      "epoch": 0.5557792169904895,
      "grad_norm": 0.8504209059512653,
      "learning_rate": 4.341690210384636e-06,
      "loss": 0.1293,
      "step": 19051
    },
    {
      "epoch": 0.555808390221133,
      "grad_norm": 0.8174216954485575,
      "learning_rate": 4.341221891861286e-06,
      "loss": 0.1199,
      "step": 19052
    },
    {
      "epoch": 0.5558375634517766,
      "grad_norm": 0.9584904670466751,
      "learning_rate": 4.340753579219349e-06,
      "loss": 0.1335,
      "step": 19053
    },
    {
      "epoch": 0.5558667366824203,
      "grad_norm": 0.7971548088341041,
      "learning_rate": 4.340285272463005e-06,
      "loss": 0.132,
      "step": 19054
    },
    {
      "epoch": 0.5558959099130638,
      "grad_norm": 1.372057586506304,
      "learning_rate": 4.339816971596438e-06,
      "loss": 0.1306,
      "step": 19055
    },
    {
      "epoch": 0.5559250831437074,
      "grad_norm": 1.035451505593037,
      "learning_rate": 4.339348676623826e-06,
      "loss": 0.1601,
      "step": 19056
    },
    {
      "epoch": 0.5559542563743509,
      "grad_norm": 0.6946171225751947,
      "learning_rate": 4.3388803875493536e-06,
      "loss": 0.1075,
      "step": 19057
    },
    {
      "epoch": 0.5559834296049945,
      "grad_norm": 0.748041898500976,
      "learning_rate": 4.338412104377198e-06,
      "loss": 0.1052,
      "step": 19058
    },
    {
      "epoch": 0.556012602835638,
      "grad_norm": 0.8847162211447822,
      "learning_rate": 4.337943827111542e-06,
      "loss": 0.1302,
      "step": 19059
    },
    {
      "epoch": 0.5560417760662816,
      "grad_norm": 0.9788611723862909,
      "learning_rate": 4.337475555756563e-06,
      "loss": 0.1221,
      "step": 19060
    },
    {
      "epoch": 0.5560709492969251,
      "grad_norm": 0.7934920805269242,
      "learning_rate": 4.3370072903164466e-06,
      "loss": 0.1163,
      "step": 19061
    },
    {
      "epoch": 0.5561001225275687,
      "grad_norm": 0.8060432545753894,
      "learning_rate": 4.33653903079537e-06,
      "loss": 0.1303,
      "step": 19062
    },
    {
      "epoch": 0.5561292957582122,
      "grad_norm": 0.8694569583121312,
      "learning_rate": 4.3360707771975154e-06,
      "loss": 0.1045,
      "step": 19063
    },
    {
      "epoch": 0.5561584689888558,
      "grad_norm": 0.9722308929764426,
      "learning_rate": 4.335602529527061e-06,
      "loss": 0.1185,
      "step": 19064
    },
    {
      "epoch": 0.5561876422194993,
      "grad_norm": 0.7641553324482699,
      "learning_rate": 4.335134287788191e-06,
      "loss": 0.1224,
      "step": 19065
    },
    {
      "epoch": 0.5562168154501429,
      "grad_norm": 1.0635532818329378,
      "learning_rate": 4.334666051985079e-06,
      "loss": 0.1439,
      "step": 19066
    },
    {
      "epoch": 0.5562459886807866,
      "grad_norm": 0.6870946879967289,
      "learning_rate": 4.334197822121913e-06,
      "loss": 0.1094,
      "step": 19067
    },
    {
      "epoch": 0.5562751619114301,
      "grad_norm": 0.8234267835962014,
      "learning_rate": 4.333729598202869e-06,
      "loss": 0.1229,
      "step": 19068
    },
    {
      "epoch": 0.5563043351420737,
      "grad_norm": 0.9770794180998047,
      "learning_rate": 4.333261380232129e-06,
      "loss": 0.1426,
      "step": 19069
    },
    {
      "epoch": 0.5563335083727172,
      "grad_norm": 0.9482585300221368,
      "learning_rate": 4.3327931682138725e-06,
      "loss": 0.1423,
      "step": 19070
    },
    {
      "epoch": 0.5563626816033608,
      "grad_norm": 0.9467254690847229,
      "learning_rate": 4.3323249621522785e-06,
      "loss": 0.1296,
      "step": 19071
    },
    {
      "epoch": 0.5563918548340043,
      "grad_norm": 0.8977881860793262,
      "learning_rate": 4.331856762051526e-06,
      "loss": 0.1211,
      "step": 19072
    },
    {
      "epoch": 0.5564210280646479,
      "grad_norm": 0.9323535480483929,
      "learning_rate": 4.331388567915799e-06,
      "loss": 0.1155,
      "step": 19073
    },
    {
      "epoch": 0.5564502012952914,
      "grad_norm": 0.8592387935246127,
      "learning_rate": 4.330920379749274e-06,
      "loss": 0.1324,
      "step": 19074
    },
    {
      "epoch": 0.556479374525935,
      "grad_norm": 1.0401763103605797,
      "learning_rate": 4.330452197556134e-06,
      "loss": 0.1341,
      "step": 19075
    },
    {
      "epoch": 0.5565085477565785,
      "grad_norm": 0.8327513069803256,
      "learning_rate": 4.329984021340557e-06,
      "loss": 0.1264,
      "step": 19076
    },
    {
      "epoch": 0.5565377209872221,
      "grad_norm": 0.7398323747336847,
      "learning_rate": 4.329515851106721e-06,
      "loss": 0.1326,
      "step": 19077
    },
    {
      "epoch": 0.5565668942178656,
      "grad_norm": 0.8631278579939401,
      "learning_rate": 4.329047686858807e-06,
      "loss": 0.1504,
      "step": 19078
    },
    {
      "epoch": 0.5565960674485092,
      "grad_norm": 0.9173061038077363,
      "learning_rate": 4.328579528600997e-06,
      "loss": 0.1046,
      "step": 19079
    },
    {
      "epoch": 0.5566252406791528,
      "grad_norm": 0.8053387977027799,
      "learning_rate": 4.328111376337468e-06,
      "loss": 0.1299,
      "step": 19080
    },
    {
      "epoch": 0.5566544139097964,
      "grad_norm": 0.7943544478077899,
      "learning_rate": 4.3276432300723995e-06,
      "loss": 0.1152,
      "step": 19081
    },
    {
      "epoch": 0.55668358714044,
      "grad_norm": 0.684900982092863,
      "learning_rate": 4.327175089809973e-06,
      "loss": 0.1134,
      "step": 19082
    },
    {
      "epoch": 0.5567127603710835,
      "grad_norm": 1.0556320969726258,
      "learning_rate": 4.3267069555543665e-06,
      "loss": 0.1289,
      "step": 19083
    },
    {
      "epoch": 0.5567419336017271,
      "grad_norm": 0.8466057433921833,
      "learning_rate": 4.326238827309758e-06,
      "loss": 0.1095,
      "step": 19084
    },
    {
      "epoch": 0.5567711068323706,
      "grad_norm": 0.7338247525685471,
      "learning_rate": 4.3257707050803285e-06,
      "loss": 0.1267,
      "step": 19085
    },
    {
      "epoch": 0.5568002800630142,
      "grad_norm": 0.7557776443026418,
      "learning_rate": 4.325302588870258e-06,
      "loss": 0.11,
      "step": 19086
    },
    {
      "epoch": 0.5568294532936577,
      "grad_norm": 0.8162613754395548,
      "learning_rate": 4.324834478683726e-06,
      "loss": 0.1316,
      "step": 19087
    },
    {
      "epoch": 0.5568586265243013,
      "grad_norm": 0.7112984568497187,
      "learning_rate": 4.32436637452491e-06,
      "loss": 0.1252,
      "step": 19088
    },
    {
      "epoch": 0.5568877997549448,
      "grad_norm": 0.945271187261099,
      "learning_rate": 4.32389827639799e-06,
      "loss": 0.1239,
      "step": 19089
    },
    {
      "epoch": 0.5569169729855884,
      "grad_norm": 0.8873115463856694,
      "learning_rate": 4.323430184307143e-06,
      "loss": 0.1198,
      "step": 19090
    },
    {
      "epoch": 0.556946146216232,
      "grad_norm": 0.8925186333145753,
      "learning_rate": 4.3229620982565505e-06,
      "loss": 0.1345,
      "step": 19091
    },
    {
      "epoch": 0.5569753194468755,
      "grad_norm": 1.267998504717158,
      "learning_rate": 4.322494018250392e-06,
      "loss": 0.1319,
      "step": 19092
    },
    {
      "epoch": 0.5570044926775191,
      "grad_norm": 0.913261745842966,
      "learning_rate": 4.322025944292845e-06,
      "loss": 0.119,
      "step": 19093
    },
    {
      "epoch": 0.5570336659081627,
      "grad_norm": 0.7564078256122583,
      "learning_rate": 4.321557876388087e-06,
      "loss": 0.1337,
      "step": 19094
    },
    {
      "epoch": 0.5570628391388063,
      "grad_norm": 0.9747580617501804,
      "learning_rate": 4.321089814540301e-06,
      "loss": 0.1181,
      "step": 19095
    },
    {
      "epoch": 0.5570920123694498,
      "grad_norm": 0.9551944684281286,
      "learning_rate": 4.320621758753659e-06,
      "loss": 0.1315,
      "step": 19096
    },
    {
      "epoch": 0.5571211856000934,
      "grad_norm": 0.7604880208963859,
      "learning_rate": 4.320153709032347e-06,
      "loss": 0.1283,
      "step": 19097
    },
    {
      "epoch": 0.5571503588307369,
      "grad_norm": 0.8382857647702464,
      "learning_rate": 4.319685665380539e-06,
      "loss": 0.1295,
      "step": 19098
    },
    {
      "epoch": 0.5571795320613805,
      "grad_norm": 1.203360923454032,
      "learning_rate": 4.319217627802415e-06,
      "loss": 0.1235,
      "step": 19099
    },
    {
      "epoch": 0.557208705292024,
      "grad_norm": 0.7630826778051315,
      "learning_rate": 4.318749596302155e-06,
      "loss": 0.1329,
      "step": 19100
    },
    {
      "epoch": 0.5572378785226676,
      "grad_norm": 0.8794901446033415,
      "learning_rate": 4.318281570883935e-06,
      "loss": 0.1294,
      "step": 19101
    },
    {
      "epoch": 0.5572670517533111,
      "grad_norm": 0.9443425549623128,
      "learning_rate": 4.3178135515519336e-06,
      "loss": 0.1419,
      "step": 19102
    },
    {
      "epoch": 0.5572962249839547,
      "grad_norm": 0.8137379571082658,
      "learning_rate": 4.317345538310331e-06,
      "loss": 0.1355,
      "step": 19103
    },
    {
      "epoch": 0.5573253982145983,
      "grad_norm": 0.8853168441729572,
      "learning_rate": 4.316877531163304e-06,
      "loss": 0.1333,
      "step": 19104
    },
    {
      "epoch": 0.5573545714452418,
      "grad_norm": 0.8321714517997488,
      "learning_rate": 4.3164095301150325e-06,
      "loss": 0.1289,
      "step": 19105
    },
    {
      "epoch": 0.5573837446758854,
      "grad_norm": 0.9104922555381546,
      "learning_rate": 4.315941535169692e-06,
      "loss": 0.1392,
      "step": 19106
    },
    {
      "epoch": 0.5574129179065289,
      "grad_norm": 0.7053054006086941,
      "learning_rate": 4.315473546331463e-06,
      "loss": 0.1003,
      "step": 19107
    },
    {
      "epoch": 0.5574420911371726,
      "grad_norm": 0.7700656288780094,
      "learning_rate": 4.315005563604521e-06,
      "loss": 0.1179,
      "step": 19108
    },
    {
      "epoch": 0.5574712643678161,
      "grad_norm": 0.7747077764319444,
      "learning_rate": 4.314537586993048e-06,
      "loss": 0.1391,
      "step": 19109
    },
    {
      "epoch": 0.5575004375984597,
      "grad_norm": 0.8512534505870097,
      "learning_rate": 4.314069616501219e-06,
      "loss": 0.116,
      "step": 19110
    },
    {
      "epoch": 0.5575296108291032,
      "grad_norm": 1.1915308090824996,
      "learning_rate": 4.313601652133213e-06,
      "loss": 0.1532,
      "step": 19111
    },
    {
      "epoch": 0.5575587840597468,
      "grad_norm": 0.7630239928530291,
      "learning_rate": 4.3131336938932085e-06,
      "loss": 0.1264,
      "step": 19112
    },
    {
      "epoch": 0.5575879572903903,
      "grad_norm": 0.9180348198619798,
      "learning_rate": 4.312665741785379e-06,
      "loss": 0.1414,
      "step": 19113
    },
    {
      "epoch": 0.5576171305210339,
      "grad_norm": 0.9850176224492503,
      "learning_rate": 4.312197795813909e-06,
      "loss": 0.1146,
      "step": 19114
    },
    {
      "epoch": 0.5576463037516775,
      "grad_norm": 0.7933498422687582,
      "learning_rate": 4.311729855982972e-06,
      "loss": 0.1441,
      "step": 19115
    },
    {
      "epoch": 0.557675476982321,
      "grad_norm": 0.9139270458622918,
      "learning_rate": 4.311261922296746e-06,
      "loss": 0.1252,
      "step": 19116
    },
    {
      "epoch": 0.5577046502129646,
      "grad_norm": 1.2188145977938503,
      "learning_rate": 4.310793994759411e-06,
      "loss": 0.1584,
      "step": 19117
    },
    {
      "epoch": 0.5577338234436081,
      "grad_norm": 0.9690350110217198,
      "learning_rate": 4.310326073375141e-06,
      "loss": 0.1238,
      "step": 19118
    },
    {
      "epoch": 0.5577629966742517,
      "grad_norm": 1.1264086158332027,
      "learning_rate": 4.309858158148114e-06,
      "loss": 0.1283,
      "step": 19119
    },
    {
      "epoch": 0.5577921699048952,
      "grad_norm": 0.8654237947195278,
      "learning_rate": 4.30939024908251e-06,
      "loss": 0.1215,
      "step": 19120
    },
    {
      "epoch": 0.5578213431355389,
      "grad_norm": 0.7326600570244524,
      "learning_rate": 4.308922346182505e-06,
      "loss": 0.1098,
      "step": 19121
    },
    {
      "epoch": 0.5578505163661824,
      "grad_norm": 0.7202669208213778,
      "learning_rate": 4.308454449452277e-06,
      "loss": 0.1186,
      "step": 19122
    },
    {
      "epoch": 0.557879689596826,
      "grad_norm": 1.0690084370132689,
      "learning_rate": 4.3079865588960014e-06,
      "loss": 0.1271,
      "step": 19123
    },
    {
      "epoch": 0.5579088628274695,
      "grad_norm": 1.1007812742797274,
      "learning_rate": 4.307518674517858e-06,
      "loss": 0.1361,
      "step": 19124
    },
    {
      "epoch": 0.5579380360581131,
      "grad_norm": 0.8787765921807975,
      "learning_rate": 4.3070507963220195e-06,
      "loss": 0.1235,
      "step": 19125
    },
    {
      "epoch": 0.5579672092887566,
      "grad_norm": 0.8996433276085365,
      "learning_rate": 4.3065829243126685e-06,
      "loss": 0.1509,
      "step": 19126
    },
    {
      "epoch": 0.5579963825194002,
      "grad_norm": 0.8878376716646226,
      "learning_rate": 4.306115058493981e-06,
      "loss": 0.1583,
      "step": 19127
    },
    {
      "epoch": 0.5580255557500438,
      "grad_norm": 1.0487540761799035,
      "learning_rate": 4.305647198870131e-06,
      "loss": 0.1302,
      "step": 19128
    },
    {
      "epoch": 0.5580547289806873,
      "grad_norm": 0.8405901697246108,
      "learning_rate": 4.305179345445297e-06,
      "loss": 0.1521,
      "step": 19129
    },
    {
      "epoch": 0.5580839022113309,
      "grad_norm": 0.888072930235096,
      "learning_rate": 4.304711498223656e-06,
      "loss": 0.1411,
      "step": 19130
    },
    {
      "epoch": 0.5581130754419744,
      "grad_norm": 0.9305898421135123,
      "learning_rate": 4.304243657209383e-06,
      "loss": 0.1314,
      "step": 19131
    },
    {
      "epoch": 0.558142248672618,
      "grad_norm": 0.8085351979341941,
      "learning_rate": 4.30377582240666e-06,
      "loss": 0.1215,
      "step": 19132
    },
    {
      "epoch": 0.5581714219032615,
      "grad_norm": 0.8572758553739566,
      "learning_rate": 4.303307993819657e-06,
      "loss": 0.1419,
      "step": 19133
    },
    {
      "epoch": 0.5582005951339051,
      "grad_norm": 0.922083938679305,
      "learning_rate": 4.302840171452556e-06,
      "loss": 0.1329,
      "step": 19134
    },
    {
      "epoch": 0.5582297683645487,
      "grad_norm": 0.8911884182439094,
      "learning_rate": 4.302372355309532e-06,
      "loss": 0.1179,
      "step": 19135
    },
    {
      "epoch": 0.5582589415951923,
      "grad_norm": 0.7111043411041209,
      "learning_rate": 4.301904545394761e-06,
      "loss": 0.141,
      "step": 19136
    },
    {
      "epoch": 0.5582881148258358,
      "grad_norm": 1.0692574535871096,
      "learning_rate": 4.301436741712417e-06,
      "loss": 0.1394,
      "step": 19137
    },
    {
      "epoch": 0.5583172880564794,
      "grad_norm": 1.1114995483858565,
      "learning_rate": 4.30096894426668e-06,
      "loss": 0.1555,
      "step": 19138
    },
    {
      "epoch": 0.558346461287123,
      "grad_norm": 0.9175644226687056,
      "learning_rate": 4.3005011530617275e-06,
      "loss": 0.1215,
      "step": 19139
    },
    {
      "epoch": 0.5583756345177665,
      "grad_norm": 0.7594943854632864,
      "learning_rate": 4.300033368101732e-06,
      "loss": 0.1245,
      "step": 19140
    },
    {
      "epoch": 0.5584048077484101,
      "grad_norm": 0.980480244557107,
      "learning_rate": 4.299565589390872e-06,
      "loss": 0.1371,
      "step": 19141
    },
    {
      "epoch": 0.5584339809790536,
      "grad_norm": 0.9394728822043553,
      "learning_rate": 4.299097816933323e-06,
      "loss": 0.1414,
      "step": 19142
    },
    {
      "epoch": 0.5584631542096972,
      "grad_norm": 0.8717233172501493,
      "learning_rate": 4.29863005073326e-06,
      "loss": 0.17,
      "step": 19143
    },
    {
      "epoch": 0.5584923274403407,
      "grad_norm": 0.825546529492615,
      "learning_rate": 4.2981622907948625e-06,
      "loss": 0.1597,
      "step": 19144
    },
    {
      "epoch": 0.5585215006709843,
      "grad_norm": 1.3135694938829492,
      "learning_rate": 4.297694537122304e-06,
      "loss": 0.121,
      "step": 19145
    },
    {
      "epoch": 0.5585506739016278,
      "grad_norm": 0.808713343832058,
      "learning_rate": 4.297226789719761e-06,
      "loss": 0.1132,
      "step": 19146
    },
    {
      "epoch": 0.5585798471322714,
      "grad_norm": 0.7237285483951936,
      "learning_rate": 4.29675904859141e-06,
      "loss": 0.1202,
      "step": 19147
    },
    {
      "epoch": 0.558609020362915,
      "grad_norm": 0.943673714823329,
      "learning_rate": 4.296291313741425e-06,
      "loss": 0.1301,
      "step": 19148
    },
    {
      "epoch": 0.5586381935935586,
      "grad_norm": 0.8394084451252932,
      "learning_rate": 4.295823585173983e-06,
      "loss": 0.1457,
      "step": 19149
    },
    {
      "epoch": 0.5586673668242021,
      "grad_norm": 0.7437816288736493,
      "learning_rate": 4.29535586289326e-06,
      "loss": 0.1091,
      "step": 19150
    },
    {
      "epoch": 0.5586965400548457,
      "grad_norm": 0.7471769505431773,
      "learning_rate": 4.294888146903433e-06,
      "loss": 0.1285,
      "step": 19151
    },
    {
      "epoch": 0.5587257132854893,
      "grad_norm": 0.9376425501717874,
      "learning_rate": 4.294420437208677e-06,
      "loss": 0.1166,
      "step": 19152
    },
    {
      "epoch": 0.5587548865161328,
      "grad_norm": 0.9560575967084503,
      "learning_rate": 4.2939527338131654e-06,
      "loss": 0.1237,
      "step": 19153
    },
    {
      "epoch": 0.5587840597467764,
      "grad_norm": 1.322551694300141,
      "learning_rate": 4.293485036721075e-06,
      "loss": 0.1278,
      "step": 19154
    },
    {
      "epoch": 0.5588132329774199,
      "grad_norm": 1.3297083397872205,
      "learning_rate": 4.293017345936581e-06,
      "loss": 0.1293,
      "step": 19155
    },
    {
      "epoch": 0.5588424062080635,
      "grad_norm": 0.8807202298756728,
      "learning_rate": 4.29254966146386e-06,
      "loss": 0.1148,
      "step": 19156
    },
    {
      "epoch": 0.558871579438707,
      "grad_norm": 0.884337355599638,
      "learning_rate": 4.292081983307088e-06,
      "loss": 0.1433,
      "step": 19157
    },
    {
      "epoch": 0.5589007526693506,
      "grad_norm": 0.8870220382296279,
      "learning_rate": 4.291614311470438e-06,
      "loss": 0.1265,
      "step": 19158
    },
    {
      "epoch": 0.5589299258999941,
      "grad_norm": 1.1241527600915935,
      "learning_rate": 4.291146645958087e-06,
      "loss": 0.1561,
      "step": 19159
    },
    {
      "epoch": 0.5589590991306377,
      "grad_norm": 1.0388864135881575,
      "learning_rate": 4.29067898677421e-06,
      "loss": 0.139,
      "step": 19160
    },
    {
      "epoch": 0.5589882723612812,
      "grad_norm": 0.7738380209356532,
      "learning_rate": 4.2902113339229774e-06,
      "loss": 0.1153,
      "step": 19161
    },
    {
      "epoch": 0.5590174455919249,
      "grad_norm": 0.819588683616226,
      "learning_rate": 4.2897436874085735e-06,
      "loss": 0.1336,
      "step": 19162
    },
    {
      "epoch": 0.5590466188225685,
      "grad_norm": 1.1735502412721954,
      "learning_rate": 4.289276047235167e-06,
      "loss": 0.1464,
      "step": 19163
    },
    {
      "epoch": 0.559075792053212,
      "grad_norm": 0.8005612962686929,
      "learning_rate": 4.2888084134069335e-06,
      "loss": 0.1264,
      "step": 19164
    },
    {
      "epoch": 0.5591049652838556,
      "grad_norm": 0.8830956224221792,
      "learning_rate": 4.28834078592805e-06,
      "loss": 0.1118,
      "step": 19165
    },
    {
      "epoch": 0.5591341385144991,
      "grad_norm": 0.9955461519220135,
      "learning_rate": 4.28787316480269e-06,
      "loss": 0.133,
      "step": 19166
    },
    {
      "epoch": 0.5591633117451427,
      "grad_norm": 0.7082792680425669,
      "learning_rate": 4.287405550035026e-06,
      "loss": 0.1243,
      "step": 19167
    },
    {
      "epoch": 0.5591924849757862,
      "grad_norm": 0.7681017037077732,
      "learning_rate": 4.286937941629237e-06,
      "loss": 0.1194,
      "step": 19168
    },
    {
      "epoch": 0.5592216582064298,
      "grad_norm": 0.8187444008121079,
      "learning_rate": 4.286470339589497e-06,
      "loss": 0.1609,
      "step": 19169
    },
    {
      "epoch": 0.5592508314370733,
      "grad_norm": 0.7842607479180885,
      "learning_rate": 4.286002743919977e-06,
      "loss": 0.1358,
      "step": 19170
    },
    {
      "epoch": 0.5592800046677169,
      "grad_norm": 0.8434677490486692,
      "learning_rate": 4.2855351546248555e-06,
      "loss": 0.1118,
      "step": 19171
    },
    {
      "epoch": 0.5593091778983604,
      "grad_norm": 0.6987501642828052,
      "learning_rate": 4.285067571708307e-06,
      "loss": 0.1221,
      "step": 19172
    },
    {
      "epoch": 0.559338351129004,
      "grad_norm": 0.9471406513575809,
      "learning_rate": 4.2845999951744995e-06,
      "loss": 0.1206,
      "step": 19173
    },
    {
      "epoch": 0.5593675243596475,
      "grad_norm": 1.0593603487317498,
      "learning_rate": 4.284132425027617e-06,
      "loss": 0.1422,
      "step": 19174
    },
    {
      "epoch": 0.5593966975902912,
      "grad_norm": 0.7583069093625302,
      "learning_rate": 4.283664861271829e-06,
      "loss": 0.1103,
      "step": 19175
    },
    {
      "epoch": 0.5594258708209348,
      "grad_norm": 0.8101424809285023,
      "learning_rate": 4.283197303911308e-06,
      "loss": 0.1276,
      "step": 19176
    },
    {
      "epoch": 0.5594550440515783,
      "grad_norm": 0.943264193812396,
      "learning_rate": 4.282729752950233e-06,
      "loss": 0.1434,
      "step": 19177
    },
    {
      "epoch": 0.5594842172822219,
      "grad_norm": 0.824926061015487,
      "learning_rate": 4.282262208392775e-06,
      "loss": 0.1203,
      "step": 19178
    },
    {
      "epoch": 0.5595133905128654,
      "grad_norm": 0.7954025563054928,
      "learning_rate": 4.281794670243106e-06,
      "loss": 0.1026,
      "step": 19179
    },
    {
      "epoch": 0.559542563743509,
      "grad_norm": 0.8435891204851553,
      "learning_rate": 4.281327138505404e-06,
      "loss": 0.1344,
      "step": 19180
    },
    {
      "epoch": 0.5595717369741525,
      "grad_norm": 0.8847702424844895,
      "learning_rate": 4.2808596131838425e-06,
      "loss": 0.1279,
      "step": 19181
    },
    {
      "epoch": 0.5596009102047961,
      "grad_norm": 1.370102585499779,
      "learning_rate": 4.280392094282596e-06,
      "loss": 0.1199,
      "step": 19182
    },
    {
      "epoch": 0.5596300834354396,
      "grad_norm": 0.6293253879548771,
      "learning_rate": 4.2799245818058345e-06,
      "loss": 0.1456,
      "step": 19183
    },
    {
      "epoch": 0.5596592566660832,
      "grad_norm": 1.211199340368652,
      "learning_rate": 4.279457075757736e-06,
      "loss": 0.1244,
      "step": 19184
    },
    {
      "epoch": 0.5596884298967267,
      "grad_norm": 0.8115015571584155,
      "learning_rate": 4.278989576142471e-06,
      "loss": 0.1265,
      "step": 19185
    },
    {
      "epoch": 0.5597176031273703,
      "grad_norm": 0.7559107067971785,
      "learning_rate": 4.278522082964216e-06,
      "loss": 0.1106,
      "step": 19186
    },
    {
      "epoch": 0.5597467763580138,
      "grad_norm": 0.7125935775902695,
      "learning_rate": 4.278054596227144e-06,
      "loss": 0.1351,
      "step": 19187
    },
    {
      "epoch": 0.5597759495886574,
      "grad_norm": 0.8288329012682432,
      "learning_rate": 4.277587115935429e-06,
      "loss": 0.12,
      "step": 19188
    },
    {
      "epoch": 0.5598051228193011,
      "grad_norm": 0.7560790380864659,
      "learning_rate": 4.277119642093242e-06,
      "loss": 0.1122,
      "step": 19189
    },
    {
      "epoch": 0.5598342960499446,
      "grad_norm": 0.8941174187321133,
      "learning_rate": 4.276652174704761e-06,
      "loss": 0.1538,
      "step": 19190
    },
    {
      "epoch": 0.5598634692805882,
      "grad_norm": 0.8135442525526468,
      "learning_rate": 4.276184713774152e-06,
      "loss": 0.1106,
      "step": 19191
    },
    {
      "epoch": 0.5598926425112317,
      "grad_norm": 0.9668779603652046,
      "learning_rate": 4.275717259305596e-06,
      "loss": 0.1329,
      "step": 19192
    },
    {
      "epoch": 0.5599218157418753,
      "grad_norm": 0.9930314287723587,
      "learning_rate": 4.275249811303265e-06,
      "loss": 0.1079,
      "step": 19193
    },
    {
      "epoch": 0.5599509889725188,
      "grad_norm": 0.7501561529837268,
      "learning_rate": 4.274782369771328e-06,
      "loss": 0.1409,
      "step": 19194
    },
    {
      "epoch": 0.5599801622031624,
      "grad_norm": 1.0002464672194624,
      "learning_rate": 4.2743149347139624e-06,
      "loss": 0.1446,
      "step": 19195
    },
    {
      "epoch": 0.5600093354338059,
      "grad_norm": 0.8574637381494113,
      "learning_rate": 4.27384750613534e-06,
      "loss": 0.1245,
      "step": 19196
    },
    {
      "epoch": 0.5600385086644495,
      "grad_norm": 0.7935332889830304,
      "learning_rate": 4.273380084039631e-06,
      "loss": 0.1222,
      "step": 19197
    },
    {
      "epoch": 0.560067681895093,
      "grad_norm": 0.8225165613848285,
      "learning_rate": 4.2729126684310136e-06,
      "loss": 0.1355,
      "step": 19198
    },
    {
      "epoch": 0.5600968551257366,
      "grad_norm": 1.0342259246357455,
      "learning_rate": 4.272445259313659e-06,
      "loss": 0.1423,
      "step": 19199
    },
    {
      "epoch": 0.5601260283563801,
      "grad_norm": 0.9432765790313263,
      "learning_rate": 4.271977856691738e-06,
      "loss": 0.1444,
      "step": 19200
    },
    {
      "epoch": 0.5601552015870237,
      "grad_norm": 0.803940543852876,
      "learning_rate": 4.271510460569425e-06,
      "loss": 0.1287,
      "step": 19201
    },
    {
      "epoch": 0.5601843748176674,
      "grad_norm": 0.9773705130442183,
      "learning_rate": 4.271043070950894e-06,
      "loss": 0.1278,
      "step": 19202
    },
    {
      "epoch": 0.5602135480483109,
      "grad_norm": 0.7344465941221047,
      "learning_rate": 4.270575687840312e-06,
      "loss": 0.1094,
      "step": 19203
    },
    {
      "epoch": 0.5602427212789545,
      "grad_norm": 0.8838434826550736,
      "learning_rate": 4.270108311241861e-06,
      "loss": 0.1309,
      "step": 19204
    },
    {
      "epoch": 0.560271894509598,
      "grad_norm": 0.8994255625391175,
      "learning_rate": 4.269640941159707e-06,
      "loss": 0.141,
      "step": 19205
    },
    {
      "epoch": 0.5603010677402416,
      "grad_norm": 0.9732707594536684,
      "learning_rate": 4.269173577598025e-06,
      "loss": 0.1323,
      "step": 19206
    },
    {
      "epoch": 0.5603302409708851,
      "grad_norm": 0.7352430477521541,
      "learning_rate": 4.268706220560988e-06,
      "loss": 0.1206,
      "step": 19207
    },
    {
      "epoch": 0.5603594142015287,
      "grad_norm": 1.1259196559999454,
      "learning_rate": 4.268238870052765e-06,
      "loss": 0.1341,
      "step": 19208
    },
    {
      "epoch": 0.5603885874321722,
      "grad_norm": 1.0097374425626127,
      "learning_rate": 4.26777152607753e-06,
      "loss": 0.1619,
      "step": 19209
    },
    {
      "epoch": 0.5604177606628158,
      "grad_norm": 0.9265512576160485,
      "learning_rate": 4.2673041886394575e-06,
      "loss": 0.1451,
      "step": 19210
    },
    {
      "epoch": 0.5604469338934593,
      "grad_norm": 0.9950666042745812,
      "learning_rate": 4.266836857742718e-06,
      "loss": 0.1206,
      "step": 19211
    },
    {
      "epoch": 0.5604761071241029,
      "grad_norm": 1.1128922622827953,
      "learning_rate": 4.266369533391485e-06,
      "loss": 0.1251,
      "step": 19212
    },
    {
      "epoch": 0.5605052803547464,
      "grad_norm": 0.9989686876612985,
      "learning_rate": 4.265902215589929e-06,
      "loss": 0.1301,
      "step": 19213
    },
    {
      "epoch": 0.56053445358539,
      "grad_norm": 0.7475467204018343,
      "learning_rate": 4.265434904342223e-06,
      "loss": 0.1113,
      "step": 19214
    },
    {
      "epoch": 0.5605636268160336,
      "grad_norm": 0.9076958632300759,
      "learning_rate": 4.264967599652537e-06,
      "loss": 0.1591,
      "step": 19215
    },
    {
      "epoch": 0.5605928000466772,
      "grad_norm": 1.1850395940739917,
      "learning_rate": 4.264500301525047e-06,
      "loss": 0.1467,
      "step": 19216
    },
    {
      "epoch": 0.5606219732773208,
      "grad_norm": 0.8431486816914163,
      "learning_rate": 4.264033009963922e-06,
      "loss": 0.1459,
      "step": 19217
    },
    {
      "epoch": 0.5606511465079643,
      "grad_norm": 0.8584084368418102,
      "learning_rate": 4.263565724973335e-06,
      "loss": 0.1377,
      "step": 19218
    },
    {
      "epoch": 0.5606803197386079,
      "grad_norm": 0.9314764561785516,
      "learning_rate": 4.2630984465574565e-06,
      "loss": 0.1095,
      "step": 19219
    },
    {
      "epoch": 0.5607094929692514,
      "grad_norm": 0.8418468200486386,
      "learning_rate": 4.262631174720461e-06,
      "loss": 0.1238,
      "step": 19220
    },
    {
      "epoch": 0.560738666199895,
      "grad_norm": 0.8403701956729118,
      "learning_rate": 4.262163909466514e-06,
      "loss": 0.1225,
      "step": 19221
    },
    {
      "epoch": 0.5607678394305385,
      "grad_norm": 1.029647917924995,
      "learning_rate": 4.261696650799796e-06,
      "loss": 0.1478,
      "step": 19222
    },
    {
      "epoch": 0.5607970126611821,
      "grad_norm": 0.6898737406749272,
      "learning_rate": 4.2612293987244724e-06,
      "loss": 0.1488,
      "step": 19223
    },
    {
      "epoch": 0.5608261858918256,
      "grad_norm": 0.899877041059888,
      "learning_rate": 4.2607621532447165e-06,
      "loss": 0.1485,
      "step": 19224
    },
    {
      "epoch": 0.5608553591224692,
      "grad_norm": 0.7953760056174247,
      "learning_rate": 4.260294914364701e-06,
      "loss": 0.1338,
      "step": 19225
    },
    {
      "epoch": 0.5608845323531128,
      "grad_norm": 0.5909386877772858,
      "learning_rate": 4.259827682088594e-06,
      "loss": 0.123,
      "step": 19226
    },
    {
      "epoch": 0.5609137055837563,
      "grad_norm": 0.6730042108219717,
      "learning_rate": 4.259360456420568e-06,
      "loss": 0.1089,
      "step": 19227
    },
    {
      "epoch": 0.5609428788143999,
      "grad_norm": 0.8464745304806932,
      "learning_rate": 4.258893237364796e-06,
      "loss": 0.1294,
      "step": 19228
    },
    {
      "epoch": 0.5609720520450435,
      "grad_norm": 0.6476066777922179,
      "learning_rate": 4.25842602492545e-06,
      "loss": 0.1055,
      "step": 19229
    },
    {
      "epoch": 0.5610012252756871,
      "grad_norm": 1.07844491737013,
      "learning_rate": 4.257958819106698e-06,
      "loss": 0.141,
      "step": 19230
    },
    {
      "epoch": 0.5610303985063306,
      "grad_norm": 0.7563219447049566,
      "learning_rate": 4.257491619912712e-06,
      "loss": 0.1366,
      "step": 19231
    },
    {
      "epoch": 0.5610595717369742,
      "grad_norm": 0.7512824230749573,
      "learning_rate": 4.257024427347665e-06,
      "loss": 0.1219,
      "step": 19232
    },
    {
      "epoch": 0.5610887449676177,
      "grad_norm": 0.7878563773671461,
      "learning_rate": 4.256557241415724e-06,
      "loss": 0.1316,
      "step": 19233
    },
    {
      "epoch": 0.5611179181982613,
      "grad_norm": 0.7928603201442495,
      "learning_rate": 4.256090062121065e-06,
      "loss": 0.1207,
      "step": 19234
    },
    {
      "epoch": 0.5611470914289048,
      "grad_norm": 0.8322238051894852,
      "learning_rate": 4.255622889467855e-06,
      "loss": 0.1445,
      "step": 19235
    },
    {
      "epoch": 0.5611762646595484,
      "grad_norm": 0.8572578634645005,
      "learning_rate": 4.255155723460267e-06,
      "loss": 0.1194,
      "step": 19236
    },
    {
      "epoch": 0.561205437890192,
      "grad_norm": 1.0490382995322687,
      "learning_rate": 4.254688564102471e-06,
      "loss": 0.1489,
      "step": 19237
    },
    {
      "epoch": 0.5612346111208355,
      "grad_norm": 0.7490484906454387,
      "learning_rate": 4.254221411398637e-06,
      "loss": 0.1455,
      "step": 19238
    },
    {
      "epoch": 0.5612637843514791,
      "grad_norm": 0.7888836109931429,
      "learning_rate": 4.253754265352936e-06,
      "loss": 0.1337,
      "step": 19239
    },
    {
      "epoch": 0.5612929575821226,
      "grad_norm": 0.7320539147918943,
      "learning_rate": 4.253287125969539e-06,
      "loss": 0.1223,
      "step": 19240
    },
    {
      "epoch": 0.5613221308127662,
      "grad_norm": 0.9379249120352995,
      "learning_rate": 4.252819993252616e-06,
      "loss": 0.1507,
      "step": 19241
    },
    {
      "epoch": 0.5613513040434097,
      "grad_norm": 0.6936519850642795,
      "learning_rate": 4.252352867206339e-06,
      "loss": 0.1166,
      "step": 19242
    },
    {
      "epoch": 0.5613804772740534,
      "grad_norm": 0.7324403816158332,
      "learning_rate": 4.251885747834876e-06,
      "loss": 0.1036,
      "step": 19243
    },
    {
      "epoch": 0.5614096505046969,
      "grad_norm": 1.0492171542944357,
      "learning_rate": 4.251418635142399e-06,
      "loss": 0.1256,
      "step": 19244
    },
    {
      "epoch": 0.5614388237353405,
      "grad_norm": 0.6966023763959978,
      "learning_rate": 4.250951529133076e-06,
      "loss": 0.1481,
      "step": 19245
    },
    {
      "epoch": 0.561467996965984,
      "grad_norm": 0.8778409784254735,
      "learning_rate": 4.25048442981108e-06,
      "loss": 0.128,
      "step": 19246
    },
    {
      "epoch": 0.5614971701966276,
      "grad_norm": 0.8473416623500546,
      "learning_rate": 4.250017337180582e-06,
      "loss": 0.1057,
      "step": 19247
    },
    {
      "epoch": 0.5615263434272711,
      "grad_norm": 0.6725343787609986,
      "learning_rate": 4.249550251245748e-06,
      "loss": 0.1228,
      "step": 19248
    },
    {
      "epoch": 0.5615555166579147,
      "grad_norm": 0.7639821480113129,
      "learning_rate": 4.2490831720107514e-06,
      "loss": 0.1308,
      "step": 19249
    },
    {
      "epoch": 0.5615846898885583,
      "grad_norm": 0.8546762631832446,
      "learning_rate": 4.248616099479761e-06,
      "loss": 0.1282,
      "step": 19250
    },
    {
      "epoch": 0.5616138631192018,
      "grad_norm": 0.7126137860181593,
      "learning_rate": 4.248149033656944e-06,
      "loss": 0.1381,
      "step": 19251
    },
    {
      "epoch": 0.5616430363498454,
      "grad_norm": 0.8070827937807697,
      "learning_rate": 4.247681974546476e-06,
      "loss": 0.1258,
      "step": 19252
    },
    {
      "epoch": 0.5616722095804889,
      "grad_norm": 0.8765076472300789,
      "learning_rate": 4.247214922152523e-06,
      "loss": 0.1285,
      "step": 19253
    },
    {
      "epoch": 0.5617013828111325,
      "grad_norm": 0.7976205097523037,
      "learning_rate": 4.246747876479255e-06,
      "loss": 0.1351,
      "step": 19254
    },
    {
      "epoch": 0.561730556041776,
      "grad_norm": 0.7308835986813395,
      "learning_rate": 4.246280837530843e-06,
      "loss": 0.1523,
      "step": 19255
    },
    {
      "epoch": 0.5617597292724196,
      "grad_norm": 0.8392817980715473,
      "learning_rate": 4.245813805311455e-06,
      "loss": 0.1277,
      "step": 19256
    },
    {
      "epoch": 0.5617889025030632,
      "grad_norm": 0.6953263947730987,
      "learning_rate": 4.245346779825261e-06,
      "loss": 0.1427,
      "step": 19257
    },
    {
      "epoch": 0.5618180757337068,
      "grad_norm": 0.7919335657714905,
      "learning_rate": 4.244879761076431e-06,
      "loss": 0.1386,
      "step": 19258
    },
    {
      "epoch": 0.5618472489643503,
      "grad_norm": 0.7850194959804362,
      "learning_rate": 4.244412749069136e-06,
      "loss": 0.141,
      "step": 19259
    },
    {
      "epoch": 0.5618764221949939,
      "grad_norm": 0.8078149370581779,
      "learning_rate": 4.2439457438075415e-06,
      "loss": 0.1231,
      "step": 19260
    },
    {
      "epoch": 0.5619055954256374,
      "grad_norm": 0.8211010678317224,
      "learning_rate": 4.243478745295819e-06,
      "loss": 0.1432,
      "step": 19261
    },
    {
      "epoch": 0.561934768656281,
      "grad_norm": 0.7373873299260599,
      "learning_rate": 4.243011753538139e-06,
      "loss": 0.1204,
      "step": 19262
    },
    {
      "epoch": 0.5619639418869246,
      "grad_norm": 0.9526800720565995,
      "learning_rate": 4.242544768538667e-06,
      "loss": 0.1326,
      "step": 19263
    },
    {
      "epoch": 0.5619931151175681,
      "grad_norm": 0.8399968829161019,
      "learning_rate": 4.2420777903015765e-06,
      "loss": 0.1221,
      "step": 19264
    },
    {
      "epoch": 0.5620222883482117,
      "grad_norm": 0.8356319202526975,
      "learning_rate": 4.241610818831034e-06,
      "loss": 0.1368,
      "step": 19265
    },
    {
      "epoch": 0.5620514615788552,
      "grad_norm": 0.8500670034443689,
      "learning_rate": 4.241143854131209e-06,
      "loss": 0.1593,
      "step": 19266
    },
    {
      "epoch": 0.5620806348094988,
      "grad_norm": 0.7891021294403209,
      "learning_rate": 4.240676896206272e-06,
      "loss": 0.1282,
      "step": 19267
    },
    {
      "epoch": 0.5621098080401423,
      "grad_norm": 0.7078533875865706,
      "learning_rate": 4.240209945060389e-06,
      "loss": 0.1212,
      "step": 19268
    },
    {
      "epoch": 0.5621389812707859,
      "grad_norm": 1.0192459342862867,
      "learning_rate": 4.239743000697729e-06,
      "loss": 0.1327,
      "step": 19269
    },
    {
      "epoch": 0.5621681545014295,
      "grad_norm": 1.266716519773206,
      "learning_rate": 4.2392760631224635e-06,
      "loss": 0.1189,
      "step": 19270
    },
    {
      "epoch": 0.5621973277320731,
      "grad_norm": 0.9068333846726923,
      "learning_rate": 4.2388091323387595e-06,
      "loss": 0.1145,
      "step": 19271
    },
    {
      "epoch": 0.5622265009627166,
      "grad_norm": 0.7834339806082172,
      "learning_rate": 4.238342208350786e-06,
      "loss": 0.1341,
      "step": 19272
    },
    {
      "epoch": 0.5622556741933602,
      "grad_norm": 1.0955118180741146,
      "learning_rate": 4.237875291162712e-06,
      "loss": 0.1173,
      "step": 19273
    },
    {
      "epoch": 0.5622848474240038,
      "grad_norm": 1.1646390911415243,
      "learning_rate": 4.237408380778705e-06,
      "loss": 0.1154,
      "step": 19274
    },
    {
      "epoch": 0.5623140206546473,
      "grad_norm": 0.6779539983296421,
      "learning_rate": 4.236941477202932e-06,
      "loss": 0.124,
      "step": 19275
    },
    {
      "epoch": 0.5623431938852909,
      "grad_norm": 1.0207774300517027,
      "learning_rate": 4.236474580439565e-06,
      "loss": 0.1137,
      "step": 19276
    },
    {
      "epoch": 0.5623723671159344,
      "grad_norm": 0.871406900651337,
      "learning_rate": 4.236007690492772e-06,
      "loss": 0.1435,
      "step": 19277
    },
    {
      "epoch": 0.562401540346578,
      "grad_norm": 0.9679563557813425,
      "learning_rate": 4.2355408073667185e-06,
      "loss": 0.1303,
      "step": 19278
    },
    {
      "epoch": 0.5624307135772215,
      "grad_norm": 0.9602230166493325,
      "learning_rate": 4.235073931065574e-06,
      "loss": 0.1423,
      "step": 19279
    },
    {
      "epoch": 0.5624598868078651,
      "grad_norm": 0.7242996991793469,
      "learning_rate": 4.234607061593508e-06,
      "loss": 0.1333,
      "step": 19280
    },
    {
      "epoch": 0.5624890600385086,
      "grad_norm": 0.9829676891679778,
      "learning_rate": 4.234140198954686e-06,
      "loss": 0.1326,
      "step": 19281
    },
    {
      "epoch": 0.5625182332691522,
      "grad_norm": 0.8301560375608132,
      "learning_rate": 4.233673343153278e-06,
      "loss": 0.1111,
      "step": 19282
    },
    {
      "epoch": 0.5625474064997957,
      "grad_norm": 0.7619115022470225,
      "learning_rate": 4.233206494193452e-06,
      "loss": 0.147,
      "step": 19283
    },
    {
      "epoch": 0.5625765797304394,
      "grad_norm": 0.7479633971938229,
      "learning_rate": 4.232739652079374e-06,
      "loss": 0.1466,
      "step": 19284
    },
    {
      "epoch": 0.562605752961083,
      "grad_norm": 0.8556559103942638,
      "learning_rate": 4.232272816815215e-06,
      "loss": 0.1273,
      "step": 19285
    },
    {
      "epoch": 0.5626349261917265,
      "grad_norm": 0.7495067380402984,
      "learning_rate": 4.23180598840514e-06,
      "loss": 0.1483,
      "step": 19286
    },
    {
      "epoch": 0.5626640994223701,
      "grad_norm": 0.7616907057061205,
      "learning_rate": 4.2313391668533175e-06,
      "loss": 0.1179,
      "step": 19287
    },
    {
      "epoch": 0.5626932726530136,
      "grad_norm": 1.0785160648338838,
      "learning_rate": 4.230872352163915e-06,
      "loss": 0.1388,
      "step": 19288
    },
    {
      "epoch": 0.5627224458836572,
      "grad_norm": 0.9011776848206084,
      "learning_rate": 4.230405544341103e-06,
      "loss": 0.1333,
      "step": 19289
    },
    {
      "epoch": 0.5627516191143007,
      "grad_norm": 0.859255093890849,
      "learning_rate": 4.229938743389045e-06,
      "loss": 0.1311,
      "step": 19290
    },
    {
      "epoch": 0.5627807923449443,
      "grad_norm": 0.8478954436826731,
      "learning_rate": 4.229471949311909e-06,
      "loss": 0.1317,
      "step": 19291
    },
    {
      "epoch": 0.5628099655755878,
      "grad_norm": 0.6805753181191991,
      "learning_rate": 4.229005162113866e-06,
      "loss": 0.1081,
      "step": 19292
    },
    {
      "epoch": 0.5628391388062314,
      "grad_norm": 1.1931656618607633,
      "learning_rate": 4.228538381799077e-06,
      "loss": 0.1148,
      "step": 19293
    },
    {
      "epoch": 0.5628683120368749,
      "grad_norm": 0.7888856696280375,
      "learning_rate": 4.228071608371717e-06,
      "loss": 0.1324,
      "step": 19294
    },
    {
      "epoch": 0.5628974852675185,
      "grad_norm": 0.8647763043827412,
      "learning_rate": 4.227604841835948e-06,
      "loss": 0.1335,
      "step": 19295
    },
    {
      "epoch": 0.562926658498162,
      "grad_norm": 1.0025047658229442,
      "learning_rate": 4.227138082195939e-06,
      "loss": 0.1331,
      "step": 19296
    },
    {
      "epoch": 0.5629558317288057,
      "grad_norm": 0.8419858458809792,
      "learning_rate": 4.226671329455856e-06,
      "loss": 0.1216,
      "step": 19297
    },
    {
      "epoch": 0.5629850049594493,
      "grad_norm": 0.9431387653014118,
      "learning_rate": 4.226204583619868e-06,
      "loss": 0.1449,
      "step": 19298
    },
    {
      "epoch": 0.5630141781900928,
      "grad_norm": 1.0837020257541334,
      "learning_rate": 4.225737844692138e-06,
      "loss": 0.1242,
      "step": 19299
    },
    {
      "epoch": 0.5630433514207364,
      "grad_norm": 0.8552785217938188,
      "learning_rate": 4.225271112676837e-06,
      "loss": 0.1235,
      "step": 19300
    },
    {
      "epoch": 0.5630725246513799,
      "grad_norm": 0.7848543573552564,
      "learning_rate": 4.224804387578131e-06,
      "loss": 0.1374,
      "step": 19301
    },
    {
      "epoch": 0.5631016978820235,
      "grad_norm": 0.8830283325085005,
      "learning_rate": 4.224337669400188e-06,
      "loss": 0.1422,
      "step": 19302
    },
    {
      "epoch": 0.563130871112667,
      "grad_norm": 0.9473238107477748,
      "learning_rate": 4.223870958147171e-06,
      "loss": 0.1308,
      "step": 19303
    },
    {
      "epoch": 0.5631600443433106,
      "grad_norm": 0.8144301208569833,
      "learning_rate": 4.22340425382325e-06,
      "loss": 0.1341,
      "step": 19304
    },
    {
      "epoch": 0.5631892175739541,
      "grad_norm": 0.912128089231077,
      "learning_rate": 4.222937556432588e-06,
      "loss": 0.1387,
      "step": 19305
    },
    {
      "epoch": 0.5632183908045977,
      "grad_norm": 1.0681510926519333,
      "learning_rate": 4.222470865979356e-06,
      "loss": 0.1506,
      "step": 19306
    },
    {
      "epoch": 0.5632475640352412,
      "grad_norm": 1.0162766848870537,
      "learning_rate": 4.2220041824677194e-06,
      "loss": 0.1311,
      "step": 19307
    },
    {
      "epoch": 0.5632767372658848,
      "grad_norm": 0.8632845629407573,
      "learning_rate": 4.221537505901843e-06,
      "loss": 0.1523,
      "step": 19308
    },
    {
      "epoch": 0.5633059104965283,
      "grad_norm": 0.9065762068080127,
      "learning_rate": 4.221070836285893e-06,
      "loss": 0.117,
      "step": 19309
    },
    {
      "epoch": 0.5633350837271719,
      "grad_norm": 0.9711847233830024,
      "learning_rate": 4.220604173624036e-06,
      "loss": 0.1302,
      "step": 19310
    },
    {
      "epoch": 0.5633642569578156,
      "grad_norm": 0.9597026554698108,
      "learning_rate": 4.22013751792044e-06,
      "loss": 0.139,
      "step": 19311
    },
    {
      "epoch": 0.5633934301884591,
      "grad_norm": 0.9967824934046925,
      "learning_rate": 4.219670869179271e-06,
      "loss": 0.1472,
      "step": 19312
    },
    {
      "epoch": 0.5634226034191027,
      "grad_norm": 0.920229970603909,
      "learning_rate": 4.219204227404693e-06,
      "loss": 0.129,
      "step": 19313
    },
    {
      "epoch": 0.5634517766497462,
      "grad_norm": 0.8940024553661092,
      "learning_rate": 4.218737592600873e-06,
      "loss": 0.1218,
      "step": 19314
    },
    {
      "epoch": 0.5634809498803898,
      "grad_norm": 0.9780462343756766,
      "learning_rate": 4.218270964771979e-06,
      "loss": 0.1385,
      "step": 19315
    },
    {
      "epoch": 0.5635101231110333,
      "grad_norm": 0.7950557499939994,
      "learning_rate": 4.217804343922173e-06,
      "loss": 0.1597,
      "step": 19316
    },
    {
      "epoch": 0.5635392963416769,
      "grad_norm": 1.1356372175957845,
      "learning_rate": 4.217337730055624e-06,
      "loss": 0.1535,
      "step": 19317
    },
    {
      "epoch": 0.5635684695723204,
      "grad_norm": 0.926451669767488,
      "learning_rate": 4.216871123176498e-06,
      "loss": 0.106,
      "step": 19318
    },
    {
      "epoch": 0.563597642802964,
      "grad_norm": 0.9967468346324517,
      "learning_rate": 4.21640452328896e-06,
      "loss": 0.1296,
      "step": 19319
    },
    {
      "epoch": 0.5636268160336075,
      "grad_norm": 1.1076081985968935,
      "learning_rate": 4.215937930397173e-06,
      "loss": 0.1277,
      "step": 19320
    },
    {
      "epoch": 0.5636559892642511,
      "grad_norm": 0.8887507150342753,
      "learning_rate": 4.215471344505307e-06,
      "loss": 0.1316,
      "step": 19321
    },
    {
      "epoch": 0.5636851624948946,
      "grad_norm": 0.8451591389386952,
      "learning_rate": 4.215004765617522e-06,
      "loss": 0.117,
      "step": 19322
    },
    {
      "epoch": 0.5637143357255382,
      "grad_norm": 1.0267875290904167,
      "learning_rate": 4.21453819373799e-06,
      "loss": 0.1291,
      "step": 19323
    },
    {
      "epoch": 0.5637435089561819,
      "grad_norm": 1.0539471781510779,
      "learning_rate": 4.214071628870874e-06,
      "loss": 0.1288,
      "step": 19324
    },
    {
      "epoch": 0.5637726821868254,
      "grad_norm": 0.8185991522525249,
      "learning_rate": 4.213605071020338e-06,
      "loss": 0.1271,
      "step": 19325
    },
    {
      "epoch": 0.563801855417469,
      "grad_norm": 1.2374200149474295,
      "learning_rate": 4.213138520190548e-06,
      "loss": 0.1244,
      "step": 19326
    },
    {
      "epoch": 0.5638310286481125,
      "grad_norm": 1.0267053419097691,
      "learning_rate": 4.212671976385671e-06,
      "loss": 0.1441,
      "step": 19327
    },
    {
      "epoch": 0.5638602018787561,
      "grad_norm": 0.7783242735958762,
      "learning_rate": 4.212205439609868e-06,
      "loss": 0.1242,
      "step": 19328
    },
    {
      "epoch": 0.5638893751093996,
      "grad_norm": 1.0591920770891756,
      "learning_rate": 4.211738909867309e-06,
      "loss": 0.1207,
      "step": 19329
    },
    {
      "epoch": 0.5639185483400432,
      "grad_norm": 0.9302342414014498,
      "learning_rate": 4.211272387162155e-06,
      "loss": 0.1296,
      "step": 19330
    },
    {
      "epoch": 0.5639477215706867,
      "grad_norm": 0.8021722036322011,
      "learning_rate": 4.210805871498575e-06,
      "loss": 0.1134,
      "step": 19331
    },
    {
      "epoch": 0.5639768948013303,
      "grad_norm": 0.8071094129094918,
      "learning_rate": 4.210339362880731e-06,
      "loss": 0.1402,
      "step": 19332
    },
    {
      "epoch": 0.5640060680319738,
      "grad_norm": 1.104945245826796,
      "learning_rate": 4.209872861312788e-06,
      "loss": 0.1511,
      "step": 19333
    },
    {
      "epoch": 0.5640352412626174,
      "grad_norm": 1.158047827388993,
      "learning_rate": 4.209406366798911e-06,
      "loss": 0.1208,
      "step": 19334
    },
    {
      "epoch": 0.564064414493261,
      "grad_norm": 0.7866838173179109,
      "learning_rate": 4.208939879343266e-06,
      "loss": 0.1337,
      "step": 19335
    },
    {
      "epoch": 0.5640935877239045,
      "grad_norm": 0.9365522579795171,
      "learning_rate": 4.208473398950016e-06,
      "loss": 0.139,
      "step": 19336
    },
    {
      "epoch": 0.564122760954548,
      "grad_norm": 1.0414850520402614,
      "learning_rate": 4.208006925623329e-06,
      "loss": 0.1389,
      "step": 19337
    },
    {
      "epoch": 0.5641519341851917,
      "grad_norm": 0.8206167461272429,
      "learning_rate": 4.207540459367365e-06,
      "loss": 0.1438,
      "step": 19338
    },
    {
      "epoch": 0.5641811074158353,
      "grad_norm": 1.0190539656046538,
      "learning_rate": 4.207074000186291e-06,
      "loss": 0.1191,
      "step": 19339
    },
    {
      "epoch": 0.5642102806464788,
      "grad_norm": 1.101606784957744,
      "learning_rate": 4.20660754808427e-06,
      "loss": 0.1266,
      "step": 19340
    },
    {
      "epoch": 0.5642394538771224,
      "grad_norm": 0.8097156329379198,
      "learning_rate": 4.20614110306547e-06,
      "loss": 0.1157,
      "step": 19341
    },
    {
      "epoch": 0.5642686271077659,
      "grad_norm": 1.0365985115715617,
      "learning_rate": 4.205674665134051e-06,
      "loss": 0.111,
      "step": 19342
    },
    {
      "epoch": 0.5642978003384095,
      "grad_norm": 0.9027453339980929,
      "learning_rate": 4.205208234294179e-06,
      "loss": 0.1238,
      "step": 19343
    },
    {
      "epoch": 0.564326973569053,
      "grad_norm": 0.9278063332020796,
      "learning_rate": 4.204741810550018e-06,
      "loss": 0.1302,
      "step": 19344
    },
    {
      "epoch": 0.5643561467996966,
      "grad_norm": 0.9972173466492001,
      "learning_rate": 4.204275393905734e-06,
      "loss": 0.1413,
      "step": 19345
    },
    {
      "epoch": 0.5643853200303401,
      "grad_norm": 0.8389099482289466,
      "learning_rate": 4.203808984365487e-06,
      "loss": 0.1205,
      "step": 19346
    },
    {
      "epoch": 0.5644144932609837,
      "grad_norm": 0.8919946807713786,
      "learning_rate": 4.203342581933444e-06,
      "loss": 0.1502,
      "step": 19347
    },
    {
      "epoch": 0.5644436664916272,
      "grad_norm": 0.9556445667187019,
      "learning_rate": 4.202876186613769e-06,
      "loss": 0.1357,
      "step": 19348
    },
    {
      "epoch": 0.5644728397222708,
      "grad_norm": 0.9808822001374411,
      "learning_rate": 4.2024097984106254e-06,
      "loss": 0.13,
      "step": 19349
    },
    {
      "epoch": 0.5645020129529144,
      "grad_norm": 0.692672238631294,
      "learning_rate": 4.201943417328176e-06,
      "loss": 0.1101,
      "step": 19350
    },
    {
      "epoch": 0.564531186183558,
      "grad_norm": 1.051160649074656,
      "learning_rate": 4.2014770433705856e-06,
      "loss": 0.1102,
      "step": 19351
    },
    {
      "epoch": 0.5645603594142016,
      "grad_norm": 0.7781561212765957,
      "learning_rate": 4.201010676542016e-06,
      "loss": 0.1401,
      "step": 19352
    },
    {
      "epoch": 0.5645895326448451,
      "grad_norm": 0.9035506015524689,
      "learning_rate": 4.200544316846633e-06,
      "loss": 0.143,
      "step": 19353
    },
    {
      "epoch": 0.5646187058754887,
      "grad_norm": 0.9088489089560662,
      "learning_rate": 4.200077964288601e-06,
      "loss": 0.1137,
      "step": 19354
    },
    {
      "epoch": 0.5646478791061322,
      "grad_norm": 0.7259117047558888,
      "learning_rate": 4.199611618872081e-06,
      "loss": 0.1347,
      "step": 19355
    },
    {
      "epoch": 0.5646770523367758,
      "grad_norm": 0.9965548007664105,
      "learning_rate": 4.199145280601238e-06,
      "loss": 0.1262,
      "step": 19356
    },
    {
      "epoch": 0.5647062255674193,
      "grad_norm": 0.8341149270966275,
      "learning_rate": 4.1986789494802345e-06,
      "loss": 0.1371,
      "step": 19357
    },
    {
      "epoch": 0.5647353987980629,
      "grad_norm": 0.7118071097143382,
      "learning_rate": 4.198212625513232e-06,
      "loss": 0.1264,
      "step": 19358
    },
    {
      "epoch": 0.5647645720287064,
      "grad_norm": 0.9634689955515569,
      "learning_rate": 4.197746308704399e-06,
      "loss": 0.1302,
      "step": 19359
    },
    {
      "epoch": 0.56479374525935,
      "grad_norm": 0.9066515208854374,
      "learning_rate": 4.1972799990578934e-06,
      "loss": 0.1461,
      "step": 19360
    },
    {
      "epoch": 0.5648229184899936,
      "grad_norm": 0.9874836309580389,
      "learning_rate": 4.1968136965778805e-06,
      "loss": 0.1336,
      "step": 19361
    },
    {
      "epoch": 0.5648520917206371,
      "grad_norm": 0.9584657928992774,
      "learning_rate": 4.196347401268525e-06,
      "loss": 0.1101,
      "step": 19362
    },
    {
      "epoch": 0.5648812649512807,
      "grad_norm": 0.8677620847531943,
      "learning_rate": 4.195881113133986e-06,
      "loss": 0.1432,
      "step": 19363
    },
    {
      "epoch": 0.5649104381819242,
      "grad_norm": 0.8374568733142175,
      "learning_rate": 4.1954148321784285e-06,
      "loss": 0.1142,
      "step": 19364
    },
    {
      "epoch": 0.5649396114125679,
      "grad_norm": 0.6635602068403426,
      "learning_rate": 4.1949485584060155e-06,
      "loss": 0.1411,
      "step": 19365
    },
    {
      "epoch": 0.5649687846432114,
      "grad_norm": 0.8396410835996723,
      "learning_rate": 4.19448229182091e-06,
      "loss": 0.1189,
      "step": 19366
    },
    {
      "epoch": 0.564997957873855,
      "grad_norm": 0.8711314366463256,
      "learning_rate": 4.194016032427275e-06,
      "loss": 0.13,
      "step": 19367
    },
    {
      "epoch": 0.5650271311044985,
      "grad_norm": 0.7495674259968295,
      "learning_rate": 4.193549780229273e-06,
      "loss": 0.0982,
      "step": 19368
    },
    {
      "epoch": 0.5650563043351421,
      "grad_norm": 0.994828317206556,
      "learning_rate": 4.193083535231064e-06,
      "loss": 0.1193,
      "step": 19369
    },
    {
      "epoch": 0.5650854775657856,
      "grad_norm": 0.8031371144023249,
      "learning_rate": 4.192617297436812e-06,
      "loss": 0.1428,
      "step": 19370
    },
    {
      "epoch": 0.5651146507964292,
      "grad_norm": 1.078601565853731,
      "learning_rate": 4.192151066850682e-06,
      "loss": 0.1676,
      "step": 19371
    },
    {
      "epoch": 0.5651438240270727,
      "grad_norm": 0.9624650306696038,
      "learning_rate": 4.191684843476834e-06,
      "loss": 0.1523,
      "step": 19372
    },
    {
      "epoch": 0.5651729972577163,
      "grad_norm": 0.757665184886242,
      "learning_rate": 4.191218627319431e-06,
      "loss": 0.1431,
      "step": 19373
    },
    {
      "epoch": 0.5652021704883599,
      "grad_norm": 0.694972450711904,
      "learning_rate": 4.190752418382635e-06,
      "loss": 0.1185,
      "step": 19374
    },
    {
      "epoch": 0.5652313437190034,
      "grad_norm": 0.8081352924546684,
      "learning_rate": 4.190286216670608e-06,
      "loss": 0.1151,
      "step": 19375
    },
    {
      "epoch": 0.565260516949647,
      "grad_norm": 0.903095796203024,
      "learning_rate": 4.189820022187511e-06,
      "loss": 0.1361,
      "step": 19376
    },
    {
      "epoch": 0.5652896901802905,
      "grad_norm": 1.3446850188910406,
      "learning_rate": 4.189353834937509e-06,
      "loss": 0.1439,
      "step": 19377
    },
    {
      "epoch": 0.5653188634109342,
      "grad_norm": 0.8425675405138412,
      "learning_rate": 4.188887654924761e-06,
      "loss": 0.1177,
      "step": 19378
    },
    {
      "epoch": 0.5653480366415777,
      "grad_norm": 0.7948812385773764,
      "learning_rate": 4.1884214821534334e-06,
      "loss": 0.1101,
      "step": 19379
    },
    {
      "epoch": 0.5653772098722213,
      "grad_norm": 0.7755867743772799,
      "learning_rate": 4.187955316627683e-06,
      "loss": 0.1312,
      "step": 19380
    },
    {
      "epoch": 0.5654063831028648,
      "grad_norm": 1.140046919943364,
      "learning_rate": 4.187489158351674e-06,
      "loss": 0.1558,
      "step": 19381
    },
    {
      "epoch": 0.5654355563335084,
      "grad_norm": 0.8086468288477293,
      "learning_rate": 4.187023007329566e-06,
      "loss": 0.1373,
      "step": 19382
    },
    {
      "epoch": 0.565464729564152,
      "grad_norm": 0.7529757380862866,
      "learning_rate": 4.186556863565524e-06,
      "loss": 0.1286,
      "step": 19383
    },
    {
      "epoch": 0.5654939027947955,
      "grad_norm": 1.3346562066612684,
      "learning_rate": 4.18609072706371e-06,
      "loss": 0.1224,
      "step": 19384
    },
    {
      "epoch": 0.565523076025439,
      "grad_norm": 0.7560453375313204,
      "learning_rate": 4.185624597828282e-06,
      "loss": 0.1085,
      "step": 19385
    },
    {
      "epoch": 0.5655522492560826,
      "grad_norm": 0.7212282462561252,
      "learning_rate": 4.185158475863403e-06,
      "loss": 0.1275,
      "step": 19386
    },
    {
      "epoch": 0.5655814224867262,
      "grad_norm": 0.7374292874245314,
      "learning_rate": 4.184692361173236e-06,
      "loss": 0.1114,
      "step": 19387
    },
    {
      "epoch": 0.5656105957173697,
      "grad_norm": 0.9391013064283164,
      "learning_rate": 4.184226253761937e-06,
      "loss": 0.1291,
      "step": 19388
    },
    {
      "epoch": 0.5656397689480133,
      "grad_norm": 0.7647587171548582,
      "learning_rate": 4.183760153633675e-06,
      "loss": 0.1228,
      "step": 19389
    },
    {
      "epoch": 0.5656689421786568,
      "grad_norm": 0.9987100527782217,
      "learning_rate": 4.183294060792606e-06,
      "loss": 0.1368,
      "step": 19390
    },
    {
      "epoch": 0.5656981154093004,
      "grad_norm": 0.8860530588359792,
      "learning_rate": 4.182827975242894e-06,
      "loss": 0.134,
      "step": 19391
    },
    {
      "epoch": 0.565727288639944,
      "grad_norm": 1.0873575091132723,
      "learning_rate": 4.182361896988699e-06,
      "loss": 0.1132,
      "step": 19392
    },
    {
      "epoch": 0.5657564618705876,
      "grad_norm": 0.842081833139939,
      "learning_rate": 4.18189582603418e-06,
      "loss": 0.14,
      "step": 19393
    },
    {
      "epoch": 0.5657856351012311,
      "grad_norm": 0.8752384891404035,
      "learning_rate": 4.1814297623835e-06,
      "loss": 0.1287,
      "step": 19394
    },
    {
      "epoch": 0.5658148083318747,
      "grad_norm": 0.8753769206838534,
      "learning_rate": 4.18096370604082e-06,
      "loss": 0.1291,
      "step": 19395
    },
    {
      "epoch": 0.5658439815625183,
      "grad_norm": 1.5124093889687904,
      "learning_rate": 4.1804976570103e-06,
      "loss": 0.1511,
      "step": 19396
    },
    {
      "epoch": 0.5658731547931618,
      "grad_norm": 0.8637210905796692,
      "learning_rate": 4.180031615296103e-06,
      "loss": 0.1413,
      "step": 19397
    },
    {
      "epoch": 0.5659023280238054,
      "grad_norm": 0.9549525126186816,
      "learning_rate": 4.179565580902387e-06,
      "loss": 0.1313,
      "step": 19398
    },
    {
      "epoch": 0.5659315012544489,
      "grad_norm": 0.7949894880690509,
      "learning_rate": 4.179099553833314e-06,
      "loss": 0.1209,
      "step": 19399
    },
    {
      "epoch": 0.5659606744850925,
      "grad_norm": 0.8830952411828071,
      "learning_rate": 4.178633534093043e-06,
      "loss": 0.1121,
      "step": 19400
    },
    {
      "epoch": 0.565989847715736,
      "grad_norm": 0.8183061514539053,
      "learning_rate": 4.178167521685737e-06,
      "loss": 0.1196,
      "step": 19401
    },
    {
      "epoch": 0.5660190209463796,
      "grad_norm": 0.8907210383788373,
      "learning_rate": 4.177701516615555e-06,
      "loss": 0.1609,
      "step": 19402
    },
    {
      "epoch": 0.5660481941770231,
      "grad_norm": 0.7892265729820176,
      "learning_rate": 4.177235518886657e-06,
      "loss": 0.1333,
      "step": 19403
    },
    {
      "epoch": 0.5660773674076667,
      "grad_norm": 0.7893912092104693,
      "learning_rate": 4.176769528503205e-06,
      "loss": 0.1162,
      "step": 19404
    },
    {
      "epoch": 0.5661065406383103,
      "grad_norm": 0.8847707860248508,
      "learning_rate": 4.176303545469358e-06,
      "loss": 0.1257,
      "step": 19405
    },
    {
      "epoch": 0.5661357138689539,
      "grad_norm": 1.0207630139430863,
      "learning_rate": 4.175837569789274e-06,
      "loss": 0.1441,
      "step": 19406
    },
    {
      "epoch": 0.5661648870995974,
      "grad_norm": 0.714577325556803,
      "learning_rate": 4.175371601467117e-06,
      "loss": 0.1155,
      "step": 19407
    },
    {
      "epoch": 0.566194060330241,
      "grad_norm": 0.9138541251039776,
      "learning_rate": 4.1749056405070455e-06,
      "loss": 0.1312,
      "step": 19408
    },
    {
      "epoch": 0.5662232335608846,
      "grad_norm": 0.8673457643088875,
      "learning_rate": 4.1744396869132205e-06,
      "loss": 0.1313,
      "step": 19409
    },
    {
      "epoch": 0.5662524067915281,
      "grad_norm": 1.0043121989356136,
      "learning_rate": 4.1739737406898e-06,
      "loss": 0.1313,
      "step": 19410
    },
    {
      "epoch": 0.5662815800221717,
      "grad_norm": 0.8286278643874674,
      "learning_rate": 4.173507801840945e-06,
      "loss": 0.1325,
      "step": 19411
    },
    {
      "epoch": 0.5663107532528152,
      "grad_norm": 1.0226878988018635,
      "learning_rate": 4.173041870370813e-06,
      "loss": 0.1376,
      "step": 19412
    },
    {
      "epoch": 0.5663399264834588,
      "grad_norm": 0.9393169483809087,
      "learning_rate": 4.1725759462835674e-06,
      "loss": 0.1266,
      "step": 19413
    },
    {
      "epoch": 0.5663690997141023,
      "grad_norm": 0.8350351829037099,
      "learning_rate": 4.172110029583368e-06,
      "loss": 0.1118,
      "step": 19414
    },
    {
      "epoch": 0.5663982729447459,
      "grad_norm": 1.0055518985598915,
      "learning_rate": 4.171644120274371e-06,
      "loss": 0.1327,
      "step": 19415
    },
    {
      "epoch": 0.5664274461753894,
      "grad_norm": 0.8402808204010609,
      "learning_rate": 4.171178218360737e-06,
      "loss": 0.1214,
      "step": 19416
    },
    {
      "epoch": 0.566456619406033,
      "grad_norm": 0.9098556148388088,
      "learning_rate": 4.170712323846628e-06,
      "loss": 0.1282,
      "step": 19417
    },
    {
      "epoch": 0.5664857926366765,
      "grad_norm": 0.9596209886171775,
      "learning_rate": 4.170246436736198e-06,
      "loss": 0.1071,
      "step": 19418
    },
    {
      "epoch": 0.5665149658673202,
      "grad_norm": 0.8622785281876078,
      "learning_rate": 4.169780557033612e-06,
      "loss": 0.1278,
      "step": 19419
    },
    {
      "epoch": 0.5665441390979638,
      "grad_norm": 0.7480267302996062,
      "learning_rate": 4.169314684743027e-06,
      "loss": 0.1252,
      "step": 19420
    },
    {
      "epoch": 0.5665733123286073,
      "grad_norm": 0.8038013696081622,
      "learning_rate": 4.168848819868601e-06,
      "loss": 0.1316,
      "step": 19421
    },
    {
      "epoch": 0.5666024855592509,
      "grad_norm": 1.1371605648032899,
      "learning_rate": 4.168382962414496e-06,
      "loss": 0.127,
      "step": 19422
    },
    {
      "epoch": 0.5666316587898944,
      "grad_norm": 1.2734743904923689,
      "learning_rate": 4.167917112384869e-06,
      "loss": 0.1498,
      "step": 19423
    },
    {
      "epoch": 0.566660832020538,
      "grad_norm": 1.0318764991978846,
      "learning_rate": 4.167451269783878e-06,
      "loss": 0.1565,
      "step": 19424
    },
    {
      "epoch": 0.5666900052511815,
      "grad_norm": 0.9522348872724322,
      "learning_rate": 4.166985434615683e-06,
      "loss": 0.1351,
      "step": 19425
    },
    {
      "epoch": 0.5667191784818251,
      "grad_norm": 0.9402296360804302,
      "learning_rate": 4.166519606884445e-06,
      "loss": 0.1187,
      "step": 19426
    },
    {
      "epoch": 0.5667483517124686,
      "grad_norm": 0.9181844994976865,
      "learning_rate": 4.166053786594322e-06,
      "loss": 0.1351,
      "step": 19427
    },
    {
      "epoch": 0.5667775249431122,
      "grad_norm": 0.8209798989749533,
      "learning_rate": 4.16558797374947e-06,
      "loss": 0.1388,
      "step": 19428
    },
    {
      "epoch": 0.5668066981737557,
      "grad_norm": 0.898912837960602,
      "learning_rate": 4.165122168354049e-06,
      "loss": 0.1321,
      "step": 19429
    },
    {
      "epoch": 0.5668358714043993,
      "grad_norm": 0.9807008826655649,
      "learning_rate": 4.164656370412218e-06,
      "loss": 0.1385,
      "step": 19430
    },
    {
      "epoch": 0.5668650446350428,
      "grad_norm": 0.8622015768296055,
      "learning_rate": 4.164190579928137e-06,
      "loss": 0.1286,
      "step": 19431
    },
    {
      "epoch": 0.5668942178656865,
      "grad_norm": 0.7912732824512433,
      "learning_rate": 4.163724796905961e-06,
      "loss": 0.1061,
      "step": 19432
    },
    {
      "epoch": 0.56692339109633,
      "grad_norm": 0.833736347636523,
      "learning_rate": 4.163259021349852e-06,
      "loss": 0.1215,
      "step": 19433
    },
    {
      "epoch": 0.5669525643269736,
      "grad_norm": 0.7638380756488233,
      "learning_rate": 4.162793253263967e-06,
      "loss": 0.1131,
      "step": 19434
    },
    {
      "epoch": 0.5669817375576172,
      "grad_norm": 0.7640489158978119,
      "learning_rate": 4.162327492652463e-06,
      "loss": 0.1177,
      "step": 19435
    },
    {
      "epoch": 0.5670109107882607,
      "grad_norm": 1.0578159091295236,
      "learning_rate": 4.161861739519498e-06,
      "loss": 0.139,
      "step": 19436
    },
    {
      "epoch": 0.5670400840189043,
      "grad_norm": 0.810072989231744,
      "learning_rate": 4.161395993869232e-06,
      "loss": 0.1262,
      "step": 19437
    },
    {
      "epoch": 0.5670692572495478,
      "grad_norm": 0.9400030183406941,
      "learning_rate": 4.160930255705824e-06,
      "loss": 0.1678,
      "step": 19438
    },
    {
      "epoch": 0.5670984304801914,
      "grad_norm": 0.6954126633205726,
      "learning_rate": 4.16046452503343e-06,
      "loss": 0.1263,
      "step": 19439
    },
    {
      "epoch": 0.5671276037108349,
      "grad_norm": 0.7525121649948221,
      "learning_rate": 4.159998801856207e-06,
      "loss": 0.1477,
      "step": 19440
    },
    {
      "epoch": 0.5671567769414785,
      "grad_norm": 0.9595718520300346,
      "learning_rate": 4.1595330861783145e-06,
      "loss": 0.1292,
      "step": 19441
    },
    {
      "epoch": 0.567185950172122,
      "grad_norm": 0.7302068471788169,
      "learning_rate": 4.15906737800391e-06,
      "loss": 0.1202,
      "step": 19442
    },
    {
      "epoch": 0.5672151234027656,
      "grad_norm": 0.7817238203916237,
      "learning_rate": 4.158601677337151e-06,
      "loss": 0.1483,
      "step": 19443
    },
    {
      "epoch": 0.5672442966334091,
      "grad_norm": 0.6014948314245426,
      "learning_rate": 4.158135984182197e-06,
      "loss": 0.1125,
      "step": 19444
    },
    {
      "epoch": 0.5672734698640527,
      "grad_norm": 0.9995087962528116,
      "learning_rate": 4.157670298543203e-06,
      "loss": 0.1526,
      "step": 19445
    },
    {
      "epoch": 0.5673026430946964,
      "grad_norm": 0.8340204683298592,
      "learning_rate": 4.157204620424326e-06,
      "loss": 0.1301,
      "step": 19446
    },
    {
      "epoch": 0.5673318163253399,
      "grad_norm": 1.2041059476698694,
      "learning_rate": 4.156738949829728e-06,
      "loss": 0.1205,
      "step": 19447
    },
    {
      "epoch": 0.5673609895559835,
      "grad_norm": 0.7920740428638681,
      "learning_rate": 4.156273286763559e-06,
      "loss": 0.1312,
      "step": 19448
    },
    {
      "epoch": 0.567390162786627,
      "grad_norm": 0.7831400996347803,
      "learning_rate": 4.155807631229984e-06,
      "loss": 0.1199,
      "step": 19449
    },
    {
      "epoch": 0.5674193360172706,
      "grad_norm": 0.8910101049727717,
      "learning_rate": 4.155341983233156e-06,
      "loss": 0.1336,
      "step": 19450
    },
    {
      "epoch": 0.5674485092479141,
      "grad_norm": 0.730621387194202,
      "learning_rate": 4.154876342777234e-06,
      "loss": 0.1135,
      "step": 19451
    },
    {
      "epoch": 0.5674776824785577,
      "grad_norm": 0.7208364369338753,
      "learning_rate": 4.154410709866374e-06,
      "loss": 0.1188,
      "step": 19452
    },
    {
      "epoch": 0.5675068557092012,
      "grad_norm": 0.8334785944597244,
      "learning_rate": 4.153945084504733e-06,
      "loss": 0.1418,
      "step": 19453
    },
    {
      "epoch": 0.5675360289398448,
      "grad_norm": 0.805382737106822,
      "learning_rate": 4.153479466696467e-06,
      "loss": 0.1191,
      "step": 19454
    },
    {
      "epoch": 0.5675652021704883,
      "grad_norm": 1.1381496668325919,
      "learning_rate": 4.153013856445736e-06,
      "loss": 0.1263,
      "step": 19455
    },
    {
      "epoch": 0.5675943754011319,
      "grad_norm": 0.892138197681285,
      "learning_rate": 4.152548253756694e-06,
      "loss": 0.1167,
      "step": 19456
    },
    {
      "epoch": 0.5676235486317754,
      "grad_norm": 0.8309719989376817,
      "learning_rate": 4.152082658633501e-06,
      "loss": 0.1322,
      "step": 19457
    },
    {
      "epoch": 0.567652721862419,
      "grad_norm": 0.856713290710731,
      "learning_rate": 4.15161707108031e-06,
      "loss": 0.129,
      "step": 19458
    },
    {
      "epoch": 0.5676818950930627,
      "grad_norm": 0.9598761879946143,
      "learning_rate": 4.151151491101279e-06,
      "loss": 0.123,
      "step": 19459
    },
    {
      "epoch": 0.5677110683237062,
      "grad_norm": 1.011005379721408,
      "learning_rate": 4.150685918700565e-06,
      "loss": 0.1304,
      "step": 19460
    },
    {
      "epoch": 0.5677402415543498,
      "grad_norm": 0.8848972179178022,
      "learning_rate": 4.150220353882325e-06,
      "loss": 0.146,
      "step": 19461
    },
    {
      "epoch": 0.5677694147849933,
      "grad_norm": 0.7194590108526638,
      "learning_rate": 4.149754796650714e-06,
      "loss": 0.1146,
      "step": 19462
    },
    {
      "epoch": 0.5677985880156369,
      "grad_norm": 0.9134909870263048,
      "learning_rate": 4.14928924700989e-06,
      "loss": 0.1268,
      "step": 19463
    },
    {
      "epoch": 0.5678277612462804,
      "grad_norm": 0.6926311306615194,
      "learning_rate": 4.148823704964009e-06,
      "loss": 0.1002,
      "step": 19464
    },
    {
      "epoch": 0.567856934476924,
      "grad_norm": 0.8663371143277986,
      "learning_rate": 4.148358170517226e-06,
      "loss": 0.1044,
      "step": 19465
    },
    {
      "epoch": 0.5678861077075675,
      "grad_norm": 0.8497971341096269,
      "learning_rate": 4.147892643673696e-06,
      "loss": 0.1273,
      "step": 19466
    },
    {
      "epoch": 0.5679152809382111,
      "grad_norm": 0.906068927483276,
      "learning_rate": 4.147427124437579e-06,
      "loss": 0.1627,
      "step": 19467
    },
    {
      "epoch": 0.5679444541688546,
      "grad_norm": 0.9524640293613119,
      "learning_rate": 4.146961612813029e-06,
      "loss": 0.1137,
      "step": 19468
    },
    {
      "epoch": 0.5679736273994982,
      "grad_norm": 0.8292784079540674,
      "learning_rate": 4.1464961088042035e-06,
      "loss": 0.1472,
      "step": 19469
    },
    {
      "epoch": 0.5680028006301417,
      "grad_norm": 0.8191775191554431,
      "learning_rate": 4.146030612415256e-06,
      "loss": 0.1408,
      "step": 19470
    },
    {
      "epoch": 0.5680319738607853,
      "grad_norm": 0.9411945831136156,
      "learning_rate": 4.145565123650342e-06,
      "loss": 0.1292,
      "step": 19471
    },
    {
      "epoch": 0.5680611470914289,
      "grad_norm": 0.8442802249400984,
      "learning_rate": 4.1450996425136184e-06,
      "loss": 0.1166,
      "step": 19472
    },
    {
      "epoch": 0.5680903203220725,
      "grad_norm": 0.9372724918897899,
      "learning_rate": 4.144634169009243e-06,
      "loss": 0.1366,
      "step": 19473
    },
    {
      "epoch": 0.5681194935527161,
      "grad_norm": 0.7893050022050192,
      "learning_rate": 4.1441687031413695e-06,
      "loss": 0.1188,
      "step": 19474
    },
    {
      "epoch": 0.5681486667833596,
      "grad_norm": 0.7700591593083678,
      "learning_rate": 4.143703244914152e-06,
      "loss": 0.1133,
      "step": 19475
    },
    {
      "epoch": 0.5681778400140032,
      "grad_norm": 0.9653185623356487,
      "learning_rate": 4.143237794331749e-06,
      "loss": 0.1221,
      "step": 19476
    },
    {
      "epoch": 0.5682070132446467,
      "grad_norm": 1.0229825103184051,
      "learning_rate": 4.142772351398314e-06,
      "loss": 0.1407,
      "step": 19477
    },
    {
      "epoch": 0.5682361864752903,
      "grad_norm": 0.9353775145276803,
      "learning_rate": 4.142306916118e-06,
      "loss": 0.1001,
      "step": 19478
    },
    {
      "epoch": 0.5682653597059338,
      "grad_norm": 0.8305059203098825,
      "learning_rate": 4.141841488494969e-06,
      "loss": 0.1303,
      "step": 19479
    },
    {
      "epoch": 0.5682945329365774,
      "grad_norm": 0.879799059625502,
      "learning_rate": 4.1413760685333714e-06,
      "loss": 0.1237,
      "step": 19480
    },
    {
      "epoch": 0.5683237061672209,
      "grad_norm": 0.786556193206934,
      "learning_rate": 4.140910656237363e-06,
      "loss": 0.1327,
      "step": 19481
    },
    {
      "epoch": 0.5683528793978645,
      "grad_norm": 0.6985346634610555,
      "learning_rate": 4.1404452516111e-06,
      "loss": 0.1044,
      "step": 19482
    },
    {
      "epoch": 0.568382052628508,
      "grad_norm": 0.873493223399064,
      "learning_rate": 4.139979854658735e-06,
      "loss": 0.1329,
      "step": 19483
    },
    {
      "epoch": 0.5684112258591516,
      "grad_norm": 0.833281339853357,
      "learning_rate": 4.139514465384424e-06,
      "loss": 0.1522,
      "step": 19484
    },
    {
      "epoch": 0.5684403990897952,
      "grad_norm": 0.7788355824123007,
      "learning_rate": 4.139049083792324e-06,
      "loss": 0.1097,
      "step": 19485
    },
    {
      "epoch": 0.5684695723204388,
      "grad_norm": 0.8406256788633911,
      "learning_rate": 4.1385837098865874e-06,
      "loss": 0.1326,
      "step": 19486
    },
    {
      "epoch": 0.5684987455510824,
      "grad_norm": 0.5863643463359757,
      "learning_rate": 4.138118343671372e-06,
      "loss": 0.1214,
      "step": 19487
    },
    {
      "epoch": 0.5685279187817259,
      "grad_norm": 0.813258095451415,
      "learning_rate": 4.137652985150829e-06,
      "loss": 0.1474,
      "step": 19488
    },
    {
      "epoch": 0.5685570920123695,
      "grad_norm": 0.745323317296243,
      "learning_rate": 4.137187634329114e-06,
      "loss": 0.1179,
      "step": 19489
    },
    {
      "epoch": 0.568586265243013,
      "grad_norm": 0.7667490617432795,
      "learning_rate": 4.13672229121038e-06,
      "loss": 0.1435,
      "step": 19490
    },
    {
      "epoch": 0.5686154384736566,
      "grad_norm": 0.8094175844997595,
      "learning_rate": 4.136256955798786e-06,
      "loss": 0.1411,
      "step": 19491
    },
    {
      "epoch": 0.5686446117043001,
      "grad_norm": 0.6636350950499006,
      "learning_rate": 4.135791628098483e-06,
      "loss": 0.1429,
      "step": 19492
    },
    {
      "epoch": 0.5686737849349437,
      "grad_norm": 0.7253760280737869,
      "learning_rate": 4.135326308113625e-06,
      "loss": 0.1086,
      "step": 19493
    },
    {
      "epoch": 0.5687029581655872,
      "grad_norm": 0.8037861905000112,
      "learning_rate": 4.13486099584837e-06,
      "loss": 0.1229,
      "step": 19494
    },
    {
      "epoch": 0.5687321313962308,
      "grad_norm": 0.8480607830202082,
      "learning_rate": 4.134395691306868e-06,
      "loss": 0.1313,
      "step": 19495
    },
    {
      "epoch": 0.5687613046268744,
      "grad_norm": 0.7440739430597881,
      "learning_rate": 4.133930394493272e-06,
      "loss": 0.1035,
      "step": 19496
    },
    {
      "epoch": 0.5687904778575179,
      "grad_norm": 0.7217697385956728,
      "learning_rate": 4.1334651054117404e-06,
      "loss": 0.1252,
      "step": 19497
    },
    {
      "epoch": 0.5688196510881615,
      "grad_norm": 0.7103858199884849,
      "learning_rate": 4.132999824066426e-06,
      "loss": 0.1448,
      "step": 19498
    },
    {
      "epoch": 0.568848824318805,
      "grad_norm": 0.8064217001025907,
      "learning_rate": 4.132534550461484e-06,
      "loss": 0.1435,
      "step": 19499
    },
    {
      "epoch": 0.5688779975494487,
      "grad_norm": 0.7874102313578355,
      "learning_rate": 4.1320692846010645e-06,
      "loss": 0.1089,
      "step": 19500
    },
    {
      "epoch": 0.5689071707800922,
      "grad_norm": 0.8832892212667347,
      "learning_rate": 4.131604026489322e-06,
      "loss": 0.1293,
      "step": 19501
    },
    {
      "epoch": 0.5689363440107358,
      "grad_norm": 0.8844026983146775,
      "learning_rate": 4.131138776130413e-06,
      "loss": 0.1291,
      "step": 19502
    },
    {
      "epoch": 0.5689655172413793,
      "grad_norm": 0.7443751926414968,
      "learning_rate": 4.130673533528489e-06,
      "loss": 0.1262,
      "step": 19503
    },
    {
      "epoch": 0.5689946904720229,
      "grad_norm": 0.7058878160954616,
      "learning_rate": 4.130208298687705e-06,
      "loss": 0.1101,
      "step": 19504
    },
    {
      "epoch": 0.5690238637026664,
      "grad_norm": 0.7290644904028006,
      "learning_rate": 4.129743071612214e-06,
      "loss": 0.1101,
      "step": 19505
    },
    {
      "epoch": 0.56905303693331,
      "grad_norm": 0.890508438194241,
      "learning_rate": 4.129277852306169e-06,
      "loss": 0.1313,
      "step": 19506
    },
    {
      "epoch": 0.5690822101639536,
      "grad_norm": 0.7590479402233686,
      "learning_rate": 4.128812640773721e-06,
      "loss": 0.1234,
      "step": 19507
    },
    {
      "epoch": 0.5691113833945971,
      "grad_norm": 0.6992276627467836,
      "learning_rate": 4.128347437019028e-06,
      "loss": 0.1051,
      "step": 19508
    },
    {
      "epoch": 0.5691405566252407,
      "grad_norm": 0.77158180994644,
      "learning_rate": 4.127882241046241e-06,
      "loss": 0.1197,
      "step": 19509
    },
    {
      "epoch": 0.5691697298558842,
      "grad_norm": 0.7894896966601396,
      "learning_rate": 4.127417052859513e-06,
      "loss": 0.1358,
      "step": 19510
    },
    {
      "epoch": 0.5691989030865278,
      "grad_norm": 0.7574734746844207,
      "learning_rate": 4.126951872462997e-06,
      "loss": 0.1215,
      "step": 19511
    },
    {
      "epoch": 0.5692280763171713,
      "grad_norm": 0.8685129970062757,
      "learning_rate": 4.1264866998608476e-06,
      "loss": 0.1421,
      "step": 19512
    },
    {
      "epoch": 0.5692572495478149,
      "grad_norm": 0.870514576773545,
      "learning_rate": 4.126021535057213e-06,
      "loss": 0.1387,
      "step": 19513
    },
    {
      "epoch": 0.5692864227784585,
      "grad_norm": 0.8267546453740713,
      "learning_rate": 4.125556378056252e-06,
      "loss": 0.118,
      "step": 19514
    },
    {
      "epoch": 0.5693155960091021,
      "grad_norm": 0.7105776430547567,
      "learning_rate": 4.125091228862115e-06,
      "loss": 0.1399,
      "step": 19515
    },
    {
      "epoch": 0.5693447692397456,
      "grad_norm": 0.939084837621374,
      "learning_rate": 4.124626087478954e-06,
      "loss": 0.1272,
      "step": 19516
    },
    {
      "epoch": 0.5693739424703892,
      "grad_norm": 0.8669677573714532,
      "learning_rate": 4.124160953910923e-06,
      "loss": 0.1237,
      "step": 19517
    },
    {
      "epoch": 0.5694031157010327,
      "grad_norm": 0.9734128272672662,
      "learning_rate": 4.1236958281621735e-06,
      "loss": 0.1136,
      "step": 19518
    },
    {
      "epoch": 0.5694322889316763,
      "grad_norm": 0.9261766815643392,
      "learning_rate": 4.123230710236857e-06,
      "loss": 0.1129,
      "step": 19519
    },
    {
      "epoch": 0.5694614621623199,
      "grad_norm": 1.0987557919086146,
      "learning_rate": 4.122765600139129e-06,
      "loss": 0.1273,
      "step": 19520
    },
    {
      "epoch": 0.5694906353929634,
      "grad_norm": 1.0520802567736014,
      "learning_rate": 4.122300497873141e-06,
      "loss": 0.1248,
      "step": 19521
    },
    {
      "epoch": 0.569519808623607,
      "grad_norm": 0.7404176701961205,
      "learning_rate": 4.121835403443044e-06,
      "loss": 0.1204,
      "step": 19522
    },
    {
      "epoch": 0.5695489818542505,
      "grad_norm": 0.8641522715390688,
      "learning_rate": 4.1213703168529905e-06,
      "loss": 0.1306,
      "step": 19523
    },
    {
      "epoch": 0.5695781550848941,
      "grad_norm": 0.7196633741119154,
      "learning_rate": 4.120905238107134e-06,
      "loss": 0.1193,
      "step": 19524
    },
    {
      "epoch": 0.5696073283155376,
      "grad_norm": 0.8359834060471374,
      "learning_rate": 4.120440167209623e-06,
      "loss": 0.106,
      "step": 19525
    },
    {
      "epoch": 0.5696365015461812,
      "grad_norm": 0.7430056591631837,
      "learning_rate": 4.119975104164616e-06,
      "loss": 0.1342,
      "step": 19526
    },
    {
      "epoch": 0.5696656747768248,
      "grad_norm": 0.78401092675671,
      "learning_rate": 4.119510048976258e-06,
      "loss": 0.1089,
      "step": 19527
    },
    {
      "epoch": 0.5696948480074684,
      "grad_norm": 0.737065396274654,
      "learning_rate": 4.119045001648705e-06,
      "loss": 0.1071,
      "step": 19528
    },
    {
      "epoch": 0.5697240212381119,
      "grad_norm": 0.8720979256625068,
      "learning_rate": 4.11857996218611e-06,
      "loss": 0.1268,
      "step": 19529
    },
    {
      "epoch": 0.5697531944687555,
      "grad_norm": 0.9327166649629202,
      "learning_rate": 4.118114930592621e-06,
      "loss": 0.14,
      "step": 19530
    },
    {
      "epoch": 0.569782367699399,
      "grad_norm": 0.8726103788430802,
      "learning_rate": 4.1176499068723895e-06,
      "loss": 0.1158,
      "step": 19531
    },
    {
      "epoch": 0.5698115409300426,
      "grad_norm": 1.0831975917678167,
      "learning_rate": 4.117184891029571e-06,
      "loss": 0.1213,
      "step": 19532
    },
    {
      "epoch": 0.5698407141606862,
      "grad_norm": 0.6362389314916833,
      "learning_rate": 4.116719883068315e-06,
      "loss": 0.119,
      "step": 19533
    },
    {
      "epoch": 0.5698698873913297,
      "grad_norm": 1.022191520372765,
      "learning_rate": 4.116254882992774e-06,
      "loss": 0.1398,
      "step": 19534
    },
    {
      "epoch": 0.5698990606219733,
      "grad_norm": 0.7810039189547336,
      "learning_rate": 4.115789890807097e-06,
      "loss": 0.1176,
      "step": 19535
    },
    {
      "epoch": 0.5699282338526168,
      "grad_norm": 0.9425448543692836,
      "learning_rate": 4.115324906515438e-06,
      "loss": 0.1217,
      "step": 19536
    },
    {
      "epoch": 0.5699574070832604,
      "grad_norm": 0.6867779750577592,
      "learning_rate": 4.114859930121944e-06,
      "loss": 0.1335,
      "step": 19537
    },
    {
      "epoch": 0.5699865803139039,
      "grad_norm": 0.7868889556038507,
      "learning_rate": 4.1143949616307725e-06,
      "loss": 0.1545,
      "step": 19538
    },
    {
      "epoch": 0.5700157535445475,
      "grad_norm": 0.7451587901962556,
      "learning_rate": 4.1139300010460705e-06,
      "loss": 0.1317,
      "step": 19539
    },
    {
      "epoch": 0.570044926775191,
      "grad_norm": 0.9621247044829014,
      "learning_rate": 4.11346504837199e-06,
      "loss": 0.1348,
      "step": 19540
    },
    {
      "epoch": 0.5700741000058347,
      "grad_norm": 0.8226151313164166,
      "learning_rate": 4.113000103612681e-06,
      "loss": 0.144,
      "step": 19541
    },
    {
      "epoch": 0.5701032732364782,
      "grad_norm": 0.7845961869456818,
      "learning_rate": 4.112535166772297e-06,
      "loss": 0.138,
      "step": 19542
    },
    {
      "epoch": 0.5701324464671218,
      "grad_norm": 0.7521358174870113,
      "learning_rate": 4.112070237854984e-06,
      "loss": 0.12,
      "step": 19543
    },
    {
      "epoch": 0.5701616196977654,
      "grad_norm": 0.7869389188886107,
      "learning_rate": 4.111605316864899e-06,
      "loss": 0.1438,
      "step": 19544
    },
    {
      "epoch": 0.5701907929284089,
      "grad_norm": 0.891113661257238,
      "learning_rate": 4.1111404038061895e-06,
      "loss": 0.1334,
      "step": 19545
    },
    {
      "epoch": 0.5702199661590525,
      "grad_norm": 0.8058385390983227,
      "learning_rate": 4.110675498683005e-06,
      "loss": 0.1496,
      "step": 19546
    },
    {
      "epoch": 0.570249139389696,
      "grad_norm": 0.891585900716879,
      "learning_rate": 4.1102106014994994e-06,
      "loss": 0.1202,
      "step": 19547
    },
    {
      "epoch": 0.5702783126203396,
      "grad_norm": 0.8010862708971639,
      "learning_rate": 4.109745712259819e-06,
      "loss": 0.1373,
      "step": 19548
    },
    {
      "epoch": 0.5703074858509831,
      "grad_norm": 0.6890893052186399,
      "learning_rate": 4.109280830968116e-06,
      "loss": 0.1086,
      "step": 19549
    },
    {
      "epoch": 0.5703366590816267,
      "grad_norm": 0.9517040546287387,
      "learning_rate": 4.108815957628542e-06,
      "loss": 0.1444,
      "step": 19550
    },
    {
      "epoch": 0.5703658323122702,
      "grad_norm": 0.740405320141188,
      "learning_rate": 4.108351092245248e-06,
      "loss": 0.1496,
      "step": 19551
    },
    {
      "epoch": 0.5703950055429138,
      "grad_norm": 0.803242782272997,
      "learning_rate": 4.107886234822381e-06,
      "loss": 0.1322,
      "step": 19552
    },
    {
      "epoch": 0.5704241787735573,
      "grad_norm": 0.9112748207641433,
      "learning_rate": 4.107421385364093e-06,
      "loss": 0.1404,
      "step": 19553
    },
    {
      "epoch": 0.570453352004201,
      "grad_norm": 0.8018854481429659,
      "learning_rate": 4.106956543874534e-06,
      "loss": 0.1231,
      "step": 19554
    },
    {
      "epoch": 0.5704825252348446,
      "grad_norm": 0.983309130673319,
      "learning_rate": 4.106491710357851e-06,
      "loss": 0.1554,
      "step": 19555
    },
    {
      "epoch": 0.5705116984654881,
      "grad_norm": 0.9327751668367343,
      "learning_rate": 4.106026884818201e-06,
      "loss": 0.1263,
      "step": 19556
    },
    {
      "epoch": 0.5705408716961317,
      "grad_norm": 0.8019383722303732,
      "learning_rate": 4.105562067259726e-06,
      "loss": 0.1025,
      "step": 19557
    },
    {
      "epoch": 0.5705700449267752,
      "grad_norm": 0.7746129361333384,
      "learning_rate": 4.1050972576865824e-06,
      "loss": 0.1471,
      "step": 19558
    },
    {
      "epoch": 0.5705992181574188,
      "grad_norm": 0.9636544945578799,
      "learning_rate": 4.104632456102916e-06,
      "loss": 0.1232,
      "step": 19559
    },
    {
      "epoch": 0.5706283913880623,
      "grad_norm": 0.7462668806815042,
      "learning_rate": 4.104167662512877e-06,
      "loss": 0.1153,
      "step": 19560
    },
    {
      "epoch": 0.5706575646187059,
      "grad_norm": 1.3059020968712054,
      "learning_rate": 4.103702876920614e-06,
      "loss": 0.1338,
      "step": 19561
    },
    {
      "epoch": 0.5706867378493494,
      "grad_norm": 0.8451109525651166,
      "learning_rate": 4.103238099330279e-06,
      "loss": 0.1371,
      "step": 19562
    },
    {
      "epoch": 0.570715911079993,
      "grad_norm": 0.9030798489373518,
      "learning_rate": 4.102773329746019e-06,
      "loss": 0.1467,
      "step": 19563
    },
    {
      "epoch": 0.5707450843106365,
      "grad_norm": 0.8529831916062731,
      "learning_rate": 4.102308568171987e-06,
      "loss": 0.1363,
      "step": 19564
    },
    {
      "epoch": 0.5707742575412801,
      "grad_norm": 0.860329533972752,
      "learning_rate": 4.101843814612328e-06,
      "loss": 0.1321,
      "step": 19565
    },
    {
      "epoch": 0.5708034307719236,
      "grad_norm": 0.8707392296522488,
      "learning_rate": 4.101379069071193e-06,
      "loss": 0.1359,
      "step": 19566
    },
    {
      "epoch": 0.5708326040025672,
      "grad_norm": 1.100558595633123,
      "learning_rate": 4.100914331552731e-06,
      "loss": 0.1099,
      "step": 19567
    },
    {
      "epoch": 0.5708617772332109,
      "grad_norm": 1.036221664048366,
      "learning_rate": 4.100449602061091e-06,
      "loss": 0.1076,
      "step": 19568
    },
    {
      "epoch": 0.5708909504638544,
      "grad_norm": 0.670950051219072,
      "learning_rate": 4.0999848806004235e-06,
      "loss": 0.132,
      "step": 19569
    },
    {
      "epoch": 0.570920123694498,
      "grad_norm": 0.8106256922639987,
      "learning_rate": 4.099520167174876e-06,
      "loss": 0.1364,
      "step": 19570
    },
    {
      "epoch": 0.5709492969251415,
      "grad_norm": 0.8510338740647666,
      "learning_rate": 4.0990554617885965e-06,
      "loss": 0.1247,
      "step": 19571
    },
    {
      "epoch": 0.5709784701557851,
      "grad_norm": 0.8043332398153833,
      "learning_rate": 4.098590764445737e-06,
      "loss": 0.1202,
      "step": 19572
    },
    {
      "epoch": 0.5710076433864286,
      "grad_norm": 1.0271611484031353,
      "learning_rate": 4.0981260751504394e-06,
      "loss": 0.1464,
      "step": 19573
    },
    {
      "epoch": 0.5710368166170722,
      "grad_norm": 0.9118783140012819,
      "learning_rate": 4.097661393906861e-06,
      "loss": 0.1546,
      "step": 19574
    },
    {
      "epoch": 0.5710659898477157,
      "grad_norm": 0.7993818994107283,
      "learning_rate": 4.097196720719146e-06,
      "loss": 0.1238,
      "step": 19575
    },
    {
      "epoch": 0.5710951630783593,
      "grad_norm": 0.8956912654468556,
      "learning_rate": 4.096732055591442e-06,
      "loss": 0.1291,
      "step": 19576
    },
    {
      "epoch": 0.5711243363090028,
      "grad_norm": 1.2382410713120202,
      "learning_rate": 4.096267398527899e-06,
      "loss": 0.146,
      "step": 19577
    },
    {
      "epoch": 0.5711535095396464,
      "grad_norm": 0.8038197701147217,
      "learning_rate": 4.095802749532665e-06,
      "loss": 0.1366,
      "step": 19578
    },
    {
      "epoch": 0.5711826827702899,
      "grad_norm": 0.9035561096904182,
      "learning_rate": 4.095338108609887e-06,
      "loss": 0.1472,
      "step": 19579
    },
    {
      "epoch": 0.5712118560009335,
      "grad_norm": 0.9322470645117651,
      "learning_rate": 4.0948734757637145e-06,
      "loss": 0.1278,
      "step": 19580
    },
    {
      "epoch": 0.5712410292315772,
      "grad_norm": 0.7384723552150703,
      "learning_rate": 4.094408850998298e-06,
      "loss": 0.1332,
      "step": 19581
    },
    {
      "epoch": 0.5712702024622207,
      "grad_norm": 0.7524179745350091,
      "learning_rate": 4.093944234317781e-06,
      "loss": 0.1198,
      "step": 19582
    },
    {
      "epoch": 0.5712993756928643,
      "grad_norm": 0.8528641395279695,
      "learning_rate": 4.093479625726314e-06,
      "loss": 0.1108,
      "step": 19583
    },
    {
      "epoch": 0.5713285489235078,
      "grad_norm": 0.7550134162929518,
      "learning_rate": 4.093015025228045e-06,
      "loss": 0.145,
      "step": 19584
    },
    {
      "epoch": 0.5713577221541514,
      "grad_norm": 0.7579223993536571,
      "learning_rate": 4.092550432827119e-06,
      "loss": 0.1188,
      "step": 19585
    },
    {
      "epoch": 0.5713868953847949,
      "grad_norm": 0.8729008044637647,
      "learning_rate": 4.092085848527689e-06,
      "loss": 0.1212,
      "step": 19586
    },
    {
      "epoch": 0.5714160686154385,
      "grad_norm": 0.6287918891569605,
      "learning_rate": 4.091621272333899e-06,
      "loss": 0.1219,
      "step": 19587
    },
    {
      "epoch": 0.571445241846082,
      "grad_norm": 0.9254186992449724,
      "learning_rate": 4.091156704249897e-06,
      "loss": 0.1547,
      "step": 19588
    },
    {
      "epoch": 0.5714744150767256,
      "grad_norm": 0.86534759301468,
      "learning_rate": 4.090692144279832e-06,
      "loss": 0.1208,
      "step": 19589
    },
    {
      "epoch": 0.5715035883073691,
      "grad_norm": 0.7307745436742721,
      "learning_rate": 4.0902275924278494e-06,
      "loss": 0.1158,
      "step": 19590
    },
    {
      "epoch": 0.5715327615380127,
      "grad_norm": 0.7691039628841517,
      "learning_rate": 4.0897630486980975e-06,
      "loss": 0.151,
      "step": 19591
    },
    {
      "epoch": 0.5715619347686562,
      "grad_norm": 0.9721164989121035,
      "learning_rate": 4.089298513094724e-06,
      "loss": 0.1145,
      "step": 19592
    },
    {
      "epoch": 0.5715911079992998,
      "grad_norm": 0.9281174769800552,
      "learning_rate": 4.088833985621876e-06,
      "loss": 0.1523,
      "step": 19593
    },
    {
      "epoch": 0.5716202812299433,
      "grad_norm": 0.8749704575707429,
      "learning_rate": 4.0883694662837015e-06,
      "loss": 0.1438,
      "step": 19594
    },
    {
      "epoch": 0.571649454460587,
      "grad_norm": 0.8404845980285226,
      "learning_rate": 4.087904955084346e-06,
      "loss": 0.1105,
      "step": 19595
    },
    {
      "epoch": 0.5716786276912306,
      "grad_norm": 0.6669387434807595,
      "learning_rate": 4.087440452027958e-06,
      "loss": 0.1161,
      "step": 19596
    },
    {
      "epoch": 0.5717078009218741,
      "grad_norm": 0.8796561362075194,
      "learning_rate": 4.086975957118682e-06,
      "loss": 0.1091,
      "step": 19597
    },
    {
      "epoch": 0.5717369741525177,
      "grad_norm": 0.8085861200201783,
      "learning_rate": 4.0865114703606675e-06,
      "loss": 0.1313,
      "step": 19598
    },
    {
      "epoch": 0.5717661473831612,
      "grad_norm": 0.9494960421333559,
      "learning_rate": 4.0860469917580625e-06,
      "loss": 0.1413,
      "step": 19599
    },
    {
      "epoch": 0.5717953206138048,
      "grad_norm": 1.016402525698968,
      "learning_rate": 4.085582521315011e-06,
      "loss": 0.1327,
      "step": 19600
    },
    {
      "epoch": 0.5718244938444483,
      "grad_norm": 0.8810885581902279,
      "learning_rate": 4.085118059035659e-06,
      "loss": 0.1316,
      "step": 19601
    },
    {
      "epoch": 0.5718536670750919,
      "grad_norm": 1.295599828780416,
      "learning_rate": 4.084653604924156e-06,
      "loss": 0.134,
      "step": 19602
    },
    {
      "epoch": 0.5718828403057354,
      "grad_norm": 0.8965368382451819,
      "learning_rate": 4.084189158984644e-06,
      "loss": 0.1147,
      "step": 19603
    },
    {
      "epoch": 0.571912013536379,
      "grad_norm": 0.7570945154397575,
      "learning_rate": 4.083724721221276e-06,
      "loss": 0.1389,
      "step": 19604
    },
    {
      "epoch": 0.5719411867670225,
      "grad_norm": 0.7008189567842064,
      "learning_rate": 4.083260291638194e-06,
      "loss": 0.1326,
      "step": 19605
    },
    {
      "epoch": 0.5719703599976661,
      "grad_norm": 0.8957014734114848,
      "learning_rate": 4.082795870239546e-06,
      "loss": 0.1096,
      "step": 19606
    },
    {
      "epoch": 0.5719995332283097,
      "grad_norm": 0.9645052959832401,
      "learning_rate": 4.082331457029477e-06,
      "loss": 0.1223,
      "step": 19607
    },
    {
      "epoch": 0.5720287064589533,
      "grad_norm": 0.8505067561797681,
      "learning_rate": 4.081867052012133e-06,
      "loss": 0.1306,
      "step": 19608
    },
    {
      "epoch": 0.5720578796895969,
      "grad_norm": 1.1036601142215465,
      "learning_rate": 4.081402655191661e-06,
      "loss": 0.1362,
      "step": 19609
    },
    {
      "epoch": 0.5720870529202404,
      "grad_norm": 1.0219819072813023,
      "learning_rate": 4.080938266572206e-06,
      "loss": 0.1426,
      "step": 19610
    },
    {
      "epoch": 0.572116226150884,
      "grad_norm": 0.7664604229633002,
      "learning_rate": 4.080473886157917e-06,
      "loss": 0.1237,
      "step": 19611
    },
    {
      "epoch": 0.5721453993815275,
      "grad_norm": 4.583120225789219,
      "learning_rate": 4.080009513952937e-06,
      "loss": 0.1405,
      "step": 19612
    },
    {
      "epoch": 0.5721745726121711,
      "grad_norm": 1.1148858841638805,
      "learning_rate": 4.079545149961411e-06,
      "loss": 0.1221,
      "step": 19613
    },
    {
      "epoch": 0.5722037458428146,
      "grad_norm": 1.1982993190446523,
      "learning_rate": 4.079080794187488e-06,
      "loss": 0.1311,
      "step": 19614
    },
    {
      "epoch": 0.5722329190734582,
      "grad_norm": 0.7716507717834155,
      "learning_rate": 4.078616446635309e-06,
      "loss": 0.1475,
      "step": 19615
    },
    {
      "epoch": 0.5722620923041017,
      "grad_norm": 1.3592832215293333,
      "learning_rate": 4.078152107309025e-06,
      "loss": 0.1188,
      "step": 19616
    },
    {
      "epoch": 0.5722912655347453,
      "grad_norm": 0.8898626691653115,
      "learning_rate": 4.0776877762127786e-06,
      "loss": 0.1348,
      "step": 19617
    },
    {
      "epoch": 0.5723204387653889,
      "grad_norm": 0.7912808512138472,
      "learning_rate": 4.077223453350715e-06,
      "loss": 0.1321,
      "step": 19618
    },
    {
      "epoch": 0.5723496119960324,
      "grad_norm": 0.7352657412409985,
      "learning_rate": 4.076759138726981e-06,
      "loss": 0.1377,
      "step": 19619
    },
    {
      "epoch": 0.572378785226676,
      "grad_norm": 1.0250168066845917,
      "learning_rate": 4.07629483234572e-06,
      "loss": 0.1192,
      "step": 19620
    },
    {
      "epoch": 0.5724079584573195,
      "grad_norm": 0.9129310045646087,
      "learning_rate": 4.075830534211077e-06,
      "loss": 0.1102,
      "step": 19621
    },
    {
      "epoch": 0.5724371316879632,
      "grad_norm": 1.0136528664078577,
      "learning_rate": 4.075366244327201e-06,
      "loss": 0.1046,
      "step": 19622
    },
    {
      "epoch": 0.5724663049186067,
      "grad_norm": 0.9626198268203121,
      "learning_rate": 4.074901962698233e-06,
      "loss": 0.1152,
      "step": 19623
    },
    {
      "epoch": 0.5724954781492503,
      "grad_norm": 0.9867479245628938,
      "learning_rate": 4.07443768932832e-06,
      "loss": 0.1452,
      "step": 19624
    },
    {
      "epoch": 0.5725246513798938,
      "grad_norm": 1.1981208406394535,
      "learning_rate": 4.073973424221606e-06,
      "loss": 0.1342,
      "step": 19625
    },
    {
      "epoch": 0.5725538246105374,
      "grad_norm": 1.103538368144145,
      "learning_rate": 4.073509167382237e-06,
      "loss": 0.1255,
      "step": 19626
    },
    {
      "epoch": 0.5725829978411809,
      "grad_norm": 0.9205918551145647,
      "learning_rate": 4.073044918814355e-06,
      "loss": 0.1307,
      "step": 19627
    },
    {
      "epoch": 0.5726121710718245,
      "grad_norm": 0.75994579522841,
      "learning_rate": 4.072580678522108e-06,
      "loss": 0.1203,
      "step": 19628
    },
    {
      "epoch": 0.572641344302468,
      "grad_norm": 0.7800455976184955,
      "learning_rate": 4.07211644650964e-06,
      "loss": 0.1274,
      "step": 19629
    },
    {
      "epoch": 0.5726705175331116,
      "grad_norm": 0.7479296977296234,
      "learning_rate": 4.071652222781095e-06,
      "loss": 0.1067,
      "step": 19630
    },
    {
      "epoch": 0.5726996907637552,
      "grad_norm": 0.935377227041055,
      "learning_rate": 4.071188007340616e-06,
      "loss": 0.144,
      "step": 19631
    },
    {
      "epoch": 0.5727288639943987,
      "grad_norm": 1.086846693480443,
      "learning_rate": 4.07072380019235e-06,
      "loss": 0.1249,
      "step": 19632
    },
    {
      "epoch": 0.5727580372250423,
      "grad_norm": 0.7920414969886806,
      "learning_rate": 4.070259601340438e-06,
      "loss": 0.1228,
      "step": 19633
    },
    {
      "epoch": 0.5727872104556858,
      "grad_norm": 0.9641235197837658,
      "learning_rate": 4.069795410789028e-06,
      "loss": 0.13,
      "step": 19634
    },
    {
      "epoch": 0.5728163836863295,
      "grad_norm": 1.4582623863303132,
      "learning_rate": 4.069331228542262e-06,
      "loss": 0.1367,
      "step": 19635
    },
    {
      "epoch": 0.572845556916973,
      "grad_norm": 0.7596637382779614,
      "learning_rate": 4.0688670546042846e-06,
      "loss": 0.1163,
      "step": 19636
    },
    {
      "epoch": 0.5728747301476166,
      "grad_norm": 0.9131849863806129,
      "learning_rate": 4.0684028889792414e-06,
      "loss": 0.1384,
      "step": 19637
    },
    {
      "epoch": 0.5729039033782601,
      "grad_norm": 0.9595573296767251,
      "learning_rate": 4.067938731671273e-06,
      "loss": 0.1303,
      "step": 19638
    },
    {
      "epoch": 0.5729330766089037,
      "grad_norm": 1.3266108150947125,
      "learning_rate": 4.0674745826845245e-06,
      "loss": 0.1196,
      "step": 19639
    },
    {
      "epoch": 0.5729622498395472,
      "grad_norm": 0.8504605757416285,
      "learning_rate": 4.0670104420231415e-06,
      "loss": 0.1297,
      "step": 19640
    },
    {
      "epoch": 0.5729914230701908,
      "grad_norm": 0.9627670484871081,
      "learning_rate": 4.066546309691267e-06,
      "loss": 0.1481,
      "step": 19641
    },
    {
      "epoch": 0.5730205963008344,
      "grad_norm": 0.8181036989347299,
      "learning_rate": 4.066082185693044e-06,
      "loss": 0.1216,
      "step": 19642
    },
    {
      "epoch": 0.5730497695314779,
      "grad_norm": 0.8190177665164526,
      "learning_rate": 4.065618070032616e-06,
      "loss": 0.144,
      "step": 19643
    },
    {
      "epoch": 0.5730789427621215,
      "grad_norm": 0.9405307383188206,
      "learning_rate": 4.065153962714128e-06,
      "loss": 0.1456,
      "step": 19644
    },
    {
      "epoch": 0.573108115992765,
      "grad_norm": 0.8718385256673635,
      "learning_rate": 4.064689863741718e-06,
      "loss": 0.1353,
      "step": 19645
    },
    {
      "epoch": 0.5731372892234086,
      "grad_norm": 0.8696890164201897,
      "learning_rate": 4.0642257731195386e-06,
      "loss": 0.1209,
      "step": 19646
    },
    {
      "epoch": 0.5731664624540521,
      "grad_norm": 0.9092921157919418,
      "learning_rate": 4.063761690851726e-06,
      "loss": 0.1263,
      "step": 19647
    },
    {
      "epoch": 0.5731956356846957,
      "grad_norm": 0.8009606291683495,
      "learning_rate": 4.063297616942425e-06,
      "loss": 0.1369,
      "step": 19648
    },
    {
      "epoch": 0.5732248089153393,
      "grad_norm": 0.7824878711572439,
      "learning_rate": 4.062833551395781e-06,
      "loss": 0.1216,
      "step": 19649
    },
    {
      "epoch": 0.5732539821459829,
      "grad_norm": 0.9747094430619917,
      "learning_rate": 4.062369494215935e-06,
      "loss": 0.1305,
      "step": 19650
    },
    {
      "epoch": 0.5732831553766264,
      "grad_norm": 0.7065736982856796,
      "learning_rate": 4.061905445407028e-06,
      "loss": 0.1564,
      "step": 19651
    },
    {
      "epoch": 0.57331232860727,
      "grad_norm": 0.9489454903107976,
      "learning_rate": 4.061441404973207e-06,
      "loss": 0.1394,
      "step": 19652
    },
    {
      "epoch": 0.5733415018379135,
      "grad_norm": 0.9276366532392195,
      "learning_rate": 4.0609773729186126e-06,
      "loss": 0.1237,
      "step": 19653
    },
    {
      "epoch": 0.5733706750685571,
      "grad_norm": 0.8067987872644518,
      "learning_rate": 4.060513349247389e-06,
      "loss": 0.1281,
      "step": 19654
    },
    {
      "epoch": 0.5733998482992007,
      "grad_norm": 0.7668286174008708,
      "learning_rate": 4.060049333963677e-06,
      "loss": 0.1068,
      "step": 19655
    },
    {
      "epoch": 0.5734290215298442,
      "grad_norm": 1.0409118486005096,
      "learning_rate": 4.059585327071622e-06,
      "loss": 0.1418,
      "step": 19656
    },
    {
      "epoch": 0.5734581947604878,
      "grad_norm": 0.7098214044664539,
      "learning_rate": 4.059121328575361e-06,
      "loss": 0.1323,
      "step": 19657
    },
    {
      "epoch": 0.5734873679911313,
      "grad_norm": 0.8026493491423402,
      "learning_rate": 4.058657338479043e-06,
      "loss": 0.1486,
      "step": 19658
    },
    {
      "epoch": 0.5735165412217749,
      "grad_norm": 0.9952965179770774,
      "learning_rate": 4.058193356786808e-06,
      "loss": 0.1272,
      "step": 19659
    },
    {
      "epoch": 0.5735457144524184,
      "grad_norm": 1.004014463737942,
      "learning_rate": 4.057729383502797e-06,
      "loss": 0.1364,
      "step": 19660
    },
    {
      "epoch": 0.573574887683062,
      "grad_norm": 0.9945880244164974,
      "learning_rate": 4.057265418631152e-06,
      "loss": 0.1146,
      "step": 19661
    },
    {
      "epoch": 0.5736040609137056,
      "grad_norm": 0.7428496129006911,
      "learning_rate": 4.056801462176018e-06,
      "loss": 0.1241,
      "step": 19662
    },
    {
      "epoch": 0.5736332341443492,
      "grad_norm": 0.9291305056916853,
      "learning_rate": 4.056337514141534e-06,
      "loss": 0.1445,
      "step": 19663
    },
    {
      "epoch": 0.5736624073749927,
      "grad_norm": 0.9821001367157365,
      "learning_rate": 4.055873574531844e-06,
      "loss": 0.1317,
      "step": 19664
    },
    {
      "epoch": 0.5736915806056363,
      "grad_norm": 0.7703487540931252,
      "learning_rate": 4.055409643351089e-06,
      "loss": 0.1415,
      "step": 19665
    },
    {
      "epoch": 0.5737207538362799,
      "grad_norm": 1.0669377839748413,
      "learning_rate": 4.054945720603412e-06,
      "loss": 0.1429,
      "step": 19666
    },
    {
      "epoch": 0.5737499270669234,
      "grad_norm": 0.7835812262511627,
      "learning_rate": 4.054481806292954e-06,
      "loss": 0.116,
      "step": 19667
    },
    {
      "epoch": 0.573779100297567,
      "grad_norm": 1.0901860140491364,
      "learning_rate": 4.054017900423857e-06,
      "loss": 0.1337,
      "step": 19668
    },
    {
      "epoch": 0.5738082735282105,
      "grad_norm": 0.7255252750423824,
      "learning_rate": 4.05355400300026e-06,
      "loss": 0.1477,
      "step": 19669
    },
    {
      "epoch": 0.5738374467588541,
      "grad_norm": 0.8568672255002934,
      "learning_rate": 4.0530901140263086e-06,
      "loss": 0.1453,
      "step": 19670
    },
    {
      "epoch": 0.5738666199894976,
      "grad_norm": 0.8489136617872457,
      "learning_rate": 4.052626233506144e-06,
      "loss": 0.1052,
      "step": 19671
    },
    {
      "epoch": 0.5738957932201412,
      "grad_norm": 0.8732673269822866,
      "learning_rate": 4.052162361443905e-06,
      "loss": 0.1258,
      "step": 19672
    },
    {
      "epoch": 0.5739249664507847,
      "grad_norm": 0.7741536434094117,
      "learning_rate": 4.051698497843733e-06,
      "loss": 0.1313,
      "step": 19673
    },
    {
      "epoch": 0.5739541396814283,
      "grad_norm": 0.906186888405836,
      "learning_rate": 4.0512346427097725e-06,
      "loss": 0.1226,
      "step": 19674
    },
    {
      "epoch": 0.5739833129120718,
      "grad_norm": 0.8287054882482325,
      "learning_rate": 4.05077079604616e-06,
      "loss": 0.1276,
      "step": 19675
    },
    {
      "epoch": 0.5740124861427155,
      "grad_norm": 0.7243895749605813,
      "learning_rate": 4.050306957857041e-06,
      "loss": 0.1492,
      "step": 19676
    },
    {
      "epoch": 0.574041659373359,
      "grad_norm": 0.7510583587606688,
      "learning_rate": 4.049843128146555e-06,
      "loss": 0.1217,
      "step": 19677
    },
    {
      "epoch": 0.5740708326040026,
      "grad_norm": 0.9605537640545965,
      "learning_rate": 4.0493793069188425e-06,
      "loss": 0.137,
      "step": 19678
    },
    {
      "epoch": 0.5741000058346462,
      "grad_norm": 0.6980832287768168,
      "learning_rate": 4.0489154941780455e-06,
      "loss": 0.1043,
      "step": 19679
    },
    {
      "epoch": 0.5741291790652897,
      "grad_norm": 0.9968242679545044,
      "learning_rate": 4.048451689928302e-06,
      "loss": 0.1273,
      "step": 19680
    },
    {
      "epoch": 0.5741583522959333,
      "grad_norm": 0.9241971501512688,
      "learning_rate": 4.047987894173755e-06,
      "loss": 0.107,
      "step": 19681
    },
    {
      "epoch": 0.5741875255265768,
      "grad_norm": 0.7666690507839959,
      "learning_rate": 4.047524106918545e-06,
      "loss": 0.1272,
      "step": 19682
    },
    {
      "epoch": 0.5742166987572204,
      "grad_norm": 0.8163723685752113,
      "learning_rate": 4.047060328166813e-06,
      "loss": 0.121,
      "step": 19683
    },
    {
      "epoch": 0.5742458719878639,
      "grad_norm": 0.7700777990182223,
      "learning_rate": 4.0465965579227e-06,
      "loss": 0.1152,
      "step": 19684
    },
    {
      "epoch": 0.5742750452185075,
      "grad_norm": 0.855520884905867,
      "learning_rate": 4.046132796190344e-06,
      "loss": 0.1389,
      "step": 19685
    },
    {
      "epoch": 0.574304218449151,
      "grad_norm": 0.7351823772848916,
      "learning_rate": 4.045669042973886e-06,
      "loss": 0.1194,
      "step": 19686
    },
    {
      "epoch": 0.5743333916797946,
      "grad_norm": 0.7907259457199423,
      "learning_rate": 4.045205298277466e-06,
      "loss": 0.1168,
      "step": 19687
    },
    {
      "epoch": 0.5743625649104381,
      "grad_norm": 0.8827805223309084,
      "learning_rate": 4.044741562105227e-06,
      "loss": 0.1407,
      "step": 19688
    },
    {
      "epoch": 0.5743917381410818,
      "grad_norm": 0.9773334181207157,
      "learning_rate": 4.044277834461308e-06,
      "loss": 0.1307,
      "step": 19689
    },
    {
      "epoch": 0.5744209113717254,
      "grad_norm": 0.812223302005619,
      "learning_rate": 4.043814115349848e-06,
      "loss": 0.1304,
      "step": 19690
    },
    {
      "epoch": 0.5744500846023689,
      "grad_norm": 1.0275950849839577,
      "learning_rate": 4.043350404774986e-06,
      "loss": 0.1404,
      "step": 19691
    },
    {
      "epoch": 0.5744792578330125,
      "grad_norm": 0.8937972180652621,
      "learning_rate": 4.042886702740865e-06,
      "loss": 0.1111,
      "step": 19692
    },
    {
      "epoch": 0.574508431063656,
      "grad_norm": 0.7054348300623637,
      "learning_rate": 4.042423009251622e-06,
      "loss": 0.1158,
      "step": 19693
    },
    {
      "epoch": 0.5745376042942996,
      "grad_norm": 0.9100222587938743,
      "learning_rate": 4.041959324311397e-06,
      "loss": 0.1319,
      "step": 19694
    },
    {
      "epoch": 0.5745667775249431,
      "grad_norm": 0.9097507057876928,
      "learning_rate": 4.041495647924331e-06,
      "loss": 0.1177,
      "step": 19695
    },
    {
      "epoch": 0.5745959507555867,
      "grad_norm": 0.7082270876472646,
      "learning_rate": 4.041031980094563e-06,
      "loss": 0.1385,
      "step": 19696
    },
    {
      "epoch": 0.5746251239862302,
      "grad_norm": 0.9263768877658136,
      "learning_rate": 4.040568320826234e-06,
      "loss": 0.1304,
      "step": 19697
    },
    {
      "epoch": 0.5746542972168738,
      "grad_norm": 1.0005896366809788,
      "learning_rate": 4.0401046701234795e-06,
      "loss": 0.1345,
      "step": 19698
    },
    {
      "epoch": 0.5746834704475173,
      "grad_norm": 0.9350469304411964,
      "learning_rate": 4.039641027990443e-06,
      "loss": 0.1477,
      "step": 19699
    },
    {
      "epoch": 0.5747126436781609,
      "grad_norm": 0.9217337418915221,
      "learning_rate": 4.039177394431262e-06,
      "loss": 0.1346,
      "step": 19700
    },
    {
      "epoch": 0.5747418169088044,
      "grad_norm": 0.7962343875316714,
      "learning_rate": 4.038713769450076e-06,
      "loss": 0.1296,
      "step": 19701
    },
    {
      "epoch": 0.574770990139448,
      "grad_norm": 0.8335662634199607,
      "learning_rate": 4.038250153051024e-06,
      "loss": 0.1381,
      "step": 19702
    },
    {
      "epoch": 0.5748001633700917,
      "grad_norm": 1.0278937183408707,
      "learning_rate": 4.0377865452382444e-06,
      "loss": 0.1231,
      "step": 19703
    },
    {
      "epoch": 0.5748293366007352,
      "grad_norm": 0.7782265105030279,
      "learning_rate": 4.037322946015876e-06,
      "loss": 0.1235,
      "step": 19704
    },
    {
      "epoch": 0.5748585098313788,
      "grad_norm": 0.8048237422203832,
      "learning_rate": 4.03685935538806e-06,
      "loss": 0.1368,
      "step": 19705
    },
    {
      "epoch": 0.5748876830620223,
      "grad_norm": 0.9199416165871753,
      "learning_rate": 4.036395773358934e-06,
      "loss": 0.1317,
      "step": 19706
    },
    {
      "epoch": 0.5749168562926659,
      "grad_norm": 0.799419880980573,
      "learning_rate": 4.035932199932636e-06,
      "loss": 0.1103,
      "step": 19707
    },
    {
      "epoch": 0.5749460295233094,
      "grad_norm": 1.0907092432324073,
      "learning_rate": 4.0354686351133055e-06,
      "loss": 0.1421,
      "step": 19708
    },
    {
      "epoch": 0.574975202753953,
      "grad_norm": 1.0981836394400115,
      "learning_rate": 4.035005078905081e-06,
      "loss": 0.121,
      "step": 19709
    },
    {
      "epoch": 0.5750043759845965,
      "grad_norm": 0.8183032746130807,
      "learning_rate": 4.034541531312099e-06,
      "loss": 0.1446,
      "step": 19710
    },
    {
      "epoch": 0.5750335492152401,
      "grad_norm": 0.8314492661481785,
      "learning_rate": 4.034077992338501e-06,
      "loss": 0.1396,
      "step": 19711
    },
    {
      "epoch": 0.5750627224458836,
      "grad_norm": 0.9401287982859894,
      "learning_rate": 4.0336144619884236e-06,
      "loss": 0.1409,
      "step": 19712
    },
    {
      "epoch": 0.5750918956765272,
      "grad_norm": 0.8138107409108649,
      "learning_rate": 4.0331509402660066e-06,
      "loss": 0.1176,
      "step": 19713
    },
    {
      "epoch": 0.5751210689071707,
      "grad_norm": 0.7815657198400805,
      "learning_rate": 4.032687427175387e-06,
      "loss": 0.1194,
      "step": 19714
    },
    {
      "epoch": 0.5751502421378143,
      "grad_norm": 0.9751414630695675,
      "learning_rate": 4.0322239227207025e-06,
      "loss": 0.155,
      "step": 19715
    },
    {
      "epoch": 0.575179415368458,
      "grad_norm": 0.752295778092462,
      "learning_rate": 4.031760426906091e-06,
      "loss": 0.1291,
      "step": 19716
    },
    {
      "epoch": 0.5752085885991015,
      "grad_norm": 0.9306219494970849,
      "learning_rate": 4.031296939735693e-06,
      "loss": 0.1291,
      "step": 19717
    },
    {
      "epoch": 0.5752377618297451,
      "grad_norm": 0.7988325175986996,
      "learning_rate": 4.0308334612136435e-06,
      "loss": 0.1179,
      "step": 19718
    },
    {
      "epoch": 0.5752669350603886,
      "grad_norm": 1.0465718626053495,
      "learning_rate": 4.030369991344083e-06,
      "loss": 0.1448,
      "step": 19719
    },
    {
      "epoch": 0.5752961082910322,
      "grad_norm": 0.8277484942582721,
      "learning_rate": 4.029906530131147e-06,
      "loss": 0.1443,
      "step": 19720
    },
    {
      "epoch": 0.5753252815216757,
      "grad_norm": 0.8403907840133241,
      "learning_rate": 4.0294430775789735e-06,
      "loss": 0.1201,
      "step": 19721
    },
    {
      "epoch": 0.5753544547523193,
      "grad_norm": 0.7523851084725544,
      "learning_rate": 4.028979633691699e-06,
      "loss": 0.1275,
      "step": 19722
    },
    {
      "epoch": 0.5753836279829628,
      "grad_norm": 0.742304283264885,
      "learning_rate": 4.028516198473465e-06,
      "loss": 0.1091,
      "step": 19723
    },
    {
      "epoch": 0.5754128012136064,
      "grad_norm": 0.8433274774611195,
      "learning_rate": 4.028052771928406e-06,
      "loss": 0.1223,
      "step": 19724
    },
    {
      "epoch": 0.5754419744442499,
      "grad_norm": 0.8413010352489224,
      "learning_rate": 4.027589354060659e-06,
      "loss": 0.1155,
      "step": 19725
    },
    {
      "epoch": 0.5754711476748935,
      "grad_norm": 0.854479311239559,
      "learning_rate": 4.027125944874364e-06,
      "loss": 0.1365,
      "step": 19726
    },
    {
      "epoch": 0.575500320905537,
      "grad_norm": 0.8284366003951291,
      "learning_rate": 4.0266625443736555e-06,
      "loss": 0.1311,
      "step": 19727
    },
    {
      "epoch": 0.5755294941361806,
      "grad_norm": 0.8834802623640456,
      "learning_rate": 4.0261991525626696e-06,
      "loss": 0.127,
      "step": 19728
    },
    {
      "epoch": 0.5755586673668242,
      "grad_norm": 0.6915509347636353,
      "learning_rate": 4.025735769445546e-06,
      "loss": 0.141,
      "step": 19729
    },
    {
      "epoch": 0.5755878405974678,
      "grad_norm": 0.96567692670703,
      "learning_rate": 4.025272395026421e-06,
      "loss": 0.1306,
      "step": 19730
    },
    {
      "epoch": 0.5756170138281114,
      "grad_norm": 1.0906606888868646,
      "learning_rate": 4.024809029309433e-06,
      "loss": 0.1569,
      "step": 19731
    },
    {
      "epoch": 0.5756461870587549,
      "grad_norm": 0.6957580102975305,
      "learning_rate": 4.024345672298716e-06,
      "loss": 0.1121,
      "step": 19732
    },
    {
      "epoch": 0.5756753602893985,
      "grad_norm": 0.7463641590409569,
      "learning_rate": 4.023882323998408e-06,
      "loss": 0.1072,
      "step": 19733
    },
    {
      "epoch": 0.575704533520042,
      "grad_norm": 0.8857683952020571,
      "learning_rate": 4.023418984412644e-06,
      "loss": 0.1393,
      "step": 19734
    },
    {
      "epoch": 0.5757337067506856,
      "grad_norm": 0.8309607274026838,
      "learning_rate": 4.022955653545563e-06,
      "loss": 0.1102,
      "step": 19735
    },
    {
      "epoch": 0.5757628799813291,
      "grad_norm": 1.0748862367884913,
      "learning_rate": 4.0224923314013025e-06,
      "loss": 0.1309,
      "step": 19736
    },
    {
      "epoch": 0.5757920532119727,
      "grad_norm": 0.8220081336407107,
      "learning_rate": 4.022029017983996e-06,
      "loss": 0.116,
      "step": 19737
    },
    {
      "epoch": 0.5758212264426162,
      "grad_norm": 0.7963877708342001,
      "learning_rate": 4.0215657132977806e-06,
      "loss": 0.135,
      "step": 19738
    },
    {
      "epoch": 0.5758503996732598,
      "grad_norm": 0.9767353315039462,
      "learning_rate": 4.021102417346794e-06,
      "loss": 0.1059,
      "step": 19739
    },
    {
      "epoch": 0.5758795729039033,
      "grad_norm": 1.1487649578395094,
      "learning_rate": 4.020639130135169e-06,
      "loss": 0.1413,
      "step": 19740
    },
    {
      "epoch": 0.5759087461345469,
      "grad_norm": 1.0001431320504899,
      "learning_rate": 4.020175851667047e-06,
      "loss": 0.1264,
      "step": 19741
    },
    {
      "epoch": 0.5759379193651905,
      "grad_norm": 0.8337775920306253,
      "learning_rate": 4.019712581946559e-06,
      "loss": 0.1492,
      "step": 19742
    },
    {
      "epoch": 0.5759670925958341,
      "grad_norm": 0.9615227523095687,
      "learning_rate": 4.019249320977844e-06,
      "loss": 0.1351,
      "step": 19743
    },
    {
      "epoch": 0.5759962658264777,
      "grad_norm": 0.8406770901161117,
      "learning_rate": 4.018786068765037e-06,
      "loss": 0.122,
      "step": 19744
    },
    {
      "epoch": 0.5760254390571212,
      "grad_norm": 1.0603003567825613,
      "learning_rate": 4.018322825312273e-06,
      "loss": 0.1402,
      "step": 19745
    },
    {
      "epoch": 0.5760546122877648,
      "grad_norm": 0.834167142924995,
      "learning_rate": 4.017859590623688e-06,
      "loss": 0.1243,
      "step": 19746
    },
    {
      "epoch": 0.5760837855184083,
      "grad_norm": 0.8171894066121539,
      "learning_rate": 4.01739636470342e-06,
      "loss": 0.1172,
      "step": 19747
    },
    {
      "epoch": 0.5761129587490519,
      "grad_norm": 1.0158128756718992,
      "learning_rate": 4.016933147555601e-06,
      "loss": 0.1216,
      "step": 19748
    },
    {
      "epoch": 0.5761421319796954,
      "grad_norm": 0.7361794378816956,
      "learning_rate": 4.01646993918437e-06,
      "loss": 0.1103,
      "step": 19749
    },
    {
      "epoch": 0.576171305210339,
      "grad_norm": 1.0196663055710706,
      "learning_rate": 4.016006739593859e-06,
      "loss": 0.1125,
      "step": 19750
    },
    {
      "epoch": 0.5762004784409825,
      "grad_norm": 0.8065011504438061,
      "learning_rate": 4.015543548788206e-06,
      "loss": 0.1121,
      "step": 19751
    },
    {
      "epoch": 0.5762296516716261,
      "grad_norm": 1.0641140286394402,
      "learning_rate": 4.015080366771543e-06,
      "loss": 0.121,
      "step": 19752
    },
    {
      "epoch": 0.5762588249022697,
      "grad_norm": 0.82255712865244,
      "learning_rate": 4.0146171935480105e-06,
      "loss": 0.1077,
      "step": 19753
    },
    {
      "epoch": 0.5762879981329132,
      "grad_norm": 0.7435633035051162,
      "learning_rate": 4.014154029121739e-06,
      "loss": 0.1296,
      "step": 19754
    },
    {
      "epoch": 0.5763171713635568,
      "grad_norm": 0.943976291171618,
      "learning_rate": 4.013690873496864e-06,
      "loss": 0.1329,
      "step": 19755
    },
    {
      "epoch": 0.5763463445942003,
      "grad_norm": 0.8523598476593407,
      "learning_rate": 4.013227726677524e-06,
      "loss": 0.1241,
      "step": 19756
    },
    {
      "epoch": 0.576375517824844,
      "grad_norm": 0.8075599513321965,
      "learning_rate": 4.01276458866785e-06,
      "loss": 0.101,
      "step": 19757
    },
    {
      "epoch": 0.5764046910554875,
      "grad_norm": 1.1751377581481242,
      "learning_rate": 4.012301459471976e-06,
      "loss": 0.127,
      "step": 19758
    },
    {
      "epoch": 0.5764338642861311,
      "grad_norm": 1.0134788032131126,
      "learning_rate": 4.011838339094041e-06,
      "loss": 0.1468,
      "step": 19759
    },
    {
      "epoch": 0.5764630375167746,
      "grad_norm": 0.7576605016473313,
      "learning_rate": 4.011375227538176e-06,
      "loss": 0.1254,
      "step": 19760
    },
    {
      "epoch": 0.5764922107474182,
      "grad_norm": 0.8019530711713517,
      "learning_rate": 4.0109121248085196e-06,
      "loss": 0.1349,
      "step": 19761
    },
    {
      "epoch": 0.5765213839780617,
      "grad_norm": 0.8733164061042527,
      "learning_rate": 4.010449030909202e-06,
      "loss": 0.1369,
      "step": 19762
    },
    {
      "epoch": 0.5765505572087053,
      "grad_norm": 1.1058875053330164,
      "learning_rate": 4.009985945844359e-06,
      "loss": 0.14,
      "step": 19763
    },
    {
      "epoch": 0.5765797304393488,
      "grad_norm": 0.7559257276682203,
      "learning_rate": 4.009522869618124e-06,
      "loss": 0.1101,
      "step": 19764
    },
    {
      "epoch": 0.5766089036699924,
      "grad_norm": 0.7963224403194995,
      "learning_rate": 4.009059802234633e-06,
      "loss": 0.1282,
      "step": 19765
    },
    {
      "epoch": 0.576638076900636,
      "grad_norm": 1.0287969571924067,
      "learning_rate": 4.008596743698022e-06,
      "loss": 0.1228,
      "step": 19766
    },
    {
      "epoch": 0.5766672501312795,
      "grad_norm": 1.0658700001395847,
      "learning_rate": 4.00813369401242e-06,
      "loss": 0.1485,
      "step": 19767
    },
    {
      "epoch": 0.5766964233619231,
      "grad_norm": 0.9506619467707396,
      "learning_rate": 4.007670653181965e-06,
      "loss": 0.121,
      "step": 19768
    },
    {
      "epoch": 0.5767255965925666,
      "grad_norm": 1.6668110885406686,
      "learning_rate": 4.00720762121079e-06,
      "loss": 0.1381,
      "step": 19769
    },
    {
      "epoch": 0.5767547698232102,
      "grad_norm": 1.105236162752622,
      "learning_rate": 4.006744598103025e-06,
      "loss": 0.119,
      "step": 19770
    },
    {
      "epoch": 0.5767839430538538,
      "grad_norm": 1.0975762739219408,
      "learning_rate": 4.00628158386281e-06,
      "loss": 0.1288,
      "step": 19771
    },
    {
      "epoch": 0.5768131162844974,
      "grad_norm": 0.9598289749935085,
      "learning_rate": 4.005818578494275e-06,
      "loss": 0.1288,
      "step": 19772
    },
    {
      "epoch": 0.5768422895151409,
      "grad_norm": 1.2119157824993114,
      "learning_rate": 4.005355582001555e-06,
      "loss": 0.135,
      "step": 19773
    },
    {
      "epoch": 0.5768714627457845,
      "grad_norm": 1.4749088699914519,
      "learning_rate": 4.0048925943887835e-06,
      "loss": 0.1347,
      "step": 19774
    },
    {
      "epoch": 0.576900635976428,
      "grad_norm": 1.125350444889824,
      "learning_rate": 4.004429615660092e-06,
      "loss": 0.138,
      "step": 19775
    },
    {
      "epoch": 0.5769298092070716,
      "grad_norm": 0.8212775273239675,
      "learning_rate": 4.003966645819615e-06,
      "loss": 0.1323,
      "step": 19776
    },
    {
      "epoch": 0.5769589824377152,
      "grad_norm": 1.1248482628024037,
      "learning_rate": 4.003503684871486e-06,
      "loss": 0.1376,
      "step": 19777
    },
    {
      "epoch": 0.5769881556683587,
      "grad_norm": 1.460473657981357,
      "learning_rate": 4.003040732819839e-06,
      "loss": 0.1257,
      "step": 19778
    },
    {
      "epoch": 0.5770173288990023,
      "grad_norm": 0.97374372816712,
      "learning_rate": 4.002577789668807e-06,
      "loss": 0.1224,
      "step": 19779
    },
    {
      "epoch": 0.5770465021296458,
      "grad_norm": 0.728353761558451,
      "learning_rate": 4.002114855422522e-06,
      "loss": 0.1182,
      "step": 19780
    },
    {
      "epoch": 0.5770756753602894,
      "grad_norm": 0.8199051259301026,
      "learning_rate": 4.001651930085117e-06,
      "loss": 0.1288,
      "step": 19781
    },
    {
      "epoch": 0.5771048485909329,
      "grad_norm": 0.9040974601117048,
      "learning_rate": 4.0011890136607236e-06,
      "loss": 0.1167,
      "step": 19782
    },
    {
      "epoch": 0.5771340218215765,
      "grad_norm": 0.9500231380617581,
      "learning_rate": 4.000726106153479e-06,
      "loss": 0.1563,
      "step": 19783
    },
    {
      "epoch": 0.5771631950522201,
      "grad_norm": 0.9962617092536757,
      "learning_rate": 4.000263207567512e-06,
      "loss": 0.1391,
      "step": 19784
    },
    {
      "epoch": 0.5771923682828637,
      "grad_norm": 0.8647576649946964,
      "learning_rate": 3.999800317906956e-06,
      "loss": 0.1415,
      "step": 19785
    },
    {
      "epoch": 0.5772215415135072,
      "grad_norm": 0.9644638617361798,
      "learning_rate": 3.999337437175946e-06,
      "loss": 0.1261,
      "step": 19786
    },
    {
      "epoch": 0.5772507147441508,
      "grad_norm": 1.0077523994658744,
      "learning_rate": 3.998874565378611e-06,
      "loss": 0.1204,
      "step": 19787
    },
    {
      "epoch": 0.5772798879747943,
      "grad_norm": 0.9483469434896953,
      "learning_rate": 3.998411702519083e-06,
      "loss": 0.1432,
      "step": 19788
    },
    {
      "epoch": 0.5773090612054379,
      "grad_norm": 0.6702345503281127,
      "learning_rate": 3.997948848601498e-06,
      "loss": 0.1091,
      "step": 19789
    },
    {
      "epoch": 0.5773382344360815,
      "grad_norm": 0.868699841119976,
      "learning_rate": 3.997486003629987e-06,
      "loss": 0.1255,
      "step": 19790
    },
    {
      "epoch": 0.577367407666725,
      "grad_norm": 0.9848451210062822,
      "learning_rate": 3.997023167608682e-06,
      "loss": 0.1401,
      "step": 19791
    },
    {
      "epoch": 0.5773965808973686,
      "grad_norm": 0.8777929448891568,
      "learning_rate": 3.996560340541714e-06,
      "loss": 0.1117,
      "step": 19792
    },
    {
      "epoch": 0.5774257541280121,
      "grad_norm": 0.7907585752637132,
      "learning_rate": 3.996097522433216e-06,
      "loss": 0.1235,
      "step": 19793
    },
    {
      "epoch": 0.5774549273586557,
      "grad_norm": 0.9023159792247771,
      "learning_rate": 3.995634713287317e-06,
      "loss": 0.1403,
      "step": 19794
    },
    {
      "epoch": 0.5774841005892992,
      "grad_norm": 0.8529545228579224,
      "learning_rate": 3.995171913108154e-06,
      "loss": 0.1048,
      "step": 19795
    },
    {
      "epoch": 0.5775132738199428,
      "grad_norm": 0.858297678135177,
      "learning_rate": 3.994709121899858e-06,
      "loss": 0.1578,
      "step": 19796
    },
    {
      "epoch": 0.5775424470505863,
      "grad_norm": 1.002419466038017,
      "learning_rate": 3.994246339666557e-06,
      "loss": 0.14,
      "step": 19797
    },
    {
      "epoch": 0.57757162028123,
      "grad_norm": 1.0851157227082862,
      "learning_rate": 3.993783566412384e-06,
      "loss": 0.1296,
      "step": 19798
    },
    {
      "epoch": 0.5776007935118735,
      "grad_norm": 0.9576563622048736,
      "learning_rate": 3.9933208021414725e-06,
      "loss": 0.1259,
      "step": 19799
    },
    {
      "epoch": 0.5776299667425171,
      "grad_norm": 1.0849308098215573,
      "learning_rate": 3.9928580468579495e-06,
      "loss": 0.142,
      "step": 19800
    },
    {
      "epoch": 0.5776591399731607,
      "grad_norm": 0.9446703660505869,
      "learning_rate": 3.992395300565953e-06,
      "loss": 0.1231,
      "step": 19801
    },
    {
      "epoch": 0.5776883132038042,
      "grad_norm": 1.1123231866657721,
      "learning_rate": 3.991932563269609e-06,
      "loss": 0.1571,
      "step": 19802
    },
    {
      "epoch": 0.5777174864344478,
      "grad_norm": 0.7499829305532338,
      "learning_rate": 3.991469834973051e-06,
      "loss": 0.0973,
      "step": 19803
    },
    {
      "epoch": 0.5777466596650913,
      "grad_norm": 0.7121285357367448,
      "learning_rate": 3.991007115680411e-06,
      "loss": 0.1288,
      "step": 19804
    },
    {
      "epoch": 0.5777758328957349,
      "grad_norm": 1.0362350184472247,
      "learning_rate": 3.990544405395817e-06,
      "loss": 0.1182,
      "step": 19805
    },
    {
      "epoch": 0.5778050061263784,
      "grad_norm": 2.8101228898403883,
      "learning_rate": 3.9900817041234e-06,
      "loss": 0.1465,
      "step": 19806
    },
    {
      "epoch": 0.577834179357022,
      "grad_norm": 0.9996293048392291,
      "learning_rate": 3.989619011867294e-06,
      "loss": 0.1161,
      "step": 19807
    },
    {
      "epoch": 0.5778633525876655,
      "grad_norm": 0.688654226170998,
      "learning_rate": 3.989156328631629e-06,
      "loss": 0.1117,
      "step": 19808
    },
    {
      "epoch": 0.5778925258183091,
      "grad_norm": 1.0035116856754207,
      "learning_rate": 3.9886936544205354e-06,
      "loss": 0.1198,
      "step": 19809
    },
    {
      "epoch": 0.5779216990489526,
      "grad_norm": 1.1308080666264633,
      "learning_rate": 3.988230989238142e-06,
      "loss": 0.1564,
      "step": 19810
    },
    {
      "epoch": 0.5779508722795963,
      "grad_norm": 0.8816919204467307,
      "learning_rate": 3.987768333088581e-06,
      "loss": 0.1276,
      "step": 19811
    },
    {
      "epoch": 0.5779800455102398,
      "grad_norm": 0.7839434874727919,
      "learning_rate": 3.987305685975982e-06,
      "loss": 0.1115,
      "step": 19812
    },
    {
      "epoch": 0.5780092187408834,
      "grad_norm": 1.1028254583540187,
      "learning_rate": 3.9868430479044775e-06,
      "loss": 0.1545,
      "step": 19813
    },
    {
      "epoch": 0.578038391971527,
      "grad_norm": 0.8066229557752148,
      "learning_rate": 3.9863804188781965e-06,
      "loss": 0.1058,
      "step": 19814
    },
    {
      "epoch": 0.5780675652021705,
      "grad_norm": 1.069983565154359,
      "learning_rate": 3.985917798901268e-06,
      "loss": 0.1703,
      "step": 19815
    },
    {
      "epoch": 0.5780967384328141,
      "grad_norm": 0.9194799344167955,
      "learning_rate": 3.985455187977825e-06,
      "loss": 0.1419,
      "step": 19816
    },
    {
      "epoch": 0.5781259116634576,
      "grad_norm": 0.6317555119245367,
      "learning_rate": 3.984992586111995e-06,
      "loss": 0.0971,
      "step": 19817
    },
    {
      "epoch": 0.5781550848941012,
      "grad_norm": 0.7721207344944646,
      "learning_rate": 3.984529993307907e-06,
      "loss": 0.1067,
      "step": 19818
    },
    {
      "epoch": 0.5781842581247447,
      "grad_norm": 2.79177121614945,
      "learning_rate": 3.984067409569694e-06,
      "loss": 0.1371,
      "step": 19819
    },
    {
      "epoch": 0.5782134313553883,
      "grad_norm": 1.0237265107365645,
      "learning_rate": 3.983604834901485e-06,
      "loss": 0.1339,
      "step": 19820
    },
    {
      "epoch": 0.5782426045860318,
      "grad_norm": 0.8966166671272845,
      "learning_rate": 3.983142269307411e-06,
      "loss": 0.1154,
      "step": 19821
    },
    {
      "epoch": 0.5782717778166754,
      "grad_norm": 0.8076250089700812,
      "learning_rate": 3.982679712791599e-06,
      "loss": 0.1393,
      "step": 19822
    },
    {
      "epoch": 0.5783009510473189,
      "grad_norm": 0.8761378344490751,
      "learning_rate": 3.982217165358179e-06,
      "loss": 0.1224,
      "step": 19823
    },
    {
      "epoch": 0.5783301242779625,
      "grad_norm": 1.0008965283306264,
      "learning_rate": 3.98175462701128e-06,
      "loss": 0.105,
      "step": 19824
    },
    {
      "epoch": 0.5783592975086062,
      "grad_norm": 0.7722542211462903,
      "learning_rate": 3.981292097755034e-06,
      "loss": 0.1454,
      "step": 19825
    },
    {
      "epoch": 0.5783884707392497,
      "grad_norm": 0.653741040135133,
      "learning_rate": 3.98082957759357e-06,
      "loss": 0.1124,
      "step": 19826
    },
    {
      "epoch": 0.5784176439698933,
      "grad_norm": 0.8022381275982864,
      "learning_rate": 3.980367066531015e-06,
      "loss": 0.1221,
      "step": 19827
    },
    {
      "epoch": 0.5784468172005368,
      "grad_norm": 0.7649152233724401,
      "learning_rate": 3.9799045645715e-06,
      "loss": 0.1011,
      "step": 19828
    },
    {
      "epoch": 0.5784759904311804,
      "grad_norm": 0.7485276808645908,
      "learning_rate": 3.979442071719154e-06,
      "loss": 0.126,
      "step": 19829
    },
    {
      "epoch": 0.5785051636618239,
      "grad_norm": 1.19238550015347,
      "learning_rate": 3.978979587978102e-06,
      "loss": 0.1185,
      "step": 19830
    },
    {
      "epoch": 0.5785343368924675,
      "grad_norm": 0.8311169837843856,
      "learning_rate": 3.978517113352481e-06,
      "loss": 0.1173,
      "step": 19831
    },
    {
      "epoch": 0.578563510123111,
      "grad_norm": 0.9373734781385317,
      "learning_rate": 3.978054647846413e-06,
      "loss": 0.1265,
      "step": 19832
    },
    {
      "epoch": 0.5785926833537546,
      "grad_norm": 0.7231410845994454,
      "learning_rate": 3.97759219146403e-06,
      "loss": 0.1215,
      "step": 19833
    },
    {
      "epoch": 0.5786218565843981,
      "grad_norm": 0.720753605115125,
      "learning_rate": 3.977129744209461e-06,
      "loss": 0.1323,
      "step": 19834
    },
    {
      "epoch": 0.5786510298150417,
      "grad_norm": 0.7621961911100231,
      "learning_rate": 3.976667306086831e-06,
      "loss": 0.123,
      "step": 19835
    },
    {
      "epoch": 0.5786802030456852,
      "grad_norm": 0.940698061770366,
      "learning_rate": 3.976204877100272e-06,
      "loss": 0.1346,
      "step": 19836
    },
    {
      "epoch": 0.5787093762763288,
      "grad_norm": 0.693390098201076,
      "learning_rate": 3.975742457253911e-06,
      "loss": 0.1208,
      "step": 19837
    },
    {
      "epoch": 0.5787385495069725,
      "grad_norm": 0.7374938419856453,
      "learning_rate": 3.975280046551877e-06,
      "loss": 0.1317,
      "step": 19838
    },
    {
      "epoch": 0.578767722737616,
      "grad_norm": 0.8774612806914714,
      "learning_rate": 3.9748176449983e-06,
      "loss": 0.1503,
      "step": 19839
    },
    {
      "epoch": 0.5787968959682596,
      "grad_norm": 0.734475246427361,
      "learning_rate": 3.974355252597304e-06,
      "loss": 0.1126,
      "step": 19840
    },
    {
      "epoch": 0.5788260691989031,
      "grad_norm": 0.886860910766383,
      "learning_rate": 3.973892869353021e-06,
      "loss": 0.1195,
      "step": 19841
    },
    {
      "epoch": 0.5788552424295467,
      "grad_norm": 1.041479930540893,
      "learning_rate": 3.973430495269576e-06,
      "loss": 0.131,
      "step": 19842
    },
    {
      "epoch": 0.5788844156601902,
      "grad_norm": 0.7043167579534947,
      "learning_rate": 3.9729681303510995e-06,
      "loss": 0.1039,
      "step": 19843
    },
    {
      "epoch": 0.5789135888908338,
      "grad_norm": 0.7283009365961199,
      "learning_rate": 3.972505774601718e-06,
      "loss": 0.1043,
      "step": 19844
    },
    {
      "epoch": 0.5789427621214773,
      "grad_norm": 1.1139644686340868,
      "learning_rate": 3.97204342802556e-06,
      "loss": 0.1422,
      "step": 19845
    },
    {
      "epoch": 0.5789719353521209,
      "grad_norm": 0.8158675132918699,
      "learning_rate": 3.971581090626754e-06,
      "loss": 0.1109,
      "step": 19846
    },
    {
      "epoch": 0.5790011085827644,
      "grad_norm": 0.939665905198642,
      "learning_rate": 3.971118762409425e-06,
      "loss": 0.1268,
      "step": 19847
    },
    {
      "epoch": 0.579030281813408,
      "grad_norm": 0.8274508180598745,
      "learning_rate": 3.970656443377701e-06,
      "loss": 0.1298,
      "step": 19848
    },
    {
      "epoch": 0.5790594550440515,
      "grad_norm": 1.0217875282158446,
      "learning_rate": 3.970194133535712e-06,
      "loss": 0.1423,
      "step": 19849
    },
    {
      "epoch": 0.5790886282746951,
      "grad_norm": 0.8823670079489304,
      "learning_rate": 3.9697318328875835e-06,
      "loss": 0.1324,
      "step": 19850
    },
    {
      "epoch": 0.5791178015053386,
      "grad_norm": 0.808542118756704,
      "learning_rate": 3.969269541437444e-06,
      "loss": 0.1325,
      "step": 19851
    },
    {
      "epoch": 0.5791469747359823,
      "grad_norm": 0.8857303744791257,
      "learning_rate": 3.96880725918942e-06,
      "loss": 0.1634,
      "step": 19852
    },
    {
      "epoch": 0.5791761479666259,
      "grad_norm": 0.6887971717287216,
      "learning_rate": 3.968344986147637e-06,
      "loss": 0.1265,
      "step": 19853
    },
    {
      "epoch": 0.5792053211972694,
      "grad_norm": 0.9153616050952632,
      "learning_rate": 3.967882722316224e-06,
      "loss": 0.1317,
      "step": 19854
    },
    {
      "epoch": 0.579234494427913,
      "grad_norm": 0.884300541454304,
      "learning_rate": 3.967420467699309e-06,
      "loss": 0.1263,
      "step": 19855
    },
    {
      "epoch": 0.5792636676585565,
      "grad_norm": 0.7977231072198944,
      "learning_rate": 3.9669582223010175e-06,
      "loss": 0.1207,
      "step": 19856
    },
    {
      "epoch": 0.5792928408892001,
      "grad_norm": 0.7568274865395707,
      "learning_rate": 3.966495986125476e-06,
      "loss": 0.1203,
      "step": 19857
    },
    {
      "epoch": 0.5793220141198436,
      "grad_norm": 0.7097346775430343,
      "learning_rate": 3.966033759176811e-06,
      "loss": 0.1226,
      "step": 19858
    },
    {
      "epoch": 0.5793511873504872,
      "grad_norm": 0.9743664825150998,
      "learning_rate": 3.965571541459153e-06,
      "loss": 0.1163,
      "step": 19859
    },
    {
      "epoch": 0.5793803605811307,
      "grad_norm": 0.7941952176074787,
      "learning_rate": 3.96510933297662e-06,
      "loss": 0.124,
      "step": 19860
    },
    {
      "epoch": 0.5794095338117743,
      "grad_norm": 1.0758974343281034,
      "learning_rate": 3.964647133733347e-06,
      "loss": 0.1318,
      "step": 19861
    },
    {
      "epoch": 0.5794387070424178,
      "grad_norm": 0.9089754466127158,
      "learning_rate": 3.964184943733457e-06,
      "loss": 0.1609,
      "step": 19862
    },
    {
      "epoch": 0.5794678802730614,
      "grad_norm": 0.8361578672702864,
      "learning_rate": 3.963722762981076e-06,
      "loss": 0.1447,
      "step": 19863
    },
    {
      "epoch": 0.579497053503705,
      "grad_norm": 0.7861331954177537,
      "learning_rate": 3.963260591480332e-06,
      "loss": 0.1047,
      "step": 19864
    },
    {
      "epoch": 0.5795262267343486,
      "grad_norm": 0.8176470971516183,
      "learning_rate": 3.962798429235349e-06,
      "loss": 0.1518,
      "step": 19865
    },
    {
      "epoch": 0.5795553999649922,
      "grad_norm": 0.7700184030086775,
      "learning_rate": 3.9623362762502525e-06,
      "loss": 0.1481,
      "step": 19866
    },
    {
      "epoch": 0.5795845731956357,
      "grad_norm": 0.8709540450684102,
      "learning_rate": 3.961874132529172e-06,
      "loss": 0.1272,
      "step": 19867
    },
    {
      "epoch": 0.5796137464262793,
      "grad_norm": 0.8419206805683189,
      "learning_rate": 3.961411998076231e-06,
      "loss": 0.1287,
      "step": 19868
    },
    {
      "epoch": 0.5796429196569228,
      "grad_norm": 0.7653976739221687,
      "learning_rate": 3.960949872895556e-06,
      "loss": 0.1256,
      "step": 19869
    },
    {
      "epoch": 0.5796720928875664,
      "grad_norm": 0.8034744332979992,
      "learning_rate": 3.960487756991272e-06,
      "loss": 0.12,
      "step": 19870
    },
    {
      "epoch": 0.5797012661182099,
      "grad_norm": 0.8200857262003038,
      "learning_rate": 3.9600256503675054e-06,
      "loss": 0.147,
      "step": 19871
    },
    {
      "epoch": 0.5797304393488535,
      "grad_norm": 0.7411616313533512,
      "learning_rate": 3.95956355302838e-06,
      "loss": 0.1353,
      "step": 19872
    },
    {
      "epoch": 0.579759612579497,
      "grad_norm": 0.6780823267716775,
      "learning_rate": 3.959101464978026e-06,
      "loss": 0.1358,
      "step": 19873
    },
    {
      "epoch": 0.5797887858101406,
      "grad_norm": 0.9806931616582027,
      "learning_rate": 3.958639386220564e-06,
      "loss": 0.1378,
      "step": 19874
    },
    {
      "epoch": 0.5798179590407841,
      "grad_norm": 0.8598138366783364,
      "learning_rate": 3.9581773167601205e-06,
      "loss": 0.1326,
      "step": 19875
    },
    {
      "epoch": 0.5798471322714277,
      "grad_norm": 0.7139813517210786,
      "learning_rate": 3.957715256600822e-06,
      "loss": 0.142,
      "step": 19876
    },
    {
      "epoch": 0.5798763055020713,
      "grad_norm": 0.7566984482264986,
      "learning_rate": 3.957253205746793e-06,
      "loss": 0.143,
      "step": 19877
    },
    {
      "epoch": 0.5799054787327148,
      "grad_norm": 0.7622060336518932,
      "learning_rate": 3.956791164202158e-06,
      "loss": 0.116,
      "step": 19878
    },
    {
      "epoch": 0.5799346519633585,
      "grad_norm": 0.7702675669435379,
      "learning_rate": 3.9563291319710416e-06,
      "loss": 0.1187,
      "step": 19879
    },
    {
      "epoch": 0.579963825194002,
      "grad_norm": 0.7526465653630454,
      "learning_rate": 3.95586710905757e-06,
      "loss": 0.1215,
      "step": 19880
    },
    {
      "epoch": 0.5799929984246456,
      "grad_norm": 0.7270363122337156,
      "learning_rate": 3.955405095465869e-06,
      "loss": 0.1308,
      "step": 19881
    },
    {
      "epoch": 0.5800221716552891,
      "grad_norm": 0.7641247848398072,
      "learning_rate": 3.9549430912000605e-06,
      "loss": 0.1318,
      "step": 19882
    },
    {
      "epoch": 0.5800513448859327,
      "grad_norm": 0.9114894132623551,
      "learning_rate": 3.954481096264272e-06,
      "loss": 0.1183,
      "step": 19883
    },
    {
      "epoch": 0.5800805181165762,
      "grad_norm": 0.9135341903234376,
      "learning_rate": 3.954019110662624e-06,
      "loss": 0.1168,
      "step": 19884
    },
    {
      "epoch": 0.5801096913472198,
      "grad_norm": 0.7507602960813876,
      "learning_rate": 3.953557134399245e-06,
      "loss": 0.108,
      "step": 19885
    },
    {
      "epoch": 0.5801388645778633,
      "grad_norm": 0.755358982570575,
      "learning_rate": 3.95309516747826e-06,
      "loss": 0.1369,
      "step": 19886
    },
    {
      "epoch": 0.5801680378085069,
      "grad_norm": 0.7412791489904513,
      "learning_rate": 3.95263320990379e-06,
      "loss": 0.1288,
      "step": 19887
    },
    {
      "epoch": 0.5801972110391505,
      "grad_norm": 0.9177539700374737,
      "learning_rate": 3.95217126167996e-06,
      "loss": 0.1324,
      "step": 19888
    },
    {
      "epoch": 0.580226384269794,
      "grad_norm": 0.721199355245318,
      "learning_rate": 3.951709322810896e-06,
      "loss": 0.108,
      "step": 19889
    },
    {
      "epoch": 0.5802555575004376,
      "grad_norm": 0.9920530278610725,
      "learning_rate": 3.9512473933007185e-06,
      "loss": 0.1269,
      "step": 19890
    },
    {
      "epoch": 0.5802847307310811,
      "grad_norm": 0.9430818114541443,
      "learning_rate": 3.950785473153557e-06,
      "loss": 0.1365,
      "step": 19891
    },
    {
      "epoch": 0.5803139039617248,
      "grad_norm": 0.8576843877655337,
      "learning_rate": 3.950323562373531e-06,
      "loss": 0.1185,
      "step": 19892
    },
    {
      "epoch": 0.5803430771923683,
      "grad_norm": 0.7132914824219657,
      "learning_rate": 3.949861660964766e-06,
      "loss": 0.1158,
      "step": 19893
    },
    {
      "epoch": 0.5803722504230119,
      "grad_norm": 0.9576143100258052,
      "learning_rate": 3.949399768931386e-06,
      "loss": 0.1146,
      "step": 19894
    },
    {
      "epoch": 0.5804014236536554,
      "grad_norm": 0.7590347442726993,
      "learning_rate": 3.948937886277511e-06,
      "loss": 0.1212,
      "step": 19895
    },
    {
      "epoch": 0.580430596884299,
      "grad_norm": 0.7955627615021786,
      "learning_rate": 3.948476013007271e-06,
      "loss": 0.1384,
      "step": 19896
    },
    {
      "epoch": 0.5804597701149425,
      "grad_norm": 0.794198650050654,
      "learning_rate": 3.948014149124785e-06,
      "loss": 0.1275,
      "step": 19897
    },
    {
      "epoch": 0.5804889433455861,
      "grad_norm": 0.8803440218187136,
      "learning_rate": 3.947552294634177e-06,
      "loss": 0.1427,
      "step": 19898
    },
    {
      "epoch": 0.5805181165762296,
      "grad_norm": 1.1473174799434247,
      "learning_rate": 3.947090449539573e-06,
      "loss": 0.1209,
      "step": 19899
    },
    {
      "epoch": 0.5805472898068732,
      "grad_norm": 0.7649924331445636,
      "learning_rate": 3.946628613845092e-06,
      "loss": 0.1184,
      "step": 19900
    },
    {
      "epoch": 0.5805764630375168,
      "grad_norm": 0.8568457914856835,
      "learning_rate": 3.9461667875548594e-06,
      "loss": 0.1421,
      "step": 19901
    },
    {
      "epoch": 0.5806056362681603,
      "grad_norm": 0.9987021030237287,
      "learning_rate": 3.945704970672998e-06,
      "loss": 0.1465,
      "step": 19902
    },
    {
      "epoch": 0.5806348094988039,
      "grad_norm": 0.7834915474880942,
      "learning_rate": 3.9452431632036326e-06,
      "loss": 0.1352,
      "step": 19903
    },
    {
      "epoch": 0.5806639827294474,
      "grad_norm": 0.9804178230128983,
      "learning_rate": 3.944781365150883e-06,
      "loss": 0.1137,
      "step": 19904
    },
    {
      "epoch": 0.580693155960091,
      "grad_norm": 0.7419456224991688,
      "learning_rate": 3.944319576518874e-06,
      "loss": 0.1191,
      "step": 19905
    },
    {
      "epoch": 0.5807223291907346,
      "grad_norm": 0.8857004541264865,
      "learning_rate": 3.943857797311729e-06,
      "loss": 0.149,
      "step": 19906
    },
    {
      "epoch": 0.5807515024213782,
      "grad_norm": 0.8853359666038874,
      "learning_rate": 3.943396027533566e-06,
      "loss": 0.1426,
      "step": 19907
    },
    {
      "epoch": 0.5807806756520217,
      "grad_norm": 0.7817563314982279,
      "learning_rate": 3.942934267188514e-06,
      "loss": 0.1339,
      "step": 19908
    },
    {
      "epoch": 0.5808098488826653,
      "grad_norm": 1.0440011384952868,
      "learning_rate": 3.942472516280691e-06,
      "loss": 0.1477,
      "step": 19909
    },
    {
      "epoch": 0.5808390221133088,
      "grad_norm": 0.7198382432564843,
      "learning_rate": 3.942010774814222e-06,
      "loss": 0.1496,
      "step": 19910
    },
    {
      "epoch": 0.5808681953439524,
      "grad_norm": 0.9719886487899805,
      "learning_rate": 3.941549042793229e-06,
      "loss": 0.1397,
      "step": 19911
    },
    {
      "epoch": 0.580897368574596,
      "grad_norm": 1.0668822065325398,
      "learning_rate": 3.941087320221832e-06,
      "loss": 0.1228,
      "step": 19912
    },
    {
      "epoch": 0.5809265418052395,
      "grad_norm": 0.8799220268546974,
      "learning_rate": 3.940625607104154e-06,
      "loss": 0.1179,
      "step": 19913
    },
    {
      "epoch": 0.5809557150358831,
      "grad_norm": 1.131297759725469,
      "learning_rate": 3.940163903444319e-06,
      "loss": 0.1092,
      "step": 19914
    },
    {
      "epoch": 0.5809848882665266,
      "grad_norm": 0.9415100809001666,
      "learning_rate": 3.939702209246446e-06,
      "loss": 0.1262,
      "step": 19915
    },
    {
      "epoch": 0.5810140614971702,
      "grad_norm": 1.1233275530287412,
      "learning_rate": 3.939240524514662e-06,
      "loss": 0.1227,
      "step": 19916
    },
    {
      "epoch": 0.5810432347278137,
      "grad_norm": 0.7489122847542726,
      "learning_rate": 3.9387788492530826e-06,
      "loss": 0.1117,
      "step": 19917
    },
    {
      "epoch": 0.5810724079584573,
      "grad_norm": 0.6935638746989583,
      "learning_rate": 3.938317183465833e-06,
      "loss": 0.088,
      "step": 19918
    },
    {
      "epoch": 0.5811015811891009,
      "grad_norm": 1.0277083187623963,
      "learning_rate": 3.937855527157033e-06,
      "loss": 0.1526,
      "step": 19919
    },
    {
      "epoch": 0.5811307544197445,
      "grad_norm": 1.0545003707528808,
      "learning_rate": 3.937393880330806e-06,
      "loss": 0.1373,
      "step": 19920
    },
    {
      "epoch": 0.581159927650388,
      "grad_norm": 0.7525256930274601,
      "learning_rate": 3.9369322429912736e-06,
      "loss": 0.1257,
      "step": 19921
    },
    {
      "epoch": 0.5811891008810316,
      "grad_norm": 0.7158008025312903,
      "learning_rate": 3.936470615142557e-06,
      "loss": 0.1226,
      "step": 19922
    },
    {
      "epoch": 0.5812182741116751,
      "grad_norm": 0.8203859749882335,
      "learning_rate": 3.936008996788775e-06,
      "loss": 0.1379,
      "step": 19923
    },
    {
      "epoch": 0.5812474473423187,
      "grad_norm": 1.0118663819334237,
      "learning_rate": 3.935547387934052e-06,
      "loss": 0.1094,
      "step": 19924
    },
    {
      "epoch": 0.5812766205729623,
      "grad_norm": 0.8037094045417538,
      "learning_rate": 3.935085788582506e-06,
      "loss": 0.1192,
      "step": 19925
    },
    {
      "epoch": 0.5813057938036058,
      "grad_norm": 0.7028004467414166,
      "learning_rate": 3.9346241987382615e-06,
      "loss": 0.1217,
      "step": 19926
    },
    {
      "epoch": 0.5813349670342494,
      "grad_norm": 0.7599614576893323,
      "learning_rate": 3.9341626184054375e-06,
      "loss": 0.1378,
      "step": 19927
    },
    {
      "epoch": 0.5813641402648929,
      "grad_norm": 1.0005815611395625,
      "learning_rate": 3.9337010475881545e-06,
      "loss": 0.1408,
      "step": 19928
    },
    {
      "epoch": 0.5813933134955365,
      "grad_norm": 0.7408872546255835,
      "learning_rate": 3.933239486290536e-06,
      "loss": 0.1369,
      "step": 19929
    },
    {
      "epoch": 0.58142248672618,
      "grad_norm": 0.8496162025289706,
      "learning_rate": 3.932777934516699e-06,
      "loss": 0.1431,
      "step": 19930
    },
    {
      "epoch": 0.5814516599568236,
      "grad_norm": 0.7909946780810766,
      "learning_rate": 3.932316392270765e-06,
      "loss": 0.1363,
      "step": 19931
    },
    {
      "epoch": 0.5814808331874671,
      "grad_norm": 0.8744586560589046,
      "learning_rate": 3.931854859556857e-06,
      "loss": 0.1303,
      "step": 19932
    },
    {
      "epoch": 0.5815100064181108,
      "grad_norm": 0.7510692926728375,
      "learning_rate": 3.931393336379094e-06,
      "loss": 0.1408,
      "step": 19933
    },
    {
      "epoch": 0.5815391796487543,
      "grad_norm": 0.6833111703741482,
      "learning_rate": 3.930931822741596e-06,
      "loss": 0.1371,
      "step": 19934
    },
    {
      "epoch": 0.5815683528793979,
      "grad_norm": 0.9106610941026719,
      "learning_rate": 3.9304703186484825e-06,
      "loss": 0.1399,
      "step": 19935
    },
    {
      "epoch": 0.5815975261100415,
      "grad_norm": 0.7618252838441384,
      "learning_rate": 3.930008824103876e-06,
      "loss": 0.1341,
      "step": 19936
    },
    {
      "epoch": 0.581626699340685,
      "grad_norm": 0.893523887261781,
      "learning_rate": 3.929547339111892e-06,
      "loss": 0.1183,
      "step": 19937
    },
    {
      "epoch": 0.5816558725713286,
      "grad_norm": 0.8366860430902492,
      "learning_rate": 3.9290858636766585e-06,
      "loss": 0.1307,
      "step": 19938
    },
    {
      "epoch": 0.5816850458019721,
      "grad_norm": 0.7336512921920025,
      "learning_rate": 3.928624397802288e-06,
      "loss": 0.1271,
      "step": 19939
    },
    {
      "epoch": 0.5817142190326157,
      "grad_norm": 0.7648675122049943,
      "learning_rate": 3.928162941492904e-06,
      "loss": 0.1225,
      "step": 19940
    },
    {
      "epoch": 0.5817433922632592,
      "grad_norm": 0.7354919159711172,
      "learning_rate": 3.927701494752626e-06,
      "loss": 0.1223,
      "step": 19941
    },
    {
      "epoch": 0.5817725654939028,
      "grad_norm": 0.6273991668830573,
      "learning_rate": 3.927240057585573e-06,
      "loss": 0.1085,
      "step": 19942
    },
    {
      "epoch": 0.5818017387245463,
      "grad_norm": 0.6459800492029086,
      "learning_rate": 3.926778629995862e-06,
      "loss": 0.1314,
      "step": 19943
    },
    {
      "epoch": 0.5818309119551899,
      "grad_norm": 0.7482739992861783,
      "learning_rate": 3.9263172119876166e-06,
      "loss": 0.1223,
      "step": 19944
    },
    {
      "epoch": 0.5818600851858334,
      "grad_norm": 0.8085054685461269,
      "learning_rate": 3.9258558035649556e-06,
      "loss": 0.1402,
      "step": 19945
    },
    {
      "epoch": 0.5818892584164771,
      "grad_norm": 0.6800301775320966,
      "learning_rate": 3.925394404731998e-06,
      "loss": 0.122,
      "step": 19946
    },
    {
      "epoch": 0.5819184316471206,
      "grad_norm": 0.7043598093254362,
      "learning_rate": 3.9249330154928625e-06,
      "loss": 0.1174,
      "step": 19947
    },
    {
      "epoch": 0.5819476048777642,
      "grad_norm": 0.7787487097948912,
      "learning_rate": 3.924471635851667e-06,
      "loss": 0.1071,
      "step": 19948
    },
    {
      "epoch": 0.5819767781084078,
      "grad_norm": 0.7955993349512235,
      "learning_rate": 3.924010265812532e-06,
      "loss": 0.1036,
      "step": 19949
    },
    {
      "epoch": 0.5820059513390513,
      "grad_norm": 0.8019184844104764,
      "learning_rate": 3.923548905379577e-06,
      "loss": 0.1464,
      "step": 19950
    },
    {
      "epoch": 0.5820351245696949,
      "grad_norm": 0.9414282098573326,
      "learning_rate": 3.923087554556922e-06,
      "loss": 0.1165,
      "step": 19951
    },
    {
      "epoch": 0.5820642978003384,
      "grad_norm": 0.7954506286146843,
      "learning_rate": 3.9226262133486824e-06,
      "loss": 0.107,
      "step": 19952
    },
    {
      "epoch": 0.582093471030982,
      "grad_norm": 0.7379893339804852,
      "learning_rate": 3.922164881758979e-06,
      "loss": 0.1326,
      "step": 19953
    },
    {
      "epoch": 0.5821226442616255,
      "grad_norm": 0.929430262482613,
      "learning_rate": 3.921703559791932e-06,
      "loss": 0.1403,
      "step": 19954
    },
    {
      "epoch": 0.5821518174922691,
      "grad_norm": 0.943504225127908,
      "learning_rate": 3.921242247451654e-06,
      "loss": 0.1331,
      "step": 19955
    },
    {
      "epoch": 0.5821809907229126,
      "grad_norm": 0.7250821337711371,
      "learning_rate": 3.920780944742272e-06,
      "loss": 0.1208,
      "step": 19956
    },
    {
      "epoch": 0.5822101639535562,
      "grad_norm": 0.8756263308111629,
      "learning_rate": 3.920319651667898e-06,
      "loss": 0.1127,
      "step": 19957
    },
    {
      "epoch": 0.5822393371841997,
      "grad_norm": 0.7321426688668755,
      "learning_rate": 3.919858368232653e-06,
      "loss": 0.1482,
      "step": 19958
    },
    {
      "epoch": 0.5822685104148433,
      "grad_norm": 0.7992597580466819,
      "learning_rate": 3.919397094440655e-06,
      "loss": 0.1094,
      "step": 19959
    },
    {
      "epoch": 0.582297683645487,
      "grad_norm": 0.9162254415320166,
      "learning_rate": 3.9189358302960215e-06,
      "loss": 0.1248,
      "step": 19960
    },
    {
      "epoch": 0.5823268568761305,
      "grad_norm": 0.7029670268063505,
      "learning_rate": 3.91847457580287e-06,
      "loss": 0.1121,
      "step": 19961
    },
    {
      "epoch": 0.5823560301067741,
      "grad_norm": 0.8335174589105804,
      "learning_rate": 3.91801333096532e-06,
      "loss": 0.136,
      "step": 19962
    },
    {
      "epoch": 0.5823852033374176,
      "grad_norm": 0.9310187708467518,
      "learning_rate": 3.917552095787489e-06,
      "loss": 0.1252,
      "step": 19963
    },
    {
      "epoch": 0.5824143765680612,
      "grad_norm": 0.7710969883012689,
      "learning_rate": 3.9170908702734945e-06,
      "loss": 0.1256,
      "step": 19964
    },
    {
      "epoch": 0.5824435497987047,
      "grad_norm": 0.9074498699170812,
      "learning_rate": 3.916629654427454e-06,
      "loss": 0.1324,
      "step": 19965
    },
    {
      "epoch": 0.5824727230293483,
      "grad_norm": 1.0343579747381952,
      "learning_rate": 3.916168448253485e-06,
      "loss": 0.1456,
      "step": 19966
    },
    {
      "epoch": 0.5825018962599918,
      "grad_norm": 0.9765773529326656,
      "learning_rate": 3.915707251755704e-06,
      "loss": 0.129,
      "step": 19967
    },
    {
      "epoch": 0.5825310694906354,
      "grad_norm": 0.8459793182395423,
      "learning_rate": 3.915246064938233e-06,
      "loss": 0.1153,
      "step": 19968
    },
    {
      "epoch": 0.5825602427212789,
      "grad_norm": 0.7714183250959891,
      "learning_rate": 3.9147848878051845e-06,
      "loss": 0.1139,
      "step": 19969
    },
    {
      "epoch": 0.5825894159519225,
      "grad_norm": 0.970932464693246,
      "learning_rate": 3.914323720360677e-06,
      "loss": 0.1081,
      "step": 19970
    },
    {
      "epoch": 0.582618589182566,
      "grad_norm": 1.3982589816857298,
      "learning_rate": 3.91386256260883e-06,
      "loss": 0.1436,
      "step": 19971
    },
    {
      "epoch": 0.5826477624132096,
      "grad_norm": 0.8520444859948432,
      "learning_rate": 3.913401414553757e-06,
      "loss": 0.1153,
      "step": 19972
    },
    {
      "epoch": 0.5826769356438533,
      "grad_norm": 0.8259940838057555,
      "learning_rate": 3.9129402761995765e-06,
      "loss": 0.1334,
      "step": 19973
    },
    {
      "epoch": 0.5827061088744968,
      "grad_norm": 0.9727286286622902,
      "learning_rate": 3.912479147550406e-06,
      "loss": 0.1415,
      "step": 19974
    },
    {
      "epoch": 0.5827352821051404,
      "grad_norm": 0.998387208620422,
      "learning_rate": 3.912018028610362e-06,
      "loss": 0.12,
      "step": 19975
    },
    {
      "epoch": 0.5827644553357839,
      "grad_norm": 0.8039708771766947,
      "learning_rate": 3.911556919383563e-06,
      "loss": 0.1429,
      "step": 19976
    },
    {
      "epoch": 0.5827936285664275,
      "grad_norm": 0.7643786701800359,
      "learning_rate": 3.911095819874123e-06,
      "loss": 0.146,
      "step": 19977
    },
    {
      "epoch": 0.582822801797071,
      "grad_norm": 1.0885862466447291,
      "learning_rate": 3.910634730086159e-06,
      "loss": 0.1229,
      "step": 19978
    },
    {
      "epoch": 0.5828519750277146,
      "grad_norm": 0.8779849257664843,
      "learning_rate": 3.910173650023787e-06,
      "loss": 0.1146,
      "step": 19979
    },
    {
      "epoch": 0.5828811482583581,
      "grad_norm": 0.9798843494074476,
      "learning_rate": 3.909712579691126e-06,
      "loss": 0.1373,
      "step": 19980
    },
    {
      "epoch": 0.5829103214890017,
      "grad_norm": 0.9199995043051882,
      "learning_rate": 3.909251519092292e-06,
      "loss": 0.1625,
      "step": 19981
    },
    {
      "epoch": 0.5829394947196452,
      "grad_norm": 0.9244682248536849,
      "learning_rate": 3.908790468231398e-06,
      "loss": 0.1235,
      "step": 19982
    },
    {
      "epoch": 0.5829686679502888,
      "grad_norm": 0.6955839158370702,
      "learning_rate": 3.9083294271125635e-06,
      "loss": 0.127,
      "step": 19983
    },
    {
      "epoch": 0.5829978411809323,
      "grad_norm": 0.8609939111882231,
      "learning_rate": 3.907868395739904e-06,
      "loss": 0.1421,
      "step": 19984
    },
    {
      "epoch": 0.5830270144115759,
      "grad_norm": 1.751689791296209,
      "learning_rate": 3.907407374117531e-06,
      "loss": 0.1224,
      "step": 19985
    },
    {
      "epoch": 0.5830561876422194,
      "grad_norm": 0.7213866183718433,
      "learning_rate": 3.906946362249567e-06,
      "loss": 0.1072,
      "step": 19986
    },
    {
      "epoch": 0.5830853608728631,
      "grad_norm": 0.6711112849067568,
      "learning_rate": 3.9064853601401255e-06,
      "loss": 0.125,
      "step": 19987
    },
    {
      "epoch": 0.5831145341035067,
      "grad_norm": 0.8294815817653309,
      "learning_rate": 3.90602436779332e-06,
      "loss": 0.1147,
      "step": 19988
    },
    {
      "epoch": 0.5831437073341502,
      "grad_norm": 1.0764632780837011,
      "learning_rate": 3.90556338521327e-06,
      "loss": 0.0988,
      "step": 19989
    },
    {
      "epoch": 0.5831728805647938,
      "grad_norm": 0.7551939346681211,
      "learning_rate": 3.905102412404087e-06,
      "loss": 0.1312,
      "step": 19990
    },
    {
      "epoch": 0.5832020537954373,
      "grad_norm": 0.6482365702037921,
      "learning_rate": 3.904641449369887e-06,
      "loss": 0.1206,
      "step": 19991
    },
    {
      "epoch": 0.5832312270260809,
      "grad_norm": 0.8071205374283903,
      "learning_rate": 3.904180496114789e-06,
      "loss": 0.1345,
      "step": 19992
    },
    {
      "epoch": 0.5832604002567244,
      "grad_norm": 0.7723152671545592,
      "learning_rate": 3.903719552642906e-06,
      "loss": 0.1,
      "step": 19993
    },
    {
      "epoch": 0.583289573487368,
      "grad_norm": 0.9115210649858577,
      "learning_rate": 3.9032586189583525e-06,
      "loss": 0.1317,
      "step": 19994
    },
    {
      "epoch": 0.5833187467180115,
      "grad_norm": 0.7362211880106496,
      "learning_rate": 3.902797695065244e-06,
      "loss": 0.1253,
      "step": 19995
    },
    {
      "epoch": 0.5833479199486551,
      "grad_norm": 0.8636060424256389,
      "learning_rate": 3.902336780967697e-06,
      "loss": 0.1338,
      "step": 19996
    },
    {
      "epoch": 0.5833770931792986,
      "grad_norm": 0.7829876879955818,
      "learning_rate": 3.901875876669822e-06,
      "loss": 0.1127,
      "step": 19997
    },
    {
      "epoch": 0.5834062664099422,
      "grad_norm": 0.7388444336577626,
      "learning_rate": 3.90141498217574e-06,
      "loss": 0.1361,
      "step": 19998
    },
    {
      "epoch": 0.5834354396405858,
      "grad_norm": 0.8067589149653539,
      "learning_rate": 3.900954097489562e-06,
      "loss": 0.1207,
      "step": 19999
    },
    {
      "epoch": 0.5834646128712293,
      "grad_norm": 0.7230169779803474,
      "learning_rate": 3.900493222615403e-06,
      "loss": 0.1104,
      "step": 20000
    },
    {
      "epoch": 0.583493786101873,
      "grad_norm": 0.877173890972481,
      "learning_rate": 3.900032357557379e-06,
      "loss": 0.1243,
      "step": 20001
    },
    {
      "epoch": 0.5835229593325165,
      "grad_norm": 0.9922213644489261,
      "learning_rate": 3.899571502319603e-06,
      "loss": 0.1247,
      "step": 20002
    },
    {
      "epoch": 0.5835521325631601,
      "grad_norm": 0.9334965449708817,
      "learning_rate": 3.899110656906189e-06,
      "loss": 0.1386,
      "step": 20003
    },
    {
      "epoch": 0.5835813057938036,
      "grad_norm": 1.0450135546821684,
      "learning_rate": 3.898649821321253e-06,
      "loss": 0.126,
      "step": 20004
    },
    {
      "epoch": 0.5836104790244472,
      "grad_norm": 1.0209668161040246,
      "learning_rate": 3.898188995568908e-06,
      "loss": 0.1247,
      "step": 20005
    },
    {
      "epoch": 0.5836396522550907,
      "grad_norm": 1.007674244420397,
      "learning_rate": 3.8977281796532706e-06,
      "loss": 0.1202,
      "step": 20006
    },
    {
      "epoch": 0.5836688254857343,
      "grad_norm": 0.9367676131305241,
      "learning_rate": 3.8972673735784516e-06,
      "loss": 0.12,
      "step": 20007
    },
    {
      "epoch": 0.5836979987163778,
      "grad_norm": 1.0124107800044146,
      "learning_rate": 3.896806577348566e-06,
      "loss": 0.1151,
      "step": 20008
    },
    {
      "epoch": 0.5837271719470214,
      "grad_norm": 0.9649067586170705,
      "learning_rate": 3.896345790967726e-06,
      "loss": 0.1217,
      "step": 20009
    },
    {
      "epoch": 0.583756345177665,
      "grad_norm": 0.7424227106621085,
      "learning_rate": 3.89588501444005e-06,
      "loss": 0.1095,
      "step": 20010
    },
    {
      "epoch": 0.5837855184083085,
      "grad_norm": 0.8772521777826231,
      "learning_rate": 3.895424247769649e-06,
      "loss": 0.1238,
      "step": 20011
    },
    {
      "epoch": 0.583814691638952,
      "grad_norm": 0.8659507469262135,
      "learning_rate": 3.8949634909606365e-06,
      "loss": 0.1365,
      "step": 20012
    },
    {
      "epoch": 0.5838438648695956,
      "grad_norm": 1.0415473076502995,
      "learning_rate": 3.894502744017126e-06,
      "loss": 0.1318,
      "step": 20013
    },
    {
      "epoch": 0.5838730381002393,
      "grad_norm": 0.7313353263417107,
      "learning_rate": 3.894042006943231e-06,
      "loss": 0.1189,
      "step": 20014
    },
    {
      "epoch": 0.5839022113308828,
      "grad_norm": 0.9722540423042687,
      "learning_rate": 3.893581279743064e-06,
      "loss": 0.1349,
      "step": 20015
    },
    {
      "epoch": 0.5839313845615264,
      "grad_norm": 0.850733822255877,
      "learning_rate": 3.89312056242074e-06,
      "loss": 0.1171,
      "step": 20016
    },
    {
      "epoch": 0.5839605577921699,
      "grad_norm": 1.5434614652881848,
      "learning_rate": 3.892659854980371e-06,
      "loss": 0.1322,
      "step": 20017
    },
    {
      "epoch": 0.5839897310228135,
      "grad_norm": 1.5228709237166698,
      "learning_rate": 3.892199157426071e-06,
      "loss": 0.1516,
      "step": 20018
    },
    {
      "epoch": 0.584018904253457,
      "grad_norm": 1.2995402203326651,
      "learning_rate": 3.891738469761953e-06,
      "loss": 0.1332,
      "step": 20019
    },
    {
      "epoch": 0.5840480774841006,
      "grad_norm": 1.4594074075054007,
      "learning_rate": 3.891277791992129e-06,
      "loss": 0.1363,
      "step": 20020
    },
    {
      "epoch": 0.5840772507147441,
      "grad_norm": 0.6573844072446806,
      "learning_rate": 3.890817124120711e-06,
      "loss": 0.1178,
      "step": 20021
    },
    {
      "epoch": 0.5841064239453877,
      "grad_norm": 0.8124837233294876,
      "learning_rate": 3.890356466151813e-06,
      "loss": 0.1315,
      "step": 20022
    },
    {
      "epoch": 0.5841355971760313,
      "grad_norm": 0.9847557728491084,
      "learning_rate": 3.889895818089549e-06,
      "loss": 0.1227,
      "step": 20023
    },
    {
      "epoch": 0.5841647704066748,
      "grad_norm": 1.2453805876828943,
      "learning_rate": 3.889435179938029e-06,
      "loss": 0.1255,
      "step": 20024
    },
    {
      "epoch": 0.5841939436373184,
      "grad_norm": 0.7276105532760212,
      "learning_rate": 3.888974551701368e-06,
      "loss": 0.1197,
      "step": 20025
    },
    {
      "epoch": 0.5842231168679619,
      "grad_norm": 0.6886842346855506,
      "learning_rate": 3.888513933383676e-06,
      "loss": 0.1254,
      "step": 20026
    },
    {
      "epoch": 0.5842522900986055,
      "grad_norm": 1.0024426718891248,
      "learning_rate": 3.888053324989065e-06,
      "loss": 0.1412,
      "step": 20027
    },
    {
      "epoch": 0.5842814633292491,
      "grad_norm": 0.8664652350890629,
      "learning_rate": 3.88759272652165e-06,
      "loss": 0.1127,
      "step": 20028
    },
    {
      "epoch": 0.5843106365598927,
      "grad_norm": 0.6224970984708146,
      "learning_rate": 3.887132137985542e-06,
      "loss": 0.1213,
      "step": 20029
    },
    {
      "epoch": 0.5843398097905362,
      "grad_norm": 0.6913691028308873,
      "learning_rate": 3.886671559384851e-06,
      "loss": 0.1414,
      "step": 20030
    },
    {
      "epoch": 0.5843689830211798,
      "grad_norm": 0.8798904964005513,
      "learning_rate": 3.8862109907236935e-06,
      "loss": 0.1183,
      "step": 20031
    },
    {
      "epoch": 0.5843981562518233,
      "grad_norm": 0.9806387333938295,
      "learning_rate": 3.8857504320061765e-06,
      "loss": 0.1095,
      "step": 20032
    },
    {
      "epoch": 0.5844273294824669,
      "grad_norm": 0.8494375148451374,
      "learning_rate": 3.8852898832364125e-06,
      "loss": 0.135,
      "step": 20033
    },
    {
      "epoch": 0.5844565027131104,
      "grad_norm": 0.7349521172006768,
      "learning_rate": 3.884829344418515e-06,
      "loss": 0.127,
      "step": 20034
    },
    {
      "epoch": 0.584485675943754,
      "grad_norm": 1.0602459365016692,
      "learning_rate": 3.884368815556595e-06,
      "loss": 0.1218,
      "step": 20035
    },
    {
      "epoch": 0.5845148491743976,
      "grad_norm": 1.1043666239761065,
      "learning_rate": 3.883908296654766e-06,
      "loss": 0.1204,
      "step": 20036
    },
    {
      "epoch": 0.5845440224050411,
      "grad_norm": 0.7194028575697635,
      "learning_rate": 3.883447787717134e-06,
      "loss": 0.1221,
      "step": 20037
    },
    {
      "epoch": 0.5845731956356847,
      "grad_norm": 1.0477697364098046,
      "learning_rate": 3.882987288747816e-06,
      "loss": 0.1359,
      "step": 20038
    },
    {
      "epoch": 0.5846023688663282,
      "grad_norm": 1.2162683390570899,
      "learning_rate": 3.8825267997509184e-06,
      "loss": 0.1384,
      "step": 20039
    },
    {
      "epoch": 0.5846315420969718,
      "grad_norm": 0.9315426408156291,
      "learning_rate": 3.882066320730556e-06,
      "loss": 0.1326,
      "step": 20040
    },
    {
      "epoch": 0.5846607153276154,
      "grad_norm": 0.7159074593237017,
      "learning_rate": 3.88160585169084e-06,
      "loss": 0.1297,
      "step": 20041
    },
    {
      "epoch": 0.584689888558259,
      "grad_norm": 1.2336832947245413,
      "learning_rate": 3.881145392635879e-06,
      "loss": 0.1199,
      "step": 20042
    },
    {
      "epoch": 0.5847190617889025,
      "grad_norm": 1.0889179166800855,
      "learning_rate": 3.880684943569785e-06,
      "loss": 0.1216,
      "step": 20043
    },
    {
      "epoch": 0.5847482350195461,
      "grad_norm": 0.8037398815818094,
      "learning_rate": 3.880224504496669e-06,
      "loss": 0.1291,
      "step": 20044
    },
    {
      "epoch": 0.5847774082501896,
      "grad_norm": 0.8383273262411762,
      "learning_rate": 3.87976407542064e-06,
      "loss": 0.1423,
      "step": 20045
    },
    {
      "epoch": 0.5848065814808332,
      "grad_norm": 0.9656367754583141,
      "learning_rate": 3.87930365634581e-06,
      "loss": 0.122,
      "step": 20046
    },
    {
      "epoch": 0.5848357547114768,
      "grad_norm": 0.7820117746304088,
      "learning_rate": 3.87884324727629e-06,
      "loss": 0.1492,
      "step": 20047
    },
    {
      "epoch": 0.5848649279421203,
      "grad_norm": 0.9874843732922406,
      "learning_rate": 3.87838284821619e-06,
      "loss": 0.1233,
      "step": 20048
    },
    {
      "epoch": 0.5848941011727639,
      "grad_norm": 0.7807569775875209,
      "learning_rate": 3.877922459169621e-06,
      "loss": 0.1324,
      "step": 20049
    },
    {
      "epoch": 0.5849232744034074,
      "grad_norm": 0.908533675249824,
      "learning_rate": 3.877462080140691e-06,
      "loss": 0.138,
      "step": 20050
    },
    {
      "epoch": 0.584952447634051,
      "grad_norm": 0.9268027110355939,
      "learning_rate": 3.877001711133511e-06,
      "loss": 0.1443,
      "step": 20051
    },
    {
      "epoch": 0.5849816208646945,
      "grad_norm": 1.1052806011889693,
      "learning_rate": 3.8765413521521925e-06,
      "loss": 0.1353,
      "step": 20052
    },
    {
      "epoch": 0.5850107940953381,
      "grad_norm": 0.7436349984137494,
      "learning_rate": 3.876081003200846e-06,
      "loss": 0.1137,
      "step": 20053
    },
    {
      "epoch": 0.5850399673259816,
      "grad_norm": 0.7905624130436862,
      "learning_rate": 3.875620664283578e-06,
      "loss": 0.1256,
      "step": 20054
    },
    {
      "epoch": 0.5850691405566253,
      "grad_norm": 0.7727231524808063,
      "learning_rate": 3.875160335404502e-06,
      "loss": 0.1109,
      "step": 20055
    },
    {
      "epoch": 0.5850983137872688,
      "grad_norm": 1.1785316990071035,
      "learning_rate": 3.874700016567726e-06,
      "loss": 0.1388,
      "step": 20056
    },
    {
      "epoch": 0.5851274870179124,
      "grad_norm": 0.8946554632140041,
      "learning_rate": 3.874239707777356e-06,
      "loss": 0.1067,
      "step": 20057
    },
    {
      "epoch": 0.585156660248556,
      "grad_norm": 0.8262004130191329,
      "learning_rate": 3.873779409037509e-06,
      "loss": 0.1297,
      "step": 20058
    },
    {
      "epoch": 0.5851858334791995,
      "grad_norm": 0.7765629391008582,
      "learning_rate": 3.873319120352289e-06,
      "loss": 0.1197,
      "step": 20059
    },
    {
      "epoch": 0.585215006709843,
      "grad_norm": 0.8418836592297371,
      "learning_rate": 3.872858841725808e-06,
      "loss": 0.1304,
      "step": 20060
    },
    {
      "epoch": 0.5852441799404866,
      "grad_norm": 0.7772474308397072,
      "learning_rate": 3.872398573162174e-06,
      "loss": 0.0986,
      "step": 20061
    },
    {
      "epoch": 0.5852733531711302,
      "grad_norm": 0.7997763202848225,
      "learning_rate": 3.871938314665496e-06,
      "loss": 0.1454,
      "step": 20062
    },
    {
      "epoch": 0.5853025264017737,
      "grad_norm": 0.7620179197154986,
      "learning_rate": 3.871478066239882e-06,
      "loss": 0.1159,
      "step": 20063
    },
    {
      "epoch": 0.5853316996324173,
      "grad_norm": 0.7601146672815218,
      "learning_rate": 3.871017827889444e-06,
      "loss": 0.1409,
      "step": 20064
    },
    {
      "epoch": 0.5853608728630608,
      "grad_norm": 0.9971389348944414,
      "learning_rate": 3.870557599618289e-06,
      "loss": 0.1192,
      "step": 20065
    },
    {
      "epoch": 0.5853900460937044,
      "grad_norm": 0.7898776749792827,
      "learning_rate": 3.8700973814305275e-06,
      "loss": 0.1396,
      "step": 20066
    },
    {
      "epoch": 0.5854192193243479,
      "grad_norm": 0.7245572167616001,
      "learning_rate": 3.869637173330265e-06,
      "loss": 0.1278,
      "step": 20067
    },
    {
      "epoch": 0.5854483925549916,
      "grad_norm": 0.81545613368151,
      "learning_rate": 3.869176975321613e-06,
      "loss": 0.1119,
      "step": 20068
    },
    {
      "epoch": 0.5854775657856351,
      "grad_norm": 0.8334207358753285,
      "learning_rate": 3.868716787408677e-06,
      "loss": 0.1344,
      "step": 20069
    },
    {
      "epoch": 0.5855067390162787,
      "grad_norm": 0.7354282710283488,
      "learning_rate": 3.8682566095955695e-06,
      "loss": 0.129,
      "step": 20070
    },
    {
      "epoch": 0.5855359122469223,
      "grad_norm": 0.7748654416289245,
      "learning_rate": 3.867796441886397e-06,
      "loss": 0.1007,
      "step": 20071
    },
    {
      "epoch": 0.5855650854775658,
      "grad_norm": 0.9861834514467656,
      "learning_rate": 3.867336284285267e-06,
      "loss": 0.1262,
      "step": 20072
    },
    {
      "epoch": 0.5855942587082094,
      "grad_norm": 0.8149178327824725,
      "learning_rate": 3.866876136796288e-06,
      "loss": 0.1329,
      "step": 20073
    },
    {
      "epoch": 0.5856234319388529,
      "grad_norm": 0.8383302629967043,
      "learning_rate": 3.86641599942357e-06,
      "loss": 0.1189,
      "step": 20074
    },
    {
      "epoch": 0.5856526051694965,
      "grad_norm": 1.0343052504712773,
      "learning_rate": 3.865955872171217e-06,
      "loss": 0.1247,
      "step": 20075
    },
    {
      "epoch": 0.58568177840014,
      "grad_norm": 0.7977024950722141,
      "learning_rate": 3.865495755043339e-06,
      "loss": 0.1204,
      "step": 20076
    },
    {
      "epoch": 0.5857109516307836,
      "grad_norm": 1.2251561082309965,
      "learning_rate": 3.865035648044046e-06,
      "loss": 0.1416,
      "step": 20077
    },
    {
      "epoch": 0.5857401248614271,
      "grad_norm": 0.8435546083933906,
      "learning_rate": 3.864575551177443e-06,
      "loss": 0.1272,
      "step": 20078
    },
    {
      "epoch": 0.5857692980920707,
      "grad_norm": 0.7883942391601387,
      "learning_rate": 3.864115464447639e-06,
      "loss": 0.1247,
      "step": 20079
    },
    {
      "epoch": 0.5857984713227142,
      "grad_norm": 0.8557367358849178,
      "learning_rate": 3.86365538785874e-06,
      "loss": 0.1291,
      "step": 20080
    },
    {
      "epoch": 0.5858276445533578,
      "grad_norm": 0.9048262984258508,
      "learning_rate": 3.863195321414855e-06,
      "loss": 0.1155,
      "step": 20081
    },
    {
      "epoch": 0.5858568177840014,
      "grad_norm": 0.7213752903120728,
      "learning_rate": 3.86273526512009e-06,
      "loss": 0.1472,
      "step": 20082
    },
    {
      "epoch": 0.585885991014645,
      "grad_norm": 0.7683928580871993,
      "learning_rate": 3.862275218978554e-06,
      "loss": 0.1478,
      "step": 20083
    },
    {
      "epoch": 0.5859151642452886,
      "grad_norm": 0.9852625062479899,
      "learning_rate": 3.861815182994353e-06,
      "loss": 0.1472,
      "step": 20084
    },
    {
      "epoch": 0.5859443374759321,
      "grad_norm": 0.9549895964272472,
      "learning_rate": 3.861355157171594e-06,
      "loss": 0.1159,
      "step": 20085
    },
    {
      "epoch": 0.5859735107065757,
      "grad_norm": 0.7935749292525924,
      "learning_rate": 3.860895141514384e-06,
      "loss": 0.1007,
      "step": 20086
    },
    {
      "epoch": 0.5860026839372192,
      "grad_norm": 0.8734585237837397,
      "learning_rate": 3.860435136026831e-06,
      "loss": 0.1254,
      "step": 20087
    },
    {
      "epoch": 0.5860318571678628,
      "grad_norm": 0.9321607175072781,
      "learning_rate": 3.859975140713042e-06,
      "loss": 0.1262,
      "step": 20088
    },
    {
      "epoch": 0.5860610303985063,
      "grad_norm": 0.8740670292774305,
      "learning_rate": 3.859515155577122e-06,
      "loss": 0.1186,
      "step": 20089
    },
    {
      "epoch": 0.5860902036291499,
      "grad_norm": 0.885447387603564,
      "learning_rate": 3.859055180623178e-06,
      "loss": 0.1444,
      "step": 20090
    },
    {
      "epoch": 0.5861193768597934,
      "grad_norm": 1.1201388672989014,
      "learning_rate": 3.858595215855318e-06,
      "loss": 0.1217,
      "step": 20091
    },
    {
      "epoch": 0.586148550090437,
      "grad_norm": 0.8479948512544705,
      "learning_rate": 3.858135261277645e-06,
      "loss": 0.1169,
      "step": 20092
    },
    {
      "epoch": 0.5861777233210805,
      "grad_norm": 0.8185512498549288,
      "learning_rate": 3.85767531689427e-06,
      "loss": 0.1064,
      "step": 20093
    },
    {
      "epoch": 0.5862068965517241,
      "grad_norm": 0.9016533709889478,
      "learning_rate": 3.857215382709296e-06,
      "loss": 0.1252,
      "step": 20094
    },
    {
      "epoch": 0.5862360697823678,
      "grad_norm": 0.9268200388301004,
      "learning_rate": 3.856755458726831e-06,
      "loss": 0.1119,
      "step": 20095
    },
    {
      "epoch": 0.5862652430130113,
      "grad_norm": 0.8720738752910677,
      "learning_rate": 3.8562955449509814e-06,
      "loss": 0.1365,
      "step": 20096
    },
    {
      "epoch": 0.5862944162436549,
      "grad_norm": 0.9482182903925894,
      "learning_rate": 3.85583564138585e-06,
      "loss": 0.1183,
      "step": 20097
    },
    {
      "epoch": 0.5863235894742984,
      "grad_norm": 0.8620664503063633,
      "learning_rate": 3.855375748035545e-06,
      "loss": 0.1304,
      "step": 20098
    },
    {
      "epoch": 0.586352762704942,
      "grad_norm": 0.9452053302308003,
      "learning_rate": 3.854915864904173e-06,
      "loss": 0.15,
      "step": 20099
    },
    {
      "epoch": 0.5863819359355855,
      "grad_norm": 1.0589915214326235,
      "learning_rate": 3.854455991995838e-06,
      "loss": 0.1411,
      "step": 20100
    },
    {
      "epoch": 0.5864111091662291,
      "grad_norm": 0.857230424040825,
      "learning_rate": 3.853996129314649e-06,
      "loss": 0.1176,
      "step": 20101
    },
    {
      "epoch": 0.5864402823968726,
      "grad_norm": 0.9205323384255356,
      "learning_rate": 3.853536276864707e-06,
      "loss": 0.1379,
      "step": 20102
    },
    {
      "epoch": 0.5864694556275162,
      "grad_norm": 1.048169501315199,
      "learning_rate": 3.853076434650119e-06,
      "loss": 0.133,
      "step": 20103
    },
    {
      "epoch": 0.5864986288581597,
      "grad_norm": 1.0178603010415677,
      "learning_rate": 3.8526166026749904e-06,
      "loss": 0.1203,
      "step": 20104
    },
    {
      "epoch": 0.5865278020888033,
      "grad_norm": 1.096479765021856,
      "learning_rate": 3.852156780943428e-06,
      "loss": 0.132,
      "step": 20105
    },
    {
      "epoch": 0.5865569753194468,
      "grad_norm": 1.0845604859818005,
      "learning_rate": 3.851696969459536e-06,
      "loss": 0.1407,
      "step": 20106
    },
    {
      "epoch": 0.5865861485500904,
      "grad_norm": 0.872584285654691,
      "learning_rate": 3.851237168227419e-06,
      "loss": 0.1172,
      "step": 20107
    },
    {
      "epoch": 0.586615321780734,
      "grad_norm": 1.036295087112203,
      "learning_rate": 3.850777377251183e-06,
      "loss": 0.1551,
      "step": 20108
    },
    {
      "epoch": 0.5866444950113776,
      "grad_norm": 1.1641155943704615,
      "learning_rate": 3.850317596534932e-06,
      "loss": 0.1396,
      "step": 20109
    },
    {
      "epoch": 0.5866736682420212,
      "grad_norm": 1.0923985167320642,
      "learning_rate": 3.849857826082769e-06,
      "loss": 0.1247,
      "step": 20110
    },
    {
      "epoch": 0.5867028414726647,
      "grad_norm": 0.987160959863191,
      "learning_rate": 3.849398065898802e-06,
      "loss": 0.131,
      "step": 20111
    },
    {
      "epoch": 0.5867320147033083,
      "grad_norm": 1.253080785508952,
      "learning_rate": 3.848938315987135e-06,
      "loss": 0.129,
      "step": 20112
    },
    {
      "epoch": 0.5867611879339518,
      "grad_norm": 1.1538753717837746,
      "learning_rate": 3.848478576351873e-06,
      "loss": 0.1245,
      "step": 20113
    },
    {
      "epoch": 0.5867903611645954,
      "grad_norm": 1.0333923564407248,
      "learning_rate": 3.848018846997117e-06,
      "loss": 0.1422,
      "step": 20114
    },
    {
      "epoch": 0.5868195343952389,
      "grad_norm": 0.6461112413466147,
      "learning_rate": 3.847559127926975e-06,
      "loss": 0.1214,
      "step": 20115
    },
    {
      "epoch": 0.5868487076258825,
      "grad_norm": 0.8272750820515089,
      "learning_rate": 3.847099419145549e-06,
      "loss": 0.1309,
      "step": 20116
    },
    {
      "epoch": 0.586877880856526,
      "grad_norm": 0.9896185719377043,
      "learning_rate": 3.846639720656944e-06,
      "loss": 0.1581,
      "step": 20117
    },
    {
      "epoch": 0.5869070540871696,
      "grad_norm": 0.7564900045154057,
      "learning_rate": 3.846180032465267e-06,
      "loss": 0.1131,
      "step": 20118
    },
    {
      "epoch": 0.5869362273178131,
      "grad_norm": 0.8645903721445637,
      "learning_rate": 3.845720354574617e-06,
      "loss": 0.1087,
      "step": 20119
    },
    {
      "epoch": 0.5869654005484567,
      "grad_norm": 0.8617417665144481,
      "learning_rate": 3.845260686989101e-06,
      "loss": 0.1454,
      "step": 20120
    },
    {
      "epoch": 0.5869945737791002,
      "grad_norm": 0.6203208205541115,
      "learning_rate": 3.844801029712822e-06,
      "loss": 0.1068,
      "step": 20121
    },
    {
      "epoch": 0.5870237470097439,
      "grad_norm": 0.7631944690684509,
      "learning_rate": 3.844341382749881e-06,
      "loss": 0.1434,
      "step": 20122
    },
    {
      "epoch": 0.5870529202403875,
      "grad_norm": 0.8825347793520131,
      "learning_rate": 3.843881746104387e-06,
      "loss": 0.1086,
      "step": 20123
    },
    {
      "epoch": 0.587082093471031,
      "grad_norm": 1.6912087255054722,
      "learning_rate": 3.84342211978044e-06,
      "loss": 0.1271,
      "step": 20124
    },
    {
      "epoch": 0.5871112667016746,
      "grad_norm": 0.8616899578604179,
      "learning_rate": 3.842962503782145e-06,
      "loss": 0.123,
      "step": 20125
    },
    {
      "epoch": 0.5871404399323181,
      "grad_norm": 0.6871094352328266,
      "learning_rate": 3.842502898113604e-06,
      "loss": 0.1384,
      "step": 20126
    },
    {
      "epoch": 0.5871696131629617,
      "grad_norm": 0.9791209284560194,
      "learning_rate": 3.842043302778921e-06,
      "loss": 0.1303,
      "step": 20127
    },
    {
      "epoch": 0.5871987863936052,
      "grad_norm": 0.6875342564927575,
      "learning_rate": 3.8415837177821976e-06,
      "loss": 0.1083,
      "step": 20128
    },
    {
      "epoch": 0.5872279596242488,
      "grad_norm": 0.8012390508740657,
      "learning_rate": 3.841124143127539e-06,
      "loss": 0.1147,
      "step": 20129
    },
    {
      "epoch": 0.5872571328548923,
      "grad_norm": 0.8787926292352068,
      "learning_rate": 3.840664578819047e-06,
      "loss": 0.1489,
      "step": 20130
    },
    {
      "epoch": 0.5872863060855359,
      "grad_norm": 0.8967703054374653,
      "learning_rate": 3.8402050248608266e-06,
      "loss": 0.1322,
      "step": 20131
    },
    {
      "epoch": 0.5873154793161794,
      "grad_norm": 0.8685471794163647,
      "learning_rate": 3.839745481256979e-06,
      "loss": 0.1346,
      "step": 20132
    },
    {
      "epoch": 0.587344652546823,
      "grad_norm": 0.8787694749575461,
      "learning_rate": 3.839285948011605e-06,
      "loss": 0.126,
      "step": 20133
    },
    {
      "epoch": 0.5873738257774666,
      "grad_norm": 0.8927126506899811,
      "learning_rate": 3.838826425128809e-06,
      "loss": 0.1205,
      "step": 20134
    },
    {
      "epoch": 0.5874029990081101,
      "grad_norm": 0.8486994366293353,
      "learning_rate": 3.838366912612694e-06,
      "loss": 0.1326,
      "step": 20135
    },
    {
      "epoch": 0.5874321722387538,
      "grad_norm": 0.7935926414817781,
      "learning_rate": 3.837907410467363e-06,
      "loss": 0.1266,
      "step": 20136
    },
    {
      "epoch": 0.5874613454693973,
      "grad_norm": 0.7376191128550542,
      "learning_rate": 3.837447918696915e-06,
      "loss": 0.1424,
      "step": 20137
    },
    {
      "epoch": 0.5874905187000409,
      "grad_norm": 1.0005943673066164,
      "learning_rate": 3.836988437305457e-06,
      "loss": 0.1177,
      "step": 20138
    },
    {
      "epoch": 0.5875196919306844,
      "grad_norm": 0.9030270167580045,
      "learning_rate": 3.836528966297087e-06,
      "loss": 0.1331,
      "step": 20139
    },
    {
      "epoch": 0.587548865161328,
      "grad_norm": 0.6526625704100362,
      "learning_rate": 3.836069505675909e-06,
      "loss": 0.1193,
      "step": 20140
    },
    {
      "epoch": 0.5875780383919715,
      "grad_norm": 1.019786180214768,
      "learning_rate": 3.835610055446024e-06,
      "loss": 0.1357,
      "step": 20141
    },
    {
      "epoch": 0.5876072116226151,
      "grad_norm": 1.0235832849047515,
      "learning_rate": 3.835150615611535e-06,
      "loss": 0.1444,
      "step": 20142
    },
    {
      "epoch": 0.5876363848532586,
      "grad_norm": 0.6907795557456458,
      "learning_rate": 3.8346911861765444e-06,
      "loss": 0.114,
      "step": 20143
    },
    {
      "epoch": 0.5876655580839022,
      "grad_norm": 0.6595963428949111,
      "learning_rate": 3.83423176714515e-06,
      "loss": 0.1151,
      "step": 20144
    },
    {
      "epoch": 0.5876947313145457,
      "grad_norm": 0.9794914247080527,
      "learning_rate": 3.833772358521458e-06,
      "loss": 0.1326,
      "step": 20145
    },
    {
      "epoch": 0.5877239045451893,
      "grad_norm": 0.7917853262638629,
      "learning_rate": 3.833312960309567e-06,
      "loss": 0.1344,
      "step": 20146
    },
    {
      "epoch": 0.5877530777758329,
      "grad_norm": 0.7057080667836103,
      "learning_rate": 3.83285357251358e-06,
      "loss": 0.122,
      "step": 20147
    },
    {
      "epoch": 0.5877822510064764,
      "grad_norm": 0.927868335098306,
      "learning_rate": 3.832394195137599e-06,
      "loss": 0.1672,
      "step": 20148
    },
    {
      "epoch": 0.5878114242371201,
      "grad_norm": 0.7746056396313836,
      "learning_rate": 3.8319348281857215e-06,
      "loss": 0.1141,
      "step": 20149
    },
    {
      "epoch": 0.5878405974677636,
      "grad_norm": 0.827359989904886,
      "learning_rate": 3.831475471662052e-06,
      "loss": 0.0981,
      "step": 20150
    },
    {
      "epoch": 0.5878697706984072,
      "grad_norm": 0.8608432342331177,
      "learning_rate": 3.831016125570692e-06,
      "loss": 0.1292,
      "step": 20151
    },
    {
      "epoch": 0.5878989439290507,
      "grad_norm": 0.7438997762371768,
      "learning_rate": 3.830556789915737e-06,
      "loss": 0.1526,
      "step": 20152
    },
    {
      "epoch": 0.5879281171596943,
      "grad_norm": 0.9969349527665727,
      "learning_rate": 3.830097464701296e-06,
      "loss": 0.1371,
      "step": 20153
    },
    {
      "epoch": 0.5879572903903378,
      "grad_norm": 0.7159327366780542,
      "learning_rate": 3.829638149931464e-06,
      "loss": 0.1135,
      "step": 20154
    },
    {
      "epoch": 0.5879864636209814,
      "grad_norm": 1.1501666243945665,
      "learning_rate": 3.829178845610343e-06,
      "loss": 0.1361,
      "step": 20155
    },
    {
      "epoch": 0.588015636851625,
      "grad_norm": 1.1771798621100007,
      "learning_rate": 3.8287195517420345e-06,
      "loss": 0.1157,
      "step": 20156
    },
    {
      "epoch": 0.5880448100822685,
      "grad_norm": 0.7381653491865249,
      "learning_rate": 3.828260268330638e-06,
      "loss": 0.1118,
      "step": 20157
    },
    {
      "epoch": 0.588073983312912,
      "grad_norm": 0.763061630725222,
      "learning_rate": 3.827800995380252e-06,
      "loss": 0.1444,
      "step": 20158
    },
    {
      "epoch": 0.5881031565435556,
      "grad_norm": 1.3101333418024994,
      "learning_rate": 3.827341732894981e-06,
      "loss": 0.1232,
      "step": 20159
    },
    {
      "epoch": 0.5881323297741992,
      "grad_norm": 0.93697661865873,
      "learning_rate": 3.826882480878923e-06,
      "loss": 0.0984,
      "step": 20160
    },
    {
      "epoch": 0.5881615030048427,
      "grad_norm": 0.8497757827537434,
      "learning_rate": 3.8264232393361785e-06,
      "loss": 0.1178,
      "step": 20161
    },
    {
      "epoch": 0.5881906762354863,
      "grad_norm": 1.1924786070328814,
      "learning_rate": 3.825964008270847e-06,
      "loss": 0.1347,
      "step": 20162
    },
    {
      "epoch": 0.5882198494661299,
      "grad_norm": 1.0225547594735644,
      "learning_rate": 3.825504787687027e-06,
      "loss": 0.0987,
      "step": 20163
    },
    {
      "epoch": 0.5882490226967735,
      "grad_norm": 1.0026292756298758,
      "learning_rate": 3.82504557758882e-06,
      "loss": 0.1282,
      "step": 20164
    },
    {
      "epoch": 0.588278195927417,
      "grad_norm": 0.8335342189841055,
      "learning_rate": 3.824586377980328e-06,
      "loss": 0.1291,
      "step": 20165
    },
    {
      "epoch": 0.5883073691580606,
      "grad_norm": 1.302804691078596,
      "learning_rate": 3.824127188865647e-06,
      "loss": 0.1572,
      "step": 20166
    },
    {
      "epoch": 0.5883365423887041,
      "grad_norm": 0.985718268969789,
      "learning_rate": 3.823668010248877e-06,
      "loss": 0.1274,
      "step": 20167
    },
    {
      "epoch": 0.5883657156193477,
      "grad_norm": 1.597761621760707,
      "learning_rate": 3.82320884213412e-06,
      "loss": 0.1257,
      "step": 20168
    },
    {
      "epoch": 0.5883948888499912,
      "grad_norm": 0.6882805232081349,
      "learning_rate": 3.822749684525472e-06,
      "loss": 0.1345,
      "step": 20169
    },
    {
      "epoch": 0.5884240620806348,
      "grad_norm": 0.8339342206647082,
      "learning_rate": 3.822290537427033e-06,
      "loss": 0.1423,
      "step": 20170
    },
    {
      "epoch": 0.5884532353112784,
      "grad_norm": 1.2302623407076574,
      "learning_rate": 3.8218314008429045e-06,
      "loss": 0.1219,
      "step": 20171
    },
    {
      "epoch": 0.5884824085419219,
      "grad_norm": 1.0556585914346928,
      "learning_rate": 3.821372274777183e-06,
      "loss": 0.1431,
      "step": 20172
    },
    {
      "epoch": 0.5885115817725655,
      "grad_norm": 0.8827344637910692,
      "learning_rate": 3.82091315923397e-06,
      "loss": 0.1443,
      "step": 20173
    },
    {
      "epoch": 0.588540755003209,
      "grad_norm": 0.674349674596774,
      "learning_rate": 3.820454054217362e-06,
      "loss": 0.1351,
      "step": 20174
    },
    {
      "epoch": 0.5885699282338526,
      "grad_norm": 0.8529289725548811,
      "learning_rate": 3.8199949597314586e-06,
      "loss": 0.1523,
      "step": 20175
    },
    {
      "epoch": 0.5885991014644962,
      "grad_norm": 0.9926984196468841,
      "learning_rate": 3.819535875780357e-06,
      "loss": 0.1385,
      "step": 20176
    },
    {
      "epoch": 0.5886282746951398,
      "grad_norm": 0.6518237954321938,
      "learning_rate": 3.8190768023681585e-06,
      "loss": 0.1275,
      "step": 20177
    },
    {
      "epoch": 0.5886574479257833,
      "grad_norm": 0.6130187369350059,
      "learning_rate": 3.818617739498962e-06,
      "loss": 0.1051,
      "step": 20178
    },
    {
      "epoch": 0.5886866211564269,
      "grad_norm": 0.8051082249111425,
      "learning_rate": 3.818158687176862e-06,
      "loss": 0.1431,
      "step": 20179
    },
    {
      "epoch": 0.5887157943870704,
      "grad_norm": 0.6799712927962633,
      "learning_rate": 3.81769964540596e-06,
      "loss": 0.1361,
      "step": 20180
    },
    {
      "epoch": 0.588744967617714,
      "grad_norm": 0.6967539028836852,
      "learning_rate": 3.817240614190354e-06,
      "loss": 0.1174,
      "step": 20181
    },
    {
      "epoch": 0.5887741408483576,
      "grad_norm": 0.9381637487702315,
      "learning_rate": 3.816781593534139e-06,
      "loss": 0.1125,
      "step": 20182
    },
    {
      "epoch": 0.5888033140790011,
      "grad_norm": 0.6395438147681456,
      "learning_rate": 3.816322583441419e-06,
      "loss": 0.1104,
      "step": 20183
    },
    {
      "epoch": 0.5888324873096447,
      "grad_norm": 0.6855004873407058,
      "learning_rate": 3.815863583916286e-06,
      "loss": 0.1201,
      "step": 20184
    },
    {
      "epoch": 0.5888616605402882,
      "grad_norm": 0.9913520040827561,
      "learning_rate": 3.815404594962841e-06,
      "loss": 0.1391,
      "step": 20185
    },
    {
      "epoch": 0.5888908337709318,
      "grad_norm": 0.9338284679029946,
      "learning_rate": 3.814945616585182e-06,
      "loss": 0.1347,
      "step": 20186
    },
    {
      "epoch": 0.5889200070015753,
      "grad_norm": 0.8037720846213434,
      "learning_rate": 3.8144866487874043e-06,
      "loss": 0.1076,
      "step": 20187
    },
    {
      "epoch": 0.5889491802322189,
      "grad_norm": 0.9015038126636395,
      "learning_rate": 3.8140276915736056e-06,
      "loss": 0.1379,
      "step": 20188
    },
    {
      "epoch": 0.5889783534628624,
      "grad_norm": 1.079556128494006,
      "learning_rate": 3.8135687449478865e-06,
      "loss": 0.1487,
      "step": 20189
    },
    {
      "epoch": 0.5890075266935061,
      "grad_norm": 0.9209749360880155,
      "learning_rate": 3.8131098089143415e-06,
      "loss": 0.14,
      "step": 20190
    },
    {
      "epoch": 0.5890366999241496,
      "grad_norm": 0.8223976212634202,
      "learning_rate": 3.8126508834770703e-06,
      "loss": 0.1298,
      "step": 20191
    },
    {
      "epoch": 0.5890658731547932,
      "grad_norm": 0.8623040227317921,
      "learning_rate": 3.812191968640167e-06,
      "loss": 0.1412,
      "step": 20192
    },
    {
      "epoch": 0.5890950463854367,
      "grad_norm": 0.8455280296843426,
      "learning_rate": 3.811733064407731e-06,
      "loss": 0.1179,
      "step": 20193
    },
    {
      "epoch": 0.5891242196160803,
      "grad_norm": 1.0134298386784872,
      "learning_rate": 3.811274170783857e-06,
      "loss": 0.1312,
      "step": 20194
    },
    {
      "epoch": 0.5891533928467239,
      "grad_norm": 0.8541302059261543,
      "learning_rate": 3.8108152877726457e-06,
      "loss": 0.13,
      "step": 20195
    },
    {
      "epoch": 0.5891825660773674,
      "grad_norm": 0.7675668199312625,
      "learning_rate": 3.8103564153781904e-06,
      "loss": 0.105,
      "step": 20196
    },
    {
      "epoch": 0.589211739308011,
      "grad_norm": 1.0805143242330255,
      "learning_rate": 3.809897553604589e-06,
      "loss": 0.1329,
      "step": 20197
    },
    {
      "epoch": 0.5892409125386545,
      "grad_norm": 0.9027691343169723,
      "learning_rate": 3.80943870245594e-06,
      "loss": 0.1416,
      "step": 20198
    },
    {
      "epoch": 0.5892700857692981,
      "grad_norm": 1.0002746479320108,
      "learning_rate": 3.808979861936336e-06,
      "loss": 0.1142,
      "step": 20199
    },
    {
      "epoch": 0.5892992589999416,
      "grad_norm": 0.8930213473446856,
      "learning_rate": 3.808521032049875e-06,
      "loss": 0.1113,
      "step": 20200
    },
    {
      "epoch": 0.5893284322305852,
      "grad_norm": 0.9342242631978366,
      "learning_rate": 3.8080622128006547e-06,
      "loss": 0.1224,
      "step": 20201
    },
    {
      "epoch": 0.5893576054612287,
      "grad_norm": 1.0747903739228073,
      "learning_rate": 3.80760340419277e-06,
      "loss": 0.1172,
      "step": 20202
    },
    {
      "epoch": 0.5893867786918724,
      "grad_norm": 1.1361561748983318,
      "learning_rate": 3.807144606230319e-06,
      "loss": 0.1368,
      "step": 20203
    },
    {
      "epoch": 0.589415951922516,
      "grad_norm": 0.8532972712206477,
      "learning_rate": 3.806685818917395e-06,
      "loss": 0.1186,
      "step": 20204
    },
    {
      "epoch": 0.5894451251531595,
      "grad_norm": 1.0131116558195778,
      "learning_rate": 3.8062270422580953e-06,
      "loss": 0.1144,
      "step": 20205
    },
    {
      "epoch": 0.589474298383803,
      "grad_norm": 0.7055321851541815,
      "learning_rate": 3.805768276256514e-06,
      "loss": 0.1386,
      "step": 20206
    },
    {
      "epoch": 0.5895034716144466,
      "grad_norm": 1.0382676058333964,
      "learning_rate": 3.80530952091675e-06,
      "loss": 0.1033,
      "step": 20207
    },
    {
      "epoch": 0.5895326448450902,
      "grad_norm": 0.7754527877530077,
      "learning_rate": 3.804850776242899e-06,
      "loss": 0.1192,
      "step": 20208
    },
    {
      "epoch": 0.5895618180757337,
      "grad_norm": 0.8098385858799838,
      "learning_rate": 3.8043920422390527e-06,
      "loss": 0.1183,
      "step": 20209
    },
    {
      "epoch": 0.5895909913063773,
      "grad_norm": 1.0213690853970159,
      "learning_rate": 3.80393331890931e-06,
      "loss": 0.1437,
      "step": 20210
    },
    {
      "epoch": 0.5896201645370208,
      "grad_norm": 0.8024946265682966,
      "learning_rate": 3.8034746062577653e-06,
      "loss": 0.1345,
      "step": 20211
    },
    {
      "epoch": 0.5896493377676644,
      "grad_norm": 0.7137791715372281,
      "learning_rate": 3.803015904288511e-06,
      "loss": 0.108,
      "step": 20212
    },
    {
      "epoch": 0.5896785109983079,
      "grad_norm": 0.9698080163434466,
      "learning_rate": 3.8025572130056475e-06,
      "loss": 0.1161,
      "step": 20213
    },
    {
      "epoch": 0.5897076842289515,
      "grad_norm": 0.7361753419335797,
      "learning_rate": 3.8020985324132663e-06,
      "loss": 0.1239,
      "step": 20214
    },
    {
      "epoch": 0.589736857459595,
      "grad_norm": 0.9412898368112532,
      "learning_rate": 3.801639862515464e-06,
      "loss": 0.1362,
      "step": 20215
    },
    {
      "epoch": 0.5897660306902386,
      "grad_norm": 0.6458254491982539,
      "learning_rate": 3.8011812033163365e-06,
      "loss": 0.1246,
      "step": 20216
    },
    {
      "epoch": 0.5897952039208822,
      "grad_norm": 0.8289219559663683,
      "learning_rate": 3.800722554819975e-06,
      "loss": 0.1179,
      "step": 20217
    },
    {
      "epoch": 0.5898243771515258,
      "grad_norm": 1.0031912845215238,
      "learning_rate": 3.8002639170304755e-06,
      "loss": 0.1178,
      "step": 20218
    },
    {
      "epoch": 0.5898535503821694,
      "grad_norm": 0.6587167737029508,
      "learning_rate": 3.7998052899519346e-06,
      "loss": 0.1119,
      "step": 20219
    },
    {
      "epoch": 0.5898827236128129,
      "grad_norm": 0.7147696398102091,
      "learning_rate": 3.7993466735884456e-06,
      "loss": 0.124,
      "step": 20220
    },
    {
      "epoch": 0.5899118968434565,
      "grad_norm": 0.8562683931200171,
      "learning_rate": 3.798888067944103e-06,
      "loss": 0.1329,
      "step": 20221
    },
    {
      "epoch": 0.5899410700741,
      "grad_norm": 0.8384721329675718,
      "learning_rate": 3.7984294730230008e-06,
      "loss": 0.133,
      "step": 20222
    },
    {
      "epoch": 0.5899702433047436,
      "grad_norm": 0.7354471706626688,
      "learning_rate": 3.797970888829233e-06,
      "loss": 0.1045,
      "step": 20223
    },
    {
      "epoch": 0.5899994165353871,
      "grad_norm": 0.7309064317536617,
      "learning_rate": 3.7975123153668935e-06,
      "loss": 0.1199,
      "step": 20224
    },
    {
      "epoch": 0.5900285897660307,
      "grad_norm": 0.9422277741528954,
      "learning_rate": 3.797053752640079e-06,
      "loss": 0.117,
      "step": 20225
    },
    {
      "epoch": 0.5900577629966742,
      "grad_norm": 1.1053133172270788,
      "learning_rate": 3.7965952006528805e-06,
      "loss": 0.1219,
      "step": 20226
    },
    {
      "epoch": 0.5900869362273178,
      "grad_norm": 0.8682861779230187,
      "learning_rate": 3.796136659409393e-06,
      "loss": 0.1234,
      "step": 20227
    },
    {
      "epoch": 0.5901161094579613,
      "grad_norm": 0.8578679187059338,
      "learning_rate": 3.7956781289137103e-06,
      "loss": 0.1213,
      "step": 20228
    },
    {
      "epoch": 0.5901452826886049,
      "grad_norm": 0.6627094752494258,
      "learning_rate": 3.795219609169925e-06,
      "loss": 0.1202,
      "step": 20229
    },
    {
      "epoch": 0.5901744559192486,
      "grad_norm": 1.0334651298328659,
      "learning_rate": 3.7947611001821307e-06,
      "loss": 0.126,
      "step": 20230
    },
    {
      "epoch": 0.5902036291498921,
      "grad_norm": 1.0303606584877127,
      "learning_rate": 3.7943026019544226e-06,
      "loss": 0.1172,
      "step": 20231
    },
    {
      "epoch": 0.5902328023805357,
      "grad_norm": 1.1767317178537413,
      "learning_rate": 3.7938441144908926e-06,
      "loss": 0.1068,
      "step": 20232
    },
    {
      "epoch": 0.5902619756111792,
      "grad_norm": 0.9603041475692659,
      "learning_rate": 3.7933856377956357e-06,
      "loss": 0.1325,
      "step": 20233
    },
    {
      "epoch": 0.5902911488418228,
      "grad_norm": 0.9127724785613851,
      "learning_rate": 3.7929271718727426e-06,
      "loss": 0.1272,
      "step": 20234
    },
    {
      "epoch": 0.5903203220724663,
      "grad_norm": 0.8904540092492622,
      "learning_rate": 3.792468716726308e-06,
      "loss": 0.1143,
      "step": 20235
    },
    {
      "epoch": 0.5903494953031099,
      "grad_norm": 1.1342747403471634,
      "learning_rate": 3.792010272360423e-06,
      "loss": 0.1251,
      "step": 20236
    },
    {
      "epoch": 0.5903786685337534,
      "grad_norm": 0.9276026984423652,
      "learning_rate": 3.7915518387791833e-06,
      "loss": 0.1208,
      "step": 20237
    },
    {
      "epoch": 0.590407841764397,
      "grad_norm": 0.8160098687053883,
      "learning_rate": 3.7910934159866807e-06,
      "loss": 0.1347,
      "step": 20238
    },
    {
      "epoch": 0.5904370149950405,
      "grad_norm": 1.1614221942977292,
      "learning_rate": 3.790635003987007e-06,
      "loss": 0.1322,
      "step": 20239
    },
    {
      "epoch": 0.5904661882256841,
      "grad_norm": 1.071437658293513,
      "learning_rate": 3.7901766027842553e-06,
      "loss": 0.1377,
      "step": 20240
    },
    {
      "epoch": 0.5904953614563276,
      "grad_norm": 0.8739567106959935,
      "learning_rate": 3.7897182123825196e-06,
      "loss": 0.1337,
      "step": 20241
    },
    {
      "epoch": 0.5905245346869712,
      "grad_norm": 0.9832205175421125,
      "learning_rate": 3.7892598327858863e-06,
      "loss": 0.1408,
      "step": 20242
    },
    {
      "epoch": 0.5905537079176147,
      "grad_norm": 1.1821731010630248,
      "learning_rate": 3.788801463998456e-06,
      "loss": 0.1191,
      "step": 20243
    },
    {
      "epoch": 0.5905828811482584,
      "grad_norm": 0.896523533953455,
      "learning_rate": 3.7883431060243163e-06,
      "loss": 0.1076,
      "step": 20244
    },
    {
      "epoch": 0.590612054378902,
      "grad_norm": 0.7510175788776267,
      "learning_rate": 3.78788475886756e-06,
      "loss": 0.1536,
      "step": 20245
    },
    {
      "epoch": 0.5906412276095455,
      "grad_norm": 1.1033535775488357,
      "learning_rate": 3.78742642253228e-06,
      "loss": 0.1198,
      "step": 20246
    },
    {
      "epoch": 0.5906704008401891,
      "grad_norm": 0.8632607001897823,
      "learning_rate": 3.7869680970225663e-06,
      "loss": 0.1434,
      "step": 20247
    },
    {
      "epoch": 0.5906995740708326,
      "grad_norm": 0.7615522216793309,
      "learning_rate": 3.786509782342511e-06,
      "loss": 0.1354,
      "step": 20248
    },
    {
      "epoch": 0.5907287473014762,
      "grad_norm": 0.8663061346334333,
      "learning_rate": 3.7860514784962084e-06,
      "loss": 0.1346,
      "step": 20249
    },
    {
      "epoch": 0.5907579205321197,
      "grad_norm": 0.7234468548634531,
      "learning_rate": 3.7855931854877474e-06,
      "loss": 0.1108,
      "step": 20250
    },
    {
      "epoch": 0.5907870937627633,
      "grad_norm": 1.1542971298041533,
      "learning_rate": 3.785134903321222e-06,
      "loss": 0.1193,
      "step": 20251
    },
    {
      "epoch": 0.5908162669934068,
      "grad_norm": 0.720546737061622,
      "learning_rate": 3.784676632000721e-06,
      "loss": 0.127,
      "step": 20252
    },
    {
      "epoch": 0.5908454402240504,
      "grad_norm": 1.0360990966121526,
      "learning_rate": 3.784218371530337e-06,
      "loss": 0.1263,
      "step": 20253
    },
    {
      "epoch": 0.5908746134546939,
      "grad_norm": 0.7628493944623506,
      "learning_rate": 3.7837601219141605e-06,
      "loss": 0.1075,
      "step": 20254
    },
    {
      "epoch": 0.5909037866853375,
      "grad_norm": 0.7982711688955447,
      "learning_rate": 3.783301883156285e-06,
      "loss": 0.1101,
      "step": 20255
    },
    {
      "epoch": 0.590932959915981,
      "grad_norm": 0.7200743169359279,
      "learning_rate": 3.782843655260799e-06,
      "loss": 0.128,
      "step": 20256
    },
    {
      "epoch": 0.5909621331466246,
      "grad_norm": 0.825527860327397,
      "learning_rate": 3.782385438231794e-06,
      "loss": 0.1177,
      "step": 20257
    },
    {
      "epoch": 0.5909913063772683,
      "grad_norm": 1.1544435995553002,
      "learning_rate": 3.7819272320733626e-06,
      "loss": 0.134,
      "step": 20258
    },
    {
      "epoch": 0.5910204796079118,
      "grad_norm": 0.797619386105925,
      "learning_rate": 3.7814690367895923e-06,
      "loss": 0.1252,
      "step": 20259
    },
    {
      "epoch": 0.5910496528385554,
      "grad_norm": 1.0506363430902557,
      "learning_rate": 3.7810108523845744e-06,
      "loss": 0.1334,
      "step": 20260
    },
    {
      "epoch": 0.5910788260691989,
      "grad_norm": 0.8632340134026724,
      "learning_rate": 3.7805526788624027e-06,
      "loss": 0.1578,
      "step": 20261
    },
    {
      "epoch": 0.5911079992998425,
      "grad_norm": 1.0704684836764167,
      "learning_rate": 3.780094516227165e-06,
      "loss": 0.1328,
      "step": 20262
    },
    {
      "epoch": 0.591137172530486,
      "grad_norm": 0.92312155686962,
      "learning_rate": 3.779636364482953e-06,
      "loss": 0.1267,
      "step": 20263
    },
    {
      "epoch": 0.5911663457611296,
      "grad_norm": 0.7941396087589555,
      "learning_rate": 3.779178223633856e-06,
      "loss": 0.111,
      "step": 20264
    },
    {
      "epoch": 0.5911955189917731,
      "grad_norm": 1.0645081561050787,
      "learning_rate": 3.778720093683964e-06,
      "loss": 0.1471,
      "step": 20265
    },
    {
      "epoch": 0.5912246922224167,
      "grad_norm": 0.9816676630504501,
      "learning_rate": 3.7782619746373663e-06,
      "loss": 0.1235,
      "step": 20266
    },
    {
      "epoch": 0.5912538654530602,
      "grad_norm": 0.777847199885689,
      "learning_rate": 3.777803866498155e-06,
      "loss": 0.1236,
      "step": 20267
    },
    {
      "epoch": 0.5912830386837038,
      "grad_norm": 0.9568779142795496,
      "learning_rate": 3.77734576927042e-06,
      "loss": 0.1009,
      "step": 20268
    },
    {
      "epoch": 0.5913122119143474,
      "grad_norm": 0.775840409936639,
      "learning_rate": 3.776887682958249e-06,
      "loss": 0.1359,
      "step": 20269
    },
    {
      "epoch": 0.5913413851449909,
      "grad_norm": 0.9228157617525361,
      "learning_rate": 3.776429607565733e-06,
      "loss": 0.1215,
      "step": 20270
    },
    {
      "epoch": 0.5913705583756346,
      "grad_norm": 0.8301160033415184,
      "learning_rate": 3.775971543096963e-06,
      "loss": 0.097,
      "step": 20271
    },
    {
      "epoch": 0.5913997316062781,
      "grad_norm": 1.0905985737349984,
      "learning_rate": 3.775513489556023e-06,
      "loss": 0.1402,
      "step": 20272
    },
    {
      "epoch": 0.5914289048369217,
      "grad_norm": 0.9260101498628446,
      "learning_rate": 3.775055446947009e-06,
      "loss": 0.1274,
      "step": 20273
    },
    {
      "epoch": 0.5914580780675652,
      "grad_norm": 0.8318914638435035,
      "learning_rate": 3.7745974152740074e-06,
      "loss": 0.1186,
      "step": 20274
    },
    {
      "epoch": 0.5914872512982088,
      "grad_norm": 1.0085084473662826,
      "learning_rate": 3.7741393945411075e-06,
      "loss": 0.1046,
      "step": 20275
    },
    {
      "epoch": 0.5915164245288523,
      "grad_norm": 0.9674246822945007,
      "learning_rate": 3.773681384752399e-06,
      "loss": 0.1372,
      "step": 20276
    },
    {
      "epoch": 0.5915455977594959,
      "grad_norm": 0.8177735756142149,
      "learning_rate": 3.773223385911969e-06,
      "loss": 0.1512,
      "step": 20277
    },
    {
      "epoch": 0.5915747709901394,
      "grad_norm": 0.8951644185692315,
      "learning_rate": 3.7727653980239077e-06,
      "loss": 0.1027,
      "step": 20278
    },
    {
      "epoch": 0.591603944220783,
      "grad_norm": 0.7730806463621648,
      "learning_rate": 3.7723074210923046e-06,
      "loss": 0.11,
      "step": 20279
    },
    {
      "epoch": 0.5916331174514265,
      "grad_norm": 1.4916102140596397,
      "learning_rate": 3.7718494551212477e-06,
      "loss": 0.1219,
      "step": 20280
    },
    {
      "epoch": 0.5916622906820701,
      "grad_norm": 1.2629613847233272,
      "learning_rate": 3.7713915001148264e-06,
      "loss": 0.1394,
      "step": 20281
    },
    {
      "epoch": 0.5916914639127137,
      "grad_norm": 0.8404674415570916,
      "learning_rate": 3.770933556077128e-06,
      "loss": 0.131,
      "step": 20282
    },
    {
      "epoch": 0.5917206371433572,
      "grad_norm": 0.8222340325811418,
      "learning_rate": 3.7704756230122404e-06,
      "loss": 0.1198,
      "step": 20283
    },
    {
      "epoch": 0.5917498103740008,
      "grad_norm": 1.2953859987497278,
      "learning_rate": 3.7700177009242533e-06,
      "loss": 0.1464,
      "step": 20284
    },
    {
      "epoch": 0.5917789836046444,
      "grad_norm": 0.7463448766649863,
      "learning_rate": 3.769559789817256e-06,
      "loss": 0.1227,
      "step": 20285
    },
    {
      "epoch": 0.591808156835288,
      "grad_norm": 0.9141728419939459,
      "learning_rate": 3.769101889695334e-06,
      "loss": 0.1088,
      "step": 20286
    },
    {
      "epoch": 0.5918373300659315,
      "grad_norm": 0.7300132060866088,
      "learning_rate": 3.768644000562577e-06,
      "loss": 0.1378,
      "step": 20287
    },
    {
      "epoch": 0.5918665032965751,
      "grad_norm": 0.9292285150265088,
      "learning_rate": 3.768186122423073e-06,
      "loss": 0.1101,
      "step": 20288
    },
    {
      "epoch": 0.5918956765272186,
      "grad_norm": 0.8716206320290674,
      "learning_rate": 3.767728255280906e-06,
      "loss": 0.1297,
      "step": 20289
    },
    {
      "epoch": 0.5919248497578622,
      "grad_norm": 0.8383145328399962,
      "learning_rate": 3.7672703991401706e-06,
      "loss": 0.1149,
      "step": 20290
    },
    {
      "epoch": 0.5919540229885057,
      "grad_norm": 0.7973474333886468,
      "learning_rate": 3.7668125540049493e-06,
      "loss": 0.1439,
      "step": 20291
    },
    {
      "epoch": 0.5919831962191493,
      "grad_norm": 0.7862309807261225,
      "learning_rate": 3.766354719879331e-06,
      "loss": 0.124,
      "step": 20292
    },
    {
      "epoch": 0.5920123694497929,
      "grad_norm": 0.8835775439221308,
      "learning_rate": 3.7658968967674046e-06,
      "loss": 0.1385,
      "step": 20293
    },
    {
      "epoch": 0.5920415426804364,
      "grad_norm": 0.8011348997365383,
      "learning_rate": 3.765439084673255e-06,
      "loss": 0.1302,
      "step": 20294
    },
    {
      "epoch": 0.59207071591108,
      "grad_norm": 1.027977766405424,
      "learning_rate": 3.76498128360097e-06,
      "loss": 0.1491,
      "step": 20295
    },
    {
      "epoch": 0.5920998891417235,
      "grad_norm": 0.8544333249902301,
      "learning_rate": 3.7645234935546377e-06,
      "loss": 0.1159,
      "step": 20296
    },
    {
      "epoch": 0.5921290623723671,
      "grad_norm": 0.8805906780075904,
      "learning_rate": 3.7640657145383445e-06,
      "loss": 0.1137,
      "step": 20297
    },
    {
      "epoch": 0.5921582356030107,
      "grad_norm": 0.7795037875714507,
      "learning_rate": 3.7636079465561793e-06,
      "loss": 0.1275,
      "step": 20298
    },
    {
      "epoch": 0.5921874088336543,
      "grad_norm": 0.8167336207242685,
      "learning_rate": 3.763150189612226e-06,
      "loss": 0.1294,
      "step": 20299
    },
    {
      "epoch": 0.5922165820642978,
      "grad_norm": 0.6971774326622011,
      "learning_rate": 3.7626924437105723e-06,
      "loss": 0.1266,
      "step": 20300
    },
    {
      "epoch": 0.5922457552949414,
      "grad_norm": 0.6563811155516861,
      "learning_rate": 3.762234708855304e-06,
      "loss": 0.1172,
      "step": 20301
    },
    {
      "epoch": 0.5922749285255849,
      "grad_norm": 0.7529173622208494,
      "learning_rate": 3.76177698505051e-06,
      "loss": 0.1161,
      "step": 20302
    },
    {
      "epoch": 0.5923041017562285,
      "grad_norm": 0.8713120732052216,
      "learning_rate": 3.761319272300276e-06,
      "loss": 0.1219,
      "step": 20303
    },
    {
      "epoch": 0.592333274986872,
      "grad_norm": 0.7023739943710798,
      "learning_rate": 3.7608615706086876e-06,
      "loss": 0.1213,
      "step": 20304
    },
    {
      "epoch": 0.5923624482175156,
      "grad_norm": 1.068413761264487,
      "learning_rate": 3.760403879979831e-06,
      "loss": 0.1267,
      "step": 20305
    },
    {
      "epoch": 0.5923916214481592,
      "grad_norm": 1.0622620883514793,
      "learning_rate": 3.759946200417793e-06,
      "loss": 0.1373,
      "step": 20306
    },
    {
      "epoch": 0.5924207946788027,
      "grad_norm": 0.8729555445934811,
      "learning_rate": 3.759488531926657e-06,
      "loss": 0.1254,
      "step": 20307
    },
    {
      "epoch": 0.5924499679094463,
      "grad_norm": 0.8807063874024929,
      "learning_rate": 3.7590308745105143e-06,
      "loss": 0.1369,
      "step": 20308
    },
    {
      "epoch": 0.5924791411400898,
      "grad_norm": 0.7730701181240264,
      "learning_rate": 3.7585732281734467e-06,
      "loss": 0.1226,
      "step": 20309
    },
    {
      "epoch": 0.5925083143707334,
      "grad_norm": 0.6801049083834891,
      "learning_rate": 3.7581155929195405e-06,
      "loss": 0.1325,
      "step": 20310
    },
    {
      "epoch": 0.5925374876013769,
      "grad_norm": 0.9171360209832026,
      "learning_rate": 3.7576579687528836e-06,
      "loss": 0.1416,
      "step": 20311
    },
    {
      "epoch": 0.5925666608320206,
      "grad_norm": 0.9015598603877577,
      "learning_rate": 3.757200355677558e-06,
      "loss": 0.1365,
      "step": 20312
    },
    {
      "epoch": 0.5925958340626641,
      "grad_norm": 0.9509602034197917,
      "learning_rate": 3.75674275369765e-06,
      "loss": 0.1193,
      "step": 20313
    },
    {
      "epoch": 0.5926250072933077,
      "grad_norm": 0.8274027554620362,
      "learning_rate": 3.7562851628172476e-06,
      "loss": 0.109,
      "step": 20314
    },
    {
      "epoch": 0.5926541805239512,
      "grad_norm": 2.385593261944973,
      "learning_rate": 3.755827583040435e-06,
      "loss": 0.1096,
      "step": 20315
    },
    {
      "epoch": 0.5926833537545948,
      "grad_norm": 0.8304798709331231,
      "learning_rate": 3.7553700143712956e-06,
      "loss": 0.1445,
      "step": 20316
    },
    {
      "epoch": 0.5927125269852384,
      "grad_norm": 0.9432083259294172,
      "learning_rate": 3.7549124568139158e-06,
      "loss": 0.1224,
      "step": 20317
    },
    {
      "epoch": 0.5927417002158819,
      "grad_norm": 0.9228415072757776,
      "learning_rate": 3.754454910372381e-06,
      "loss": 0.1417,
      "step": 20318
    },
    {
      "epoch": 0.5927708734465255,
      "grad_norm": 0.8394270709799782,
      "learning_rate": 3.7539973750507723e-06,
      "loss": 0.1356,
      "step": 20319
    },
    {
      "epoch": 0.592800046677169,
      "grad_norm": 1.0490922762574508,
      "learning_rate": 3.753539850853181e-06,
      "loss": 0.1182,
      "step": 20320
    },
    {
      "epoch": 0.5928292199078126,
      "grad_norm": 0.9272445567929332,
      "learning_rate": 3.753082337783688e-06,
      "loss": 0.0953,
      "step": 20321
    },
    {
      "epoch": 0.5928583931384561,
      "grad_norm": 0.7105217279422499,
      "learning_rate": 3.7526248358463768e-06,
      "loss": 0.1458,
      "step": 20322
    },
    {
      "epoch": 0.5928875663690997,
      "grad_norm": 0.9120023752836961,
      "learning_rate": 3.7521673450453356e-06,
      "loss": 0.1424,
      "step": 20323
    },
    {
      "epoch": 0.5929167395997432,
      "grad_norm": 1.257870197846429,
      "learning_rate": 3.7517098653846446e-06,
      "loss": 0.1058,
      "step": 20324
    },
    {
      "epoch": 0.5929459128303869,
      "grad_norm": 0.8575419280848129,
      "learning_rate": 3.751252396868389e-06,
      "loss": 0.118,
      "step": 20325
    },
    {
      "epoch": 0.5929750860610304,
      "grad_norm": 0.7128499351579052,
      "learning_rate": 3.750794939500655e-06,
      "loss": 0.1314,
      "step": 20326
    },
    {
      "epoch": 0.593004259291674,
      "grad_norm": 0.8097750720264704,
      "learning_rate": 3.7503374932855258e-06,
      "loss": 0.1234,
      "step": 20327
    },
    {
      "epoch": 0.5930334325223175,
      "grad_norm": 0.9747084859732462,
      "learning_rate": 3.7498800582270863e-06,
      "loss": 0.1073,
      "step": 20328
    },
    {
      "epoch": 0.5930626057529611,
      "grad_norm": 0.7648578301821678,
      "learning_rate": 3.7494226343294177e-06,
      "loss": 0.1,
      "step": 20329
    },
    {
      "epoch": 0.5930917789836047,
      "grad_norm": 0.7565341007173764,
      "learning_rate": 3.7489652215966055e-06,
      "loss": 0.1393,
      "step": 20330
    },
    {
      "epoch": 0.5931209522142482,
      "grad_norm": 0.7887501873350983,
      "learning_rate": 3.7485078200327317e-06,
      "loss": 0.1316,
      "step": 20331
    },
    {
      "epoch": 0.5931501254448918,
      "grad_norm": 0.8692053319094313,
      "learning_rate": 3.748050429641883e-06,
      "loss": 0.1152,
      "step": 20332
    },
    {
      "epoch": 0.5931792986755353,
      "grad_norm": 0.7751738676907003,
      "learning_rate": 3.747593050428142e-06,
      "loss": 0.1413,
      "step": 20333
    },
    {
      "epoch": 0.5932084719061789,
      "grad_norm": 0.7288789974986442,
      "learning_rate": 3.7471356823955908e-06,
      "loss": 0.1356,
      "step": 20334
    },
    {
      "epoch": 0.5932376451368224,
      "grad_norm": 0.8715625909154425,
      "learning_rate": 3.7466783255483125e-06,
      "loss": 0.1503,
      "step": 20335
    },
    {
      "epoch": 0.593266818367466,
      "grad_norm": 0.8948276633084514,
      "learning_rate": 3.746220979890392e-06,
      "loss": 0.1303,
      "step": 20336
    },
    {
      "epoch": 0.5932959915981095,
      "grad_norm": 0.7820814106919346,
      "learning_rate": 3.7457636454259084e-06,
      "loss": 0.1221,
      "step": 20337
    },
    {
      "epoch": 0.5933251648287531,
      "grad_norm": 0.9603909685091399,
      "learning_rate": 3.74530632215895e-06,
      "loss": 0.1262,
      "step": 20338
    },
    {
      "epoch": 0.5933543380593967,
      "grad_norm": 0.9199513407352496,
      "learning_rate": 3.744849010093597e-06,
      "loss": 0.1262,
      "step": 20339
    },
    {
      "epoch": 0.5933835112900403,
      "grad_norm": 0.9135260477282455,
      "learning_rate": 3.7443917092339323e-06,
      "loss": 0.1241,
      "step": 20340
    },
    {
      "epoch": 0.5934126845206839,
      "grad_norm": 0.9318084696620351,
      "learning_rate": 3.7439344195840393e-06,
      "loss": 0.1205,
      "step": 20341
    },
    {
      "epoch": 0.5934418577513274,
      "grad_norm": 0.8841853727203788,
      "learning_rate": 3.7434771411479993e-06,
      "loss": 0.1069,
      "step": 20342
    },
    {
      "epoch": 0.593471030981971,
      "grad_norm": 1.23356435704127,
      "learning_rate": 3.743019873929894e-06,
      "loss": 0.138,
      "step": 20343
    },
    {
      "epoch": 0.5935002042126145,
      "grad_norm": 1.300083953889085,
      "learning_rate": 3.7425626179338087e-06,
      "loss": 0.1369,
      "step": 20344
    },
    {
      "epoch": 0.5935293774432581,
      "grad_norm": 0.9076827024539956,
      "learning_rate": 3.7421053731638247e-06,
      "loss": 0.1724,
      "step": 20345
    },
    {
      "epoch": 0.5935585506739016,
      "grad_norm": 0.9152453874334994,
      "learning_rate": 3.7416481396240233e-06,
      "loss": 0.1179,
      "step": 20346
    },
    {
      "epoch": 0.5935877239045452,
      "grad_norm": 0.7506870356234908,
      "learning_rate": 3.7411909173184863e-06,
      "loss": 0.1167,
      "step": 20347
    },
    {
      "epoch": 0.5936168971351887,
      "grad_norm": 0.8966013397136234,
      "learning_rate": 3.740733706251298e-06,
      "loss": 0.1177,
      "step": 20348
    },
    {
      "epoch": 0.5936460703658323,
      "grad_norm": 0.8199959588363075,
      "learning_rate": 3.7402765064265346e-06,
      "loss": 0.1291,
      "step": 20349
    },
    {
      "epoch": 0.5936752435964758,
      "grad_norm": 0.9856671762568906,
      "learning_rate": 3.7398193178482855e-06,
      "loss": 0.1545,
      "step": 20350
    },
    {
      "epoch": 0.5937044168271194,
      "grad_norm": 0.7912230412079806,
      "learning_rate": 3.739362140520627e-06,
      "loss": 0.1406,
      "step": 20351
    },
    {
      "epoch": 0.593733590057763,
      "grad_norm": 0.8040817049542729,
      "learning_rate": 3.7389049744476437e-06,
      "loss": 0.1257,
      "step": 20352
    },
    {
      "epoch": 0.5937627632884066,
      "grad_norm": 0.7683089581911615,
      "learning_rate": 3.738447819633415e-06,
      "loss": 0.1199,
      "step": 20353
    },
    {
      "epoch": 0.5937919365190502,
      "grad_norm": 0.709091046344398,
      "learning_rate": 3.7379906760820234e-06,
      "loss": 0.107,
      "step": 20354
    },
    {
      "epoch": 0.5938211097496937,
      "grad_norm": 0.7930700175595372,
      "learning_rate": 3.737533543797548e-06,
      "loss": 0.132,
      "step": 20355
    },
    {
      "epoch": 0.5938502829803373,
      "grad_norm": 0.829812465390992,
      "learning_rate": 3.7370764227840734e-06,
      "loss": 0.1611,
      "step": 20356
    },
    {
      "epoch": 0.5938794562109808,
      "grad_norm": 0.7824138393830351,
      "learning_rate": 3.7366193130456784e-06,
      "loss": 0.1019,
      "step": 20357
    },
    {
      "epoch": 0.5939086294416244,
      "grad_norm": 0.8153222821715773,
      "learning_rate": 3.736162214586446e-06,
      "loss": 0.1364,
      "step": 20358
    },
    {
      "epoch": 0.5939378026722679,
      "grad_norm": 0.8966206591182314,
      "learning_rate": 3.7357051274104545e-06,
      "loss": 0.1107,
      "step": 20359
    },
    {
      "epoch": 0.5939669759029115,
      "grad_norm": 0.8034161455187806,
      "learning_rate": 3.735248051521786e-06,
      "loss": 0.1104,
      "step": 20360
    },
    {
      "epoch": 0.593996149133555,
      "grad_norm": 0.7245629777622508,
      "learning_rate": 3.734790986924519e-06,
      "loss": 0.1009,
      "step": 20361
    },
    {
      "epoch": 0.5940253223641986,
      "grad_norm": 0.8347839313674297,
      "learning_rate": 3.734333933622738e-06,
      "loss": 0.1052,
      "step": 20362
    },
    {
      "epoch": 0.5940544955948421,
      "grad_norm": 0.8366940053896308,
      "learning_rate": 3.7338768916205224e-06,
      "loss": 0.1295,
      "step": 20363
    },
    {
      "epoch": 0.5940836688254857,
      "grad_norm": 0.7912055817415566,
      "learning_rate": 3.7334198609219506e-06,
      "loss": 0.1364,
      "step": 20364
    },
    {
      "epoch": 0.5941128420561292,
      "grad_norm": 1.3096844057058885,
      "learning_rate": 3.7329628415311043e-06,
      "loss": 0.1215,
      "step": 20365
    },
    {
      "epoch": 0.5941420152867729,
      "grad_norm": 0.8284737979793699,
      "learning_rate": 3.7325058334520637e-06,
      "loss": 0.1393,
      "step": 20366
    },
    {
      "epoch": 0.5941711885174165,
      "grad_norm": 1.3668484993926355,
      "learning_rate": 3.7320488366889064e-06,
      "loss": 0.113,
      "step": 20367
    },
    {
      "epoch": 0.59420036174806,
      "grad_norm": 0.8733517250331496,
      "learning_rate": 3.731591851245716e-06,
      "loss": 0.1456,
      "step": 20368
    },
    {
      "epoch": 0.5942295349787036,
      "grad_norm": 0.7996803319961718,
      "learning_rate": 3.73113487712657e-06,
      "loss": 0.1246,
      "step": 20369
    },
    {
      "epoch": 0.5942587082093471,
      "grad_norm": 0.7079043012081149,
      "learning_rate": 3.73067791433555e-06,
      "loss": 0.135,
      "step": 20370
    },
    {
      "epoch": 0.5942878814399907,
      "grad_norm": 0.7480051648978058,
      "learning_rate": 3.7302209628767345e-06,
      "loss": 0.133,
      "step": 20371
    },
    {
      "epoch": 0.5943170546706342,
      "grad_norm": 0.7366026184962297,
      "learning_rate": 3.7297640227542024e-06,
      "loss": 0.1169,
      "step": 20372
    },
    {
      "epoch": 0.5943462279012778,
      "grad_norm": 2.1814451346277037,
      "learning_rate": 3.7293070939720332e-06,
      "loss": 0.1251,
      "step": 20373
    },
    {
      "epoch": 0.5943754011319213,
      "grad_norm": 0.959928786799977,
      "learning_rate": 3.7288501765343076e-06,
      "loss": 0.1413,
      "step": 20374
    },
    {
      "epoch": 0.5944045743625649,
      "grad_norm": 0.8054432663471339,
      "learning_rate": 3.7283932704451053e-06,
      "loss": 0.1348,
      "step": 20375
    },
    {
      "epoch": 0.5944337475932084,
      "grad_norm": 0.6711341243653186,
      "learning_rate": 3.727936375708503e-06,
      "loss": 0.1143,
      "step": 20376
    },
    {
      "epoch": 0.594462920823852,
      "grad_norm": 0.8048865207066975,
      "learning_rate": 3.727479492328582e-06,
      "loss": 0.1355,
      "step": 20377
    },
    {
      "epoch": 0.5944920940544955,
      "grad_norm": 0.8756069044306218,
      "learning_rate": 3.7270226203094207e-06,
      "loss": 0.1065,
      "step": 20378
    },
    {
      "epoch": 0.5945212672851392,
      "grad_norm": 0.7645233073994955,
      "learning_rate": 3.726565759655094e-06,
      "loss": 0.1356,
      "step": 20379
    },
    {
      "epoch": 0.5945504405157828,
      "grad_norm": 0.8286671678293144,
      "learning_rate": 3.726108910369688e-06,
      "loss": 0.1275,
      "step": 20380
    },
    {
      "epoch": 0.5945796137464263,
      "grad_norm": 0.7099918736793572,
      "learning_rate": 3.7256520724572766e-06,
      "loss": 0.1474,
      "step": 20381
    },
    {
      "epoch": 0.5946087869770699,
      "grad_norm": 0.8368279277975266,
      "learning_rate": 3.7251952459219385e-06,
      "loss": 0.0914,
      "step": 20382
    },
    {
      "epoch": 0.5946379602077134,
      "grad_norm": 0.6257342084471068,
      "learning_rate": 3.724738430767755e-06,
      "loss": 0.1382,
      "step": 20383
    },
    {
      "epoch": 0.594667133438357,
      "grad_norm": 0.7840624078300055,
      "learning_rate": 3.7242816269988e-06,
      "loss": 0.1343,
      "step": 20384
    },
    {
      "epoch": 0.5946963066690005,
      "grad_norm": 0.8047747085774777,
      "learning_rate": 3.7238248346191543e-06,
      "loss": 0.1164,
      "step": 20385
    },
    {
      "epoch": 0.5947254798996441,
      "grad_norm": 1.0750330604736558,
      "learning_rate": 3.7233680536328965e-06,
      "loss": 0.1133,
      "step": 20386
    },
    {
      "epoch": 0.5947546531302876,
      "grad_norm": 0.8052074439890694,
      "learning_rate": 3.7229112840441036e-06,
      "loss": 0.1426,
      "step": 20387
    },
    {
      "epoch": 0.5947838263609312,
      "grad_norm": 1.0046242506305367,
      "learning_rate": 3.722454525856855e-06,
      "loss": 0.1271,
      "step": 20388
    },
    {
      "epoch": 0.5948129995915747,
      "grad_norm": 0.920190064571862,
      "learning_rate": 3.7219977790752265e-06,
      "loss": 0.1186,
      "step": 20389
    },
    {
      "epoch": 0.5948421728222183,
      "grad_norm": 0.9141840259196852,
      "learning_rate": 3.721541043703297e-06,
      "loss": 0.131,
      "step": 20390
    },
    {
      "epoch": 0.5948713460528618,
      "grad_norm": 0.8071523413765483,
      "learning_rate": 3.7210843197451423e-06,
      "loss": 0.1568,
      "step": 20391
    },
    {
      "epoch": 0.5949005192835054,
      "grad_norm": 0.9220774603287075,
      "learning_rate": 3.720627607204843e-06,
      "loss": 0.1442,
      "step": 20392
    },
    {
      "epoch": 0.5949296925141491,
      "grad_norm": 0.9029026910952078,
      "learning_rate": 3.720170906086476e-06,
      "loss": 0.0972,
      "step": 20393
    },
    {
      "epoch": 0.5949588657447926,
      "grad_norm": 0.8324401036230402,
      "learning_rate": 3.719714216394117e-06,
      "loss": 0.1127,
      "step": 20394
    },
    {
      "epoch": 0.5949880389754362,
      "grad_norm": 0.9037690580022587,
      "learning_rate": 3.719257538131843e-06,
      "loss": 0.1183,
      "step": 20395
    },
    {
      "epoch": 0.5950172122060797,
      "grad_norm": 0.8559684972197111,
      "learning_rate": 3.718800871303733e-06,
      "loss": 0.1096,
      "step": 20396
    },
    {
      "epoch": 0.5950463854367233,
      "grad_norm": 1.1706300170802248,
      "learning_rate": 3.7183442159138618e-06,
      "loss": 0.1319,
      "step": 20397
    },
    {
      "epoch": 0.5950755586673668,
      "grad_norm": 0.8423446476329747,
      "learning_rate": 3.717887571966308e-06,
      "loss": 0.1399,
      "step": 20398
    },
    {
      "epoch": 0.5951047318980104,
      "grad_norm": 0.957491801776679,
      "learning_rate": 3.7174309394651476e-06,
      "loss": 0.1199,
      "step": 20399
    },
    {
      "epoch": 0.5951339051286539,
      "grad_norm": 0.9615955937963768,
      "learning_rate": 3.716974318414458e-06,
      "loss": 0.1058,
      "step": 20400
    },
    {
      "epoch": 0.5951630783592975,
      "grad_norm": 0.7055027976814059,
      "learning_rate": 3.7165177088183158e-06,
      "loss": 0.1325,
      "step": 20401
    },
    {
      "epoch": 0.595192251589941,
      "grad_norm": 0.8306744452301431,
      "learning_rate": 3.716061110680797e-06,
      "loss": 0.1214,
      "step": 20402
    },
    {
      "epoch": 0.5952214248205846,
      "grad_norm": 0.8514220181183535,
      "learning_rate": 3.7156045240059766e-06,
      "loss": 0.1371,
      "step": 20403
    },
    {
      "epoch": 0.5952505980512282,
      "grad_norm": 1.0421853182694367,
      "learning_rate": 3.7151479487979335e-06,
      "loss": 0.1287,
      "step": 20404
    },
    {
      "epoch": 0.5952797712818717,
      "grad_norm": 0.7951703747004137,
      "learning_rate": 3.7146913850607435e-06,
      "loss": 0.1174,
      "step": 20405
    },
    {
      "epoch": 0.5953089445125154,
      "grad_norm": 0.9022842613203131,
      "learning_rate": 3.714234832798481e-06,
      "loss": 0.1211,
      "step": 20406
    },
    {
      "epoch": 0.5953381177431589,
      "grad_norm": 0.8525782163889262,
      "learning_rate": 3.7137782920152237e-06,
      "loss": 0.125,
      "step": 20407
    },
    {
      "epoch": 0.5953672909738025,
      "grad_norm": 0.8668321566287852,
      "learning_rate": 3.7133217627150475e-06,
      "loss": 0.1264,
      "step": 20408
    },
    {
      "epoch": 0.595396464204446,
      "grad_norm": 0.8136735881886328,
      "learning_rate": 3.712865244902024e-06,
      "loss": 0.1459,
      "step": 20409
    },
    {
      "epoch": 0.5954256374350896,
      "grad_norm": 0.832175816541259,
      "learning_rate": 3.7124087385802353e-06,
      "loss": 0.1179,
      "step": 20410
    },
    {
      "epoch": 0.5954548106657331,
      "grad_norm": 0.8724492284315477,
      "learning_rate": 3.7119522437537537e-06,
      "loss": 0.1288,
      "step": 20411
    },
    {
      "epoch": 0.5954839838963767,
      "grad_norm": 1.0214453828923034,
      "learning_rate": 3.7114957604266546e-06,
      "loss": 0.133,
      "step": 20412
    },
    {
      "epoch": 0.5955131571270202,
      "grad_norm": 0.8232700476169499,
      "learning_rate": 3.7110392886030145e-06,
      "loss": 0.1303,
      "step": 20413
    },
    {
      "epoch": 0.5955423303576638,
      "grad_norm": 0.799274066246224,
      "learning_rate": 3.710582828286907e-06,
      "loss": 0.1184,
      "step": 20414
    },
    {
      "epoch": 0.5955715035883073,
      "grad_norm": 0.8350031645601355,
      "learning_rate": 3.7101263794824072e-06,
      "loss": 0.1084,
      "step": 20415
    },
    {
      "epoch": 0.5956006768189509,
      "grad_norm": 0.8646161730243356,
      "learning_rate": 3.7096699421935926e-06,
      "loss": 0.1508,
      "step": 20416
    },
    {
      "epoch": 0.5956298500495945,
      "grad_norm": 0.8859231506998803,
      "learning_rate": 3.709213516424537e-06,
      "loss": 0.1399,
      "step": 20417
    },
    {
      "epoch": 0.595659023280238,
      "grad_norm": 0.8511175372845571,
      "learning_rate": 3.708757102179315e-06,
      "loss": 0.1257,
      "step": 20418
    },
    {
      "epoch": 0.5956881965108816,
      "grad_norm": 0.9045200428975965,
      "learning_rate": 3.708300699462001e-06,
      "loss": 0.1426,
      "step": 20419
    },
    {
      "epoch": 0.5957173697415252,
      "grad_norm": 0.8359579178889355,
      "learning_rate": 3.7078443082766694e-06,
      "loss": 0.1264,
      "step": 20420
    },
    {
      "epoch": 0.5957465429721688,
      "grad_norm": 0.8293232457778126,
      "learning_rate": 3.707387928627395e-06,
      "loss": 0.1243,
      "step": 20421
    },
    {
      "epoch": 0.5957757162028123,
      "grad_norm": 0.8543332688610659,
      "learning_rate": 3.706931560518253e-06,
      "loss": 0.1369,
      "step": 20422
    },
    {
      "epoch": 0.5958048894334559,
      "grad_norm": 0.8685431899547782,
      "learning_rate": 3.706475203953319e-06,
      "loss": 0.1125,
      "step": 20423
    },
    {
      "epoch": 0.5958340626640994,
      "grad_norm": 0.8912930805152341,
      "learning_rate": 3.706018858936664e-06,
      "loss": 0.1213,
      "step": 20424
    },
    {
      "epoch": 0.595863235894743,
      "grad_norm": 0.9540886112928165,
      "learning_rate": 3.7055625254723645e-06,
      "loss": 0.1108,
      "step": 20425
    },
    {
      "epoch": 0.5958924091253865,
      "grad_norm": 0.6292913574709981,
      "learning_rate": 3.705106203564494e-06,
      "loss": 0.1246,
      "step": 20426
    },
    {
      "epoch": 0.5959215823560301,
      "grad_norm": 0.866460610219473,
      "learning_rate": 3.7046498932171247e-06,
      "loss": 0.1202,
      "step": 20427
    },
    {
      "epoch": 0.5959507555866737,
      "grad_norm": 0.9358369037365718,
      "learning_rate": 3.7041935944343325e-06,
      "loss": 0.1225,
      "step": 20428
    },
    {
      "epoch": 0.5959799288173172,
      "grad_norm": 0.8857018143086642,
      "learning_rate": 3.703737307220191e-06,
      "loss": 0.1124,
      "step": 20429
    },
    {
      "epoch": 0.5960091020479608,
      "grad_norm": 0.8307551087365714,
      "learning_rate": 3.7032810315787726e-06,
      "loss": 0.1405,
      "step": 20430
    },
    {
      "epoch": 0.5960382752786043,
      "grad_norm": 0.8077905351829049,
      "learning_rate": 3.7028247675141538e-06,
      "loss": 0.1342,
      "step": 20431
    },
    {
      "epoch": 0.5960674485092479,
      "grad_norm": 0.768252473178892,
      "learning_rate": 3.702368515030404e-06,
      "loss": 0.0898,
      "step": 20432
    },
    {
      "epoch": 0.5960966217398915,
      "grad_norm": 1.1228798295974916,
      "learning_rate": 3.701912274131597e-06,
      "loss": 0.1441,
      "step": 20433
    },
    {
      "epoch": 0.5961257949705351,
      "grad_norm": 0.653153642098277,
      "learning_rate": 3.7014560448218094e-06,
      "loss": 0.1148,
      "step": 20434
    },
    {
      "epoch": 0.5961549682011786,
      "grad_norm": 0.7372247471430156,
      "learning_rate": 3.7009998271051127e-06,
      "loss": 0.1184,
      "step": 20435
    },
    {
      "epoch": 0.5961841414318222,
      "grad_norm": 0.8465129941024829,
      "learning_rate": 3.700543620985578e-06,
      "loss": 0.114,
      "step": 20436
    },
    {
      "epoch": 0.5962133146624657,
      "grad_norm": 0.8628217830651215,
      "learning_rate": 3.7000874264672804e-06,
      "loss": 0.1313,
      "step": 20437
    },
    {
      "epoch": 0.5962424878931093,
      "grad_norm": 0.738228192358053,
      "learning_rate": 3.6996312435542925e-06,
      "loss": 0.127,
      "step": 20438
    },
    {
      "epoch": 0.5962716611237528,
      "grad_norm": 1.0597878518786803,
      "learning_rate": 3.6991750722506835e-06,
      "loss": 0.1202,
      "step": 20439
    },
    {
      "epoch": 0.5963008343543964,
      "grad_norm": 0.810686714995001,
      "learning_rate": 3.6987189125605315e-06,
      "loss": 0.1302,
      "step": 20440
    },
    {
      "epoch": 0.59633000758504,
      "grad_norm": 0.8079389368908894,
      "learning_rate": 3.6982627644879065e-06,
      "loss": 0.1522,
      "step": 20441
    },
    {
      "epoch": 0.5963591808156835,
      "grad_norm": 0.8620911802258356,
      "learning_rate": 3.6978066280368797e-06,
      "loss": 0.1086,
      "step": 20442
    },
    {
      "epoch": 0.5963883540463271,
      "grad_norm": 0.8940846739395645,
      "learning_rate": 3.6973505032115262e-06,
      "loss": 0.135,
      "step": 20443
    },
    {
      "epoch": 0.5964175272769706,
      "grad_norm": 0.7280652859703205,
      "learning_rate": 3.696894390015915e-06,
      "loss": 0.1066,
      "step": 20444
    },
    {
      "epoch": 0.5964467005076142,
      "grad_norm": 0.8332426661569426,
      "learning_rate": 3.6964382884541188e-06,
      "loss": 0.1127,
      "step": 20445
    },
    {
      "epoch": 0.5964758737382577,
      "grad_norm": 0.8961981703523535,
      "learning_rate": 3.695982198530211e-06,
      "loss": 0.132,
      "step": 20446
    },
    {
      "epoch": 0.5965050469689014,
      "grad_norm": 0.6293797067487202,
      "learning_rate": 3.695526120248264e-06,
      "loss": 0.118,
      "step": 20447
    },
    {
      "epoch": 0.5965342201995449,
      "grad_norm": 0.9985226237685431,
      "learning_rate": 3.6950700536123486e-06,
      "loss": 0.1149,
      "step": 20448
    },
    {
      "epoch": 0.5965633934301885,
      "grad_norm": 0.8470849790946957,
      "learning_rate": 3.694613998626535e-06,
      "loss": 0.1281,
      "step": 20449
    },
    {
      "epoch": 0.596592566660832,
      "grad_norm": 0.7890509438732369,
      "learning_rate": 3.694157955294896e-06,
      "loss": 0.1316,
      "step": 20450
    },
    {
      "epoch": 0.5966217398914756,
      "grad_norm": 0.9085179037130472,
      "learning_rate": 3.693701923621502e-06,
      "loss": 0.1322,
      "step": 20451
    },
    {
      "epoch": 0.5966509131221192,
      "grad_norm": 0.8370162621157942,
      "learning_rate": 3.6932459036104272e-06,
      "loss": 0.1048,
      "step": 20452
    },
    {
      "epoch": 0.5966800863527627,
      "grad_norm": 1.0677576295377924,
      "learning_rate": 3.6927898952657417e-06,
      "loss": 0.1245,
      "step": 20453
    },
    {
      "epoch": 0.5967092595834063,
      "grad_norm": 0.8045087334684989,
      "learning_rate": 3.6923338985915146e-06,
      "loss": 0.1382,
      "step": 20454
    },
    {
      "epoch": 0.5967384328140498,
      "grad_norm": 0.9289775206330065,
      "learning_rate": 3.691877913591818e-06,
      "loss": 0.1314,
      "step": 20455
    },
    {
      "epoch": 0.5967676060446934,
      "grad_norm": 0.7931801109386174,
      "learning_rate": 3.691421940270725e-06,
      "loss": 0.1128,
      "step": 20456
    },
    {
      "epoch": 0.5967967792753369,
      "grad_norm": 0.7868190286090775,
      "learning_rate": 3.6909659786323016e-06,
      "loss": 0.1203,
      "step": 20457
    },
    {
      "epoch": 0.5968259525059805,
      "grad_norm": 0.9195037410372153,
      "learning_rate": 3.6905100286806228e-06,
      "loss": 0.1307,
      "step": 20458
    },
    {
      "epoch": 0.596855125736624,
      "grad_norm": 0.9351941867488012,
      "learning_rate": 3.6900540904197583e-06,
      "loss": 0.1211,
      "step": 20459
    },
    {
      "epoch": 0.5968842989672677,
      "grad_norm": 0.7102082820615029,
      "learning_rate": 3.689598163853779e-06,
      "loss": 0.1374,
      "step": 20460
    },
    {
      "epoch": 0.5969134721979112,
      "grad_norm": 1.232348357686735,
      "learning_rate": 3.6891422489867535e-06,
      "loss": 0.1262,
      "step": 20461
    },
    {
      "epoch": 0.5969426454285548,
      "grad_norm": 1.6203371497122976,
      "learning_rate": 3.688686345822753e-06,
      "loss": 0.1398,
      "step": 20462
    },
    {
      "epoch": 0.5969718186591983,
      "grad_norm": 1.0860417899502368,
      "learning_rate": 3.6882304543658465e-06,
      "loss": 0.1697,
      "step": 20463
    },
    {
      "epoch": 0.5970009918898419,
      "grad_norm": 0.8871256628348626,
      "learning_rate": 3.6877745746201064e-06,
      "loss": 0.1386,
      "step": 20464
    },
    {
      "epoch": 0.5970301651204855,
      "grad_norm": 0.7836428040188675,
      "learning_rate": 3.6873187065896033e-06,
      "loss": 0.1205,
      "step": 20465
    },
    {
      "epoch": 0.597059338351129,
      "grad_norm": 0.9965053353182319,
      "learning_rate": 3.686862850278403e-06,
      "loss": 0.1118,
      "step": 20466
    },
    {
      "epoch": 0.5970885115817726,
      "grad_norm": 0.9808097879999046,
      "learning_rate": 3.6864070056905786e-06,
      "loss": 0.1331,
      "step": 20467
    },
    {
      "epoch": 0.5971176848124161,
      "grad_norm": 0.909625164618019,
      "learning_rate": 3.6859511728302006e-06,
      "loss": 0.1262,
      "step": 20468
    },
    {
      "epoch": 0.5971468580430597,
      "grad_norm": 0.7804522237800623,
      "learning_rate": 3.6854953517013326e-06,
      "loss": 0.1348,
      "step": 20469
    },
    {
      "epoch": 0.5971760312737032,
      "grad_norm": 0.9893818890694739,
      "learning_rate": 3.685039542308052e-06,
      "loss": 0.1477,
      "step": 20470
    },
    {
      "epoch": 0.5972052045043468,
      "grad_norm": 0.8875647411801023,
      "learning_rate": 3.684583744654423e-06,
      "loss": 0.1088,
      "step": 20471
    },
    {
      "epoch": 0.5972343777349903,
      "grad_norm": 0.7961423523856973,
      "learning_rate": 3.6841279587445165e-06,
      "loss": 0.1369,
      "step": 20472
    },
    {
      "epoch": 0.5972635509656339,
      "grad_norm": 1.0075475240222815,
      "learning_rate": 3.6836721845824032e-06,
      "loss": 0.1452,
      "step": 20473
    },
    {
      "epoch": 0.5972927241962775,
      "grad_norm": 0.9706868154006772,
      "learning_rate": 3.6832164221721465e-06,
      "loss": 0.1286,
      "step": 20474
    },
    {
      "epoch": 0.5973218974269211,
      "grad_norm": 0.6751922044837719,
      "learning_rate": 3.682760671517823e-06,
      "loss": 0.1046,
      "step": 20475
    },
    {
      "epoch": 0.5973510706575647,
      "grad_norm": 0.9000148407740457,
      "learning_rate": 3.6823049326234963e-06,
      "loss": 0.1278,
      "step": 20476
    },
    {
      "epoch": 0.5973802438882082,
      "grad_norm": 1.144890886810307,
      "learning_rate": 3.6818492054932363e-06,
      "loss": 0.1329,
      "step": 20477
    },
    {
      "epoch": 0.5974094171188518,
      "grad_norm": 1.039175940067144,
      "learning_rate": 3.6813934901311134e-06,
      "loss": 0.1077,
      "step": 20478
    },
    {
      "epoch": 0.5974385903494953,
      "grad_norm": 0.7314679239854645,
      "learning_rate": 3.6809377865411933e-06,
      "loss": 0.1235,
      "step": 20479
    },
    {
      "epoch": 0.5974677635801389,
      "grad_norm": 0.8729613152525328,
      "learning_rate": 3.6804820947275444e-06,
      "loss": 0.1319,
      "step": 20480
    },
    {
      "epoch": 0.5974969368107824,
      "grad_norm": 0.785173835529341,
      "learning_rate": 3.680026414694238e-06,
      "loss": 0.1206,
      "step": 20481
    },
    {
      "epoch": 0.597526110041426,
      "grad_norm": 1.184177483031644,
      "learning_rate": 3.679570746445341e-06,
      "loss": 0.122,
      "step": 20482
    },
    {
      "epoch": 0.5975552832720695,
      "grad_norm": 1.1185632815041635,
      "learning_rate": 3.6791150899849215e-06,
      "loss": 0.1345,
      "step": 20483
    },
    {
      "epoch": 0.5975844565027131,
      "grad_norm": 0.9808900238407227,
      "learning_rate": 3.6786594453170467e-06,
      "loss": 0.1472,
      "step": 20484
    },
    {
      "epoch": 0.5976136297333566,
      "grad_norm": 0.8160310435880171,
      "learning_rate": 3.678203812445784e-06,
      "loss": 0.1449,
      "step": 20485
    },
    {
      "epoch": 0.5976428029640002,
      "grad_norm": 0.8488171059555756,
      "learning_rate": 3.677748191375202e-06,
      "loss": 0.1224,
      "step": 20486
    },
    {
      "epoch": 0.5976719761946439,
      "grad_norm": 0.7615093566664713,
      "learning_rate": 3.67729258210937e-06,
      "loss": 0.1262,
      "step": 20487
    },
    {
      "epoch": 0.5977011494252874,
      "grad_norm": 1.0336629755885731,
      "learning_rate": 3.6768369846523534e-06,
      "loss": 0.1148,
      "step": 20488
    },
    {
      "epoch": 0.597730322655931,
      "grad_norm": 0.9502916717067837,
      "learning_rate": 3.6763813990082205e-06,
      "loss": 0.1562,
      "step": 20489
    },
    {
      "epoch": 0.5977594958865745,
      "grad_norm": 0.9356259514588079,
      "learning_rate": 3.675925825181039e-06,
      "loss": 0.1532,
      "step": 20490
    },
    {
      "epoch": 0.5977886691172181,
      "grad_norm": 1.0665564011160675,
      "learning_rate": 3.675470263174875e-06,
      "loss": 0.1394,
      "step": 20491
    },
    {
      "epoch": 0.5978178423478616,
      "grad_norm": 0.8358045200038936,
      "learning_rate": 3.6750147129937954e-06,
      "loss": 0.1385,
      "step": 20492
    },
    {
      "epoch": 0.5978470155785052,
      "grad_norm": 0.6542279822197059,
      "learning_rate": 3.6745591746418687e-06,
      "loss": 0.1317,
      "step": 20493
    },
    {
      "epoch": 0.5978761888091487,
      "grad_norm": 0.7857079650083013,
      "learning_rate": 3.6741036481231618e-06,
      "loss": 0.1396,
      "step": 20494
    },
    {
      "epoch": 0.5979053620397923,
      "grad_norm": 0.9921570766625235,
      "learning_rate": 3.673648133441742e-06,
      "loss": 0.1661,
      "step": 20495
    },
    {
      "epoch": 0.5979345352704358,
      "grad_norm": 0.7417332212474258,
      "learning_rate": 3.673192630601673e-06,
      "loss": 0.1136,
      "step": 20496
    },
    {
      "epoch": 0.5979637085010794,
      "grad_norm": 0.8751843957608968,
      "learning_rate": 3.672737139607024e-06,
      "loss": 0.1314,
      "step": 20497
    },
    {
      "epoch": 0.5979928817317229,
      "grad_norm": 0.9085993574172754,
      "learning_rate": 3.6722816604618603e-06,
      "loss": 0.1469,
      "step": 20498
    },
    {
      "epoch": 0.5980220549623665,
      "grad_norm": 1.0359512202969643,
      "learning_rate": 3.6718261931702504e-06,
      "loss": 0.1243,
      "step": 20499
    },
    {
      "epoch": 0.59805122819301,
      "grad_norm": 0.95895574487634,
      "learning_rate": 3.6713707377362594e-06,
      "loss": 0.1032,
      "step": 20500
    },
    {
      "epoch": 0.5980804014236537,
      "grad_norm": 1.2815863745803195,
      "learning_rate": 3.6709152941639526e-06,
      "loss": 0.1241,
      "step": 20501
    },
    {
      "epoch": 0.5981095746542973,
      "grad_norm": 0.8772051140004502,
      "learning_rate": 3.6704598624573967e-06,
      "loss": 0.1327,
      "step": 20502
    },
    {
      "epoch": 0.5981387478849408,
      "grad_norm": 0.8344933951998865,
      "learning_rate": 3.670004442620659e-06,
      "loss": 0.1216,
      "step": 20503
    },
    {
      "epoch": 0.5981679211155844,
      "grad_norm": 0.9100034234769037,
      "learning_rate": 3.6695490346578007e-06,
      "loss": 0.1233,
      "step": 20504
    },
    {
      "epoch": 0.5981970943462279,
      "grad_norm": 0.7762673031814327,
      "learning_rate": 3.6690936385728943e-06,
      "loss": 0.102,
      "step": 20505
    },
    {
      "epoch": 0.5982262675768715,
      "grad_norm": 0.7129530037666477,
      "learning_rate": 3.668638254370001e-06,
      "loss": 0.1178,
      "step": 20506
    },
    {
      "epoch": 0.598255440807515,
      "grad_norm": 1.052894058234126,
      "learning_rate": 3.668182882053188e-06,
      "loss": 0.1162,
      "step": 20507
    },
    {
      "epoch": 0.5982846140381586,
      "grad_norm": 0.7718272057497901,
      "learning_rate": 3.667727521626521e-06,
      "loss": 0.1546,
      "step": 20508
    },
    {
      "epoch": 0.5983137872688021,
      "grad_norm": 0.821713021135635,
      "learning_rate": 3.667272173094063e-06,
      "loss": 0.1322,
      "step": 20509
    },
    {
      "epoch": 0.5983429604994457,
      "grad_norm": 0.8207207203971407,
      "learning_rate": 3.666816836459881e-06,
      "loss": 0.1062,
      "step": 20510
    },
    {
      "epoch": 0.5983721337300892,
      "grad_norm": 0.8450759428460285,
      "learning_rate": 3.6663615117280405e-06,
      "loss": 0.1352,
      "step": 20511
    },
    {
      "epoch": 0.5984013069607328,
      "grad_norm": 0.7236166763696078,
      "learning_rate": 3.6659061989026057e-06,
      "loss": 0.1138,
      "step": 20512
    },
    {
      "epoch": 0.5984304801913763,
      "grad_norm": 0.708566754879243,
      "learning_rate": 3.6654508979876433e-06,
      "loss": 0.1234,
      "step": 20513
    },
    {
      "epoch": 0.5984596534220199,
      "grad_norm": 0.8334590853831855,
      "learning_rate": 3.6649956089872163e-06,
      "loss": 0.1172,
      "step": 20514
    },
    {
      "epoch": 0.5984888266526636,
      "grad_norm": 0.9573498502570469,
      "learning_rate": 3.6645403319053885e-06,
      "loss": 0.1459,
      "step": 20515
    },
    {
      "epoch": 0.5985179998833071,
      "grad_norm": 0.9034634695812083,
      "learning_rate": 3.664085066746226e-06,
      "loss": 0.1428,
      "step": 20516
    },
    {
      "epoch": 0.5985471731139507,
      "grad_norm": 1.005677962838466,
      "learning_rate": 3.6636298135137945e-06,
      "loss": 0.1189,
      "step": 20517
    },
    {
      "epoch": 0.5985763463445942,
      "grad_norm": 0.8234564040175449,
      "learning_rate": 3.663174572212156e-06,
      "loss": 0.1008,
      "step": 20518
    },
    {
      "epoch": 0.5986055195752378,
      "grad_norm": 0.8627707595548206,
      "learning_rate": 3.6627193428453755e-06,
      "loss": 0.1269,
      "step": 20519
    },
    {
      "epoch": 0.5986346928058813,
      "grad_norm": 0.9130325309868798,
      "learning_rate": 3.6622641254175193e-06,
      "loss": 0.1239,
      "step": 20520
    },
    {
      "epoch": 0.5986638660365249,
      "grad_norm": 1.0175108021003532,
      "learning_rate": 3.6618089199326477e-06,
      "loss": 0.1027,
      "step": 20521
    },
    {
      "epoch": 0.5986930392671684,
      "grad_norm": 1.0057138131731775,
      "learning_rate": 3.661353726394826e-06,
      "loss": 0.122,
      "step": 20522
    },
    {
      "epoch": 0.598722212497812,
      "grad_norm": 0.9933031439251812,
      "learning_rate": 3.6608985448081204e-06,
      "loss": 0.1498,
      "step": 20523
    },
    {
      "epoch": 0.5987513857284555,
      "grad_norm": 0.8647899552852462,
      "learning_rate": 3.660443375176592e-06,
      "loss": 0.112,
      "step": 20524
    },
    {
      "epoch": 0.5987805589590991,
      "grad_norm": 0.7840596306832815,
      "learning_rate": 3.6599882175043074e-06,
      "loss": 0.1216,
      "step": 20525
    },
    {
      "epoch": 0.5988097321897426,
      "grad_norm": 0.8525807806613944,
      "learning_rate": 3.659533071795326e-06,
      "loss": 0.1341,
      "step": 20526
    },
    {
      "epoch": 0.5988389054203862,
      "grad_norm": 0.8781698906984328,
      "learning_rate": 3.659077938053714e-06,
      "loss": 0.1418,
      "step": 20527
    },
    {
      "epoch": 0.5988680786510299,
      "grad_norm": 0.9022492130214876,
      "learning_rate": 3.6586228162835326e-06,
      "loss": 0.1437,
      "step": 20528
    },
    {
      "epoch": 0.5988972518816734,
      "grad_norm": 0.6684622309029933,
      "learning_rate": 3.6581677064888476e-06,
      "loss": 0.1105,
      "step": 20529
    },
    {
      "epoch": 0.598926425112317,
      "grad_norm": 0.8036002649106786,
      "learning_rate": 3.6577126086737225e-06,
      "loss": 0.1027,
      "step": 20530
    },
    {
      "epoch": 0.5989555983429605,
      "grad_norm": 1.061772593087311,
      "learning_rate": 3.657257522842217e-06,
      "loss": 0.1421,
      "step": 20531
    },
    {
      "epoch": 0.5989847715736041,
      "grad_norm": 0.9983704893430387,
      "learning_rate": 3.6568024489983967e-06,
      "loss": 0.1146,
      "step": 20532
    },
    {
      "epoch": 0.5990139448042476,
      "grad_norm": 0.9157293895801865,
      "learning_rate": 3.6563473871463238e-06,
      "loss": 0.113,
      "step": 20533
    },
    {
      "epoch": 0.5990431180348912,
      "grad_norm": 0.8104014835120488,
      "learning_rate": 3.655892337290058e-06,
      "loss": 0.1291,
      "step": 20534
    },
    {
      "epoch": 0.5990722912655347,
      "grad_norm": 0.7866470717924987,
      "learning_rate": 3.6554372994336674e-06,
      "loss": 0.117,
      "step": 20535
    },
    {
      "epoch": 0.5991014644961783,
      "grad_norm": 0.7352142384835508,
      "learning_rate": 3.65498227358121e-06,
      "loss": 0.0957,
      "step": 20536
    },
    {
      "epoch": 0.5991306377268218,
      "grad_norm": 0.8191234449972371,
      "learning_rate": 3.6545272597367507e-06,
      "loss": 0.1209,
      "step": 20537
    },
    {
      "epoch": 0.5991598109574654,
      "grad_norm": 0.9268517631101401,
      "learning_rate": 3.654072257904352e-06,
      "loss": 0.146,
      "step": 20538
    },
    {
      "epoch": 0.599188984188109,
      "grad_norm": 0.8001956900343967,
      "learning_rate": 3.6536172680880732e-06,
      "loss": 0.1335,
      "step": 20539
    },
    {
      "epoch": 0.5992181574187525,
      "grad_norm": 0.7775463995139918,
      "learning_rate": 3.653162290291977e-06,
      "loss": 0.1082,
      "step": 20540
    },
    {
      "epoch": 0.5992473306493961,
      "grad_norm": 0.7418387145631792,
      "learning_rate": 3.652707324520127e-06,
      "loss": 0.0965,
      "step": 20541
    },
    {
      "epoch": 0.5992765038800397,
      "grad_norm": 0.8001930971560922,
      "learning_rate": 3.6522523707765856e-06,
      "loss": 0.1274,
      "step": 20542
    },
    {
      "epoch": 0.5993056771106833,
      "grad_norm": 0.7296196533427095,
      "learning_rate": 3.6517974290654136e-06,
      "loss": 0.122,
      "step": 20543
    },
    {
      "epoch": 0.5993348503413268,
      "grad_norm": 0.8544467595532104,
      "learning_rate": 3.6513424993906717e-06,
      "loss": 0.1318,
      "step": 20544
    },
    {
      "epoch": 0.5993640235719704,
      "grad_norm": 0.9430329215346632,
      "learning_rate": 3.6508875817564214e-06,
      "loss": 0.146,
      "step": 20545
    },
    {
      "epoch": 0.5993931968026139,
      "grad_norm": 0.6930544020153473,
      "learning_rate": 3.6504326761667242e-06,
      "loss": 0.1126,
      "step": 20546
    },
    {
      "epoch": 0.5994223700332575,
      "grad_norm": 0.881379971953356,
      "learning_rate": 3.6499777826256434e-06,
      "loss": 0.1334,
      "step": 20547
    },
    {
      "epoch": 0.599451543263901,
      "grad_norm": 1.0946395253591987,
      "learning_rate": 3.649522901137238e-06,
      "loss": 0.1444,
      "step": 20548
    },
    {
      "epoch": 0.5994807164945446,
      "grad_norm": 0.9284954408650453,
      "learning_rate": 3.64906803170557e-06,
      "loss": 0.1341,
      "step": 20549
    },
    {
      "epoch": 0.5995098897251881,
      "grad_norm": 0.6966842165615734,
      "learning_rate": 3.6486131743347007e-06,
      "loss": 0.114,
      "step": 20550
    },
    {
      "epoch": 0.5995390629558317,
      "grad_norm": 0.8204866663581304,
      "learning_rate": 3.6481583290286894e-06,
      "loss": 0.1153,
      "step": 20551
    },
    {
      "epoch": 0.5995682361864753,
      "grad_norm": 1.21413720074367,
      "learning_rate": 3.647703495791597e-06,
      "loss": 0.1387,
      "step": 20552
    },
    {
      "epoch": 0.5995974094171188,
      "grad_norm": 0.8178389200924865,
      "learning_rate": 3.647248674627486e-06,
      "loss": 0.1109,
      "step": 20553
    },
    {
      "epoch": 0.5996265826477624,
      "grad_norm": 0.7348694448942786,
      "learning_rate": 3.6467938655404155e-06,
      "loss": 0.1168,
      "step": 20554
    },
    {
      "epoch": 0.599655755878406,
      "grad_norm": 0.7679002210842868,
      "learning_rate": 3.646339068534448e-06,
      "loss": 0.1152,
      "step": 20555
    },
    {
      "epoch": 0.5996849291090496,
      "grad_norm": 1.3187101764265343,
      "learning_rate": 3.645884283613641e-06,
      "loss": 0.122,
      "step": 20556
    },
    {
      "epoch": 0.5997141023396931,
      "grad_norm": 0.9132659570266782,
      "learning_rate": 3.6454295107820557e-06,
      "loss": 0.1006,
      "step": 20557
    },
    {
      "epoch": 0.5997432755703367,
      "grad_norm": 0.7024967893104602,
      "learning_rate": 3.6449747500437517e-06,
      "loss": 0.1206,
      "step": 20558
    },
    {
      "epoch": 0.5997724488009802,
      "grad_norm": 0.7835023542497151,
      "learning_rate": 3.64452000140279e-06,
      "loss": 0.1084,
      "step": 20559
    },
    {
      "epoch": 0.5998016220316238,
      "grad_norm": 0.891525483631285,
      "learning_rate": 3.6440652648632314e-06,
      "loss": 0.1001,
      "step": 20560
    },
    {
      "epoch": 0.5998307952622673,
      "grad_norm": 1.001478933931435,
      "learning_rate": 3.6436105404291334e-06,
      "loss": 0.1267,
      "step": 20561
    },
    {
      "epoch": 0.5998599684929109,
      "grad_norm": 0.8148258392322149,
      "learning_rate": 3.643155828104557e-06,
      "loss": 0.1179,
      "step": 20562
    },
    {
      "epoch": 0.5998891417235545,
      "grad_norm": 0.9157434882122585,
      "learning_rate": 3.642701127893562e-06,
      "loss": 0.1093,
      "step": 20563
    },
    {
      "epoch": 0.599918314954198,
      "grad_norm": 0.8773445146079626,
      "learning_rate": 3.6422464398002044e-06,
      "loss": 0.1163,
      "step": 20564
    },
    {
      "epoch": 0.5999474881848416,
      "grad_norm": 0.8994101692166602,
      "learning_rate": 3.6417917638285497e-06,
      "loss": 0.1216,
      "step": 20565
    },
    {
      "epoch": 0.5999766614154851,
      "grad_norm": 0.816099564080936,
      "learning_rate": 3.641337099982653e-06,
      "loss": 0.1395,
      "step": 20566
    },
    {
      "epoch": 0.6000058346461287,
      "grad_norm": 0.8183404442908826,
      "learning_rate": 3.6408824482665744e-06,
      "loss": 0.1194,
      "step": 20567
    },
    {
      "epoch": 0.6000350078767722,
      "grad_norm": 1.1391397282549864,
      "learning_rate": 3.640427808684374e-06,
      "loss": 0.1339,
      "step": 20568
    },
    {
      "epoch": 0.6000641811074159,
      "grad_norm": 0.829170338175808,
      "learning_rate": 3.639973181240108e-06,
      "loss": 0.1085,
      "step": 20569
    },
    {
      "epoch": 0.6000933543380594,
      "grad_norm": 0.7001107993042314,
      "learning_rate": 3.6395185659378357e-06,
      "loss": 0.1108,
      "step": 20570
    },
    {
      "epoch": 0.600122527568703,
      "grad_norm": 0.9894333483873435,
      "learning_rate": 3.6390639627816182e-06,
      "loss": 0.1252,
      "step": 20571
    },
    {
      "epoch": 0.6001517007993465,
      "grad_norm": 0.6801080708384538,
      "learning_rate": 3.638609371775512e-06,
      "loss": 0.1192,
      "step": 20572
    },
    {
      "epoch": 0.6001808740299901,
      "grad_norm": 0.8665406353231355,
      "learning_rate": 3.638154792923578e-06,
      "loss": 0.1328,
      "step": 20573
    },
    {
      "epoch": 0.6002100472606337,
      "grad_norm": 0.7497171878377388,
      "learning_rate": 3.6377002262298726e-06,
      "loss": 0.1121,
      "step": 20574
    },
    {
      "epoch": 0.6002392204912772,
      "grad_norm": 0.8700510828837275,
      "learning_rate": 3.637245671698454e-06,
      "loss": 0.1441,
      "step": 20575
    },
    {
      "epoch": 0.6002683937219208,
      "grad_norm": 0.8163394810681037,
      "learning_rate": 3.636791129333379e-06,
      "loss": 0.135,
      "step": 20576
    },
    {
      "epoch": 0.6002975669525643,
      "grad_norm": 0.8931486550626689,
      "learning_rate": 3.6363365991387102e-06,
      "loss": 0.1371,
      "step": 20577
    },
    {
      "epoch": 0.6003267401832079,
      "grad_norm": 0.8836632685717212,
      "learning_rate": 3.6358820811185015e-06,
      "loss": 0.1207,
      "step": 20578
    },
    {
      "epoch": 0.6003559134138514,
      "grad_norm": 0.6778682316577336,
      "learning_rate": 3.6354275752768114e-06,
      "loss": 0.1073,
      "step": 20579
    },
    {
      "epoch": 0.600385086644495,
      "grad_norm": 0.7449842815807373,
      "learning_rate": 3.6349730816176996e-06,
      "loss": 0.1326,
      "step": 20580
    },
    {
      "epoch": 0.6004142598751385,
      "grad_norm": 0.9601133893833005,
      "learning_rate": 3.6345186001452215e-06,
      "loss": 0.1359,
      "step": 20581
    },
    {
      "epoch": 0.6004434331057822,
      "grad_norm": 0.7274108088868823,
      "learning_rate": 3.634064130863434e-06,
      "loss": 0.1064,
      "step": 20582
    },
    {
      "epoch": 0.6004726063364257,
      "grad_norm": 0.7576625059967385,
      "learning_rate": 3.6336096737763964e-06,
      "loss": 0.1441,
      "step": 20583
    },
    {
      "epoch": 0.6005017795670693,
      "grad_norm": 0.9506553708409862,
      "learning_rate": 3.633155228888166e-06,
      "loss": 0.1605,
      "step": 20584
    },
    {
      "epoch": 0.6005309527977128,
      "grad_norm": 1.045133038241358,
      "learning_rate": 3.6327007962028003e-06,
      "loss": 0.1532,
      "step": 20585
    },
    {
      "epoch": 0.6005601260283564,
      "grad_norm": 0.8065056763539316,
      "learning_rate": 3.6322463757243554e-06,
      "loss": 0.1353,
      "step": 20586
    },
    {
      "epoch": 0.600589299259,
      "grad_norm": 0.7090430509265934,
      "learning_rate": 3.631791967456887e-06,
      "loss": 0.1425,
      "step": 20587
    },
    {
      "epoch": 0.6006184724896435,
      "grad_norm": 0.9132580952968826,
      "learning_rate": 3.631337571404453e-06,
      "loss": 0.1194,
      "step": 20588
    },
    {
      "epoch": 0.6006476457202871,
      "grad_norm": 0.9152231027887684,
      "learning_rate": 3.6308831875711115e-06,
      "loss": 0.1243,
      "step": 20589
    },
    {
      "epoch": 0.6006768189509306,
      "grad_norm": 1.219974233729985,
      "learning_rate": 3.6304288159609187e-06,
      "loss": 0.1263,
      "step": 20590
    },
    {
      "epoch": 0.6007059921815742,
      "grad_norm": 0.7423868034816976,
      "learning_rate": 3.6299744565779294e-06,
      "loss": 0.1174,
      "step": 20591
    },
    {
      "epoch": 0.6007351654122177,
      "grad_norm": 0.9292507963557238,
      "learning_rate": 3.6295201094262013e-06,
      "loss": 0.1253,
      "step": 20592
    },
    {
      "epoch": 0.6007643386428613,
      "grad_norm": 1.0896664292755547,
      "learning_rate": 3.6290657745097917e-06,
      "loss": 0.1371,
      "step": 20593
    },
    {
      "epoch": 0.6007935118735048,
      "grad_norm": 0.808847616529553,
      "learning_rate": 3.628611451832752e-06,
      "loss": 0.1339,
      "step": 20594
    },
    {
      "epoch": 0.6008226851041484,
      "grad_norm": 0.7392848472867324,
      "learning_rate": 3.6281571413991458e-06,
      "loss": 0.1236,
      "step": 20595
    },
    {
      "epoch": 0.600851858334792,
      "grad_norm": 0.9715254412626044,
      "learning_rate": 3.6277028432130235e-06,
      "loss": 0.111,
      "step": 20596
    },
    {
      "epoch": 0.6008810315654356,
      "grad_norm": 0.9161313724467532,
      "learning_rate": 3.6272485572784426e-06,
      "loss": 0.1095,
      "step": 20597
    },
    {
      "epoch": 0.6009102047960792,
      "grad_norm": 1.1380091667837064,
      "learning_rate": 3.6267942835994607e-06,
      "loss": 0.1086,
      "step": 20598
    },
    {
      "epoch": 0.6009393780267227,
      "grad_norm": 0.7336744295507781,
      "learning_rate": 3.6263400221801292e-06,
      "loss": 0.1217,
      "step": 20599
    },
    {
      "epoch": 0.6009685512573663,
      "grad_norm": 0.7615605502921807,
      "learning_rate": 3.625885773024506e-06,
      "loss": 0.1268,
      "step": 20600
    },
    {
      "epoch": 0.6009977244880098,
      "grad_norm": 0.7294107060867674,
      "learning_rate": 3.6254315361366477e-06,
      "loss": 0.1173,
      "step": 20601
    },
    {
      "epoch": 0.6010268977186534,
      "grad_norm": 0.6880247447368646,
      "learning_rate": 3.6249773115206085e-06,
      "loss": 0.1162,
      "step": 20602
    },
    {
      "epoch": 0.6010560709492969,
      "grad_norm": 0.8659064709018958,
      "learning_rate": 3.624523099180444e-06,
      "loss": 0.11,
      "step": 20603
    },
    {
      "epoch": 0.6010852441799405,
      "grad_norm": 0.9030016504160433,
      "learning_rate": 3.6240688991202085e-06,
      "loss": 0.1214,
      "step": 20604
    },
    {
      "epoch": 0.601114417410584,
      "grad_norm": 0.6965923949101781,
      "learning_rate": 3.623614711343957e-06,
      "loss": 0.1261,
      "step": 20605
    },
    {
      "epoch": 0.6011435906412276,
      "grad_norm": 0.8626239518223667,
      "learning_rate": 3.6231605358557442e-06,
      "loss": 0.1231,
      "step": 20606
    },
    {
      "epoch": 0.6011727638718711,
      "grad_norm": 1.1416879445210433,
      "learning_rate": 3.622706372659627e-06,
      "loss": 0.1196,
      "step": 20607
    },
    {
      "epoch": 0.6012019371025147,
      "grad_norm": 0.7884215630810347,
      "learning_rate": 3.622252221759658e-06,
      "loss": 0.1346,
      "step": 20608
    },
    {
      "epoch": 0.6012311103331583,
      "grad_norm": 0.794585350162656,
      "learning_rate": 3.621798083159892e-06,
      "loss": 0.1181,
      "step": 20609
    },
    {
      "epoch": 0.6012602835638019,
      "grad_norm": 0.9790855397077731,
      "learning_rate": 3.621343956864385e-06,
      "loss": 0.1111,
      "step": 20610
    },
    {
      "epoch": 0.6012894567944455,
      "grad_norm": 0.7805984597896618,
      "learning_rate": 3.6208898428771887e-06,
      "loss": 0.1342,
      "step": 20611
    },
    {
      "epoch": 0.601318630025089,
      "grad_norm": 1.0142421179477181,
      "learning_rate": 3.620435741202357e-06,
      "loss": 0.1415,
      "step": 20612
    },
    {
      "epoch": 0.6013478032557326,
      "grad_norm": 1.05410386310876,
      "learning_rate": 3.6199816518439477e-06,
      "loss": 0.1566,
      "step": 20613
    },
    {
      "epoch": 0.6013769764863761,
      "grad_norm": 1.1189031920338837,
      "learning_rate": 3.6195275748060125e-06,
      "loss": 0.1252,
      "step": 20614
    },
    {
      "epoch": 0.6014061497170197,
      "grad_norm": 1.0135170322610298,
      "learning_rate": 3.6190735100926066e-06,
      "loss": 0.1434,
      "step": 20615
    },
    {
      "epoch": 0.6014353229476632,
      "grad_norm": 1.0868049012424301,
      "learning_rate": 3.6186194577077817e-06,
      "loss": 0.1402,
      "step": 20616
    },
    {
      "epoch": 0.6014644961783068,
      "grad_norm": 0.8598214788839132,
      "learning_rate": 3.6181654176555927e-06,
      "loss": 0.1119,
      "step": 20617
    },
    {
      "epoch": 0.6014936694089503,
      "grad_norm": 0.7153551213611583,
      "learning_rate": 3.6177113899400916e-06,
      "loss": 0.1075,
      "step": 20618
    },
    {
      "epoch": 0.6015228426395939,
      "grad_norm": 0.7031827095581438,
      "learning_rate": 3.617257374565335e-06,
      "loss": 0.1181,
      "step": 20619
    },
    {
      "epoch": 0.6015520158702374,
      "grad_norm": 1.2676887347710046,
      "learning_rate": 3.6168033715353747e-06,
      "loss": 0.1282,
      "step": 20620
    },
    {
      "epoch": 0.601581189100881,
      "grad_norm": 0.9495079511916384,
      "learning_rate": 3.6163493808542628e-06,
      "loss": 0.1189,
      "step": 20621
    },
    {
      "epoch": 0.6016103623315245,
      "grad_norm": 0.7168617426368551,
      "learning_rate": 3.6158954025260532e-06,
      "loss": 0.1209,
      "step": 20622
    },
    {
      "epoch": 0.6016395355621682,
      "grad_norm": 0.6744299998705775,
      "learning_rate": 3.6154414365548008e-06,
      "loss": 0.1355,
      "step": 20623
    },
    {
      "epoch": 0.6016687087928118,
      "grad_norm": 1.3458883116510916,
      "learning_rate": 3.614987482944553e-06,
      "loss": 0.1359,
      "step": 20624
    },
    {
      "epoch": 0.6016978820234553,
      "grad_norm": 1.1403406960405245,
      "learning_rate": 3.61453354169937e-06,
      "loss": 0.13,
      "step": 20625
    },
    {
      "epoch": 0.6017270552540989,
      "grad_norm": 0.8954812012347784,
      "learning_rate": 3.614079612823299e-06,
      "loss": 0.1182,
      "step": 20626
    },
    {
      "epoch": 0.6017562284847424,
      "grad_norm": 0.755592231970031,
      "learning_rate": 3.613625696320394e-06,
      "loss": 0.14,
      "step": 20627
    },
    {
      "epoch": 0.601785401715386,
      "grad_norm": 0.6368978214706259,
      "learning_rate": 3.61317179219471e-06,
      "loss": 0.1177,
      "step": 20628
    },
    {
      "epoch": 0.6018145749460295,
      "grad_norm": 0.8581482083463947,
      "learning_rate": 3.6127179004502953e-06,
      "loss": 0.161,
      "step": 20629
    },
    {
      "epoch": 0.6018437481766731,
      "grad_norm": 1.0527574505587558,
      "learning_rate": 3.6122640210912042e-06,
      "loss": 0.141,
      "step": 20630
    },
    {
      "epoch": 0.6018729214073166,
      "grad_norm": 0.9176333557272351,
      "learning_rate": 3.6118101541214887e-06,
      "loss": 0.1406,
      "step": 20631
    },
    {
      "epoch": 0.6019020946379602,
      "grad_norm": 0.8515930027778424,
      "learning_rate": 3.611356299545201e-06,
      "loss": 0.109,
      "step": 20632
    },
    {
      "epoch": 0.6019312678686037,
      "grad_norm": 0.7663183498739945,
      "learning_rate": 3.6109024573663938e-06,
      "loss": 0.1287,
      "step": 20633
    },
    {
      "epoch": 0.6019604410992473,
      "grad_norm": 0.7971329176308535,
      "learning_rate": 3.6104486275891166e-06,
      "loss": 0.1398,
      "step": 20634
    },
    {
      "epoch": 0.6019896143298908,
      "grad_norm": 0.847177107999471,
      "learning_rate": 3.609994810217422e-06,
      "loss": 0.127,
      "step": 20635
    },
    {
      "epoch": 0.6020187875605345,
      "grad_norm": 0.8145235229946574,
      "learning_rate": 3.6095410052553613e-06,
      "loss": 0.1334,
      "step": 20636
    },
    {
      "epoch": 0.6020479607911781,
      "grad_norm": 0.9353061638939351,
      "learning_rate": 3.609087212706989e-06,
      "loss": 0.142,
      "step": 20637
    },
    {
      "epoch": 0.6020771340218216,
      "grad_norm": 0.8241346742955289,
      "learning_rate": 3.6086334325763528e-06,
      "loss": 0.1181,
      "step": 20638
    },
    {
      "epoch": 0.6021063072524652,
      "grad_norm": 1.517520638230361,
      "learning_rate": 3.608179664867505e-06,
      "loss": 0.1112,
      "step": 20639
    },
    {
      "epoch": 0.6021354804831087,
      "grad_norm": 0.8251323222826907,
      "learning_rate": 3.607725909584498e-06,
      "loss": 0.1191,
      "step": 20640
    },
    {
      "epoch": 0.6021646537137523,
      "grad_norm": 0.8794529914077927,
      "learning_rate": 3.6072721667313806e-06,
      "loss": 0.1348,
      "step": 20641
    },
    {
      "epoch": 0.6021938269443958,
      "grad_norm": 0.8467028458750899,
      "learning_rate": 3.606818436312204e-06,
      "loss": 0.1301,
      "step": 20642
    },
    {
      "epoch": 0.6022230001750394,
      "grad_norm": 0.8023789917132966,
      "learning_rate": 3.606364718331021e-06,
      "loss": 0.1255,
      "step": 20643
    },
    {
      "epoch": 0.6022521734056829,
      "grad_norm": 0.9670951006226213,
      "learning_rate": 3.6059110127918807e-06,
      "loss": 0.1549,
      "step": 20644
    },
    {
      "epoch": 0.6022813466363265,
      "grad_norm": 0.8424118195840142,
      "learning_rate": 3.605457319698835e-06,
      "loss": 0.1594,
      "step": 20645
    },
    {
      "epoch": 0.60231051986697,
      "grad_norm": 0.830922376239889,
      "learning_rate": 3.605003639055933e-06,
      "loss": 0.1099,
      "step": 20646
    },
    {
      "epoch": 0.6023396930976136,
      "grad_norm": 0.7870205949639671,
      "learning_rate": 3.604549970867225e-06,
      "loss": 0.1034,
      "step": 20647
    },
    {
      "epoch": 0.6023688663282571,
      "grad_norm": 0.9793539706991589,
      "learning_rate": 3.604096315136761e-06,
      "loss": 0.1329,
      "step": 20648
    },
    {
      "epoch": 0.6023980395589007,
      "grad_norm": 0.7572672185471363,
      "learning_rate": 3.6036426718685925e-06,
      "loss": 0.1444,
      "step": 20649
    },
    {
      "epoch": 0.6024272127895444,
      "grad_norm": 0.7233964453635535,
      "learning_rate": 3.6031890410667704e-06,
      "loss": 0.116,
      "step": 20650
    },
    {
      "epoch": 0.6024563860201879,
      "grad_norm": 1.1149264391767628,
      "learning_rate": 3.6027354227353417e-06,
      "loss": 0.1433,
      "step": 20651
    },
    {
      "epoch": 0.6024855592508315,
      "grad_norm": 1.0212343659979026,
      "learning_rate": 3.602281816878358e-06,
      "loss": 0.1209,
      "step": 20652
    },
    {
      "epoch": 0.602514732481475,
      "grad_norm": 1.2270577799485092,
      "learning_rate": 3.6018282234998693e-06,
      "loss": 0.1236,
      "step": 20653
    },
    {
      "epoch": 0.6025439057121186,
      "grad_norm": 0.8697683972920066,
      "learning_rate": 3.601374642603921e-06,
      "loss": 0.1274,
      "step": 20654
    },
    {
      "epoch": 0.6025730789427621,
      "grad_norm": 0.932449916295231,
      "learning_rate": 3.60092107419457e-06,
      "loss": 0.1233,
      "step": 20655
    },
    {
      "epoch": 0.6026022521734057,
      "grad_norm": 1.1722804030568308,
      "learning_rate": 3.6004675182758598e-06,
      "loss": 0.1262,
      "step": 20656
    },
    {
      "epoch": 0.6026314254040492,
      "grad_norm": 1.1566340777759807,
      "learning_rate": 3.600013974851842e-06,
      "loss": 0.109,
      "step": 20657
    },
    {
      "epoch": 0.6026605986346928,
      "grad_norm": 0.8125755871923773,
      "learning_rate": 3.5995604439265664e-06,
      "loss": 0.1371,
      "step": 20658
    },
    {
      "epoch": 0.6026897718653363,
      "grad_norm": 1.351873782568918,
      "learning_rate": 3.599106925504079e-06,
      "loss": 0.1079,
      "step": 20659
    },
    {
      "epoch": 0.6027189450959799,
      "grad_norm": 1.0619854616517461,
      "learning_rate": 3.5986534195884305e-06,
      "loss": 0.1273,
      "step": 20660
    },
    {
      "epoch": 0.6027481183266234,
      "grad_norm": 1.3283689024383327,
      "learning_rate": 3.59819992618367e-06,
      "loss": 0.1256,
      "step": 20661
    },
    {
      "epoch": 0.602777291557267,
      "grad_norm": 0.8898156437098708,
      "learning_rate": 3.597746445293846e-06,
      "loss": 0.1245,
      "step": 20662
    },
    {
      "epoch": 0.6028064647879107,
      "grad_norm": 1.1695808649292871,
      "learning_rate": 3.597292976923008e-06,
      "loss": 0.1288,
      "step": 20663
    },
    {
      "epoch": 0.6028356380185542,
      "grad_norm": 0.8282942467785808,
      "learning_rate": 3.5968395210752027e-06,
      "loss": 0.1078,
      "step": 20664
    },
    {
      "epoch": 0.6028648112491978,
      "grad_norm": 0.9183036143669286,
      "learning_rate": 3.5963860777544796e-06,
      "loss": 0.1576,
      "step": 20665
    },
    {
      "epoch": 0.6028939844798413,
      "grad_norm": 1.0451332960441726,
      "learning_rate": 3.5959326469648847e-06,
      "loss": 0.1096,
      "step": 20666
    },
    {
      "epoch": 0.6029231577104849,
      "grad_norm": 0.7834724585353079,
      "learning_rate": 3.5954792287104707e-06,
      "loss": 0.1352,
      "step": 20667
    },
    {
      "epoch": 0.6029523309411284,
      "grad_norm": 0.7969454085436715,
      "learning_rate": 3.5950258229952817e-06,
      "loss": 0.121,
      "step": 20668
    },
    {
      "epoch": 0.602981504171772,
      "grad_norm": 1.2529372948763293,
      "learning_rate": 3.5945724298233665e-06,
      "loss": 0.1425,
      "step": 20669
    },
    {
      "epoch": 0.6030106774024155,
      "grad_norm": 1.192656878933679,
      "learning_rate": 3.5941190491987745e-06,
      "loss": 0.1222,
      "step": 20670
    },
    {
      "epoch": 0.6030398506330591,
      "grad_norm": 0.6172278253839465,
      "learning_rate": 3.5936656811255484e-06,
      "loss": 0.0992,
      "step": 20671
    },
    {
      "epoch": 0.6030690238637026,
      "grad_norm": 0.9249651754572804,
      "learning_rate": 3.593212325607742e-06,
      "loss": 0.1312,
      "step": 20672
    },
    {
      "epoch": 0.6030981970943462,
      "grad_norm": 1.2429406023273408,
      "learning_rate": 3.5927589826494005e-06,
      "loss": 0.1326,
      "step": 20673
    },
    {
      "epoch": 0.6031273703249898,
      "grad_norm": 1.107180675293284,
      "learning_rate": 3.5923056522545703e-06,
      "loss": 0.1209,
      "step": 20674
    },
    {
      "epoch": 0.6031565435556333,
      "grad_norm": 0.9657972443333933,
      "learning_rate": 3.5918523344272997e-06,
      "loss": 0.1687,
      "step": 20675
    },
    {
      "epoch": 0.6031857167862769,
      "grad_norm": 1.3548453125253543,
      "learning_rate": 3.591399029171635e-06,
      "loss": 0.1263,
      "step": 20676
    },
    {
      "epoch": 0.6032148900169205,
      "grad_norm": 1.0409323225286617,
      "learning_rate": 3.5909457364916223e-06,
      "loss": 0.1313,
      "step": 20677
    },
    {
      "epoch": 0.6032440632475641,
      "grad_norm": 0.8592680051229966,
      "learning_rate": 3.59049245639131e-06,
      "loss": 0.1442,
      "step": 20678
    },
    {
      "epoch": 0.6032732364782076,
      "grad_norm": 1.1466818044190623,
      "learning_rate": 3.5900391888747455e-06,
      "loss": 0.1239,
      "step": 20679
    },
    {
      "epoch": 0.6033024097088512,
      "grad_norm": 1.1531244828518774,
      "learning_rate": 3.5895859339459753e-06,
      "loss": 0.1162,
      "step": 20680
    },
    {
      "epoch": 0.6033315829394947,
      "grad_norm": 0.9372895602273493,
      "learning_rate": 3.589132691609044e-06,
      "loss": 0.1061,
      "step": 20681
    },
    {
      "epoch": 0.6033607561701383,
      "grad_norm": 0.8773942916931872,
      "learning_rate": 3.588679461868e-06,
      "loss": 0.1228,
      "step": 20682
    },
    {
      "epoch": 0.6033899294007818,
      "grad_norm": 0.8371494965905516,
      "learning_rate": 3.5882262447268865e-06,
      "loss": 0.1364,
      "step": 20683
    },
    {
      "epoch": 0.6034191026314254,
      "grad_norm": 1.1130096353059873,
      "learning_rate": 3.587773040189754e-06,
      "loss": 0.1158,
      "step": 20684
    },
    {
      "epoch": 0.603448275862069,
      "grad_norm": 0.8665302231987151,
      "learning_rate": 3.5873198482606477e-06,
      "loss": 0.1057,
      "step": 20685
    },
    {
      "epoch": 0.6034774490927125,
      "grad_norm": 0.7844004690780885,
      "learning_rate": 3.586866668943611e-06,
      "loss": 0.1166,
      "step": 20686
    },
    {
      "epoch": 0.6035066223233561,
      "grad_norm": 1.2263462758872938,
      "learning_rate": 3.5864135022426916e-06,
      "loss": 0.1347,
      "step": 20687
    },
    {
      "epoch": 0.6035357955539996,
      "grad_norm": 1.0266241126485882,
      "learning_rate": 3.585960348161936e-06,
      "loss": 0.1182,
      "step": 20688
    },
    {
      "epoch": 0.6035649687846432,
      "grad_norm": 0.8003543521389126,
      "learning_rate": 3.585507206705386e-06,
      "loss": 0.1274,
      "step": 20689
    },
    {
      "epoch": 0.6035941420152868,
      "grad_norm": 0.948863701856177,
      "learning_rate": 3.5850540778770924e-06,
      "loss": 0.1154,
      "step": 20690
    },
    {
      "epoch": 0.6036233152459304,
      "grad_norm": 0.7486068958601275,
      "learning_rate": 3.5846009616810983e-06,
      "loss": 0.131,
      "step": 20691
    },
    {
      "epoch": 0.6036524884765739,
      "grad_norm": 0.7833005345713987,
      "learning_rate": 3.5841478581214483e-06,
      "loss": 0.1153,
      "step": 20692
    },
    {
      "epoch": 0.6036816617072175,
      "grad_norm": 0.6709724665651245,
      "learning_rate": 3.583694767202189e-06,
      "loss": 0.097,
      "step": 20693
    },
    {
      "epoch": 0.603710834937861,
      "grad_norm": 0.8574087692321917,
      "learning_rate": 3.583241688927364e-06,
      "loss": 0.1284,
      "step": 20694
    },
    {
      "epoch": 0.6037400081685046,
      "grad_norm": 1.0161858743358603,
      "learning_rate": 3.582788623301018e-06,
      "loss": 0.132,
      "step": 20695
    },
    {
      "epoch": 0.6037691813991481,
      "grad_norm": 1.0026121346936068,
      "learning_rate": 3.582335570327198e-06,
      "loss": 0.1242,
      "step": 20696
    },
    {
      "epoch": 0.6037983546297917,
      "grad_norm": 0.7721155276758169,
      "learning_rate": 3.581882530009948e-06,
      "loss": 0.14,
      "step": 20697
    },
    {
      "epoch": 0.6038275278604353,
      "grad_norm": 0.7625042643757383,
      "learning_rate": 3.581429502353312e-06,
      "loss": 0.1484,
      "step": 20698
    },
    {
      "epoch": 0.6038567010910788,
      "grad_norm": 0.8184795323443229,
      "learning_rate": 3.580976487361334e-06,
      "loss": 0.1267,
      "step": 20699
    },
    {
      "epoch": 0.6038858743217224,
      "grad_norm": 0.7152635396360302,
      "learning_rate": 3.580523485038061e-06,
      "loss": 0.1052,
      "step": 20700
    },
    {
      "epoch": 0.6039150475523659,
      "grad_norm": 0.7071465680886245,
      "learning_rate": 3.580070495387532e-06,
      "loss": 0.1208,
      "step": 20701
    },
    {
      "epoch": 0.6039442207830095,
      "grad_norm": 1.0458616600763395,
      "learning_rate": 3.579617518413798e-06,
      "loss": 0.1493,
      "step": 20702
    },
    {
      "epoch": 0.603973394013653,
      "grad_norm": 0.7926599590148983,
      "learning_rate": 3.579164554120898e-06,
      "loss": 0.1176,
      "step": 20703
    },
    {
      "epoch": 0.6040025672442967,
      "grad_norm": 0.7180877823910206,
      "learning_rate": 3.578711602512878e-06,
      "loss": 0.1346,
      "step": 20704
    },
    {
      "epoch": 0.6040317404749402,
      "grad_norm": 0.731005048302328,
      "learning_rate": 3.5782586635937834e-06,
      "loss": 0.142,
      "step": 20705
    },
    {
      "epoch": 0.6040609137055838,
      "grad_norm": 0.9452837656329977,
      "learning_rate": 3.577805737367654e-06,
      "loss": 0.1396,
      "step": 20706
    },
    {
      "epoch": 0.6040900869362273,
      "grad_norm": 0.6820508516041044,
      "learning_rate": 3.5773528238385346e-06,
      "loss": 0.102,
      "step": 20707
    },
    {
      "epoch": 0.6041192601668709,
      "grad_norm": 0.80347364286241,
      "learning_rate": 3.5768999230104704e-06,
      "loss": 0.1087,
      "step": 20708
    },
    {
      "epoch": 0.6041484333975145,
      "grad_norm": 0.9430438813368315,
      "learning_rate": 3.5764470348875045e-06,
      "loss": 0.1391,
      "step": 20709
    },
    {
      "epoch": 0.604177606628158,
      "grad_norm": 0.926409377536277,
      "learning_rate": 3.57599415947368e-06,
      "loss": 0.1539,
      "step": 20710
    },
    {
      "epoch": 0.6042067798588016,
      "grad_norm": 0.7496252817836285,
      "learning_rate": 3.5755412967730397e-06,
      "loss": 0.1193,
      "step": 20711
    },
    {
      "epoch": 0.6042359530894451,
      "grad_norm": 1.0412874650331714,
      "learning_rate": 3.5750884467896262e-06,
      "loss": 0.1699,
      "step": 20712
    },
    {
      "epoch": 0.6042651263200887,
      "grad_norm": 1.0497410722901828,
      "learning_rate": 3.5746356095274817e-06,
      "loss": 0.1343,
      "step": 20713
    },
    {
      "epoch": 0.6042942995507322,
      "grad_norm": 0.9605813323763668,
      "learning_rate": 3.5741827849906514e-06,
      "loss": 0.11,
      "step": 20714
    },
    {
      "epoch": 0.6043234727813758,
      "grad_norm": 1.075380391109725,
      "learning_rate": 3.5737299731831776e-06,
      "loss": 0.1755,
      "step": 20715
    },
    {
      "epoch": 0.6043526460120193,
      "grad_norm": 0.815662823665873,
      "learning_rate": 3.5732771741091014e-06,
      "loss": 0.1267,
      "step": 20716
    },
    {
      "epoch": 0.604381819242663,
      "grad_norm": 1.0608945222955084,
      "learning_rate": 3.572824387772466e-06,
      "loss": 0.1316,
      "step": 20717
    },
    {
      "epoch": 0.6044109924733065,
      "grad_norm": 1.3495815301820906,
      "learning_rate": 3.5723716141773145e-06,
      "loss": 0.1109,
      "step": 20718
    },
    {
      "epoch": 0.6044401657039501,
      "grad_norm": 0.7443500837028708,
      "learning_rate": 3.5719188533276854e-06,
      "loss": 0.1258,
      "step": 20719
    },
    {
      "epoch": 0.6044693389345936,
      "grad_norm": 1.032691015903389,
      "learning_rate": 3.571466105227627e-06,
      "loss": 0.1204,
      "step": 20720
    },
    {
      "epoch": 0.6044985121652372,
      "grad_norm": 0.6986654255922213,
      "learning_rate": 3.5710133698811776e-06,
      "loss": 0.1076,
      "step": 20721
    },
    {
      "epoch": 0.6045276853958808,
      "grad_norm": 0.8930889899932719,
      "learning_rate": 3.570560647292379e-06,
      "loss": 0.1329,
      "step": 20722
    },
    {
      "epoch": 0.6045568586265243,
      "grad_norm": 0.8177811896482285,
      "learning_rate": 3.570107937465276e-06,
      "loss": 0.1199,
      "step": 20723
    },
    {
      "epoch": 0.6045860318571679,
      "grad_norm": 0.9547362573044593,
      "learning_rate": 3.5696552404039053e-06,
      "loss": 0.1166,
      "step": 20724
    },
    {
      "epoch": 0.6046152050878114,
      "grad_norm": 0.6963679729761292,
      "learning_rate": 3.569202556112311e-06,
      "loss": 0.1024,
      "step": 20725
    },
    {
      "epoch": 0.604644378318455,
      "grad_norm": 1.3464318442173022,
      "learning_rate": 3.5687498845945357e-06,
      "loss": 0.178,
      "step": 20726
    },
    {
      "epoch": 0.6046735515490985,
      "grad_norm": 0.740158816183777,
      "learning_rate": 3.5682972258546213e-06,
      "loss": 0.1164,
      "step": 20727
    },
    {
      "epoch": 0.6047027247797421,
      "grad_norm": 0.957911953830772,
      "learning_rate": 3.5678445798966055e-06,
      "loss": 0.1188,
      "step": 20728
    },
    {
      "epoch": 0.6047318980103856,
      "grad_norm": 1.2051437086304853,
      "learning_rate": 3.567391946724532e-06,
      "loss": 0.1162,
      "step": 20729
    },
    {
      "epoch": 0.6047610712410292,
      "grad_norm": 0.9796458679730979,
      "learning_rate": 3.5669393263424417e-06,
      "loss": 0.1317,
      "step": 20730
    },
    {
      "epoch": 0.6047902444716728,
      "grad_norm": 0.8184650639055017,
      "learning_rate": 3.566486718754372e-06,
      "loss": 0.1133,
      "step": 20731
    },
    {
      "epoch": 0.6048194177023164,
      "grad_norm": 0.6924939754626013,
      "learning_rate": 3.5660341239643703e-06,
      "loss": 0.1043,
      "step": 20732
    },
    {
      "epoch": 0.60484859093296,
      "grad_norm": 0.7927609320462828,
      "learning_rate": 3.5655815419764724e-06,
      "loss": 0.1324,
      "step": 20733
    },
    {
      "epoch": 0.6048777641636035,
      "grad_norm": 0.8222092239598024,
      "learning_rate": 3.56512897279472e-06,
      "loss": 0.1478,
      "step": 20734
    },
    {
      "epoch": 0.6049069373942471,
      "grad_norm": 0.8377589663943582,
      "learning_rate": 3.564676416423154e-06,
      "loss": 0.1253,
      "step": 20735
    },
    {
      "epoch": 0.6049361106248906,
      "grad_norm": 0.877026665903272,
      "learning_rate": 3.564223872865814e-06,
      "loss": 0.1367,
      "step": 20736
    },
    {
      "epoch": 0.6049652838555342,
      "grad_norm": 0.774475699720562,
      "learning_rate": 3.5637713421267395e-06,
      "loss": 0.1366,
      "step": 20737
    },
    {
      "epoch": 0.6049944570861777,
      "grad_norm": 0.9174580895118168,
      "learning_rate": 3.5633188242099726e-06,
      "loss": 0.1591,
      "step": 20738
    },
    {
      "epoch": 0.6050236303168213,
      "grad_norm": 0.9322027571763905,
      "learning_rate": 3.5628663191195525e-06,
      "loss": 0.1127,
      "step": 20739
    },
    {
      "epoch": 0.6050528035474648,
      "grad_norm": 0.6558769605593633,
      "learning_rate": 3.5624138268595186e-06,
      "loss": 0.1198,
      "step": 20740
    },
    {
      "epoch": 0.6050819767781084,
      "grad_norm": 0.8040920378513927,
      "learning_rate": 3.561961347433911e-06,
      "loss": 0.1484,
      "step": 20741
    },
    {
      "epoch": 0.6051111500087519,
      "grad_norm": 0.8918910970278644,
      "learning_rate": 3.5615088808467692e-06,
      "loss": 0.1291,
      "step": 20742
    },
    {
      "epoch": 0.6051403232393955,
      "grad_norm": 0.861320704932758,
      "learning_rate": 3.5610564271021315e-06,
      "loss": 0.1029,
      "step": 20743
    },
    {
      "epoch": 0.605169496470039,
      "grad_norm": 0.8195312237738975,
      "learning_rate": 3.5606039862040398e-06,
      "loss": 0.1459,
      "step": 20744
    },
    {
      "epoch": 0.6051986697006827,
      "grad_norm": 0.691934009545335,
      "learning_rate": 3.5601515581565326e-06,
      "loss": 0.1178,
      "step": 20745
    },
    {
      "epoch": 0.6052278429313263,
      "grad_norm": 0.8664220212506307,
      "learning_rate": 3.5596991429636474e-06,
      "loss": 0.1155,
      "step": 20746
    },
    {
      "epoch": 0.6052570161619698,
      "grad_norm": 0.769249132744937,
      "learning_rate": 3.559246740629425e-06,
      "loss": 0.1319,
      "step": 20747
    },
    {
      "epoch": 0.6052861893926134,
      "grad_norm": 0.7613625846478836,
      "learning_rate": 3.558794351157905e-06,
      "loss": 0.1316,
      "step": 20748
    },
    {
      "epoch": 0.6053153626232569,
      "grad_norm": 0.8746338487270799,
      "learning_rate": 3.558341974553122e-06,
      "loss": 0.1208,
      "step": 20749
    },
    {
      "epoch": 0.6053445358539005,
      "grad_norm": 0.7576457626868381,
      "learning_rate": 3.5578896108191195e-06,
      "loss": 0.138,
      "step": 20750
    },
    {
      "epoch": 0.605373709084544,
      "grad_norm": 0.827542553441471,
      "learning_rate": 3.5574372599599337e-06,
      "loss": 0.1222,
      "step": 20751
    },
    {
      "epoch": 0.6054028823151876,
      "grad_norm": 0.923101206094693,
      "learning_rate": 3.5569849219796044e-06,
      "loss": 0.1272,
      "step": 20752
    },
    {
      "epoch": 0.6054320555458311,
      "grad_norm": 0.8517156793731273,
      "learning_rate": 3.5565325968821694e-06,
      "loss": 0.1253,
      "step": 20753
    },
    {
      "epoch": 0.6054612287764747,
      "grad_norm": 0.7968882326285671,
      "learning_rate": 3.556080284671667e-06,
      "loss": 0.1288,
      "step": 20754
    },
    {
      "epoch": 0.6054904020071182,
      "grad_norm": 0.8297991682726783,
      "learning_rate": 3.555627985352133e-06,
      "loss": 0.1359,
      "step": 20755
    },
    {
      "epoch": 0.6055195752377618,
      "grad_norm": 0.9329933523109677,
      "learning_rate": 3.5551756989276087e-06,
      "loss": 0.1321,
      "step": 20756
    },
    {
      "epoch": 0.6055487484684053,
      "grad_norm": 0.7371539989534648,
      "learning_rate": 3.5547234254021325e-06,
      "loss": 0.1034,
      "step": 20757
    },
    {
      "epoch": 0.605577921699049,
      "grad_norm": 0.808178212639286,
      "learning_rate": 3.554271164779739e-06,
      "loss": 0.117,
      "step": 20758
    },
    {
      "epoch": 0.6056070949296926,
      "grad_norm": 0.6366159947828574,
      "learning_rate": 3.5538189170644678e-06,
      "loss": 0.1154,
      "step": 20759
    },
    {
      "epoch": 0.6056362681603361,
      "grad_norm": 1.2799814059715604,
      "learning_rate": 3.553366682260356e-06,
      "loss": 0.1327,
      "step": 20760
    },
    {
      "epoch": 0.6056654413909797,
      "grad_norm": 0.8034490254175609,
      "learning_rate": 3.5529144603714395e-06,
      "loss": 0.1227,
      "step": 20761
    },
    {
      "epoch": 0.6056946146216232,
      "grad_norm": 1.1062572770653447,
      "learning_rate": 3.55246225140176e-06,
      "loss": 0.1252,
      "step": 20762
    },
    {
      "epoch": 0.6057237878522668,
      "grad_norm": 1.0330797722000389,
      "learning_rate": 3.5520100553553504e-06,
      "loss": 0.1564,
      "step": 20763
    },
    {
      "epoch": 0.6057529610829103,
      "grad_norm": 0.8166753943911534,
      "learning_rate": 3.55155787223625e-06,
      "loss": 0.1281,
      "step": 20764
    },
    {
      "epoch": 0.6057821343135539,
      "grad_norm": 0.8070122984817666,
      "learning_rate": 3.551105702048495e-06,
      "loss": 0.1292,
      "step": 20765
    },
    {
      "epoch": 0.6058113075441974,
      "grad_norm": 1.1250501921628877,
      "learning_rate": 3.5506535447961227e-06,
      "loss": 0.1332,
      "step": 20766
    },
    {
      "epoch": 0.605840480774841,
      "grad_norm": 1.0708299412142712,
      "learning_rate": 3.5502014004831674e-06,
      "loss": 0.1257,
      "step": 20767
    },
    {
      "epoch": 0.6058696540054845,
      "grad_norm": 0.9902104897837148,
      "learning_rate": 3.5497492691136705e-06,
      "loss": 0.1424,
      "step": 20768
    },
    {
      "epoch": 0.6058988272361281,
      "grad_norm": 0.7946503233411324,
      "learning_rate": 3.5492971506916647e-06,
      "loss": 0.1489,
      "step": 20769
    },
    {
      "epoch": 0.6059280004667716,
      "grad_norm": 1.1545968112596225,
      "learning_rate": 3.5488450452211887e-06,
      "loss": 0.1281,
      "step": 20770
    },
    {
      "epoch": 0.6059571736974152,
      "grad_norm": 0.998017859659975,
      "learning_rate": 3.5483929527062764e-06,
      "loss": 0.1209,
      "step": 20771
    },
    {
      "epoch": 0.6059863469280589,
      "grad_norm": 0.7082787407444177,
      "learning_rate": 3.547940873150966e-06,
      "loss": 0.1006,
      "step": 20772
    },
    {
      "epoch": 0.6060155201587024,
      "grad_norm": 0.8808205347203875,
      "learning_rate": 3.547488806559292e-06,
      "loss": 0.1373,
      "step": 20773
    },
    {
      "epoch": 0.606044693389346,
      "grad_norm": 1.0237477128960173,
      "learning_rate": 3.5470367529352917e-06,
      "loss": 0.1378,
      "step": 20774
    },
    {
      "epoch": 0.6060738666199895,
      "grad_norm": 0.6965407799322243,
      "learning_rate": 3.5465847122830014e-06,
      "loss": 0.0925,
      "step": 20775
    },
    {
      "epoch": 0.6061030398506331,
      "grad_norm": 0.5832944443817607,
      "learning_rate": 3.5461326846064555e-06,
      "loss": 0.1142,
      "step": 20776
    },
    {
      "epoch": 0.6061322130812766,
      "grad_norm": 1.0577146828928368,
      "learning_rate": 3.545680669909689e-06,
      "loss": 0.1118,
      "step": 20777
    },
    {
      "epoch": 0.6061613863119202,
      "grad_norm": 1.1101115172841742,
      "learning_rate": 3.5452286681967397e-06,
      "loss": 0.1411,
      "step": 20778
    },
    {
      "epoch": 0.6061905595425637,
      "grad_norm": 0.7237761676523827,
      "learning_rate": 3.54477667947164e-06,
      "loss": 0.1271,
      "step": 20779
    },
    {
      "epoch": 0.6062197327732073,
      "grad_norm": 1.1404929236029993,
      "learning_rate": 3.5443247037384273e-06,
      "loss": 0.1319,
      "step": 20780
    },
    {
      "epoch": 0.6062489060038508,
      "grad_norm": 1.585405341464,
      "learning_rate": 3.543872741001136e-06,
      "loss": 0.1384,
      "step": 20781
    },
    {
      "epoch": 0.6062780792344944,
      "grad_norm": 1.1413178559235155,
      "learning_rate": 3.543420791263801e-06,
      "loss": 0.1094,
      "step": 20782
    },
    {
      "epoch": 0.606307252465138,
      "grad_norm": 0.805556354799846,
      "learning_rate": 3.5429688545304596e-06,
      "loss": 0.1302,
      "step": 20783
    },
    {
      "epoch": 0.6063364256957815,
      "grad_norm": 0.8586873033020317,
      "learning_rate": 3.5425169308051423e-06,
      "loss": 0.1168,
      "step": 20784
    },
    {
      "epoch": 0.6063655989264252,
      "grad_norm": 1.1159385045306318,
      "learning_rate": 3.5420650200918854e-06,
      "loss": 0.1182,
      "step": 20785
    },
    {
      "epoch": 0.6063947721570687,
      "grad_norm": 1.1160109972322403,
      "learning_rate": 3.5416131223947246e-06,
      "loss": 0.1018,
      "step": 20786
    },
    {
      "epoch": 0.6064239453877123,
      "grad_norm": 0.8517934459793963,
      "learning_rate": 3.5411612377176952e-06,
      "loss": 0.1398,
      "step": 20787
    },
    {
      "epoch": 0.6064531186183558,
      "grad_norm": 1.007116534568464,
      "learning_rate": 3.540709366064829e-06,
      "loss": 0.1161,
      "step": 20788
    },
    {
      "epoch": 0.6064822918489994,
      "grad_norm": 1.106611411847511,
      "learning_rate": 3.5402575074401614e-06,
      "loss": 0.121,
      "step": 20789
    },
    {
      "epoch": 0.6065114650796429,
      "grad_norm": 1.1247601950427408,
      "learning_rate": 3.5398056618477267e-06,
      "loss": 0.117,
      "step": 20790
    },
    {
      "epoch": 0.6065406383102865,
      "grad_norm": 0.8765751259200038,
      "learning_rate": 3.539353829291555e-06,
      "loss": 0.1522,
      "step": 20791
    },
    {
      "epoch": 0.60656981154093,
      "grad_norm": 0.848885329789495,
      "learning_rate": 3.5389020097756875e-06,
      "loss": 0.1423,
      "step": 20792
    },
    {
      "epoch": 0.6065989847715736,
      "grad_norm": 1.3298916316619849,
      "learning_rate": 3.5384502033041534e-06,
      "loss": 0.1238,
      "step": 20793
    },
    {
      "epoch": 0.6066281580022171,
      "grad_norm": 0.9793552854737595,
      "learning_rate": 3.537998409880986e-06,
      "loss": 0.1404,
      "step": 20794
    },
    {
      "epoch": 0.6066573312328607,
      "grad_norm": 0.9251143141179438,
      "learning_rate": 3.537546629510222e-06,
      "loss": 0.1465,
      "step": 20795
    },
    {
      "epoch": 0.6066865044635043,
      "grad_norm": 0.7619181328684479,
      "learning_rate": 3.5370948621958905e-06,
      "loss": 0.1183,
      "step": 20796
    },
    {
      "epoch": 0.6067156776941478,
      "grad_norm": 0.9043037680986421,
      "learning_rate": 3.536643107942026e-06,
      "loss": 0.1291,
      "step": 20797
    },
    {
      "epoch": 0.6067448509247914,
      "grad_norm": 0.7967554360175508,
      "learning_rate": 3.5361913667526637e-06,
      "loss": 0.1225,
      "step": 20798
    },
    {
      "epoch": 0.606774024155435,
      "grad_norm": 1.128864099514495,
      "learning_rate": 3.5357396386318356e-06,
      "loss": 0.1066,
      "step": 20799
    },
    {
      "epoch": 0.6068031973860786,
      "grad_norm": 0.9666030322325735,
      "learning_rate": 3.535287923583576e-06,
      "loss": 0.1013,
      "step": 20800
    },
    {
      "epoch": 0.6068323706167221,
      "grad_norm": 0.6596696692633949,
      "learning_rate": 3.5348362216119136e-06,
      "loss": 0.1278,
      "step": 20801
    },
    {
      "epoch": 0.6068615438473657,
      "grad_norm": 0.9430316041094062,
      "learning_rate": 3.534384532720885e-06,
      "loss": 0.1392,
      "step": 20802
    },
    {
      "epoch": 0.6068907170780092,
      "grad_norm": 0.8621980563535409,
      "learning_rate": 3.53393285691452e-06,
      "loss": 0.1262,
      "step": 20803
    },
    {
      "epoch": 0.6069198903086528,
      "grad_norm": 0.7425572221417459,
      "learning_rate": 3.5334811941968533e-06,
      "loss": 0.1064,
      "step": 20804
    },
    {
      "epoch": 0.6069490635392963,
      "grad_norm": 0.7104699576240053,
      "learning_rate": 3.5330295445719174e-06,
      "loss": 0.126,
      "step": 20805
    },
    {
      "epoch": 0.6069782367699399,
      "grad_norm": 0.7137393887368758,
      "learning_rate": 3.5325779080437427e-06,
      "loss": 0.0955,
      "step": 20806
    },
    {
      "epoch": 0.6070074100005834,
      "grad_norm": 0.836191153618072,
      "learning_rate": 3.532126284616362e-06,
      "loss": 0.1368,
      "step": 20807
    },
    {
      "epoch": 0.607036583231227,
      "grad_norm": 0.8013247250862227,
      "learning_rate": 3.531674674293809e-06,
      "loss": 0.1177,
      "step": 20808
    },
    {
      "epoch": 0.6070657564618706,
      "grad_norm": 0.8837431030362821,
      "learning_rate": 3.5312230770801115e-06,
      "loss": 0.1246,
      "step": 20809
    },
    {
      "epoch": 0.6070949296925141,
      "grad_norm": 0.7869883262216052,
      "learning_rate": 3.530771492979305e-06,
      "loss": 0.1373,
      "step": 20810
    },
    {
      "epoch": 0.6071241029231577,
      "grad_norm": 0.8483986506473256,
      "learning_rate": 3.5303199219954188e-06,
      "loss": 0.1183,
      "step": 20811
    },
    {
      "epoch": 0.6071532761538013,
      "grad_norm": 0.6440159824701732,
      "learning_rate": 3.5298683641324864e-06,
      "loss": 0.1147,
      "step": 20812
    },
    {
      "epoch": 0.6071824493844449,
      "grad_norm": 0.8186681196906564,
      "learning_rate": 3.5294168193945392e-06,
      "loss": 0.1363,
      "step": 20813
    },
    {
      "epoch": 0.6072116226150884,
      "grad_norm": 0.7340619231161224,
      "learning_rate": 3.528965287785607e-06,
      "loss": 0.0933,
      "step": 20814
    },
    {
      "epoch": 0.607240795845732,
      "grad_norm": 0.7199545601433122,
      "learning_rate": 3.5285137693097198e-06,
      "loss": 0.1366,
      "step": 20815
    },
    {
      "epoch": 0.6072699690763755,
      "grad_norm": 0.6875301094139717,
      "learning_rate": 3.5280622639709117e-06,
      "loss": 0.1199,
      "step": 20816
    },
    {
      "epoch": 0.6072991423070191,
      "grad_norm": 0.8598583189444979,
      "learning_rate": 3.5276107717732133e-06,
      "loss": 0.1261,
      "step": 20817
    },
    {
      "epoch": 0.6073283155376626,
      "grad_norm": 0.749982161543977,
      "learning_rate": 3.527159292720654e-06,
      "loss": 0.1302,
      "step": 20818
    },
    {
      "epoch": 0.6073574887683062,
      "grad_norm": 0.9103036841326585,
      "learning_rate": 3.526707826817264e-06,
      "loss": 0.1311,
      "step": 20819
    },
    {
      "epoch": 0.6073866619989498,
      "grad_norm": 0.746632233792318,
      "learning_rate": 3.5262563740670765e-06,
      "loss": 0.1189,
      "step": 20820
    },
    {
      "epoch": 0.6074158352295933,
      "grad_norm": 0.90928843892397,
      "learning_rate": 3.525804934474117e-06,
      "loss": 0.1469,
      "step": 20821
    },
    {
      "epoch": 0.6074450084602369,
      "grad_norm": 0.742644949864802,
      "learning_rate": 3.5253535080424224e-06,
      "loss": 0.1174,
      "step": 20822
    },
    {
      "epoch": 0.6074741816908804,
      "grad_norm": 0.7847711759910857,
      "learning_rate": 3.5249020947760182e-06,
      "loss": 0.1507,
      "step": 20823
    },
    {
      "epoch": 0.607503354921524,
      "grad_norm": 0.7114351635012685,
      "learning_rate": 3.524450694678936e-06,
      "loss": 0.1354,
      "step": 20824
    },
    {
      "epoch": 0.6075325281521675,
      "grad_norm": 0.9068679796758986,
      "learning_rate": 3.523999307755207e-06,
      "loss": 0.1387,
      "step": 20825
    },
    {
      "epoch": 0.6075617013828112,
      "grad_norm": 0.7231457922906789,
      "learning_rate": 3.523547934008859e-06,
      "loss": 0.1443,
      "step": 20826
    },
    {
      "epoch": 0.6075908746134547,
      "grad_norm": 0.8640421220288766,
      "learning_rate": 3.5230965734439214e-06,
      "loss": 0.1104,
      "step": 20827
    },
    {
      "epoch": 0.6076200478440983,
      "grad_norm": 0.7545538548010573,
      "learning_rate": 3.522645226064426e-06,
      "loss": 0.1171,
      "step": 20828
    },
    {
      "epoch": 0.6076492210747418,
      "grad_norm": 0.8720536517182633,
      "learning_rate": 3.5221938918744013e-06,
      "loss": 0.1275,
      "step": 20829
    },
    {
      "epoch": 0.6076783943053854,
      "grad_norm": 0.7062483667290207,
      "learning_rate": 3.5217425708778774e-06,
      "loss": 0.0852,
      "step": 20830
    },
    {
      "epoch": 0.607707567536029,
      "grad_norm": 0.9420998347925931,
      "learning_rate": 3.5212912630788827e-06,
      "loss": 0.1322,
      "step": 20831
    },
    {
      "epoch": 0.6077367407666725,
      "grad_norm": 0.7892344052736006,
      "learning_rate": 3.5208399684814463e-06,
      "loss": 0.124,
      "step": 20832
    },
    {
      "epoch": 0.607765913997316,
      "grad_norm": 0.8214551947674077,
      "learning_rate": 3.5203886870895965e-06,
      "loss": 0.1176,
      "step": 20833
    },
    {
      "epoch": 0.6077950872279596,
      "grad_norm": 0.8876504109379849,
      "learning_rate": 3.519937418907364e-06,
      "loss": 0.135,
      "step": 20834
    },
    {
      "epoch": 0.6078242604586032,
      "grad_norm": 0.939069544891645,
      "learning_rate": 3.5194861639387783e-06,
      "loss": 0.1228,
      "step": 20835
    },
    {
      "epoch": 0.6078534336892467,
      "grad_norm": 0.6827677451020275,
      "learning_rate": 3.5190349221878655e-06,
      "loss": 0.1229,
      "step": 20836
    },
    {
      "epoch": 0.6078826069198903,
      "grad_norm": 0.8175353329013016,
      "learning_rate": 3.518583693658656e-06,
      "loss": 0.1354,
      "step": 20837
    },
    {
      "epoch": 0.6079117801505338,
      "grad_norm": 0.8746298979621124,
      "learning_rate": 3.518132478355178e-06,
      "loss": 0.1394,
      "step": 20838
    },
    {
      "epoch": 0.6079409533811775,
      "grad_norm": 0.7487423416580942,
      "learning_rate": 3.5176812762814572e-06,
      "loss": 0.1097,
      "step": 20839
    },
    {
      "epoch": 0.607970126611821,
      "grad_norm": 0.7421659489455351,
      "learning_rate": 3.5172300874415256e-06,
      "loss": 0.1359,
      "step": 20840
    },
    {
      "epoch": 0.6079992998424646,
      "grad_norm": 0.928068727451947,
      "learning_rate": 3.51677891183941e-06,
      "loss": 0.1221,
      "step": 20841
    },
    {
      "epoch": 0.6080284730731081,
      "grad_norm": 0.8362138577853714,
      "learning_rate": 3.516327749479139e-06,
      "loss": 0.1323,
      "step": 20842
    },
    {
      "epoch": 0.6080576463037517,
      "grad_norm": 0.8282463313254467,
      "learning_rate": 3.5158766003647382e-06,
      "loss": 0.1245,
      "step": 20843
    },
    {
      "epoch": 0.6080868195343953,
      "grad_norm": 0.7602479263944608,
      "learning_rate": 3.515425464500237e-06,
      "loss": 0.144,
      "step": 20844
    },
    {
      "epoch": 0.6081159927650388,
      "grad_norm": 1.075393671920079,
      "learning_rate": 3.5149743418896622e-06,
      "loss": 0.1132,
      "step": 20845
    },
    {
      "epoch": 0.6081451659956824,
      "grad_norm": 0.8795152460951055,
      "learning_rate": 3.5145232325370426e-06,
      "loss": 0.1463,
      "step": 20846
    },
    {
      "epoch": 0.6081743392263259,
      "grad_norm": 0.8786622884723323,
      "learning_rate": 3.5140721364464068e-06,
      "loss": 0.1128,
      "step": 20847
    },
    {
      "epoch": 0.6082035124569695,
      "grad_norm": 0.7586587947296383,
      "learning_rate": 3.5136210536217787e-06,
      "loss": 0.1132,
      "step": 20848
    },
    {
      "epoch": 0.608232685687613,
      "grad_norm": 0.9380004096608355,
      "learning_rate": 3.5131699840671867e-06,
      "loss": 0.1291,
      "step": 20849
    },
    {
      "epoch": 0.6082618589182566,
      "grad_norm": 0.7444580516213217,
      "learning_rate": 3.51271892778666e-06,
      "loss": 0.1279,
      "step": 20850
    },
    {
      "epoch": 0.6082910321489001,
      "grad_norm": 0.8131382801497327,
      "learning_rate": 3.5122678847842197e-06,
      "loss": 0.1145,
      "step": 20851
    },
    {
      "epoch": 0.6083202053795437,
      "grad_norm": 0.7383238104561087,
      "learning_rate": 3.5118168550639e-06,
      "loss": 0.1259,
      "step": 20852
    },
    {
      "epoch": 0.6083493786101873,
      "grad_norm": 0.8365176261664504,
      "learning_rate": 3.5113658386297227e-06,
      "loss": 0.142,
      "step": 20853
    },
    {
      "epoch": 0.6083785518408309,
      "grad_norm": 0.748472365231926,
      "learning_rate": 3.5109148354857165e-06,
      "loss": 0.1447,
      "step": 20854
    },
    {
      "epoch": 0.6084077250714744,
      "grad_norm": 0.7115854890993898,
      "learning_rate": 3.510463845635908e-06,
      "loss": 0.1057,
      "step": 20855
    },
    {
      "epoch": 0.608436898302118,
      "grad_norm": 0.8930583837912623,
      "learning_rate": 3.5100128690843215e-06,
      "loss": 0.126,
      "step": 20856
    },
    {
      "epoch": 0.6084660715327616,
      "grad_norm": 0.7522305806269778,
      "learning_rate": 3.509561905834984e-06,
      "loss": 0.1225,
      "step": 20857
    },
    {
      "epoch": 0.6084952447634051,
      "grad_norm": 0.7656887352671015,
      "learning_rate": 3.5091109558919223e-06,
      "loss": 0.1097,
      "step": 20858
    },
    {
      "epoch": 0.6085244179940487,
      "grad_norm": 0.8535693426859395,
      "learning_rate": 3.5086600192591623e-06,
      "loss": 0.1293,
      "step": 20859
    },
    {
      "epoch": 0.6085535912246922,
      "grad_norm": 0.8635018599260486,
      "learning_rate": 3.5082090959407307e-06,
      "loss": 0.1157,
      "step": 20860
    },
    {
      "epoch": 0.6085827644553358,
      "grad_norm": 0.7333457926948656,
      "learning_rate": 3.5077581859406508e-06,
      "loss": 0.1188,
      "step": 20861
    },
    {
      "epoch": 0.6086119376859793,
      "grad_norm": 0.8042113886541649,
      "learning_rate": 3.507307289262949e-06,
      "loss": 0.1416,
      "step": 20862
    },
    {
      "epoch": 0.6086411109166229,
      "grad_norm": 0.7903422032931964,
      "learning_rate": 3.5068564059116522e-06,
      "loss": 0.1144,
      "step": 20863
    },
    {
      "epoch": 0.6086702841472664,
      "grad_norm": 0.7913155719387774,
      "learning_rate": 3.5064055358907854e-06,
      "loss": 0.0999,
      "step": 20864
    },
    {
      "epoch": 0.60869945737791,
      "grad_norm": 1.0452286762992344,
      "learning_rate": 3.5059546792043742e-06,
      "loss": 0.1217,
      "step": 20865
    },
    {
      "epoch": 0.6087286306085536,
      "grad_norm": 0.8504592104242178,
      "learning_rate": 3.505503835856442e-06,
      "loss": 0.1313,
      "step": 20866
    },
    {
      "epoch": 0.6087578038391972,
      "grad_norm": 0.9016377475640407,
      "learning_rate": 3.5050530058510146e-06,
      "loss": 0.1391,
      "step": 20867
    },
    {
      "epoch": 0.6087869770698408,
      "grad_norm": 1.3474493280915316,
      "learning_rate": 3.5046021891921156e-06,
      "loss": 0.1419,
      "step": 20868
    },
    {
      "epoch": 0.6088161503004843,
      "grad_norm": 1.0027879261808696,
      "learning_rate": 3.504151385883774e-06,
      "loss": 0.1173,
      "step": 20869
    },
    {
      "epoch": 0.6088453235311279,
      "grad_norm": 0.8442227729097376,
      "learning_rate": 3.5037005959300106e-06,
      "loss": 0.1265,
      "step": 20870
    },
    {
      "epoch": 0.6088744967617714,
      "grad_norm": 1.5925643195253445,
      "learning_rate": 3.503249819334851e-06,
      "loss": 0.1343,
      "step": 20871
    },
    {
      "epoch": 0.608903669992415,
      "grad_norm": 1.3341999200780448,
      "learning_rate": 3.5027990561023204e-06,
      "loss": 0.1328,
      "step": 20872
    },
    {
      "epoch": 0.6089328432230585,
      "grad_norm": 0.8395569746982348,
      "learning_rate": 3.502348306236442e-06,
      "loss": 0.1348,
      "step": 20873
    },
    {
      "epoch": 0.6089620164537021,
      "grad_norm": 1.1910055822634305,
      "learning_rate": 3.5018975697412392e-06,
      "loss": 0.1515,
      "step": 20874
    },
    {
      "epoch": 0.6089911896843456,
      "grad_norm": 1.4586076054610828,
      "learning_rate": 3.5014468466207387e-06,
      "loss": 0.1205,
      "step": 20875
    },
    {
      "epoch": 0.6090203629149892,
      "grad_norm": 0.8727719842501428,
      "learning_rate": 3.5009961368789623e-06,
      "loss": 0.114,
      "step": 20876
    },
    {
      "epoch": 0.6090495361456327,
      "grad_norm": 0.8727754863794717,
      "learning_rate": 3.5005454405199358e-06,
      "loss": 0.1166,
      "step": 20877
    },
    {
      "epoch": 0.6090787093762763,
      "grad_norm": 0.8788419478958428,
      "learning_rate": 3.5000947575476806e-06,
      "loss": 0.1029,
      "step": 20878
    },
    {
      "epoch": 0.6091078826069198,
      "grad_norm": 1.1375574796335826,
      "learning_rate": 3.4996440879662218e-06,
      "loss": 0.1335,
      "step": 20879
    },
    {
      "epoch": 0.6091370558375635,
      "grad_norm": 1.170482436033285,
      "learning_rate": 3.4991934317795806e-06,
      "loss": 0.1295,
      "step": 20880
    },
    {
      "epoch": 0.609166229068207,
      "grad_norm": 0.7969557444024356,
      "learning_rate": 3.4987427889917835e-06,
      "loss": 0.1288,
      "step": 20881
    },
    {
      "epoch": 0.6091954022988506,
      "grad_norm": 0.9061287474697424,
      "learning_rate": 3.4982921596068543e-06,
      "loss": 0.119,
      "step": 20882
    },
    {
      "epoch": 0.6092245755294942,
      "grad_norm": 0.841542942014196,
      "learning_rate": 3.4978415436288117e-06,
      "loss": 0.1126,
      "step": 20883
    },
    {
      "epoch": 0.6092537487601377,
      "grad_norm": 0.8243967094503337,
      "learning_rate": 3.4973909410616825e-06,
      "loss": 0.1144,
      "step": 20884
    },
    {
      "epoch": 0.6092829219907813,
      "grad_norm": 0.7724631396965665,
      "learning_rate": 3.4969403519094884e-06,
      "loss": 0.1247,
      "step": 20885
    },
    {
      "epoch": 0.6093120952214248,
      "grad_norm": 0.8084179720613038,
      "learning_rate": 3.4964897761762494e-06,
      "loss": 0.1458,
      "step": 20886
    },
    {
      "epoch": 0.6093412684520684,
      "grad_norm": 0.7925687170356461,
      "learning_rate": 3.4960392138659937e-06,
      "loss": 0.1159,
      "step": 20887
    },
    {
      "epoch": 0.6093704416827119,
      "grad_norm": 0.9906277379096061,
      "learning_rate": 3.49558866498274e-06,
      "loss": 0.127,
      "step": 20888
    },
    {
      "epoch": 0.6093996149133555,
      "grad_norm": 0.8723356293846019,
      "learning_rate": 3.495138129530511e-06,
      "loss": 0.0994,
      "step": 20889
    },
    {
      "epoch": 0.609428788143999,
      "grad_norm": 0.8502232317404022,
      "learning_rate": 3.4946876075133314e-06,
      "loss": 0.1389,
      "step": 20890
    },
    {
      "epoch": 0.6094579613746426,
      "grad_norm": 0.7269993560244731,
      "learning_rate": 3.4942370989352197e-06,
      "loss": 0.13,
      "step": 20891
    },
    {
      "epoch": 0.6094871346052861,
      "grad_norm": 0.7966318687625942,
      "learning_rate": 3.493786603800199e-06,
      "loss": 0.1203,
      "step": 20892
    },
    {
      "epoch": 0.6095163078359298,
      "grad_norm": 0.8267682902728425,
      "learning_rate": 3.493336122112293e-06,
      "loss": 0.1314,
      "step": 20893
    },
    {
      "epoch": 0.6095454810665734,
      "grad_norm": 0.8030176606329594,
      "learning_rate": 3.492885653875523e-06,
      "loss": 0.1412,
      "step": 20894
    },
    {
      "epoch": 0.6095746542972169,
      "grad_norm": 0.9935218848228359,
      "learning_rate": 3.4924351990939102e-06,
      "loss": 0.1076,
      "step": 20895
    },
    {
      "epoch": 0.6096038275278605,
      "grad_norm": 0.8916645742970674,
      "learning_rate": 3.4919847577714753e-06,
      "loss": 0.0969,
      "step": 20896
    },
    {
      "epoch": 0.609633000758504,
      "grad_norm": 0.8236760374407862,
      "learning_rate": 3.4915343299122408e-06,
      "loss": 0.1284,
      "step": 20897
    },
    {
      "epoch": 0.6096621739891476,
      "grad_norm": 0.919804114272897,
      "learning_rate": 3.491083915520227e-06,
      "loss": 0.1516,
      "step": 20898
    },
    {
      "epoch": 0.6096913472197911,
      "grad_norm": 0.9565564281413478,
      "learning_rate": 3.490633514599457e-06,
      "loss": 0.1143,
      "step": 20899
    },
    {
      "epoch": 0.6097205204504347,
      "grad_norm": 0.7096049569596213,
      "learning_rate": 3.49018312715395e-06,
      "loss": 0.1382,
      "step": 20900
    },
    {
      "epoch": 0.6097496936810782,
      "grad_norm": 0.9871400070174613,
      "learning_rate": 3.489732753187728e-06,
      "loss": 0.1388,
      "step": 20901
    },
    {
      "epoch": 0.6097788669117218,
      "grad_norm": 1.131942789357873,
      "learning_rate": 3.4892823927048113e-06,
      "loss": 0.1249,
      "step": 20902
    },
    {
      "epoch": 0.6098080401423653,
      "grad_norm": 0.7074766836221436,
      "learning_rate": 3.4888320457092207e-06,
      "loss": 0.1315,
      "step": 20903
    },
    {
      "epoch": 0.6098372133730089,
      "grad_norm": 1.6197528286078506,
      "learning_rate": 3.4883817122049757e-06,
      "loss": 0.1334,
      "step": 20904
    },
    {
      "epoch": 0.6098663866036524,
      "grad_norm": 1.0630169660883857,
      "learning_rate": 3.4879313921960988e-06,
      "loss": 0.146,
      "step": 20905
    },
    {
      "epoch": 0.609895559834296,
      "grad_norm": 1.3073245857038782,
      "learning_rate": 3.48748108568661e-06,
      "loss": 0.1199,
      "step": 20906
    },
    {
      "epoch": 0.6099247330649397,
      "grad_norm": 0.7861543012982191,
      "learning_rate": 3.4870307926805293e-06,
      "loss": 0.1196,
      "step": 20907
    },
    {
      "epoch": 0.6099539062955832,
      "grad_norm": 1.178482787113756,
      "learning_rate": 3.486580513181876e-06,
      "loss": 0.1264,
      "step": 20908
    },
    {
      "epoch": 0.6099830795262268,
      "grad_norm": 1.3058702213214082,
      "learning_rate": 3.4861302471946703e-06,
      "loss": 0.1353,
      "step": 20909
    },
    {
      "epoch": 0.6100122527568703,
      "grad_norm": 0.8620394268813247,
      "learning_rate": 3.4856799947229316e-06,
      "loss": 0.1239,
      "step": 20910
    },
    {
      "epoch": 0.6100414259875139,
      "grad_norm": 0.794154012305866,
      "learning_rate": 3.4852297557706803e-06,
      "loss": 0.1024,
      "step": 20911
    },
    {
      "epoch": 0.6100705992181574,
      "grad_norm": 0.9086135890438978,
      "learning_rate": 3.4847795303419385e-06,
      "loss": 0.142,
      "step": 20912
    },
    {
      "epoch": 0.610099772448801,
      "grad_norm": 1.1621642144116857,
      "learning_rate": 3.4843293184407223e-06,
      "loss": 0.1233,
      "step": 20913
    },
    {
      "epoch": 0.6101289456794445,
      "grad_norm": 0.8985573165668729,
      "learning_rate": 3.4838791200710515e-06,
      "loss": 0.1321,
      "step": 20914
    },
    {
      "epoch": 0.6101581189100881,
      "grad_norm": 0.8056488716815887,
      "learning_rate": 3.4834289352369477e-06,
      "loss": 0.1333,
      "step": 20915
    },
    {
      "epoch": 0.6101872921407316,
      "grad_norm": 0.8015228922543629,
      "learning_rate": 3.4829787639424238e-06,
      "loss": 0.1212,
      "step": 20916
    },
    {
      "epoch": 0.6102164653713752,
      "grad_norm": 1.038916445368362,
      "learning_rate": 3.482528606191508e-06,
      "loss": 0.1395,
      "step": 20917
    },
    {
      "epoch": 0.6102456386020187,
      "grad_norm": 0.9483160722802674,
      "learning_rate": 3.482078461988213e-06,
      "loss": 0.1042,
      "step": 20918
    },
    {
      "epoch": 0.6102748118326623,
      "grad_norm": 0.7376460552091912,
      "learning_rate": 3.481628331336559e-06,
      "loss": 0.1246,
      "step": 20919
    },
    {
      "epoch": 0.610303985063306,
      "grad_norm": 0.8470370483299596,
      "learning_rate": 3.481178214240566e-06,
      "loss": 0.1087,
      "step": 20920
    },
    {
      "epoch": 0.6103331582939495,
      "grad_norm": 0.9497577318465791,
      "learning_rate": 3.48072811070425e-06,
      "loss": 0.1286,
      "step": 20921
    },
    {
      "epoch": 0.6103623315245931,
      "grad_norm": 0.8930144418107888,
      "learning_rate": 3.48027802073163e-06,
      "loss": 0.1097,
      "step": 20922
    },
    {
      "epoch": 0.6103915047552366,
      "grad_norm": 0.842364621518752,
      "learning_rate": 3.479827944326726e-06,
      "loss": 0.1272,
      "step": 20923
    },
    {
      "epoch": 0.6104206779858802,
      "grad_norm": 0.747239643573268,
      "learning_rate": 3.4793778814935553e-06,
      "loss": 0.1231,
      "step": 20924
    },
    {
      "epoch": 0.6104498512165237,
      "grad_norm": 0.8841884267214734,
      "learning_rate": 3.478927832236137e-06,
      "loss": 0.1132,
      "step": 20925
    },
    {
      "epoch": 0.6104790244471673,
      "grad_norm": 0.8602181308889852,
      "learning_rate": 3.478477796558487e-06,
      "loss": 0.1351,
      "step": 20926
    },
    {
      "epoch": 0.6105081976778108,
      "grad_norm": 0.9963921043130824,
      "learning_rate": 3.4780277744646236e-06,
      "loss": 0.1272,
      "step": 20927
    },
    {
      "epoch": 0.6105373709084544,
      "grad_norm": 0.8268281964396148,
      "learning_rate": 3.477577765958564e-06,
      "loss": 0.1381,
      "step": 20928
    },
    {
      "epoch": 0.6105665441390979,
      "grad_norm": 0.7129514428666248,
      "learning_rate": 3.4771277710443284e-06,
      "loss": 0.1109,
      "step": 20929
    },
    {
      "epoch": 0.6105957173697415,
      "grad_norm": 0.8057698220128542,
      "learning_rate": 3.4766777897259317e-06,
      "loss": 0.1395,
      "step": 20930
    },
    {
      "epoch": 0.610624890600385,
      "grad_norm": 1.0915883936241786,
      "learning_rate": 3.4762278220073927e-06,
      "loss": 0.1416,
      "step": 20931
    },
    {
      "epoch": 0.6106540638310286,
      "grad_norm": 0.8465931890384214,
      "learning_rate": 3.475777867892728e-06,
      "loss": 0.1237,
      "step": 20932
    },
    {
      "epoch": 0.6106832370616722,
      "grad_norm": 0.9360312890669172,
      "learning_rate": 3.475327927385954e-06,
      "loss": 0.1195,
      "step": 20933
    },
    {
      "epoch": 0.6107124102923158,
      "grad_norm": 0.7891785178727896,
      "learning_rate": 3.4748780004910875e-06,
      "loss": 0.1175,
      "step": 20934
    },
    {
      "epoch": 0.6107415835229594,
      "grad_norm": 0.7896108617273909,
      "learning_rate": 3.474428087212147e-06,
      "loss": 0.1024,
      "step": 20935
    },
    {
      "epoch": 0.6107707567536029,
      "grad_norm": 0.9645337464520171,
      "learning_rate": 3.473978187553149e-06,
      "loss": 0.1134,
      "step": 20936
    },
    {
      "epoch": 0.6107999299842465,
      "grad_norm": 1.036323117317088,
      "learning_rate": 3.4735283015181092e-06,
      "loss": 0.148,
      "step": 20937
    },
    {
      "epoch": 0.61082910321489,
      "grad_norm": 1.020705629172368,
      "learning_rate": 3.473078429111044e-06,
      "loss": 0.142,
      "step": 20938
    },
    {
      "epoch": 0.6108582764455336,
      "grad_norm": 0.9300278752439212,
      "learning_rate": 3.4726285703359698e-06,
      "loss": 0.1102,
      "step": 20939
    },
    {
      "epoch": 0.6108874496761771,
      "grad_norm": 0.7062035862868438,
      "learning_rate": 3.4721787251969023e-06,
      "loss": 0.1332,
      "step": 20940
    },
    {
      "epoch": 0.6109166229068207,
      "grad_norm": 0.9170001169958084,
      "learning_rate": 3.47172889369786e-06,
      "loss": 0.1292,
      "step": 20941
    },
    {
      "epoch": 0.6109457961374642,
      "grad_norm": 1.1138343930381036,
      "learning_rate": 3.471279075842857e-06,
      "loss": 0.1292,
      "step": 20942
    },
    {
      "epoch": 0.6109749693681078,
      "grad_norm": 0.8864499926165303,
      "learning_rate": 3.4708292716359094e-06,
      "loss": 0.1507,
      "step": 20943
    },
    {
      "epoch": 0.6110041425987514,
      "grad_norm": 0.80922507010581,
      "learning_rate": 3.4703794810810334e-06,
      "loss": 0.1303,
      "step": 20944
    },
    {
      "epoch": 0.6110333158293949,
      "grad_norm": 0.7618441864480844,
      "learning_rate": 3.4699297041822444e-06,
      "loss": 0.1246,
      "step": 20945
    },
    {
      "epoch": 0.6110624890600385,
      "grad_norm": 1.1477619314815288,
      "learning_rate": 3.469479940943555e-06,
      "loss": 0.1192,
      "step": 20946
    },
    {
      "epoch": 0.6110916622906821,
      "grad_norm": 0.8051245272157639,
      "learning_rate": 3.4690301913689863e-06,
      "loss": 0.106,
      "step": 20947
    },
    {
      "epoch": 0.6111208355213257,
      "grad_norm": 0.9034847528658548,
      "learning_rate": 3.4685804554625495e-06,
      "loss": 0.1174,
      "step": 20948
    },
    {
      "epoch": 0.6111500087519692,
      "grad_norm": 0.921643093258146,
      "learning_rate": 3.468130733228261e-06,
      "loss": 0.134,
      "step": 20949
    },
    {
      "epoch": 0.6111791819826128,
      "grad_norm": 1.070957243678224,
      "learning_rate": 3.4676810246701365e-06,
      "loss": 0.1194,
      "step": 20950
    },
    {
      "epoch": 0.6112083552132563,
      "grad_norm": 1.166313172374092,
      "learning_rate": 3.467231329792189e-06,
      "loss": 0.156,
      "step": 20951
    },
    {
      "epoch": 0.6112375284438999,
      "grad_norm": 0.7777529780148577,
      "learning_rate": 3.4667816485984334e-06,
      "loss": 0.135,
      "step": 20952
    },
    {
      "epoch": 0.6112667016745434,
      "grad_norm": 0.7953839416045292,
      "learning_rate": 3.4663319810928865e-06,
      "loss": 0.1207,
      "step": 20953
    },
    {
      "epoch": 0.611295874905187,
      "grad_norm": 0.9927925875520035,
      "learning_rate": 3.465882327279561e-06,
      "loss": 0.1361,
      "step": 20954
    },
    {
      "epoch": 0.6113250481358306,
      "grad_norm": 0.9540477595798074,
      "learning_rate": 3.465432687162473e-06,
      "loss": 0.1155,
      "step": 20955
    },
    {
      "epoch": 0.6113542213664741,
      "grad_norm": 1.0290227431209418,
      "learning_rate": 3.464983060745635e-06,
      "loss": 0.1434,
      "step": 20956
    },
    {
      "epoch": 0.6113833945971177,
      "grad_norm": 0.8991033544133925,
      "learning_rate": 3.4645334480330616e-06,
      "loss": 0.1339,
      "step": 20957
    },
    {
      "epoch": 0.6114125678277612,
      "grad_norm": 0.7888454291008832,
      "learning_rate": 3.464083849028766e-06,
      "loss": 0.1575,
      "step": 20958
    },
    {
      "epoch": 0.6114417410584048,
      "grad_norm": 0.9488993324605315,
      "learning_rate": 3.463634263736765e-06,
      "loss": 0.1171,
      "step": 20959
    },
    {
      "epoch": 0.6114709142890483,
      "grad_norm": 0.8432693116187181,
      "learning_rate": 3.46318469216107e-06,
      "loss": 0.1466,
      "step": 20960
    },
    {
      "epoch": 0.611500087519692,
      "grad_norm": 0.6769327721821427,
      "learning_rate": 3.4627351343056947e-06,
      "loss": 0.1115,
      "step": 20961
    },
    {
      "epoch": 0.6115292607503355,
      "grad_norm": 0.8731764480185785,
      "learning_rate": 3.4622855901746543e-06,
      "loss": 0.1363,
      "step": 20962
    },
    {
      "epoch": 0.6115584339809791,
      "grad_norm": 0.953362235882418,
      "learning_rate": 3.46183605977196e-06,
      "loss": 0.1498,
      "step": 20963
    },
    {
      "epoch": 0.6115876072116226,
      "grad_norm": 1.0649145980754482,
      "learning_rate": 3.4613865431016253e-06,
      "loss": 0.1363,
      "step": 20964
    },
    {
      "epoch": 0.6116167804422662,
      "grad_norm": 0.9672501895694245,
      "learning_rate": 3.460937040167665e-06,
      "loss": 0.125,
      "step": 20965
    },
    {
      "epoch": 0.6116459536729097,
      "grad_norm": 0.9080402169163565,
      "learning_rate": 3.4604875509740922e-06,
      "loss": 0.1217,
      "step": 20966
    },
    {
      "epoch": 0.6116751269035533,
      "grad_norm": 1.1121807349407231,
      "learning_rate": 3.460038075524919e-06,
      "loss": 0.1251,
      "step": 20967
    },
    {
      "epoch": 0.6117043001341969,
      "grad_norm": 1.0001426277027219,
      "learning_rate": 3.4595886138241575e-06,
      "loss": 0.1473,
      "step": 20968
    },
    {
      "epoch": 0.6117334733648404,
      "grad_norm": 0.8792572688182623,
      "learning_rate": 3.459139165875821e-06,
      "loss": 0.1309,
      "step": 20969
    },
    {
      "epoch": 0.611762646595484,
      "grad_norm": 0.7173610154422235,
      "learning_rate": 3.4586897316839217e-06,
      "loss": 0.1387,
      "step": 20970
    },
    {
      "epoch": 0.6117918198261275,
      "grad_norm": 1.098478615007473,
      "learning_rate": 3.458240311252473e-06,
      "loss": 0.1293,
      "step": 20971
    },
    {
      "epoch": 0.6118209930567711,
      "grad_norm": 0.7366822015674209,
      "learning_rate": 3.4577909045854884e-06,
      "loss": 0.1167,
      "step": 20972
    },
    {
      "epoch": 0.6118501662874146,
      "grad_norm": 0.9817394618978629,
      "learning_rate": 3.4573415116869774e-06,
      "loss": 0.1205,
      "step": 20973
    },
    {
      "epoch": 0.6118793395180583,
      "grad_norm": 0.8977344426497099,
      "learning_rate": 3.456892132560953e-06,
      "loss": 0.1575,
      "step": 20974
    },
    {
      "epoch": 0.6119085127487018,
      "grad_norm": 0.8035672913976277,
      "learning_rate": 3.456442767211428e-06,
      "loss": 0.1278,
      "step": 20975
    },
    {
      "epoch": 0.6119376859793454,
      "grad_norm": 0.6974203347194811,
      "learning_rate": 3.45599341564241e-06,
      "loss": 0.1244,
      "step": 20976
    },
    {
      "epoch": 0.611966859209989,
      "grad_norm": 0.8611962303220687,
      "learning_rate": 3.4555440778579185e-06,
      "loss": 0.1056,
      "step": 20977
    },
    {
      "epoch": 0.6119960324406325,
      "grad_norm": 0.7342171979455759,
      "learning_rate": 3.455094753861959e-06,
      "loss": 0.119,
      "step": 20978
    },
    {
      "epoch": 0.612025205671276,
      "grad_norm": 0.6687892399548173,
      "learning_rate": 3.4546454436585454e-06,
      "loss": 0.1088,
      "step": 20979
    },
    {
      "epoch": 0.6120543789019196,
      "grad_norm": 0.9301980438692424,
      "learning_rate": 3.4541961472516882e-06,
      "loss": 0.1277,
      "step": 20980
    },
    {
      "epoch": 0.6120835521325632,
      "grad_norm": 0.7632162987039959,
      "learning_rate": 3.4537468646453987e-06,
      "loss": 0.1122,
      "step": 20981
    },
    {
      "epoch": 0.6121127253632067,
      "grad_norm": 0.7659636098045769,
      "learning_rate": 3.4532975958436866e-06,
      "loss": 0.1149,
      "step": 20982
    },
    {
      "epoch": 0.6121418985938503,
      "grad_norm": 0.8543931345825652,
      "learning_rate": 3.4528483408505653e-06,
      "loss": 0.1089,
      "step": 20983
    },
    {
      "epoch": 0.6121710718244938,
      "grad_norm": 1.0042344270139514,
      "learning_rate": 3.452399099670045e-06,
      "loss": 0.1227,
      "step": 20984
    },
    {
      "epoch": 0.6122002450551374,
      "grad_norm": 0.8658228461930776,
      "learning_rate": 3.451949872306137e-06,
      "loss": 0.1161,
      "step": 20985
    },
    {
      "epoch": 0.6122294182857809,
      "grad_norm": 0.8556424588367897,
      "learning_rate": 3.4515006587628497e-06,
      "loss": 0.1236,
      "step": 20986
    },
    {
      "epoch": 0.6122585915164245,
      "grad_norm": 0.8755333393449263,
      "learning_rate": 3.4510514590441957e-06,
      "loss": 0.1323,
      "step": 20987
    },
    {
      "epoch": 0.6122877647470681,
      "grad_norm": 1.079093149162543,
      "learning_rate": 3.4506022731541826e-06,
      "loss": 0.1398,
      "step": 20988
    },
    {
      "epoch": 0.6123169379777117,
      "grad_norm": 1.16111636361247,
      "learning_rate": 3.450153101096825e-06,
      "loss": 0.1099,
      "step": 20989
    },
    {
      "epoch": 0.6123461112083552,
      "grad_norm": 0.779681191738122,
      "learning_rate": 3.4497039428761293e-06,
      "loss": 0.1142,
      "step": 20990
    },
    {
      "epoch": 0.6123752844389988,
      "grad_norm": 0.9628872504343245,
      "learning_rate": 3.4492547984961067e-06,
      "loss": 0.1303,
      "step": 20991
    },
    {
      "epoch": 0.6124044576696424,
      "grad_norm": 1.051397096908327,
      "learning_rate": 3.4488056679607685e-06,
      "loss": 0.1168,
      "step": 20992
    },
    {
      "epoch": 0.6124336309002859,
      "grad_norm": 0.9578684732032886,
      "learning_rate": 3.4483565512741214e-06,
      "loss": 0.1236,
      "step": 20993
    },
    {
      "epoch": 0.6124628041309295,
      "grad_norm": 0.869847829921037,
      "learning_rate": 3.4479074484401763e-06,
      "loss": 0.1325,
      "step": 20994
    },
    {
      "epoch": 0.612491977361573,
      "grad_norm": 1.402606131990117,
      "learning_rate": 3.4474583594629436e-06,
      "loss": 0.1406,
      "step": 20995
    },
    {
      "epoch": 0.6125211505922166,
      "grad_norm": 1.0505145326623786,
      "learning_rate": 3.447009284346432e-06,
      "loss": 0.1255,
      "step": 20996
    },
    {
      "epoch": 0.6125503238228601,
      "grad_norm": 0.8443454167133138,
      "learning_rate": 3.4465602230946517e-06,
      "loss": 0.1312,
      "step": 20997
    },
    {
      "epoch": 0.6125794970535037,
      "grad_norm": 1.530233319482489,
      "learning_rate": 3.44611117571161e-06,
      "loss": 0.1327,
      "step": 20998
    },
    {
      "epoch": 0.6126086702841472,
      "grad_norm": 1.204036597985255,
      "learning_rate": 3.445662142201317e-06,
      "loss": 0.1275,
      "step": 20999
    },
    {
      "epoch": 0.6126378435147908,
      "grad_norm": 0.8904733020868539,
      "learning_rate": 3.4452131225677798e-06,
      "loss": 0.1215,
      "step": 21000
    },
    {
      "epoch": 0.6126670167454343,
      "grad_norm": 1.2092441567946983,
      "learning_rate": 3.4447641168150103e-06,
      "loss": 0.1449,
      "step": 21001
    },
    {
      "epoch": 0.612696189976078,
      "grad_norm": 1.298882785905577,
      "learning_rate": 3.4443151249470163e-06,
      "loss": 0.1352,
      "step": 21002
    },
    {
      "epoch": 0.6127253632067216,
      "grad_norm": 1.1317444753596781,
      "learning_rate": 3.443866146967804e-06,
      "loss": 0.1317,
      "step": 21003
    },
    {
      "epoch": 0.6127545364373651,
      "grad_norm": 0.770033170431705,
      "learning_rate": 3.4434171828813833e-06,
      "loss": 0.1353,
      "step": 21004
    },
    {
      "epoch": 0.6127837096680087,
      "grad_norm": 0.7785240795060671,
      "learning_rate": 3.4429682326917645e-06,
      "loss": 0.1136,
      "step": 21005
    },
    {
      "epoch": 0.6128128828986522,
      "grad_norm": 1.0235229812936215,
      "learning_rate": 3.44251929640295e-06,
      "loss": 0.1378,
      "step": 21006
    },
    {
      "epoch": 0.6128420561292958,
      "grad_norm": 0.9164818800568367,
      "learning_rate": 3.4420703740189544e-06,
      "loss": 0.1312,
      "step": 21007
    },
    {
      "epoch": 0.6128712293599393,
      "grad_norm": 0.9996494893264714,
      "learning_rate": 3.441621465543781e-06,
      "loss": 0.1621,
      "step": 21008
    },
    {
      "epoch": 0.6129004025905829,
      "grad_norm": 0.7748913711001848,
      "learning_rate": 3.4411725709814397e-06,
      "loss": 0.1247,
      "step": 21009
    },
    {
      "epoch": 0.6129295758212264,
      "grad_norm": 0.9984979162441634,
      "learning_rate": 3.4407236903359385e-06,
      "loss": 0.1258,
      "step": 21010
    },
    {
      "epoch": 0.61295874905187,
      "grad_norm": 0.9676661351595874,
      "learning_rate": 3.4402748236112827e-06,
      "loss": 0.1379,
      "step": 21011
    },
    {
      "epoch": 0.6129879222825135,
      "grad_norm": 0.8025287907469718,
      "learning_rate": 3.43982597081148e-06,
      "loss": 0.1277,
      "step": 21012
    },
    {
      "epoch": 0.6130170955131571,
      "grad_norm": 0.9375310565927569,
      "learning_rate": 3.43937713194054e-06,
      "loss": 0.1508,
      "step": 21013
    },
    {
      "epoch": 0.6130462687438006,
      "grad_norm": 0.7262399909554743,
      "learning_rate": 3.4389283070024684e-06,
      "loss": 0.1002,
      "step": 21014
    },
    {
      "epoch": 0.6130754419744443,
      "grad_norm": 0.7272098888733515,
      "learning_rate": 3.4384794960012734e-06,
      "loss": 0.1143,
      "step": 21015
    },
    {
      "epoch": 0.6131046152050879,
      "grad_norm": 0.8919304754962747,
      "learning_rate": 3.438030698940959e-06,
      "loss": 0.1176,
      "step": 21016
    },
    {
      "epoch": 0.6131337884357314,
      "grad_norm": 0.813250615691482,
      "learning_rate": 3.437581915825534e-06,
      "loss": 0.1408,
      "step": 21017
    },
    {
      "epoch": 0.613162961666375,
      "grad_norm": 0.7351002698829738,
      "learning_rate": 3.4371331466590038e-06,
      "loss": 0.1049,
      "step": 21018
    },
    {
      "epoch": 0.6131921348970185,
      "grad_norm": 0.876096414795378,
      "learning_rate": 3.4366843914453774e-06,
      "loss": 0.1352,
      "step": 21019
    },
    {
      "epoch": 0.6132213081276621,
      "grad_norm": 0.8369573559671274,
      "learning_rate": 3.436235650188659e-06,
      "loss": 0.1604,
      "step": 21020
    },
    {
      "epoch": 0.6132504813583056,
      "grad_norm": 0.710377594669661,
      "learning_rate": 3.4357869228928553e-06,
      "loss": 0.105,
      "step": 21021
    },
    {
      "epoch": 0.6132796545889492,
      "grad_norm": 0.7724533961278669,
      "learning_rate": 3.4353382095619737e-06,
      "loss": 0.1229,
      "step": 21022
    },
    {
      "epoch": 0.6133088278195927,
      "grad_norm": 0.9649071735315539,
      "learning_rate": 3.4348895102000173e-06,
      "loss": 0.1367,
      "step": 21023
    },
    {
      "epoch": 0.6133380010502363,
      "grad_norm": 0.9883881679299172,
      "learning_rate": 3.4344408248109933e-06,
      "loss": 0.12,
      "step": 21024
    },
    {
      "epoch": 0.6133671742808798,
      "grad_norm": 0.7613585357562115,
      "learning_rate": 3.4339921533989083e-06,
      "loss": 0.1059,
      "step": 21025
    },
    {
      "epoch": 0.6133963475115234,
      "grad_norm": 0.6464953118660361,
      "learning_rate": 3.4335434959677683e-06,
      "loss": 0.1124,
      "step": 21026
    },
    {
      "epoch": 0.6134255207421669,
      "grad_norm": 0.767443712319199,
      "learning_rate": 3.433094852521579e-06,
      "loss": 0.1146,
      "step": 21027
    },
    {
      "epoch": 0.6134546939728105,
      "grad_norm": 1.2296710589919633,
      "learning_rate": 3.4326462230643436e-06,
      "loss": 0.1302,
      "step": 21028
    },
    {
      "epoch": 0.6134838672034542,
      "grad_norm": 0.7219022973250488,
      "learning_rate": 3.4321976076000685e-06,
      "loss": 0.1252,
      "step": 21029
    },
    {
      "epoch": 0.6135130404340977,
      "grad_norm": 0.837349692783926,
      "learning_rate": 3.431749006132758e-06,
      "loss": 0.111,
      "step": 21030
    },
    {
      "epoch": 0.6135422136647413,
      "grad_norm": 0.7466805912168603,
      "learning_rate": 3.431300418666419e-06,
      "loss": 0.1386,
      "step": 21031
    },
    {
      "epoch": 0.6135713868953848,
      "grad_norm": 0.8062874915350087,
      "learning_rate": 3.4308518452050567e-06,
      "loss": 0.1016,
      "step": 21032
    },
    {
      "epoch": 0.6136005601260284,
      "grad_norm": 0.8428077027826459,
      "learning_rate": 3.4304032857526724e-06,
      "loss": 0.1344,
      "step": 21033
    },
    {
      "epoch": 0.6136297333566719,
      "grad_norm": 1.1384267038198153,
      "learning_rate": 3.4299547403132738e-06,
      "loss": 0.137,
      "step": 21034
    },
    {
      "epoch": 0.6136589065873155,
      "grad_norm": 0.7902375493699365,
      "learning_rate": 3.4295062088908652e-06,
      "loss": 0.1286,
      "step": 21035
    },
    {
      "epoch": 0.613688079817959,
      "grad_norm": 0.9446412109261622,
      "learning_rate": 3.4290576914894473e-06,
      "loss": 0.1219,
      "step": 21036
    },
    {
      "epoch": 0.6137172530486026,
      "grad_norm": 0.9453470235633491,
      "learning_rate": 3.4286091881130306e-06,
      "loss": 0.1283,
      "step": 21037
    },
    {
      "epoch": 0.6137464262792461,
      "grad_norm": 0.9393398829366166,
      "learning_rate": 3.4281606987656145e-06,
      "loss": 0.1274,
      "step": 21038
    },
    {
      "epoch": 0.6137755995098897,
      "grad_norm": 0.8114576726343591,
      "learning_rate": 3.427712223451205e-06,
      "loss": 0.0999,
      "step": 21039
    },
    {
      "epoch": 0.6138047727405332,
      "grad_norm": 0.8286629047477375,
      "learning_rate": 3.427263762173806e-06,
      "loss": 0.1214,
      "step": 21040
    },
    {
      "epoch": 0.6138339459711768,
      "grad_norm": 0.9669587631486285,
      "learning_rate": 3.4268153149374196e-06,
      "loss": 0.1456,
      "step": 21041
    },
    {
      "epoch": 0.6138631192018205,
      "grad_norm": 1.2406251188809492,
      "learning_rate": 3.42636688174605e-06,
      "loss": 0.1197,
      "step": 21042
    },
    {
      "epoch": 0.613892292432464,
      "grad_norm": 0.8051232750190032,
      "learning_rate": 3.425918462603702e-06,
      "loss": 0.1032,
      "step": 21043
    },
    {
      "epoch": 0.6139214656631076,
      "grad_norm": 0.7013276307148666,
      "learning_rate": 3.425470057514378e-06,
      "loss": 0.1232,
      "step": 21044
    },
    {
      "epoch": 0.6139506388937511,
      "grad_norm": 0.8151813991754139,
      "learning_rate": 3.4250216664820823e-06,
      "loss": 0.1196,
      "step": 21045
    },
    {
      "epoch": 0.6139798121243947,
      "grad_norm": 1.1766845593849624,
      "learning_rate": 3.424573289510817e-06,
      "loss": 0.1222,
      "step": 21046
    },
    {
      "epoch": 0.6140089853550382,
      "grad_norm": 0.8595289919133213,
      "learning_rate": 3.4241249266045846e-06,
      "loss": 0.1063,
      "step": 21047
    },
    {
      "epoch": 0.6140381585856818,
      "grad_norm": 0.977795390304621,
      "learning_rate": 3.4236765777673877e-06,
      "loss": 0.1065,
      "step": 21048
    },
    {
      "epoch": 0.6140673318163253,
      "grad_norm": 0.9509711323827088,
      "learning_rate": 3.4232282430032325e-06,
      "loss": 0.1311,
      "step": 21049
    },
    {
      "epoch": 0.6140965050469689,
      "grad_norm": 0.8516215564018293,
      "learning_rate": 3.4227799223161172e-06,
      "loss": 0.1038,
      "step": 21050
    },
    {
      "epoch": 0.6141256782776124,
      "grad_norm": 1.0373232414161326,
      "learning_rate": 3.4223316157100472e-06,
      "loss": 0.1203,
      "step": 21051
    },
    {
      "epoch": 0.614154851508256,
      "grad_norm": 1.1369227402149664,
      "learning_rate": 3.4218833231890247e-06,
      "loss": 0.1134,
      "step": 21052
    },
    {
      "epoch": 0.6141840247388995,
      "grad_norm": 0.9984749318565033,
      "learning_rate": 3.4214350447570497e-06,
      "loss": 0.1372,
      "step": 21053
    },
    {
      "epoch": 0.6142131979695431,
      "grad_norm": 0.8746272182503635,
      "learning_rate": 3.420986780418125e-06,
      "loss": 0.1223,
      "step": 21054
    },
    {
      "epoch": 0.6142423712001867,
      "grad_norm": 0.9208209942617852,
      "learning_rate": 3.420538530176255e-06,
      "loss": 0.1359,
      "step": 21055
    },
    {
      "epoch": 0.6142715444308303,
      "grad_norm": 0.8486047135830159,
      "learning_rate": 3.4200902940354393e-06,
      "loss": 0.1173,
      "step": 21056
    },
    {
      "epoch": 0.6143007176614739,
      "grad_norm": 0.9784875212204225,
      "learning_rate": 3.4196420719996815e-06,
      "loss": 0.1354,
      "step": 21057
    },
    {
      "epoch": 0.6143298908921174,
      "grad_norm": 0.9037462086589715,
      "learning_rate": 3.4191938640729804e-06,
      "loss": 0.13,
      "step": 21058
    },
    {
      "epoch": 0.614359064122761,
      "grad_norm": 0.836418206364951,
      "learning_rate": 3.4187456702593393e-06,
      "loss": 0.1164,
      "step": 21059
    },
    {
      "epoch": 0.6143882373534045,
      "grad_norm": 0.7692119225633084,
      "learning_rate": 3.4182974905627597e-06,
      "loss": 0.0974,
      "step": 21060
    },
    {
      "epoch": 0.6144174105840481,
      "grad_norm": 0.8262778150302602,
      "learning_rate": 3.4178493249872426e-06,
      "loss": 0.1179,
      "step": 21061
    },
    {
      "epoch": 0.6144465838146916,
      "grad_norm": 1.0896507636235115,
      "learning_rate": 3.4174011735367898e-06,
      "loss": 0.1482,
      "step": 21062
    },
    {
      "epoch": 0.6144757570453352,
      "grad_norm": 0.849059463558778,
      "learning_rate": 3.416953036215401e-06,
      "loss": 0.1557,
      "step": 21063
    },
    {
      "epoch": 0.6145049302759787,
      "grad_norm": 0.7333087844744282,
      "learning_rate": 3.416504913027077e-06,
      "loss": 0.11,
      "step": 21064
    },
    {
      "epoch": 0.6145341035066223,
      "grad_norm": 0.7265565962359889,
      "learning_rate": 3.416056803975818e-06,
      "loss": 0.1361,
      "step": 21065
    },
    {
      "epoch": 0.6145632767372659,
      "grad_norm": 1.0893694892523167,
      "learning_rate": 3.4156087090656274e-06,
      "loss": 0.1254,
      "step": 21066
    },
    {
      "epoch": 0.6145924499679094,
      "grad_norm": 0.8376286525754213,
      "learning_rate": 3.415160628300505e-06,
      "loss": 0.1272,
      "step": 21067
    },
    {
      "epoch": 0.614621623198553,
      "grad_norm": 0.7529455272292838,
      "learning_rate": 3.414712561684449e-06,
      "loss": 0.1017,
      "step": 21068
    },
    {
      "epoch": 0.6146507964291966,
      "grad_norm": 0.8563188556651878,
      "learning_rate": 3.414264509221461e-06,
      "loss": 0.1184,
      "step": 21069
    },
    {
      "epoch": 0.6146799696598402,
      "grad_norm": 0.9759476152761085,
      "learning_rate": 3.4138164709155415e-06,
      "loss": 0.1409,
      "step": 21070
    },
    {
      "epoch": 0.6147091428904837,
      "grad_norm": 0.9775050495130322,
      "learning_rate": 3.4133684467706872e-06,
      "loss": 0.1374,
      "step": 21071
    },
    {
      "epoch": 0.6147383161211273,
      "grad_norm": 0.6244597394948174,
      "learning_rate": 3.412920436790903e-06,
      "loss": 0.1086,
      "step": 21072
    },
    {
      "epoch": 0.6147674893517708,
      "grad_norm": 0.7340720042898693,
      "learning_rate": 3.4124724409801864e-06,
      "loss": 0.133,
      "step": 21073
    },
    {
      "epoch": 0.6147966625824144,
      "grad_norm": 0.9434362957191156,
      "learning_rate": 3.4120244593425363e-06,
      "loss": 0.1398,
      "step": 21074
    },
    {
      "epoch": 0.6148258358130579,
      "grad_norm": 0.7282879258179913,
      "learning_rate": 3.411576491881954e-06,
      "loss": 0.1129,
      "step": 21075
    },
    {
      "epoch": 0.6148550090437015,
      "grad_norm": 0.89164869118127,
      "learning_rate": 3.4111285386024363e-06,
      "loss": 0.1272,
      "step": 21076
    },
    {
      "epoch": 0.614884182274345,
      "grad_norm": 0.9496976075366732,
      "learning_rate": 3.4106805995079824e-06,
      "loss": 0.1528,
      "step": 21077
    },
    {
      "epoch": 0.6149133555049886,
      "grad_norm": 0.9619568063580806,
      "learning_rate": 3.4102326746025938e-06,
      "loss": 0.1261,
      "step": 21078
    },
    {
      "epoch": 0.6149425287356322,
      "grad_norm": 0.7389963679323991,
      "learning_rate": 3.40978476389027e-06,
      "loss": 0.1285,
      "step": 21079
    },
    {
      "epoch": 0.6149717019662757,
      "grad_norm": 0.7873182570850406,
      "learning_rate": 3.4093368673750066e-06,
      "loss": 0.1217,
      "step": 21080
    },
    {
      "epoch": 0.6150008751969193,
      "grad_norm": 1.124881653368094,
      "learning_rate": 3.408888985060804e-06,
      "loss": 0.1507,
      "step": 21081
    },
    {
      "epoch": 0.6150300484275628,
      "grad_norm": 0.8711768419178351,
      "learning_rate": 3.4084411169516618e-06,
      "loss": 0.1153,
      "step": 21082
    },
    {
      "epoch": 0.6150592216582065,
      "grad_norm": 0.5681554111961512,
      "learning_rate": 3.4079932630515746e-06,
      "loss": 0.1141,
      "step": 21083
    },
    {
      "epoch": 0.61508839488885,
      "grad_norm": 0.8012527760871635,
      "learning_rate": 3.4075454233645466e-06,
      "loss": 0.1251,
      "step": 21084
    },
    {
      "epoch": 0.6151175681194936,
      "grad_norm": 1.1940778013542062,
      "learning_rate": 3.407097597894572e-06,
      "loss": 0.1176,
      "step": 21085
    },
    {
      "epoch": 0.6151467413501371,
      "grad_norm": 0.783546930966101,
      "learning_rate": 3.4066497866456493e-06,
      "loss": 0.1315,
      "step": 21086
    },
    {
      "epoch": 0.6151759145807807,
      "grad_norm": 0.7274787811137401,
      "learning_rate": 3.406201989621778e-06,
      "loss": 0.1268,
      "step": 21087
    },
    {
      "epoch": 0.6152050878114242,
      "grad_norm": 0.8389753465093771,
      "learning_rate": 3.405754206826954e-06,
      "loss": 0.1186,
      "step": 21088
    },
    {
      "epoch": 0.6152342610420678,
      "grad_norm": 0.8868556445561018,
      "learning_rate": 3.4053064382651748e-06,
      "loss": 0.1124,
      "step": 21089
    },
    {
      "epoch": 0.6152634342727114,
      "grad_norm": 0.7182195739752382,
      "learning_rate": 3.4048586839404394e-06,
      "loss": 0.1229,
      "step": 21090
    },
    {
      "epoch": 0.6152926075033549,
      "grad_norm": 0.8918735417370072,
      "learning_rate": 3.4044109438567463e-06,
      "loss": 0.113,
      "step": 21091
    },
    {
      "epoch": 0.6153217807339985,
      "grad_norm": 0.8436804640959246,
      "learning_rate": 3.4039632180180915e-06,
      "loss": 0.1199,
      "step": 21092
    },
    {
      "epoch": 0.615350953964642,
      "grad_norm": 0.6505094891322994,
      "learning_rate": 3.403515506428471e-06,
      "loss": 0.111,
      "step": 21093
    },
    {
      "epoch": 0.6153801271952856,
      "grad_norm": 0.7803460636998947,
      "learning_rate": 3.4030678090918833e-06,
      "loss": 0.1418,
      "step": 21094
    },
    {
      "epoch": 0.6154093004259291,
      "grad_norm": 0.9766751223909023,
      "learning_rate": 3.4026201260123237e-06,
      "loss": 0.1441,
      "step": 21095
    },
    {
      "epoch": 0.6154384736565728,
      "grad_norm": 0.9481376533175228,
      "learning_rate": 3.402172457193792e-06,
      "loss": 0.1285,
      "step": 21096
    },
    {
      "epoch": 0.6154676468872163,
      "grad_norm": 0.8126063740133177,
      "learning_rate": 3.401724802640283e-06,
      "loss": 0.1139,
      "step": 21097
    },
    {
      "epoch": 0.6154968201178599,
      "grad_norm": 0.8163672154408407,
      "learning_rate": 3.401277162355793e-06,
      "loss": 0.1264,
      "step": 21098
    },
    {
      "epoch": 0.6155259933485034,
      "grad_norm": 0.8898152787830411,
      "learning_rate": 3.400829536344319e-06,
      "loss": 0.1047,
      "step": 21099
    },
    {
      "epoch": 0.615555166579147,
      "grad_norm": 0.9128706990403679,
      "learning_rate": 3.400381924609858e-06,
      "loss": 0.0934,
      "step": 21100
    },
    {
      "epoch": 0.6155843398097905,
      "grad_norm": 0.8037016179255396,
      "learning_rate": 3.3999343271564033e-06,
      "loss": 0.1174,
      "step": 21101
    },
    {
      "epoch": 0.6156135130404341,
      "grad_norm": 0.8151627688975245,
      "learning_rate": 3.3994867439879543e-06,
      "loss": 0.1284,
      "step": 21102
    },
    {
      "epoch": 0.6156426862710777,
      "grad_norm": 0.8541182688864145,
      "learning_rate": 3.399039175108505e-06,
      "loss": 0.1376,
      "step": 21103
    },
    {
      "epoch": 0.6156718595017212,
      "grad_norm": 0.9659765166684128,
      "learning_rate": 3.3985916205220527e-06,
      "loss": 0.1396,
      "step": 21104
    },
    {
      "epoch": 0.6157010327323648,
      "grad_norm": 0.6885270996459721,
      "learning_rate": 3.3981440802325922e-06,
      "loss": 0.1396,
      "step": 21105
    },
    {
      "epoch": 0.6157302059630083,
      "grad_norm": 0.9091368934832649,
      "learning_rate": 3.397696554244118e-06,
      "loss": 0.1489,
      "step": 21106
    },
    {
      "epoch": 0.6157593791936519,
      "grad_norm": 0.8662934895472221,
      "learning_rate": 3.3972490425606258e-06,
      "loss": 0.1273,
      "step": 21107
    },
    {
      "epoch": 0.6157885524242954,
      "grad_norm": 0.8242258428719526,
      "learning_rate": 3.3968015451861124e-06,
      "loss": 0.1416,
      "step": 21108
    },
    {
      "epoch": 0.615817725654939,
      "grad_norm": 1.005713719858816,
      "learning_rate": 3.3963540621245734e-06,
      "loss": 0.1153,
      "step": 21109
    },
    {
      "epoch": 0.6158468988855826,
      "grad_norm": 1.002304810971801,
      "learning_rate": 3.395906593380001e-06,
      "loss": 0.1185,
      "step": 21110
    },
    {
      "epoch": 0.6158760721162262,
      "grad_norm": 0.9061916118785468,
      "learning_rate": 3.395459138956392e-06,
      "loss": 0.1136,
      "step": 21111
    },
    {
      "epoch": 0.6159052453468697,
      "grad_norm": 0.946255304795538,
      "learning_rate": 3.395011698857742e-06,
      "loss": 0.1315,
      "step": 21112
    },
    {
      "epoch": 0.6159344185775133,
      "grad_norm": 1.3922510314359713,
      "learning_rate": 3.39456427308804e-06,
      "loss": 0.1151,
      "step": 21113
    },
    {
      "epoch": 0.6159635918081569,
      "grad_norm": 0.7777176414925873,
      "learning_rate": 3.39411686165129e-06,
      "loss": 0.1091,
      "step": 21114
    },
    {
      "epoch": 0.6159927650388004,
      "grad_norm": 0.7181694510957686,
      "learning_rate": 3.393669464551479e-06,
      "loss": 0.1183,
      "step": 21115
    },
    {
      "epoch": 0.616021938269444,
      "grad_norm": 1.2838727664589396,
      "learning_rate": 3.393222081792603e-06,
      "loss": 0.1194,
      "step": 21116
    },
    {
      "epoch": 0.6160511115000875,
      "grad_norm": 1.0335190932026679,
      "learning_rate": 3.3927747133786593e-06,
      "loss": 0.1491,
      "step": 21117
    },
    {
      "epoch": 0.6160802847307311,
      "grad_norm": 0.921466228499897,
      "learning_rate": 3.3923273593136376e-06,
      "loss": 0.1104,
      "step": 21118
    },
    {
      "epoch": 0.6161094579613746,
      "grad_norm": 0.973417624582613,
      "learning_rate": 3.3918800196015324e-06,
      "loss": 0.1212,
      "step": 21119
    },
    {
      "epoch": 0.6161386311920182,
      "grad_norm": 1.1702767967094085,
      "learning_rate": 3.3914326942463393e-06,
      "loss": 0.1286,
      "step": 21120
    },
    {
      "epoch": 0.6161678044226617,
      "grad_norm": 0.9303754380158115,
      "learning_rate": 3.390985383252051e-06,
      "loss": 0.1277,
      "step": 21121
    },
    {
      "epoch": 0.6161969776533053,
      "grad_norm": 0.8328432298832673,
      "learning_rate": 3.3905380866226622e-06,
      "loss": 0.1362,
      "step": 21122
    },
    {
      "epoch": 0.6162261508839489,
      "grad_norm": 1.0653924386615603,
      "learning_rate": 3.3900908043621642e-06,
      "loss": 0.1226,
      "step": 21123
    },
    {
      "epoch": 0.6162553241145925,
      "grad_norm": 0.8004490174977535,
      "learning_rate": 3.3896435364745516e-06,
      "loss": 0.1521,
      "step": 21124
    },
    {
      "epoch": 0.616284497345236,
      "grad_norm": 0.7318667905252606,
      "learning_rate": 3.389196282963816e-06,
      "loss": 0.1108,
      "step": 21125
    },
    {
      "epoch": 0.6163136705758796,
      "grad_norm": 0.9010306237909933,
      "learning_rate": 3.388749043833952e-06,
      "loss": 0.1253,
      "step": 21126
    },
    {
      "epoch": 0.6163428438065232,
      "grad_norm": 0.802820715916072,
      "learning_rate": 3.3883018190889526e-06,
      "loss": 0.131,
      "step": 21127
    },
    {
      "epoch": 0.6163720170371667,
      "grad_norm": 0.8216216258840674,
      "learning_rate": 3.3878546087328096e-06,
      "loss": 0.1068,
      "step": 21128
    },
    {
      "epoch": 0.6164011902678103,
      "grad_norm": 0.8973045211638617,
      "learning_rate": 3.3874074127695156e-06,
      "loss": 0.1166,
      "step": 21129
    },
    {
      "epoch": 0.6164303634984538,
      "grad_norm": 2.5582603756955216,
      "learning_rate": 3.386960231203064e-06,
      "loss": 0.1276,
      "step": 21130
    },
    {
      "epoch": 0.6164595367290974,
      "grad_norm": 0.9130987991297592,
      "learning_rate": 3.3865130640374444e-06,
      "loss": 0.1256,
      "step": 21131
    },
    {
      "epoch": 0.6164887099597409,
      "grad_norm": 1.0208648611113489,
      "learning_rate": 3.3860659112766526e-06,
      "loss": 0.1274,
      "step": 21132
    },
    {
      "epoch": 0.6165178831903845,
      "grad_norm": 0.6996033325044707,
      "learning_rate": 3.3856187729246785e-06,
      "loss": 0.1358,
      "step": 21133
    },
    {
      "epoch": 0.616547056421028,
      "grad_norm": 0.9373999646860356,
      "learning_rate": 3.3851716489855146e-06,
      "loss": 0.1511,
      "step": 21134
    },
    {
      "epoch": 0.6165762296516716,
      "grad_norm": 0.9384190485217382,
      "learning_rate": 3.3847245394631544e-06,
      "loss": 0.1321,
      "step": 21135
    },
    {
      "epoch": 0.6166054028823151,
      "grad_norm": 0.90609768373347,
      "learning_rate": 3.384277444361586e-06,
      "loss": 0.1466,
      "step": 21136
    },
    {
      "epoch": 0.6166345761129588,
      "grad_norm": 0.7934721185413713,
      "learning_rate": 3.3838303636848022e-06,
      "loss": 0.1396,
      "step": 21137
    },
    {
      "epoch": 0.6166637493436024,
      "grad_norm": 1.1830060333479426,
      "learning_rate": 3.383383297436796e-06,
      "loss": 0.1404,
      "step": 21138
    },
    {
      "epoch": 0.6166929225742459,
      "grad_norm": 0.9254301458145205,
      "learning_rate": 3.38293624562156e-06,
      "loss": 0.1173,
      "step": 21139
    },
    {
      "epoch": 0.6167220958048895,
      "grad_norm": 0.7713612792198895,
      "learning_rate": 3.3824892082430803e-06,
      "loss": 0.1207,
      "step": 21140
    },
    {
      "epoch": 0.616751269035533,
      "grad_norm": 1.2303376297944446,
      "learning_rate": 3.382042185305352e-06,
      "loss": 0.13,
      "step": 21141
    },
    {
      "epoch": 0.6167804422661766,
      "grad_norm": 0.9861930183687242,
      "learning_rate": 3.3815951768123654e-06,
      "loss": 0.1178,
      "step": 21142
    },
    {
      "epoch": 0.6168096154968201,
      "grad_norm": 0.7440257645961308,
      "learning_rate": 3.381148182768108e-06,
      "loss": 0.1159,
      "step": 21143
    },
    {
      "epoch": 0.6168387887274637,
      "grad_norm": 1.0968413448562162,
      "learning_rate": 3.3807012031765758e-06,
      "loss": 0.1283,
      "step": 21144
    },
    {
      "epoch": 0.6168679619581072,
      "grad_norm": 1.0142731549523618,
      "learning_rate": 3.3802542380417556e-06,
      "loss": 0.1115,
      "step": 21145
    },
    {
      "epoch": 0.6168971351887508,
      "grad_norm": 0.7268069163428662,
      "learning_rate": 3.379807287367639e-06,
      "loss": 0.1229,
      "step": 21146
    },
    {
      "epoch": 0.6169263084193943,
      "grad_norm": 0.8990398893279822,
      "learning_rate": 3.3793603511582178e-06,
      "loss": 0.1052,
      "step": 21147
    },
    {
      "epoch": 0.6169554816500379,
      "grad_norm": 1.3020606188837562,
      "learning_rate": 3.378913429417479e-06,
      "loss": 0.1506,
      "step": 21148
    },
    {
      "epoch": 0.6169846548806814,
      "grad_norm": 0.9374825750232973,
      "learning_rate": 3.378466522149413e-06,
      "loss": 0.1533,
      "step": 21149
    },
    {
      "epoch": 0.6170138281113251,
      "grad_norm": 0.9108943977469091,
      "learning_rate": 3.3780196293580125e-06,
      "loss": 0.1158,
      "step": 21150
    },
    {
      "epoch": 0.6170430013419687,
      "grad_norm": 0.9500285737204432,
      "learning_rate": 3.3775727510472644e-06,
      "loss": 0.1336,
      "step": 21151
    },
    {
      "epoch": 0.6170721745726122,
      "grad_norm": 0.941945039929362,
      "learning_rate": 3.3771258872211614e-06,
      "loss": 0.1101,
      "step": 21152
    },
    {
      "epoch": 0.6171013478032558,
      "grad_norm": 0.7556585325761117,
      "learning_rate": 3.37667903788369e-06,
      "loss": 0.1,
      "step": 21153
    },
    {
      "epoch": 0.6171305210338993,
      "grad_norm": 0.8734207602001116,
      "learning_rate": 3.3762322030388407e-06,
      "loss": 0.1027,
      "step": 21154
    },
    {
      "epoch": 0.6171596942645429,
      "grad_norm": 0.8427695756956373,
      "learning_rate": 3.375785382690601e-06,
      "loss": 0.1034,
      "step": 21155
    },
    {
      "epoch": 0.6171888674951864,
      "grad_norm": 0.8173671443059403,
      "learning_rate": 3.3753385768429624e-06,
      "loss": 0.11,
      "step": 21156
    },
    {
      "epoch": 0.61721804072583,
      "grad_norm": 0.7632379967922096,
      "learning_rate": 3.3748917854999153e-06,
      "loss": 0.0903,
      "step": 21157
    },
    {
      "epoch": 0.6172472139564735,
      "grad_norm": 0.899417514099561,
      "learning_rate": 3.3744450086654444e-06,
      "loss": 0.1207,
      "step": 21158
    },
    {
      "epoch": 0.6172763871871171,
      "grad_norm": 1.0236612305183193,
      "learning_rate": 3.3739982463435417e-06,
      "loss": 0.1486,
      "step": 21159
    },
    {
      "epoch": 0.6173055604177606,
      "grad_norm": 0.802210837903345,
      "learning_rate": 3.3735514985381944e-06,
      "loss": 0.1263,
      "step": 21160
    },
    {
      "epoch": 0.6173347336484042,
      "grad_norm": 0.8378422940751181,
      "learning_rate": 3.3731047652533892e-06,
      "loss": 0.1133,
      "step": 21161
    },
    {
      "epoch": 0.6173639068790477,
      "grad_norm": 1.1110821027982585,
      "learning_rate": 3.372658046493118e-06,
      "loss": 0.1414,
      "step": 21162
    },
    {
      "epoch": 0.6173930801096913,
      "grad_norm": 1.1105431698320545,
      "learning_rate": 3.3722113422613668e-06,
      "loss": 0.1099,
      "step": 21163
    },
    {
      "epoch": 0.617422253340335,
      "grad_norm": 0.7429347405761328,
      "learning_rate": 3.371764652562124e-06,
      "loss": 0.1043,
      "step": 21164
    },
    {
      "epoch": 0.6174514265709785,
      "grad_norm": 0.9633949073256,
      "learning_rate": 3.3713179773993787e-06,
      "loss": 0.1377,
      "step": 21165
    },
    {
      "epoch": 0.6174805998016221,
      "grad_norm": 1.0267365838698848,
      "learning_rate": 3.3708713167771166e-06,
      "loss": 0.1389,
      "step": 21166
    },
    {
      "epoch": 0.6175097730322656,
      "grad_norm": 1.0615631732340258,
      "learning_rate": 3.3704246706993255e-06,
      "loss": 0.1292,
      "step": 21167
    },
    {
      "epoch": 0.6175389462629092,
      "grad_norm": 0.8260204382415454,
      "learning_rate": 3.369978039169995e-06,
      "loss": 0.1252,
      "step": 21168
    },
    {
      "epoch": 0.6175681194935527,
      "grad_norm": 0.7363162209643128,
      "learning_rate": 3.3695314221931124e-06,
      "loss": 0.1297,
      "step": 21169
    },
    {
      "epoch": 0.6175972927241963,
      "grad_norm": 1.1806912401812193,
      "learning_rate": 3.369084819772663e-06,
      "loss": 0.134,
      "step": 21170
    },
    {
      "epoch": 0.6176264659548398,
      "grad_norm": 1.1202515002285367,
      "learning_rate": 3.3686382319126353e-06,
      "loss": 0.1309,
      "step": 21171
    },
    {
      "epoch": 0.6176556391854834,
      "grad_norm": 0.8047278167034158,
      "learning_rate": 3.368191658617017e-06,
      "loss": 0.1262,
      "step": 21172
    },
    {
      "epoch": 0.6176848124161269,
      "grad_norm": 0.6263439729541208,
      "learning_rate": 3.367745099889791e-06,
      "loss": 0.1286,
      "step": 21173
    },
    {
      "epoch": 0.6177139856467705,
      "grad_norm": 0.726227833659534,
      "learning_rate": 3.36729855573495e-06,
      "loss": 0.1359,
      "step": 21174
    },
    {
      "epoch": 0.617743158877414,
      "grad_norm": 1.057684417738265,
      "learning_rate": 3.3668520261564764e-06,
      "loss": 0.1254,
      "step": 21175
    },
    {
      "epoch": 0.6177723321080576,
      "grad_norm": 1.0064517708224447,
      "learning_rate": 3.3664055111583586e-06,
      "loss": 0.1468,
      "step": 21176
    },
    {
      "epoch": 0.6178015053387013,
      "grad_norm": 0.7996593234194277,
      "learning_rate": 3.3659590107445833e-06,
      "loss": 0.1249,
      "step": 21177
    },
    {
      "epoch": 0.6178306785693448,
      "grad_norm": 0.7087731280318946,
      "learning_rate": 3.3655125249191344e-06,
      "loss": 0.1063,
      "step": 21178
    },
    {
      "epoch": 0.6178598517999884,
      "grad_norm": 0.784415737054412,
      "learning_rate": 3.365066053685999e-06,
      "loss": 0.1065,
      "step": 21179
    },
    {
      "epoch": 0.6178890250306319,
      "grad_norm": 0.9828445544343942,
      "learning_rate": 3.3646195970491645e-06,
      "loss": 0.1356,
      "step": 21180
    },
    {
      "epoch": 0.6179181982612755,
      "grad_norm": 0.8968393461343391,
      "learning_rate": 3.364173155012616e-06,
      "loss": 0.1352,
      "step": 21181
    },
    {
      "epoch": 0.617947371491919,
      "grad_norm": 0.7622712764123849,
      "learning_rate": 3.3637267275803397e-06,
      "loss": 0.1169,
      "step": 21182
    },
    {
      "epoch": 0.6179765447225626,
      "grad_norm": 0.9958838711416718,
      "learning_rate": 3.36328031475632e-06,
      "loss": 0.1516,
      "step": 21183
    },
    {
      "epoch": 0.6180057179532061,
      "grad_norm": 0.7650714280782808,
      "learning_rate": 3.3628339165445427e-06,
      "loss": 0.1392,
      "step": 21184
    },
    {
      "epoch": 0.6180348911838497,
      "grad_norm": 0.9511637695257763,
      "learning_rate": 3.3623875329489923e-06,
      "loss": 0.1399,
      "step": 21185
    },
    {
      "epoch": 0.6180640644144932,
      "grad_norm": 0.7707891357080175,
      "learning_rate": 3.3619411639736566e-06,
      "loss": 0.128,
      "step": 21186
    },
    {
      "epoch": 0.6180932376451368,
      "grad_norm": 0.8522396463338711,
      "learning_rate": 3.3614948096225193e-06,
      "loss": 0.1276,
      "step": 21187
    },
    {
      "epoch": 0.6181224108757803,
      "grad_norm": 0.7604940444213334,
      "learning_rate": 3.3610484698995647e-06,
      "loss": 0.1423,
      "step": 21188
    },
    {
      "epoch": 0.6181515841064239,
      "grad_norm": 0.7199763847857643,
      "learning_rate": 3.3606021448087778e-06,
      "loss": 0.1142,
      "step": 21189
    },
    {
      "epoch": 0.6181807573370675,
      "grad_norm": 0.6883281593330257,
      "learning_rate": 3.360155834354145e-06,
      "loss": 0.1298,
      "step": 21190
    },
    {
      "epoch": 0.6182099305677111,
      "grad_norm": 1.2775797964422437,
      "learning_rate": 3.359709538539647e-06,
      "loss": 0.1405,
      "step": 21191
    },
    {
      "epoch": 0.6182391037983547,
      "grad_norm": 0.8332967776041809,
      "learning_rate": 3.359263257369272e-06,
      "loss": 0.1392,
      "step": 21192
    },
    {
      "epoch": 0.6182682770289982,
      "grad_norm": 0.9422662887694544,
      "learning_rate": 3.3588169908470024e-06,
      "loss": 0.129,
      "step": 21193
    },
    {
      "epoch": 0.6182974502596418,
      "grad_norm": 0.8398392910025081,
      "learning_rate": 3.358370738976825e-06,
      "loss": 0.122,
      "step": 21194
    },
    {
      "epoch": 0.6183266234902853,
      "grad_norm": 0.8505175982716272,
      "learning_rate": 3.35792450176272e-06,
      "loss": 0.1229,
      "step": 21195
    },
    {
      "epoch": 0.6183557967209289,
      "grad_norm": 0.7003612590512537,
      "learning_rate": 3.3574782792086735e-06,
      "loss": 0.0949,
      "step": 21196
    },
    {
      "epoch": 0.6183849699515724,
      "grad_norm": 0.7088503878253166,
      "learning_rate": 3.357032071318667e-06,
      "loss": 0.1279,
      "step": 21197
    },
    {
      "epoch": 0.618414143182216,
      "grad_norm": 1.0418224776350187,
      "learning_rate": 3.3565858780966875e-06,
      "loss": 0.1052,
      "step": 21198
    },
    {
      "epoch": 0.6184433164128595,
      "grad_norm": 0.9797337619283316,
      "learning_rate": 3.356139699546718e-06,
      "loss": 0.1059,
      "step": 21199
    },
    {
      "epoch": 0.6184724896435031,
      "grad_norm": 0.714229762414593,
      "learning_rate": 3.35569353567274e-06,
      "loss": 0.108,
      "step": 21200
    },
    {
      "epoch": 0.6185016628741467,
      "grad_norm": 1.134266107694089,
      "learning_rate": 3.3552473864787373e-06,
      "loss": 0.1575,
      "step": 21201
    },
    {
      "epoch": 0.6185308361047902,
      "grad_norm": 1.2277527972648197,
      "learning_rate": 3.3548012519686944e-06,
      "loss": 0.1414,
      "step": 21202
    },
    {
      "epoch": 0.6185600093354338,
      "grad_norm": 0.7349958918407727,
      "learning_rate": 3.35435513214659e-06,
      "loss": 0.1154,
      "step": 21203
    },
    {
      "epoch": 0.6185891825660774,
      "grad_norm": 0.8126375420422207,
      "learning_rate": 3.3539090270164134e-06,
      "loss": 0.1494,
      "step": 21204
    },
    {
      "epoch": 0.618618355796721,
      "grad_norm": 1.1103551371168165,
      "learning_rate": 3.3534629365821424e-06,
      "loss": 0.1406,
      "step": 21205
    },
    {
      "epoch": 0.6186475290273645,
      "grad_norm": 0.8389841037147578,
      "learning_rate": 3.353016860847762e-06,
      "loss": 0.1172,
      "step": 21206
    },
    {
      "epoch": 0.6186767022580081,
      "grad_norm": 0.800865656585191,
      "learning_rate": 3.352570799817255e-06,
      "loss": 0.1133,
      "step": 21207
    },
    {
      "epoch": 0.6187058754886516,
      "grad_norm": 0.6766690715697238,
      "learning_rate": 3.352124753494601e-06,
      "loss": 0.1163,
      "step": 21208
    },
    {
      "epoch": 0.6187350487192952,
      "grad_norm": 0.7791316632308496,
      "learning_rate": 3.351678721883783e-06,
      "loss": 0.1307,
      "step": 21209
    },
    {
      "epoch": 0.6187642219499387,
      "grad_norm": 0.9211891434207212,
      "learning_rate": 3.351232704988785e-06,
      "loss": 0.1284,
      "step": 21210
    },
    {
      "epoch": 0.6187933951805823,
      "grad_norm": 0.6421911306092272,
      "learning_rate": 3.3507867028135883e-06,
      "loss": 0.1202,
      "step": 21211
    },
    {
      "epoch": 0.6188225684112258,
      "grad_norm": 0.755354661394962,
      "learning_rate": 3.3503407153621747e-06,
      "loss": 0.1116,
      "step": 21212
    },
    {
      "epoch": 0.6188517416418694,
      "grad_norm": 0.9795982324837306,
      "learning_rate": 3.349894742638524e-06,
      "loss": 0.1379,
      "step": 21213
    },
    {
      "epoch": 0.618880914872513,
      "grad_norm": 0.7906302537901844,
      "learning_rate": 3.34944878464662e-06,
      "loss": 0.1303,
      "step": 21214
    },
    {
      "epoch": 0.6189100881031565,
      "grad_norm": 0.7370506327497786,
      "learning_rate": 3.349002841390442e-06,
      "loss": 0.1066,
      "step": 21215
    },
    {
      "epoch": 0.6189392613338001,
      "grad_norm": 0.7863086011781343,
      "learning_rate": 3.3485569128739724e-06,
      "loss": 0.134,
      "step": 21216
    },
    {
      "epoch": 0.6189684345644436,
      "grad_norm": 0.7531898753356019,
      "learning_rate": 3.348110999101195e-06,
      "loss": 0.133,
      "step": 21217
    },
    {
      "epoch": 0.6189976077950873,
      "grad_norm": 0.8702064344304873,
      "learning_rate": 3.347665100076086e-06,
      "loss": 0.0964,
      "step": 21218
    },
    {
      "epoch": 0.6190267810257308,
      "grad_norm": 0.7362051539785243,
      "learning_rate": 3.3472192158026296e-06,
      "loss": 0.1498,
      "step": 21219
    },
    {
      "epoch": 0.6190559542563744,
      "grad_norm": 0.7447196134231505,
      "learning_rate": 3.3467733462848063e-06,
      "loss": 0.1188,
      "step": 21220
    },
    {
      "epoch": 0.6190851274870179,
      "grad_norm": 0.7591857929875219,
      "learning_rate": 3.3463274915265935e-06,
      "loss": 0.1132,
      "step": 21221
    },
    {
      "epoch": 0.6191143007176615,
      "grad_norm": 0.6873927769309279,
      "learning_rate": 3.3458816515319753e-06,
      "loss": 0.124,
      "step": 21222
    },
    {
      "epoch": 0.619143473948305,
      "grad_norm": 0.9018772086631418,
      "learning_rate": 3.345435826304931e-06,
      "loss": 0.1254,
      "step": 21223
    },
    {
      "epoch": 0.6191726471789486,
      "grad_norm": 0.8982515765955048,
      "learning_rate": 3.3449900158494407e-06,
      "loss": 0.1257,
      "step": 21224
    },
    {
      "epoch": 0.6192018204095922,
      "grad_norm": 0.8706524419812917,
      "learning_rate": 3.3445442201694843e-06,
      "loss": 0.1203,
      "step": 21225
    },
    {
      "epoch": 0.6192309936402357,
      "grad_norm": 0.7358970491922977,
      "learning_rate": 3.3440984392690425e-06,
      "loss": 0.1138,
      "step": 21226
    },
    {
      "epoch": 0.6192601668708793,
      "grad_norm": 1.0222237823949332,
      "learning_rate": 3.3436526731520924e-06,
      "loss": 0.1222,
      "step": 21227
    },
    {
      "epoch": 0.6192893401015228,
      "grad_norm": 0.8041844508813587,
      "learning_rate": 3.3432069218226173e-06,
      "loss": 0.1106,
      "step": 21228
    },
    {
      "epoch": 0.6193185133321664,
      "grad_norm": 0.9842227525447131,
      "learning_rate": 3.3427611852845964e-06,
      "loss": 0.1641,
      "step": 21229
    },
    {
      "epoch": 0.6193476865628099,
      "grad_norm": 0.8740283958603253,
      "learning_rate": 3.3423154635420075e-06,
      "loss": 0.1113,
      "step": 21230
    },
    {
      "epoch": 0.6193768597934536,
      "grad_norm": 0.9697856100686789,
      "learning_rate": 3.341869756598829e-06,
      "loss": 0.1402,
      "step": 21231
    },
    {
      "epoch": 0.6194060330240971,
      "grad_norm": 0.9156001557062314,
      "learning_rate": 3.3414240644590435e-06,
      "loss": 0.1236,
      "step": 21232
    },
    {
      "epoch": 0.6194352062547407,
      "grad_norm": 0.8168858686619196,
      "learning_rate": 3.340978387126625e-06,
      "loss": 0.1204,
      "step": 21233
    },
    {
      "epoch": 0.6194643794853842,
      "grad_norm": 0.8154387843613505,
      "learning_rate": 3.3405327246055584e-06,
      "loss": 0.1083,
      "step": 21234
    },
    {
      "epoch": 0.6194935527160278,
      "grad_norm": 0.9627968238583299,
      "learning_rate": 3.3400870768998185e-06,
      "loss": 0.135,
      "step": 21235
    },
    {
      "epoch": 0.6195227259466713,
      "grad_norm": 0.8056921129051398,
      "learning_rate": 3.3396414440133846e-06,
      "loss": 0.1201,
      "step": 21236
    },
    {
      "epoch": 0.6195518991773149,
      "grad_norm": 0.7033696684907796,
      "learning_rate": 3.3391958259502364e-06,
      "loss": 0.1071,
      "step": 21237
    },
    {
      "epoch": 0.6195810724079585,
      "grad_norm": 1.1705868548999783,
      "learning_rate": 3.338750222714351e-06,
      "loss": 0.131,
      "step": 21238
    },
    {
      "epoch": 0.619610245638602,
      "grad_norm": 0.8836863283166284,
      "learning_rate": 3.3383046343097057e-06,
      "loss": 0.1141,
      "step": 21239
    },
    {
      "epoch": 0.6196394188692456,
      "grad_norm": 0.866811109363582,
      "learning_rate": 3.3378590607402805e-06,
      "loss": 0.122,
      "step": 21240
    },
    {
      "epoch": 0.6196685920998891,
      "grad_norm": 1.1313283525712725,
      "learning_rate": 3.337413502010054e-06,
      "loss": 0.1204,
      "step": 21241
    },
    {
      "epoch": 0.6196977653305327,
      "grad_norm": 1.2772787645472965,
      "learning_rate": 3.336967958123003e-06,
      "loss": 0.1095,
      "step": 21242
    },
    {
      "epoch": 0.6197269385611762,
      "grad_norm": 0.8394017627605113,
      "learning_rate": 3.3365224290831046e-06,
      "loss": 0.1003,
      "step": 21243
    },
    {
      "epoch": 0.6197561117918198,
      "grad_norm": 0.8194341602790984,
      "learning_rate": 3.336076914894336e-06,
      "loss": 0.1161,
      "step": 21244
    },
    {
      "epoch": 0.6197852850224634,
      "grad_norm": 0.9484147882504607,
      "learning_rate": 3.335631415560675e-06,
      "loss": 0.1518,
      "step": 21245
    },
    {
      "epoch": 0.619814458253107,
      "grad_norm": 0.6960903804747017,
      "learning_rate": 3.3351859310861002e-06,
      "loss": 0.1106,
      "step": 21246
    },
    {
      "epoch": 0.6198436314837505,
      "grad_norm": 0.7394339730995685,
      "learning_rate": 3.3347404614745893e-06,
      "loss": 0.1057,
      "step": 21247
    },
    {
      "epoch": 0.6198728047143941,
      "grad_norm": 0.9504018079113686,
      "learning_rate": 3.3342950067301173e-06,
      "loss": 0.1243,
      "step": 21248
    },
    {
      "epoch": 0.6199019779450377,
      "grad_norm": 0.9583044532792427,
      "learning_rate": 3.3338495668566614e-06,
      "loss": 0.1422,
      "step": 21249
    },
    {
      "epoch": 0.6199311511756812,
      "grad_norm": 0.8205100638906588,
      "learning_rate": 3.3334041418581996e-06,
      "loss": 0.1055,
      "step": 21250
    },
    {
      "epoch": 0.6199603244063248,
      "grad_norm": 0.9523776220684347,
      "learning_rate": 3.332958731738706e-06,
      "loss": 0.1338,
      "step": 21251
    },
    {
      "epoch": 0.6199894976369683,
      "grad_norm": 0.7827706205399767,
      "learning_rate": 3.33251333650216e-06,
      "loss": 0.1141,
      "step": 21252
    },
    {
      "epoch": 0.6200186708676119,
      "grad_norm": 1.0623234776400148,
      "learning_rate": 3.332067956152537e-06,
      "loss": 0.1217,
      "step": 21253
    },
    {
      "epoch": 0.6200478440982554,
      "grad_norm": 0.8327085585583657,
      "learning_rate": 3.3316225906938136e-06,
      "loss": 0.1231,
      "step": 21254
    },
    {
      "epoch": 0.620077017328899,
      "grad_norm": 1.0833418036181328,
      "learning_rate": 3.3311772401299645e-06,
      "loss": 0.1515,
      "step": 21255
    },
    {
      "epoch": 0.6201061905595425,
      "grad_norm": 0.9818978491360845,
      "learning_rate": 3.3307319044649663e-06,
      "loss": 0.1241,
      "step": 21256
    },
    {
      "epoch": 0.6201353637901861,
      "grad_norm": 1.0891961690935306,
      "learning_rate": 3.3302865837027954e-06,
      "loss": 0.1163,
      "step": 21257
    },
    {
      "epoch": 0.6201645370208296,
      "grad_norm": 0.8342598441064556,
      "learning_rate": 3.3298412778474277e-06,
      "loss": 0.1102,
      "step": 21258
    },
    {
      "epoch": 0.6201937102514733,
      "grad_norm": 0.9643015121715223,
      "learning_rate": 3.329395986902839e-06,
      "loss": 0.1105,
      "step": 21259
    },
    {
      "epoch": 0.6202228834821168,
      "grad_norm": 1.1013596414539768,
      "learning_rate": 3.3289507108730033e-06,
      "loss": 0.1168,
      "step": 21260
    },
    {
      "epoch": 0.6202520567127604,
      "grad_norm": 0.9029074386464252,
      "learning_rate": 3.3285054497618974e-06,
      "loss": 0.139,
      "step": 21261
    },
    {
      "epoch": 0.620281229943404,
      "grad_norm": 0.7658185436620013,
      "learning_rate": 3.3280602035734944e-06,
      "loss": 0.106,
      "step": 21262
    },
    {
      "epoch": 0.6203104031740475,
      "grad_norm": 0.9713398445467478,
      "learning_rate": 3.327614972311771e-06,
      "loss": 0.1107,
      "step": 21263
    },
    {
      "epoch": 0.6203395764046911,
      "grad_norm": 0.8215817782423971,
      "learning_rate": 3.3271697559807042e-06,
      "loss": 0.1202,
      "step": 21264
    },
    {
      "epoch": 0.6203687496353346,
      "grad_norm": 0.8677790075313323,
      "learning_rate": 3.3267245545842653e-06,
      "loss": 0.1597,
      "step": 21265
    },
    {
      "epoch": 0.6203979228659782,
      "grad_norm": 0.8700063377829749,
      "learning_rate": 3.3262793681264293e-06,
      "loss": 0.1151,
      "step": 21266
    },
    {
      "epoch": 0.6204270960966217,
      "grad_norm": 0.7790501776389437,
      "learning_rate": 3.3258341966111728e-06,
      "loss": 0.1317,
      "step": 21267
    },
    {
      "epoch": 0.6204562693272653,
      "grad_norm": 0.94202255518554,
      "learning_rate": 3.325389040042466e-06,
      "loss": 0.1339,
      "step": 21268
    },
    {
      "epoch": 0.6204854425579088,
      "grad_norm": 0.7582691685828977,
      "learning_rate": 3.3249438984242893e-06,
      "loss": 0.1148,
      "step": 21269
    },
    {
      "epoch": 0.6205146157885524,
      "grad_norm": 0.8749013701005308,
      "learning_rate": 3.3244987717606127e-06,
      "loss": 0.1152,
      "step": 21270
    },
    {
      "epoch": 0.6205437890191959,
      "grad_norm": 0.7694512091733933,
      "learning_rate": 3.324053660055411e-06,
      "loss": 0.1208,
      "step": 21271
    },
    {
      "epoch": 0.6205729622498396,
      "grad_norm": 0.8421991445483307,
      "learning_rate": 3.3236085633126586e-06,
      "loss": 0.1371,
      "step": 21272
    },
    {
      "epoch": 0.6206021354804832,
      "grad_norm": 0.7464571484475591,
      "learning_rate": 3.323163481536328e-06,
      "loss": 0.1303,
      "step": 21273
    },
    {
      "epoch": 0.6206313087111267,
      "grad_norm": 0.8140424366889636,
      "learning_rate": 3.3227184147303928e-06,
      "loss": 0.1206,
      "step": 21274
    },
    {
      "epoch": 0.6206604819417703,
      "grad_norm": 0.9722149153286191,
      "learning_rate": 3.322273362898828e-06,
      "loss": 0.1178,
      "step": 21275
    },
    {
      "epoch": 0.6206896551724138,
      "grad_norm": 0.7490105420493265,
      "learning_rate": 3.3218283260456065e-06,
      "loss": 0.1167,
      "step": 21276
    },
    {
      "epoch": 0.6207188284030574,
      "grad_norm": 0.907949776745723,
      "learning_rate": 3.321383304174702e-06,
      "loss": 0.1364,
      "step": 21277
    },
    {
      "epoch": 0.6207480016337009,
      "grad_norm": 0.9494358160912708,
      "learning_rate": 3.320938297290085e-06,
      "loss": 0.1187,
      "step": 21278
    },
    {
      "epoch": 0.6207771748643445,
      "grad_norm": 1.0470637100780809,
      "learning_rate": 3.3204933053957312e-06,
      "loss": 0.1292,
      "step": 21279
    },
    {
      "epoch": 0.620806348094988,
      "grad_norm": 0.8368251737513249,
      "learning_rate": 3.32004832849561e-06,
      "loss": 0.1249,
      "step": 21280
    },
    {
      "epoch": 0.6208355213256316,
      "grad_norm": 1.0894381658191175,
      "learning_rate": 3.319603366593699e-06,
      "loss": 0.1316,
      "step": 21281
    },
    {
      "epoch": 0.6208646945562751,
      "grad_norm": 1.053502910510657,
      "learning_rate": 3.3191584196939664e-06,
      "loss": 0.149,
      "step": 21282
    },
    {
      "epoch": 0.6208938677869187,
      "grad_norm": 0.7907878805400254,
      "learning_rate": 3.318713487800387e-06,
      "loss": 0.139,
      "step": 21283
    },
    {
      "epoch": 0.6209230410175622,
      "grad_norm": 0.806438614256576,
      "learning_rate": 3.318268570916933e-06,
      "loss": 0.1359,
      "step": 21284
    },
    {
      "epoch": 0.6209522142482058,
      "grad_norm": 0.8779660143369934,
      "learning_rate": 3.317823669047574e-06,
      "loss": 0.114,
      "step": 21285
    },
    {
      "epoch": 0.6209813874788495,
      "grad_norm": 0.780236913047683,
      "learning_rate": 3.3173787821962835e-06,
      "loss": 0.1013,
      "step": 21286
    },
    {
      "epoch": 0.621010560709493,
      "grad_norm": 0.7286366102868524,
      "learning_rate": 3.3169339103670346e-06,
      "loss": 0.1441,
      "step": 21287
    },
    {
      "epoch": 0.6210397339401366,
      "grad_norm": 0.8642237540419939,
      "learning_rate": 3.3164890535637973e-06,
      "loss": 0.1417,
      "step": 21288
    },
    {
      "epoch": 0.6210689071707801,
      "grad_norm": 1.0161236822646509,
      "learning_rate": 3.3160442117905457e-06,
      "loss": 0.1071,
      "step": 21289
    },
    {
      "epoch": 0.6210980804014237,
      "grad_norm": 0.7800651069117587,
      "learning_rate": 3.315599385051248e-06,
      "loss": 0.1363,
      "step": 21290
    },
    {
      "epoch": 0.6211272536320672,
      "grad_norm": 0.789159023649561,
      "learning_rate": 3.315154573349877e-06,
      "loss": 0.1266,
      "step": 21291
    },
    {
      "epoch": 0.6211564268627108,
      "grad_norm": 0.7680218715126719,
      "learning_rate": 3.3147097766904023e-06,
      "loss": 0.1096,
      "step": 21292
    },
    {
      "epoch": 0.6211856000933543,
      "grad_norm": 0.8466641849015817,
      "learning_rate": 3.314264995076798e-06,
      "loss": 0.1409,
      "step": 21293
    },
    {
      "epoch": 0.6212147733239979,
      "grad_norm": 0.6803673970957486,
      "learning_rate": 3.313820228513034e-06,
      "loss": 0.1142,
      "step": 21294
    },
    {
      "epoch": 0.6212439465546414,
      "grad_norm": 1.0595812562162283,
      "learning_rate": 3.313375477003079e-06,
      "loss": 0.1339,
      "step": 21295
    },
    {
      "epoch": 0.621273119785285,
      "grad_norm": 0.8386605136996219,
      "learning_rate": 3.3129307405509058e-06,
      "loss": 0.1323,
      "step": 21296
    },
    {
      "epoch": 0.6213022930159285,
      "grad_norm": 0.8007409591229493,
      "learning_rate": 3.312486019160486e-06,
      "loss": 0.1453,
      "step": 21297
    },
    {
      "epoch": 0.6213314662465721,
      "grad_norm": 0.8800377461827983,
      "learning_rate": 3.3120413128357837e-06,
      "loss": 0.1407,
      "step": 21298
    },
    {
      "epoch": 0.6213606394772158,
      "grad_norm": 0.7508978002471144,
      "learning_rate": 3.311596621580777e-06,
      "loss": 0.1032,
      "step": 21299
    },
    {
      "epoch": 0.6213898127078593,
      "grad_norm": 0.6572251173652115,
      "learning_rate": 3.311151945399432e-06,
      "loss": 0.1106,
      "step": 21300
    },
    {
      "epoch": 0.6214189859385029,
      "grad_norm": 0.8964986549543154,
      "learning_rate": 3.3107072842957188e-06,
      "loss": 0.129,
      "step": 21301
    },
    {
      "epoch": 0.6214481591691464,
      "grad_norm": 1.0203596500690153,
      "learning_rate": 3.310262638273609e-06,
      "loss": 0.1428,
      "step": 21302
    },
    {
      "epoch": 0.62147733239979,
      "grad_norm": 0.9062871790989347,
      "learning_rate": 3.3098180073370702e-06,
      "loss": 0.1242,
      "step": 21303
    },
    {
      "epoch": 0.6215065056304335,
      "grad_norm": 0.892411289294688,
      "learning_rate": 3.309373391490072e-06,
      "loss": 0.1277,
      "step": 21304
    },
    {
      "epoch": 0.6215356788610771,
      "grad_norm": 0.8217260832629453,
      "learning_rate": 3.3089287907365848e-06,
      "loss": 0.1387,
      "step": 21305
    },
    {
      "epoch": 0.6215648520917206,
      "grad_norm": 0.9979695852189724,
      "learning_rate": 3.3084842050805778e-06,
      "loss": 0.1206,
      "step": 21306
    },
    {
      "epoch": 0.6215940253223642,
      "grad_norm": 0.9104752937405064,
      "learning_rate": 3.3080396345260213e-06,
      "loss": 0.0993,
      "step": 21307
    },
    {
      "epoch": 0.6216231985530077,
      "grad_norm": 0.7880754227526585,
      "learning_rate": 3.3075950790768817e-06,
      "loss": 0.1326,
      "step": 21308
    },
    {
      "epoch": 0.6216523717836513,
      "grad_norm": 0.8896441449057885,
      "learning_rate": 3.3071505387371294e-06,
      "loss": 0.1217,
      "step": 21309
    },
    {
      "epoch": 0.6216815450142948,
      "grad_norm": 0.9020034823921245,
      "learning_rate": 3.306706013510732e-06,
      "loss": 0.1301,
      "step": 21310
    },
    {
      "epoch": 0.6217107182449384,
      "grad_norm": 0.7829725279898587,
      "learning_rate": 3.306261503401661e-06,
      "loss": 0.1202,
      "step": 21311
    },
    {
      "epoch": 0.621739891475582,
      "grad_norm": 1.0006266652654363,
      "learning_rate": 3.3058170084138824e-06,
      "loss": 0.1325,
      "step": 21312
    },
    {
      "epoch": 0.6217690647062256,
      "grad_norm": 0.8483983545262735,
      "learning_rate": 3.305372528551365e-06,
      "loss": 0.1383,
      "step": 21313
    },
    {
      "epoch": 0.6217982379368692,
      "grad_norm": 0.7990302783664022,
      "learning_rate": 3.304928063818078e-06,
      "loss": 0.1056,
      "step": 21314
    },
    {
      "epoch": 0.6218274111675127,
      "grad_norm": 1.3404300346733664,
      "learning_rate": 3.304483614217987e-06,
      "loss": 0.1365,
      "step": 21315
    },
    {
      "epoch": 0.6218565843981563,
      "grad_norm": 0.8902200862550608,
      "learning_rate": 3.304039179755061e-06,
      "loss": 0.1272,
      "step": 21316
    },
    {
      "epoch": 0.6218857576287998,
      "grad_norm": 0.9414809876283852,
      "learning_rate": 3.3035947604332697e-06,
      "loss": 0.1177,
      "step": 21317
    },
    {
      "epoch": 0.6219149308594434,
      "grad_norm": 0.8641232348744955,
      "learning_rate": 3.3031503562565793e-06,
      "loss": 0.121,
      "step": 21318
    },
    {
      "epoch": 0.6219441040900869,
      "grad_norm": 0.8492757587837996,
      "learning_rate": 3.302705967228958e-06,
      "loss": 0.1243,
      "step": 21319
    },
    {
      "epoch": 0.6219732773207305,
      "grad_norm": 0.675696365679319,
      "learning_rate": 3.3022615933543724e-06,
      "loss": 0.1139,
      "step": 21320
    },
    {
      "epoch": 0.622002450551374,
      "grad_norm": 0.9470120382955522,
      "learning_rate": 3.3018172346367896e-06,
      "loss": 0.1551,
      "step": 21321
    },
    {
      "epoch": 0.6220316237820176,
      "grad_norm": 1.0350572461555476,
      "learning_rate": 3.3013728910801758e-06,
      "loss": 0.1141,
      "step": 21322
    },
    {
      "epoch": 0.6220607970126611,
      "grad_norm": 0.686469596550152,
      "learning_rate": 3.3009285626885002e-06,
      "loss": 0.1136,
      "step": 21323
    },
    {
      "epoch": 0.6220899702433047,
      "grad_norm": 0.7361842123082664,
      "learning_rate": 3.3004842494657304e-06,
      "loss": 0.1491,
      "step": 21324
    },
    {
      "epoch": 0.6221191434739483,
      "grad_norm": 0.9547261361657784,
      "learning_rate": 3.30003995141583e-06,
      "loss": 0.122,
      "step": 21325
    },
    {
      "epoch": 0.6221483167045919,
      "grad_norm": 0.7418964773658194,
      "learning_rate": 3.299595668542768e-06,
      "loss": 0.1406,
      "step": 21326
    },
    {
      "epoch": 0.6221774899352355,
      "grad_norm": 0.7739919750428765,
      "learning_rate": 3.29915140085051e-06,
      "loss": 0.1337,
      "step": 21327
    },
    {
      "epoch": 0.622206663165879,
      "grad_norm": 0.7858944115116288,
      "learning_rate": 3.2987071483430195e-06,
      "loss": 0.1083,
      "step": 21328
    },
    {
      "epoch": 0.6222358363965226,
      "grad_norm": 1.0061417646637059,
      "learning_rate": 3.298262911024269e-06,
      "loss": 0.1297,
      "step": 21329
    },
    {
      "epoch": 0.6222650096271661,
      "grad_norm": 0.6176665245333957,
      "learning_rate": 3.2978186888982188e-06,
      "loss": 0.1171,
      "step": 21330
    },
    {
      "epoch": 0.6222941828578097,
      "grad_norm": 0.7673620279695847,
      "learning_rate": 3.297374481968838e-06,
      "loss": 0.1376,
      "step": 21331
    },
    {
      "epoch": 0.6223233560884532,
      "grad_norm": 1.0690542554056024,
      "learning_rate": 3.2969302902400925e-06,
      "loss": 0.1146,
      "step": 21332
    },
    {
      "epoch": 0.6223525293190968,
      "grad_norm": 0.7159491735092279,
      "learning_rate": 3.2964861137159453e-06,
      "loss": 0.1087,
      "step": 21333
    },
    {
      "epoch": 0.6223817025497403,
      "grad_norm": 0.8175363148488521,
      "learning_rate": 3.296041952400363e-06,
      "loss": 0.112,
      "step": 21334
    },
    {
      "epoch": 0.6224108757803839,
      "grad_norm": 0.8295698694515653,
      "learning_rate": 3.2955978062973117e-06,
      "loss": 0.1317,
      "step": 21335
    },
    {
      "epoch": 0.6224400490110275,
      "grad_norm": 0.9168800836975299,
      "learning_rate": 3.295153675410756e-06,
      "loss": 0.1245,
      "step": 21336
    },
    {
      "epoch": 0.622469222241671,
      "grad_norm": 0.9645563310931667,
      "learning_rate": 3.294709559744663e-06,
      "loss": 0.1397,
      "step": 21337
    },
    {
      "epoch": 0.6224983954723146,
      "grad_norm": 0.8367384801046545,
      "learning_rate": 3.2942654593029957e-06,
      "loss": 0.1266,
      "step": 21338
    },
    {
      "epoch": 0.6225275687029581,
      "grad_norm": 0.7889492779134213,
      "learning_rate": 3.2938213740897173e-06,
      "loss": 0.1085,
      "step": 21339
    },
    {
      "epoch": 0.6225567419336018,
      "grad_norm": 0.8859241154679921,
      "learning_rate": 3.2933773041087945e-06,
      "loss": 0.1594,
      "step": 21340
    },
    {
      "epoch": 0.6225859151642453,
      "grad_norm": 0.8246678630982585,
      "learning_rate": 3.292933249364194e-06,
      "loss": 0.1378,
      "step": 21341
    },
    {
      "epoch": 0.6226150883948889,
      "grad_norm": 0.9921870326117046,
      "learning_rate": 3.2924892098598765e-06,
      "loss": 0.1151,
      "step": 21342
    },
    {
      "epoch": 0.6226442616255324,
      "grad_norm": 0.8903747923174103,
      "learning_rate": 3.292045185599808e-06,
      "loss": 0.1186,
      "step": 21343
    },
    {
      "epoch": 0.622673434856176,
      "grad_norm": 1.0347288106601755,
      "learning_rate": 3.291601176587953e-06,
      "loss": 0.1235,
      "step": 21344
    },
    {
      "epoch": 0.6227026080868195,
      "grad_norm": 0.9944202953824731,
      "learning_rate": 3.291157182828274e-06,
      "loss": 0.1479,
      "step": 21345
    },
    {
      "epoch": 0.6227317813174631,
      "grad_norm": 0.8423356634400826,
      "learning_rate": 3.290713204324735e-06,
      "loss": 0.1128,
      "step": 21346
    },
    {
      "epoch": 0.6227609545481066,
      "grad_norm": 0.7917008469805683,
      "learning_rate": 3.290269241081301e-06,
      "loss": 0.1097,
      "step": 21347
    },
    {
      "epoch": 0.6227901277787502,
      "grad_norm": 0.7328504510975807,
      "learning_rate": 3.2898252931019353e-06,
      "loss": 0.1379,
      "step": 21348
    },
    {
      "epoch": 0.6228193010093938,
      "grad_norm": 1.1886418462196109,
      "learning_rate": 3.289381360390602e-06,
      "loss": 0.1268,
      "step": 21349
    },
    {
      "epoch": 0.6228484742400373,
      "grad_norm": 0.9171882169636048,
      "learning_rate": 3.2889374429512625e-06,
      "loss": 0.1191,
      "step": 21350
    },
    {
      "epoch": 0.6228776474706809,
      "grad_norm": 0.8192435164398073,
      "learning_rate": 3.2884935407878815e-06,
      "loss": 0.128,
      "step": 21351
    },
    {
      "epoch": 0.6229068207013244,
      "grad_norm": 0.9002291269705648,
      "learning_rate": 3.2880496539044204e-06,
      "loss": 0.1404,
      "step": 21352
    },
    {
      "epoch": 0.6229359939319681,
      "grad_norm": 1.4768307983113431,
      "learning_rate": 3.287605782304844e-06,
      "loss": 0.1265,
      "step": 21353
    },
    {
      "epoch": 0.6229651671626116,
      "grad_norm": 0.9693148724283404,
      "learning_rate": 3.2871619259931155e-06,
      "loss": 0.1306,
      "step": 21354
    },
    {
      "epoch": 0.6229943403932552,
      "grad_norm": 1.0424101658228286,
      "learning_rate": 3.286718084973196e-06,
      "loss": 0.1393,
      "step": 21355
    },
    {
      "epoch": 0.6230235136238987,
      "grad_norm": 1.0124755595158514,
      "learning_rate": 3.286274259249048e-06,
      "loss": 0.1319,
      "step": 21356
    },
    {
      "epoch": 0.6230526868545423,
      "grad_norm": 0.9645705464887633,
      "learning_rate": 3.285830448824635e-06,
      "loss": 0.1424,
      "step": 21357
    },
    {
      "epoch": 0.6230818600851858,
      "grad_norm": 0.7622431640789379,
      "learning_rate": 3.285386653703916e-06,
      "loss": 0.1367,
      "step": 21358
    },
    {
      "epoch": 0.6231110333158294,
      "grad_norm": 0.92898231544446,
      "learning_rate": 3.2849428738908585e-06,
      "loss": 0.1443,
      "step": 21359
    },
    {
      "epoch": 0.623140206546473,
      "grad_norm": 1.1094987025385317,
      "learning_rate": 3.2844991093894205e-06,
      "loss": 0.1361,
      "step": 21360
    },
    {
      "epoch": 0.6231693797771165,
      "grad_norm": 0.9244821197402684,
      "learning_rate": 3.284055360203565e-06,
      "loss": 0.1164,
      "step": 21361
    },
    {
      "epoch": 0.6231985530077601,
      "grad_norm": 1.5233771887704943,
      "learning_rate": 3.2836116263372553e-06,
      "loss": 0.1237,
      "step": 21362
    },
    {
      "epoch": 0.6232277262384036,
      "grad_norm": 1.3060899002913862,
      "learning_rate": 3.283167907794449e-06,
      "loss": 0.1436,
      "step": 21363
    },
    {
      "epoch": 0.6232568994690472,
      "grad_norm": 0.8891857246693189,
      "learning_rate": 3.2827242045791097e-06,
      "loss": 0.1126,
      "step": 21364
    },
    {
      "epoch": 0.6232860726996907,
      "grad_norm": 0.7134619818092386,
      "learning_rate": 3.2822805166951993e-06,
      "loss": 0.1269,
      "step": 21365
    },
    {
      "epoch": 0.6233152459303343,
      "grad_norm": 0.7131956097189778,
      "learning_rate": 3.2818368441466785e-06,
      "loss": 0.1174,
      "step": 21366
    },
    {
      "epoch": 0.6233444191609779,
      "grad_norm": 0.9251043737066043,
      "learning_rate": 3.2813931869375093e-06,
      "loss": 0.1017,
      "step": 21367
    },
    {
      "epoch": 0.6233735923916215,
      "grad_norm": 0.9510310397422824,
      "learning_rate": 3.2809495450716504e-06,
      "loss": 0.1194,
      "step": 21368
    },
    {
      "epoch": 0.623402765622265,
      "grad_norm": 0.8411471375005438,
      "learning_rate": 3.280505918553064e-06,
      "loss": 0.1269,
      "step": 21369
    },
    {
      "epoch": 0.6234319388529086,
      "grad_norm": 0.7882271282585804,
      "learning_rate": 3.2800623073857086e-06,
      "loss": 0.1124,
      "step": 21370
    },
    {
      "epoch": 0.6234611120835521,
      "grad_norm": 0.9041004644139871,
      "learning_rate": 3.279618711573549e-06,
      "loss": 0.1071,
      "step": 21371
    },
    {
      "epoch": 0.6234902853141957,
      "grad_norm": 1.0976159418705274,
      "learning_rate": 3.2791751311205412e-06,
      "loss": 0.117,
      "step": 21372
    },
    {
      "epoch": 0.6235194585448393,
      "grad_norm": 0.7036863548417275,
      "learning_rate": 3.2787315660306473e-06,
      "loss": 0.1277,
      "step": 21373
    },
    {
      "epoch": 0.6235486317754828,
      "grad_norm": 0.8272929067017146,
      "learning_rate": 3.278288016307828e-06,
      "loss": 0.1192,
      "step": 21374
    },
    {
      "epoch": 0.6235778050061264,
      "grad_norm": 0.9351901240229592,
      "learning_rate": 3.277844481956042e-06,
      "loss": 0.1257,
      "step": 21375
    },
    {
      "epoch": 0.6236069782367699,
      "grad_norm": 0.9171908978621737,
      "learning_rate": 3.277400962979247e-06,
      "loss": 0.134,
      "step": 21376
    },
    {
      "epoch": 0.6236361514674135,
      "grad_norm": 0.7627911834806919,
      "learning_rate": 3.2769574593814067e-06,
      "loss": 0.134,
      "step": 21377
    },
    {
      "epoch": 0.623665324698057,
      "grad_norm": 0.8982023656165522,
      "learning_rate": 3.2765139711664795e-06,
      "loss": 0.1599,
      "step": 21378
    },
    {
      "epoch": 0.6236944979287006,
      "grad_norm": 0.7722640945898418,
      "learning_rate": 3.2760704983384237e-06,
      "loss": 0.1149,
      "step": 21379
    },
    {
      "epoch": 0.6237236711593442,
      "grad_norm": 1.1665461068577712,
      "learning_rate": 3.2756270409011993e-06,
      "loss": 0.1295,
      "step": 21380
    },
    {
      "epoch": 0.6237528443899878,
      "grad_norm": 0.9510608179510497,
      "learning_rate": 3.2751835988587644e-06,
      "loss": 0.14,
      "step": 21381
    },
    {
      "epoch": 0.6237820176206313,
      "grad_norm": 0.8044782996137715,
      "learning_rate": 3.274740172215078e-06,
      "loss": 0.1097,
      "step": 21382
    },
    {
      "epoch": 0.6238111908512749,
      "grad_norm": 0.8821719331786081,
      "learning_rate": 3.2742967609741e-06,
      "loss": 0.123,
      "step": 21383
    },
    {
      "epoch": 0.6238403640819185,
      "grad_norm": 0.8969669929447068,
      "learning_rate": 3.2738533651397895e-06,
      "loss": 0.1282,
      "step": 21384
    },
    {
      "epoch": 0.623869537312562,
      "grad_norm": 0.8431979072420812,
      "learning_rate": 3.2734099847161038e-06,
      "loss": 0.1016,
      "step": 21385
    },
    {
      "epoch": 0.6238987105432056,
      "grad_norm": 1.2478708829528349,
      "learning_rate": 3.272966619707001e-06,
      "loss": 0.1379,
      "step": 21386
    },
    {
      "epoch": 0.6239278837738491,
      "grad_norm": 0.8291457332907493,
      "learning_rate": 3.272523270116441e-06,
      "loss": 0.1407,
      "step": 21387
    },
    {
      "epoch": 0.6239570570044927,
      "grad_norm": 0.9483068769052317,
      "learning_rate": 3.272079935948378e-06,
      "loss": 0.1219,
      "step": 21388
    },
    {
      "epoch": 0.6239862302351362,
      "grad_norm": 0.7799120919355934,
      "learning_rate": 3.271636617206776e-06,
      "loss": 0.1205,
      "step": 21389
    },
    {
      "epoch": 0.6240154034657798,
      "grad_norm": 0.8938662660427801,
      "learning_rate": 3.271193313895588e-06,
      "loss": 0.1285,
      "step": 21390
    },
    {
      "epoch": 0.6240445766964233,
      "grad_norm": 0.7676948460797017,
      "learning_rate": 3.270750026018774e-06,
      "loss": 0.1091,
      "step": 21391
    },
    {
      "epoch": 0.6240737499270669,
      "grad_norm": 0.6694981629040268,
      "learning_rate": 3.270306753580292e-06,
      "loss": 0.1397,
      "step": 21392
    },
    {
      "epoch": 0.6241029231577104,
      "grad_norm": 0.8658009481411322,
      "learning_rate": 3.269863496584097e-06,
      "loss": 0.111,
      "step": 21393
    },
    {
      "epoch": 0.6241320963883541,
      "grad_norm": 1.0818055981683743,
      "learning_rate": 3.2694202550341467e-06,
      "loss": 0.1224,
      "step": 21394
    },
    {
      "epoch": 0.6241612696189976,
      "grad_norm": 0.8759576032731533,
      "learning_rate": 3.2689770289344006e-06,
      "loss": 0.1412,
      "step": 21395
    },
    {
      "epoch": 0.6241904428496412,
      "grad_norm": 0.9859698274761876,
      "learning_rate": 3.2685338182888143e-06,
      "loss": 0.1089,
      "step": 21396
    },
    {
      "epoch": 0.6242196160802848,
      "grad_norm": 0.7141952478940872,
      "learning_rate": 3.268090623101346e-06,
      "loss": 0.112,
      "step": 21397
    },
    {
      "epoch": 0.6242487893109283,
      "grad_norm": 0.9453485591901521,
      "learning_rate": 3.2676474433759498e-06,
      "loss": 0.1081,
      "step": 21398
    },
    {
      "epoch": 0.6242779625415719,
      "grad_norm": 0.921193296818913,
      "learning_rate": 3.2672042791165837e-06,
      "loss": 0.1242,
      "step": 21399
    },
    {
      "epoch": 0.6243071357722154,
      "grad_norm": 0.8739465438971922,
      "learning_rate": 3.266761130327203e-06,
      "loss": 0.1157,
      "step": 21400
    },
    {
      "epoch": 0.624336309002859,
      "grad_norm": 0.9048152619749588,
      "learning_rate": 3.2663179970117678e-06,
      "loss": 0.1399,
      "step": 21401
    },
    {
      "epoch": 0.6243654822335025,
      "grad_norm": 1.023687456115142,
      "learning_rate": 3.26587487917423e-06,
      "loss": 0.1455,
      "step": 21402
    },
    {
      "epoch": 0.6243946554641461,
      "grad_norm": 0.9609077409680912,
      "learning_rate": 3.2654317768185474e-06,
      "loss": 0.1206,
      "step": 21403
    },
    {
      "epoch": 0.6244238286947896,
      "grad_norm": 0.7574570783561895,
      "learning_rate": 3.264988689948677e-06,
      "loss": 0.1148,
      "step": 21404
    },
    {
      "epoch": 0.6244530019254332,
      "grad_norm": 0.8387826309541686,
      "learning_rate": 3.264545618568572e-06,
      "loss": 0.1156,
      "step": 21405
    },
    {
      "epoch": 0.6244821751560767,
      "grad_norm": 0.8790705048861258,
      "learning_rate": 3.264102562682189e-06,
      "loss": 0.1127,
      "step": 21406
    },
    {
      "epoch": 0.6245113483867204,
      "grad_norm": 0.8097963171134626,
      "learning_rate": 3.2636595222934843e-06,
      "loss": 0.1122,
      "step": 21407
    },
    {
      "epoch": 0.624540521617364,
      "grad_norm": 0.7712599598560937,
      "learning_rate": 3.2632164974064136e-06,
      "loss": 0.1315,
      "step": 21408
    },
    {
      "epoch": 0.6245696948480075,
      "grad_norm": 0.7878562866613266,
      "learning_rate": 3.262773488024932e-06,
      "loss": 0.1347,
      "step": 21409
    },
    {
      "epoch": 0.6245988680786511,
      "grad_norm": 0.7682216592352683,
      "learning_rate": 3.262330494152993e-06,
      "loss": 0.1227,
      "step": 21410
    },
    {
      "epoch": 0.6246280413092946,
      "grad_norm": 0.8909676263064586,
      "learning_rate": 3.2618875157945527e-06,
      "loss": 0.1085,
      "step": 21411
    },
    {
      "epoch": 0.6246572145399382,
      "grad_norm": 0.893901912628838,
      "learning_rate": 3.2614445529535643e-06,
      "loss": 0.1297,
      "step": 21412
    },
    {
      "epoch": 0.6246863877705817,
      "grad_norm": 0.8418011611485549,
      "learning_rate": 3.2610016056339855e-06,
      "loss": 0.1211,
      "step": 21413
    },
    {
      "epoch": 0.6247155610012253,
      "grad_norm": 0.8337426922324811,
      "learning_rate": 3.2605586738397697e-06,
      "loss": 0.1148,
      "step": 21414
    },
    {
      "epoch": 0.6247447342318688,
      "grad_norm": 0.8394602552448956,
      "learning_rate": 3.26011575757487e-06,
      "loss": 0.1356,
      "step": 21415
    },
    {
      "epoch": 0.6247739074625124,
      "grad_norm": 0.7867755359073282,
      "learning_rate": 3.2596728568432417e-06,
      "loss": 0.1688,
      "step": 21416
    },
    {
      "epoch": 0.6248030806931559,
      "grad_norm": 0.7488016430772473,
      "learning_rate": 3.2592299716488396e-06,
      "loss": 0.1353,
      "step": 21417
    },
    {
      "epoch": 0.6248322539237995,
      "grad_norm": 0.8252871185907982,
      "learning_rate": 3.2587871019956137e-06,
      "loss": 0.1291,
      "step": 21418
    },
    {
      "epoch": 0.624861427154443,
      "grad_norm": 0.6719926057896809,
      "learning_rate": 3.258344247887524e-06,
      "loss": 0.1394,
      "step": 21419
    },
    {
      "epoch": 0.6248906003850866,
      "grad_norm": 0.6376576206577068,
      "learning_rate": 3.25790140932852e-06,
      "loss": 0.1238,
      "step": 21420
    },
    {
      "epoch": 0.6249197736157303,
      "grad_norm": 0.8584609128994657,
      "learning_rate": 3.257458586322556e-06,
      "loss": 0.1254,
      "step": 21421
    },
    {
      "epoch": 0.6249489468463738,
      "grad_norm": 0.6927924290628801,
      "learning_rate": 3.257015778873587e-06,
      "loss": 0.1244,
      "step": 21422
    },
    {
      "epoch": 0.6249781200770174,
      "grad_norm": 0.6575024255520201,
      "learning_rate": 3.2565729869855643e-06,
      "loss": 0.1217,
      "step": 21423
    },
    {
      "epoch": 0.6250072933076609,
      "grad_norm": 0.7516460135581234,
      "learning_rate": 3.2561302106624405e-06,
      "loss": 0.1145,
      "step": 21424
    },
    {
      "epoch": 0.6250364665383045,
      "grad_norm": 0.9149866479396442,
      "learning_rate": 3.2556874499081715e-06,
      "loss": 0.1167,
      "step": 21425
    },
    {
      "epoch": 0.625065639768948,
      "grad_norm": 0.6900964557226459,
      "learning_rate": 3.255244704726708e-06,
      "loss": 0.1362,
      "step": 21426
    },
    {
      "epoch": 0.6250948129995916,
      "grad_norm": 0.7534044221475009,
      "learning_rate": 3.254801975122004e-06,
      "loss": 0.1245,
      "step": 21427
    },
    {
      "epoch": 0.6251239862302351,
      "grad_norm": 1.0243240109542218,
      "learning_rate": 3.2543592610980107e-06,
      "loss": 0.1232,
      "step": 21428
    },
    {
      "epoch": 0.6251531594608787,
      "grad_norm": 0.8209915087198103,
      "learning_rate": 3.2539165626586812e-06,
      "loss": 0.1122,
      "step": 21429
    },
    {
      "epoch": 0.6251823326915222,
      "grad_norm": 0.7725251185715415,
      "learning_rate": 3.253473879807967e-06,
      "loss": 0.1144,
      "step": 21430
    },
    {
      "epoch": 0.6252115059221658,
      "grad_norm": 0.787825801021517,
      "learning_rate": 3.2530312125498224e-06,
      "loss": 0.1143,
      "step": 21431
    },
    {
      "epoch": 0.6252406791528093,
      "grad_norm": 0.9066304345049514,
      "learning_rate": 3.252588560888198e-06,
      "loss": 0.1308,
      "step": 21432
    },
    {
      "epoch": 0.6252698523834529,
      "grad_norm": 0.8198773791558235,
      "learning_rate": 3.252145924827045e-06,
      "loss": 0.13,
      "step": 21433
    },
    {
      "epoch": 0.6252990256140966,
      "grad_norm": 1.1783804499968054,
      "learning_rate": 3.251703304370317e-06,
      "loss": 0.1235,
      "step": 21434
    },
    {
      "epoch": 0.6253281988447401,
      "grad_norm": 0.8029233077293314,
      "learning_rate": 3.251260699521964e-06,
      "loss": 0.1258,
      "step": 21435
    },
    {
      "epoch": 0.6253573720753837,
      "grad_norm": 0.847215641910633,
      "learning_rate": 3.2508181102859373e-06,
      "loss": 0.1132,
      "step": 21436
    },
    {
      "epoch": 0.6253865453060272,
      "grad_norm": 1.0726819381486028,
      "learning_rate": 3.2503755366661893e-06,
      "loss": 0.144,
      "step": 21437
    },
    {
      "epoch": 0.6254157185366708,
      "grad_norm": 0.9165456177087047,
      "learning_rate": 3.2499329786666704e-06,
      "loss": 0.1252,
      "step": 21438
    },
    {
      "epoch": 0.6254448917673143,
      "grad_norm": 1.0593579415371146,
      "learning_rate": 3.2494904362913336e-06,
      "loss": 0.1244,
      "step": 21439
    },
    {
      "epoch": 0.6254740649979579,
      "grad_norm": 0.7798101061649765,
      "learning_rate": 3.2490479095441274e-06,
      "loss": 0.1164,
      "step": 21440
    },
    {
      "epoch": 0.6255032382286014,
      "grad_norm": 1.165482104967654,
      "learning_rate": 3.248605398429004e-06,
      "loss": 0.1128,
      "step": 21441
    },
    {
      "epoch": 0.625532411459245,
      "grad_norm": 1.002490336377689,
      "learning_rate": 3.248162902949912e-06,
      "loss": 0.1132,
      "step": 21442
    },
    {
      "epoch": 0.6255615846898885,
      "grad_norm": 0.6985548173851968,
      "learning_rate": 3.247720423110804e-06,
      "loss": 0.1191,
      "step": 21443
    },
    {
      "epoch": 0.6255907579205321,
      "grad_norm": 0.9088624263014558,
      "learning_rate": 3.2472779589156313e-06,
      "loss": 0.1432,
      "step": 21444
    },
    {
      "epoch": 0.6256199311511756,
      "grad_norm": 0.91907924242179,
      "learning_rate": 3.2468355103683414e-06,
      "loss": 0.1445,
      "step": 21445
    },
    {
      "epoch": 0.6256491043818192,
      "grad_norm": 0.8977712600118576,
      "learning_rate": 3.246393077472886e-06,
      "loss": 0.1299,
      "step": 21446
    },
    {
      "epoch": 0.6256782776124628,
      "grad_norm": 0.8134447544544907,
      "learning_rate": 3.2459506602332124e-06,
      "loss": 0.1424,
      "step": 21447
    },
    {
      "epoch": 0.6257074508431064,
      "grad_norm": 0.8258625851606094,
      "learning_rate": 3.2455082586532748e-06,
      "loss": 0.1214,
      "step": 21448
    },
    {
      "epoch": 0.62573662407375,
      "grad_norm": 0.8219977428003091,
      "learning_rate": 3.245065872737021e-06,
      "loss": 0.1138,
      "step": 21449
    },
    {
      "epoch": 0.6257657973043935,
      "grad_norm": 0.8148850511239529,
      "learning_rate": 3.2446235024883998e-06,
      "loss": 0.1259,
      "step": 21450
    },
    {
      "epoch": 0.6257949705350371,
      "grad_norm": 0.9931304727284846,
      "learning_rate": 3.2441811479113606e-06,
      "loss": 0.1206,
      "step": 21451
    },
    {
      "epoch": 0.6258241437656806,
      "grad_norm": 1.130776706632609,
      "learning_rate": 3.243738809009853e-06,
      "loss": 0.1212,
      "step": 21452
    },
    {
      "epoch": 0.6258533169963242,
      "grad_norm": 0.7070222500696697,
      "learning_rate": 3.2432964857878255e-06,
      "loss": 0.111,
      "step": 21453
    },
    {
      "epoch": 0.6258824902269677,
      "grad_norm": 1.3226701988216725,
      "learning_rate": 3.242854178249228e-06,
      "loss": 0.1343,
      "step": 21454
    },
    {
      "epoch": 0.6259116634576113,
      "grad_norm": 1.1425727052245944,
      "learning_rate": 3.242411886398009e-06,
      "loss": 0.1107,
      "step": 21455
    },
    {
      "epoch": 0.6259408366882548,
      "grad_norm": 1.0726275174312958,
      "learning_rate": 3.241969610238117e-06,
      "loss": 0.1204,
      "step": 21456
    },
    {
      "epoch": 0.6259700099188984,
      "grad_norm": 0.8533176144163238,
      "learning_rate": 3.241527349773501e-06,
      "loss": 0.0913,
      "step": 21457
    },
    {
      "epoch": 0.625999183149542,
      "grad_norm": 0.8312751156994354,
      "learning_rate": 3.2410851050081093e-06,
      "loss": 0.1581,
      "step": 21458
    },
    {
      "epoch": 0.6260283563801855,
      "grad_norm": 1.3826534479492336,
      "learning_rate": 3.2406428759458886e-06,
      "loss": 0.1461,
      "step": 21459
    },
    {
      "epoch": 0.626057529610829,
      "grad_norm": 0.8075980524239934,
      "learning_rate": 3.240200662590789e-06,
      "loss": 0.119,
      "step": 21460
    },
    {
      "epoch": 0.6260867028414727,
      "grad_norm": 1.0854000076389174,
      "learning_rate": 3.2397584649467584e-06,
      "loss": 0.1475,
      "step": 21461
    },
    {
      "epoch": 0.6261158760721163,
      "grad_norm": 1.2238030639072361,
      "learning_rate": 3.239316283017744e-06,
      "loss": 0.108,
      "step": 21462
    },
    {
      "epoch": 0.6261450493027598,
      "grad_norm": 1.0729826646664034,
      "learning_rate": 3.2388741168076927e-06,
      "loss": 0.1129,
      "step": 21463
    },
    {
      "epoch": 0.6261742225334034,
      "grad_norm": 0.8640682667547323,
      "learning_rate": 3.2384319663205544e-06,
      "loss": 0.1131,
      "step": 21464
    },
    {
      "epoch": 0.6262033957640469,
      "grad_norm": 0.7957328888807539,
      "learning_rate": 3.237989831560271e-06,
      "loss": 0.1325,
      "step": 21465
    },
    {
      "epoch": 0.6262325689946905,
      "grad_norm": 0.9437612541507222,
      "learning_rate": 3.2375477125307976e-06,
      "loss": 0.1359,
      "step": 21466
    },
    {
      "epoch": 0.626261742225334,
      "grad_norm": 0.690983930350747,
      "learning_rate": 3.2371056092360764e-06,
      "loss": 0.1126,
      "step": 21467
    },
    {
      "epoch": 0.6262909154559776,
      "grad_norm": 0.8852358085702343,
      "learning_rate": 3.2366635216800556e-06,
      "loss": 0.1162,
      "step": 21468
    },
    {
      "epoch": 0.6263200886866211,
      "grad_norm": 0.6476452883828339,
      "learning_rate": 3.2362214498666826e-06,
      "loss": 0.1279,
      "step": 21469
    },
    {
      "epoch": 0.6263492619172647,
      "grad_norm": 0.7760825147597292,
      "learning_rate": 3.235779393799903e-06,
      "loss": 0.1059,
      "step": 21470
    },
    {
      "epoch": 0.6263784351479083,
      "grad_norm": 1.120546129680719,
      "learning_rate": 3.2353373534836618e-06,
      "loss": 0.112,
      "step": 21471
    },
    {
      "epoch": 0.6264076083785518,
      "grad_norm": 0.6623240211488992,
      "learning_rate": 3.23489532892191e-06,
      "loss": 0.1296,
      "step": 21472
    },
    {
      "epoch": 0.6264367816091954,
      "grad_norm": 0.9737138537624721,
      "learning_rate": 3.2344533201185903e-06,
      "loss": 0.1355,
      "step": 21473
    },
    {
      "epoch": 0.6264659548398389,
      "grad_norm": 1.0758797997196936,
      "learning_rate": 3.234011327077651e-06,
      "loss": 0.1303,
      "step": 21474
    },
    {
      "epoch": 0.6264951280704826,
      "grad_norm": 0.7697573308968146,
      "learning_rate": 3.233569349803036e-06,
      "loss": 0.0937,
      "step": 21475
    },
    {
      "epoch": 0.6265243013011261,
      "grad_norm": 0.7873452600584238,
      "learning_rate": 3.233127388298692e-06,
      "loss": 0.1574,
      "step": 21476
    },
    {
      "epoch": 0.6265534745317697,
      "grad_norm": 1.3188187568593865,
      "learning_rate": 3.232685442568564e-06,
      "loss": 0.1401,
      "step": 21477
    },
    {
      "epoch": 0.6265826477624132,
      "grad_norm": 0.8299559483035459,
      "learning_rate": 3.2322435126165998e-06,
      "loss": 0.1251,
      "step": 21478
    },
    {
      "epoch": 0.6266118209930568,
      "grad_norm": 0.7907233450056685,
      "learning_rate": 3.2318015984467444e-06,
      "loss": 0.1093,
      "step": 21479
    },
    {
      "epoch": 0.6266409942237003,
      "grad_norm": 0.7562805504276985,
      "learning_rate": 3.2313597000629405e-06,
      "loss": 0.1078,
      "step": 21480
    },
    {
      "epoch": 0.6266701674543439,
      "grad_norm": 0.7682667137237226,
      "learning_rate": 3.230917817469136e-06,
      "loss": 0.1096,
      "step": 21481
    },
    {
      "epoch": 0.6266993406849874,
      "grad_norm": 0.778745490208327,
      "learning_rate": 3.230475950669275e-06,
      "loss": 0.0987,
      "step": 21482
    },
    {
      "epoch": 0.626728513915631,
      "grad_norm": 0.7573155065175218,
      "learning_rate": 3.2300340996673007e-06,
      "loss": 0.123,
      "step": 21483
    },
    {
      "epoch": 0.6267576871462746,
      "grad_norm": 0.6423302620766055,
      "learning_rate": 3.2295922644671605e-06,
      "loss": 0.1363,
      "step": 21484
    },
    {
      "epoch": 0.6267868603769181,
      "grad_norm": 0.9531062125109175,
      "learning_rate": 3.2291504450727983e-06,
      "loss": 0.1193,
      "step": 21485
    },
    {
      "epoch": 0.6268160336075617,
      "grad_norm": 0.7858304417946237,
      "learning_rate": 3.228708641488158e-06,
      "loss": 0.1228,
      "step": 21486
    },
    {
      "epoch": 0.6268452068382052,
      "grad_norm": 0.8371163322809207,
      "learning_rate": 3.2282668537171845e-06,
      "loss": 0.1123,
      "step": 21487
    },
    {
      "epoch": 0.6268743800688488,
      "grad_norm": 0.8470091732039831,
      "learning_rate": 3.2278250817638213e-06,
      "loss": 0.1247,
      "step": 21488
    },
    {
      "epoch": 0.6269035532994924,
      "grad_norm": 0.6490071039160903,
      "learning_rate": 3.227383325632012e-06,
      "loss": 0.0973,
      "step": 21489
    },
    {
      "epoch": 0.626932726530136,
      "grad_norm": 0.7524538481288671,
      "learning_rate": 3.2269415853257015e-06,
      "loss": 0.1603,
      "step": 21490
    },
    {
      "epoch": 0.6269618997607795,
      "grad_norm": 0.8569837417623171,
      "learning_rate": 3.226499860848834e-06,
      "loss": 0.1109,
      "step": 21491
    },
    {
      "epoch": 0.6269910729914231,
      "grad_norm": 0.8558427233602092,
      "learning_rate": 3.226058152205352e-06,
      "loss": 0.1319,
      "step": 21492
    },
    {
      "epoch": 0.6270202462220666,
      "grad_norm": 0.7372753385136062,
      "learning_rate": 3.225616459399199e-06,
      "loss": 0.1222,
      "step": 21493
    },
    {
      "epoch": 0.6270494194527102,
      "grad_norm": 0.6613528140723616,
      "learning_rate": 3.22517478243432e-06,
      "loss": 0.1116,
      "step": 21494
    },
    {
      "epoch": 0.6270785926833538,
      "grad_norm": 0.8100790230582315,
      "learning_rate": 3.2247331213146537e-06,
      "loss": 0.1212,
      "step": 21495
    },
    {
      "epoch": 0.6271077659139973,
      "grad_norm": 1.0019922446123124,
      "learning_rate": 3.2242914760441492e-06,
      "loss": 0.0958,
      "step": 21496
    },
    {
      "epoch": 0.6271369391446409,
      "grad_norm": 0.8196795105674665,
      "learning_rate": 3.2238498466267452e-06,
      "loss": 0.1361,
      "step": 21497
    },
    {
      "epoch": 0.6271661123752844,
      "grad_norm": 1.0121264732408293,
      "learning_rate": 3.2234082330663862e-06,
      "loss": 0.1417,
      "step": 21498
    },
    {
      "epoch": 0.627195285605928,
      "grad_norm": 0.8623796532033311,
      "learning_rate": 3.2229666353670157e-06,
      "loss": 0.1257,
      "step": 21499
    },
    {
      "epoch": 0.6272244588365715,
      "grad_norm": 0.706500786192673,
      "learning_rate": 3.2225250535325734e-06,
      "loss": 0.0941,
      "step": 21500
    },
    {
      "epoch": 0.6272536320672151,
      "grad_norm": 1.0776435973948162,
      "learning_rate": 3.2220834875670025e-06,
      "loss": 0.1343,
      "step": 21501
    },
    {
      "epoch": 0.6272828052978587,
      "grad_norm": 0.9433531895740077,
      "learning_rate": 3.2216419374742463e-06,
      "loss": 0.1196,
      "step": 21502
    },
    {
      "epoch": 0.6273119785285023,
      "grad_norm": 0.8541372782299788,
      "learning_rate": 3.221200403258247e-06,
      "loss": 0.138,
      "step": 21503
    },
    {
      "epoch": 0.6273411517591458,
      "grad_norm": 1.0613124678525838,
      "learning_rate": 3.220758884922946e-06,
      "loss": 0.1404,
      "step": 21504
    },
    {
      "epoch": 0.6273703249897894,
      "grad_norm": 0.7149691283911175,
      "learning_rate": 3.2203173824722845e-06,
      "loss": 0.1287,
      "step": 21505
    },
    {
      "epoch": 0.627399498220433,
      "grad_norm": 0.8569712554103952,
      "learning_rate": 3.2198758959102044e-06,
      "loss": 0.1309,
      "step": 21506
    },
    {
      "epoch": 0.6274286714510765,
      "grad_norm": 0.8563520174119542,
      "learning_rate": 3.219434425240646e-06,
      "loss": 0.1076,
      "step": 21507
    },
    {
      "epoch": 0.6274578446817201,
      "grad_norm": 0.8683837521634081,
      "learning_rate": 3.218992970467554e-06,
      "loss": 0.1434,
      "step": 21508
    },
    {
      "epoch": 0.6274870179123636,
      "grad_norm": 0.7393628992403547,
      "learning_rate": 3.218551531594868e-06,
      "loss": 0.1175,
      "step": 21509
    },
    {
      "epoch": 0.6275161911430072,
      "grad_norm": 0.7744820819302393,
      "learning_rate": 3.218110108626528e-06,
      "loss": 0.1128,
      "step": 21510
    },
    {
      "epoch": 0.6275453643736507,
      "grad_norm": 0.764739802815339,
      "learning_rate": 3.2176687015664744e-06,
      "loss": 0.1169,
      "step": 21511
    },
    {
      "epoch": 0.6275745376042943,
      "grad_norm": 0.8077778593371385,
      "learning_rate": 3.217227310418651e-06,
      "loss": 0.125,
      "step": 21512
    },
    {
      "epoch": 0.6276037108349378,
      "grad_norm": 0.7929958180323915,
      "learning_rate": 3.2167859351869946e-06,
      "loss": 0.1407,
      "step": 21513
    },
    {
      "epoch": 0.6276328840655814,
      "grad_norm": 0.9001240930573178,
      "learning_rate": 3.2163445758754484e-06,
      "loss": 0.1162,
      "step": 21514
    },
    {
      "epoch": 0.6276620572962249,
      "grad_norm": 0.9234262503649429,
      "learning_rate": 3.2159032324879522e-06,
      "loss": 0.1074,
      "step": 21515
    },
    {
      "epoch": 0.6276912305268686,
      "grad_norm": 0.6901976147452621,
      "learning_rate": 3.2154619050284465e-06,
      "loss": 0.1425,
      "step": 21516
    },
    {
      "epoch": 0.6277204037575121,
      "grad_norm": 0.9370112500263487,
      "learning_rate": 3.2150205935008715e-06,
      "loss": 0.1164,
      "step": 21517
    },
    {
      "epoch": 0.6277495769881557,
      "grad_norm": 0.8671280063844882,
      "learning_rate": 3.2145792979091656e-06,
      "loss": 0.1213,
      "step": 21518
    },
    {
      "epoch": 0.6277787502187993,
      "grad_norm": 0.7151301844910141,
      "learning_rate": 3.2141380182572684e-06,
      "loss": 0.1236,
      "step": 21519
    },
    {
      "epoch": 0.6278079234494428,
      "grad_norm": 0.7907461814608361,
      "learning_rate": 3.2136967545491214e-06,
      "loss": 0.1054,
      "step": 21520
    },
    {
      "epoch": 0.6278370966800864,
      "grad_norm": 1.0311974990272104,
      "learning_rate": 3.213255506788665e-06,
      "loss": 0.13,
      "step": 21521
    },
    {
      "epoch": 0.6278662699107299,
      "grad_norm": 0.654783786397787,
      "learning_rate": 3.2128142749798357e-06,
      "loss": 0.1073,
      "step": 21522
    },
    {
      "epoch": 0.6278954431413735,
      "grad_norm": 0.7715701914786791,
      "learning_rate": 3.212373059126574e-06,
      "loss": 0.1276,
      "step": 21523
    },
    {
      "epoch": 0.627924616372017,
      "grad_norm": 0.8551968957402468,
      "learning_rate": 3.21193185923282e-06,
      "loss": 0.123,
      "step": 21524
    },
    {
      "epoch": 0.6279537896026606,
      "grad_norm": 0.9263490150613771,
      "learning_rate": 3.211490675302508e-06,
      "loss": 0.1269,
      "step": 21525
    },
    {
      "epoch": 0.6279829628333041,
      "grad_norm": 0.7033271582772593,
      "learning_rate": 3.211049507339583e-06,
      "loss": 0.1149,
      "step": 21526
    },
    {
      "epoch": 0.6280121360639477,
      "grad_norm": 0.7079676583610439,
      "learning_rate": 3.2106083553479803e-06,
      "loss": 0.1284,
      "step": 21527
    },
    {
      "epoch": 0.6280413092945912,
      "grad_norm": 0.7680249952309003,
      "learning_rate": 3.2101672193316396e-06,
      "loss": 0.1159,
      "step": 21528
    },
    {
      "epoch": 0.6280704825252349,
      "grad_norm": 0.7992028237089969,
      "learning_rate": 3.209726099294499e-06,
      "loss": 0.1269,
      "step": 21529
    },
    {
      "epoch": 0.6280996557558784,
      "grad_norm": 0.7976631278886198,
      "learning_rate": 3.2092849952404958e-06,
      "loss": 0.1264,
      "step": 21530
    },
    {
      "epoch": 0.628128828986522,
      "grad_norm": 0.8019705601305389,
      "learning_rate": 3.208843907173568e-06,
      "loss": 0.1292,
      "step": 21531
    },
    {
      "epoch": 0.6281580022171656,
      "grad_norm": 0.8028068650962843,
      "learning_rate": 3.2084028350976547e-06,
      "loss": 0.1165,
      "step": 21532
    },
    {
      "epoch": 0.6281871754478091,
      "grad_norm": 0.8200340992995329,
      "learning_rate": 3.207961779016693e-06,
      "loss": 0.1262,
      "step": 21533
    },
    {
      "epoch": 0.6282163486784527,
      "grad_norm": 0.9319842655998817,
      "learning_rate": 3.207520738934622e-06,
      "loss": 0.1202,
      "step": 21534
    },
    {
      "epoch": 0.6282455219090962,
      "grad_norm": 0.7868073065749692,
      "learning_rate": 3.207079714855377e-06,
      "loss": 0.1269,
      "step": 21535
    },
    {
      "epoch": 0.6282746951397398,
      "grad_norm": 0.7929034376400665,
      "learning_rate": 3.2066387067828964e-06,
      "loss": 0.1134,
      "step": 21536
    },
    {
      "epoch": 0.6283038683703833,
      "grad_norm": 0.9960031040117716,
      "learning_rate": 3.2061977147211167e-06,
      "loss": 0.132,
      "step": 21537
    },
    {
      "epoch": 0.6283330416010269,
      "grad_norm": 0.8034204287393533,
      "learning_rate": 3.205756738673976e-06,
      "loss": 0.1156,
      "step": 21538
    },
    {
      "epoch": 0.6283622148316704,
      "grad_norm": 0.8642989459802942,
      "learning_rate": 3.2053157786454115e-06,
      "loss": 0.1093,
      "step": 21539
    },
    {
      "epoch": 0.628391388062314,
      "grad_norm": 0.8764213622877319,
      "learning_rate": 3.2048748346393587e-06,
      "loss": 0.116,
      "step": 21540
    },
    {
      "epoch": 0.6284205612929575,
      "grad_norm": 0.9479980537473418,
      "learning_rate": 3.2044339066597554e-06,
      "loss": 0.1141,
      "step": 21541
    },
    {
      "epoch": 0.6284497345236011,
      "grad_norm": 0.8517522390729108,
      "learning_rate": 3.2039929947105373e-06,
      "loss": 0.12,
      "step": 21542
    },
    {
      "epoch": 0.6284789077542448,
      "grad_norm": 0.9185116737345116,
      "learning_rate": 3.2035520987956403e-06,
      "loss": 0.1127,
      "step": 21543
    },
    {
      "epoch": 0.6285080809848883,
      "grad_norm": 1.076201627421619,
      "learning_rate": 3.2031112189190016e-06,
      "loss": 0.1275,
      "step": 21544
    },
    {
      "epoch": 0.6285372542155319,
      "grad_norm": 0.9351418794293074,
      "learning_rate": 3.202670355084557e-06,
      "loss": 0.1495,
      "step": 21545
    },
    {
      "epoch": 0.6285664274461754,
      "grad_norm": 0.7919129578114034,
      "learning_rate": 3.202229507296242e-06,
      "loss": 0.1061,
      "step": 21546
    },
    {
      "epoch": 0.628595600676819,
      "grad_norm": 0.9598799706381894,
      "learning_rate": 3.2017886755579945e-06,
      "loss": 0.1157,
      "step": 21547
    },
    {
      "epoch": 0.6286247739074625,
      "grad_norm": 1.256863121251991,
      "learning_rate": 3.2013478598737473e-06,
      "loss": 0.1213,
      "step": 21548
    },
    {
      "epoch": 0.6286539471381061,
      "grad_norm": 1.0244973188794877,
      "learning_rate": 3.2009070602474364e-06,
      "loss": 0.1055,
      "step": 21549
    },
    {
      "epoch": 0.6286831203687496,
      "grad_norm": 0.8329326346392835,
      "learning_rate": 3.200466276682998e-06,
      "loss": 0.1197,
      "step": 21550
    },
    {
      "epoch": 0.6287122935993932,
      "grad_norm": 1.07833819645262,
      "learning_rate": 3.2000255091843685e-06,
      "loss": 0.144,
      "step": 21551
    },
    {
      "epoch": 0.6287414668300367,
      "grad_norm": 1.4706122642924964,
      "learning_rate": 3.1995847577554805e-06,
      "loss": 0.1278,
      "step": 21552
    },
    {
      "epoch": 0.6287706400606803,
      "grad_norm": 0.7771298614194058,
      "learning_rate": 3.1991440224002703e-06,
      "loss": 0.1046,
      "step": 21553
    },
    {
      "epoch": 0.6287998132913238,
      "grad_norm": 0.934078883731427,
      "learning_rate": 3.1987033031226734e-06,
      "loss": 0.1175,
      "step": 21554
    },
    {
      "epoch": 0.6288289865219674,
      "grad_norm": 0.8632368368291131,
      "learning_rate": 3.1982625999266192e-06,
      "loss": 0.1314,
      "step": 21555
    },
    {
      "epoch": 0.6288581597526111,
      "grad_norm": 0.9722694887346638,
      "learning_rate": 3.1978219128160506e-06,
      "loss": 0.14,
      "step": 21556
    },
    {
      "epoch": 0.6288873329832546,
      "grad_norm": 0.7143577061373547,
      "learning_rate": 3.197381241794897e-06,
      "loss": 0.1179,
      "step": 21557
    },
    {
      "epoch": 0.6289165062138982,
      "grad_norm": 0.7677500897266829,
      "learning_rate": 3.1969405868670923e-06,
      "loss": 0.1145,
      "step": 21558
    },
    {
      "epoch": 0.6289456794445417,
      "grad_norm": 1.192550759965728,
      "learning_rate": 3.1964999480365732e-06,
      "loss": 0.1412,
      "step": 21559
    },
    {
      "epoch": 0.6289748526751853,
      "grad_norm": 1.1865159356900845,
      "learning_rate": 3.1960593253072713e-06,
      "loss": 0.1232,
      "step": 21560
    },
    {
      "epoch": 0.6290040259058288,
      "grad_norm": 0.7064843681198242,
      "learning_rate": 3.1956187186831197e-06,
      "loss": 0.1118,
      "step": 21561
    },
    {
      "epoch": 0.6290331991364724,
      "grad_norm": 0.8028464328453727,
      "learning_rate": 3.1951781281680537e-06,
      "loss": 0.1262,
      "step": 21562
    },
    {
      "epoch": 0.6290623723671159,
      "grad_norm": 1.0914709799610633,
      "learning_rate": 3.1947375537660073e-06,
      "loss": 0.1282,
      "step": 21563
    },
    {
      "epoch": 0.6290915455977595,
      "grad_norm": 0.828817605881031,
      "learning_rate": 3.1942969954809142e-06,
      "loss": 0.1255,
      "step": 21564
    },
    {
      "epoch": 0.629120718828403,
      "grad_norm": 0.7945320374821138,
      "learning_rate": 3.193856453316706e-06,
      "loss": 0.1267,
      "step": 21565
    },
    {
      "epoch": 0.6291498920590466,
      "grad_norm": 0.8926668259099348,
      "learning_rate": 3.1934159272773153e-06,
      "loss": 0.1261,
      "step": 21566
    },
    {
      "epoch": 0.6291790652896901,
      "grad_norm": 1.019131136454555,
      "learning_rate": 3.192975417366675e-06,
      "loss": 0.1262,
      "step": 21567
    },
    {
      "epoch": 0.6292082385203337,
      "grad_norm": 1.4016807723883395,
      "learning_rate": 3.1925349235887206e-06,
      "loss": 0.1222,
      "step": 21568
    },
    {
      "epoch": 0.6292374117509772,
      "grad_norm": 0.7037588645852593,
      "learning_rate": 3.192094445947383e-06,
      "loss": 0.1351,
      "step": 21569
    },
    {
      "epoch": 0.6292665849816209,
      "grad_norm": 1.1536549875530948,
      "learning_rate": 3.1916539844465945e-06,
      "loss": 0.1325,
      "step": 21570
    },
    {
      "epoch": 0.6292957582122645,
      "grad_norm": 1.0094463591261138,
      "learning_rate": 3.1912135390902866e-06,
      "loss": 0.1162,
      "step": 21571
    },
    {
      "epoch": 0.629324931442908,
      "grad_norm": 0.7365809998674072,
      "learning_rate": 3.1907731098823934e-06,
      "loss": 0.1085,
      "step": 21572
    },
    {
      "epoch": 0.6293541046735516,
      "grad_norm": 1.0474714461770684,
      "learning_rate": 3.1903326968268445e-06,
      "loss": 0.129,
      "step": 21573
    },
    {
      "epoch": 0.6293832779041951,
      "grad_norm": 1.105126739367677,
      "learning_rate": 3.1898922999275746e-06,
      "loss": 0.1489,
      "step": 21574
    },
    {
      "epoch": 0.6294124511348387,
      "grad_norm": 1.2915971982128405,
      "learning_rate": 3.189451919188513e-06,
      "loss": 0.1301,
      "step": 21575
    },
    {
      "epoch": 0.6294416243654822,
      "grad_norm": 0.8876481332728403,
      "learning_rate": 3.1890115546135946e-06,
      "loss": 0.1258,
      "step": 21576
    },
    {
      "epoch": 0.6294707975961258,
      "grad_norm": 0.874967281714496,
      "learning_rate": 3.1885712062067474e-06,
      "loss": 0.1245,
      "step": 21577
    },
    {
      "epoch": 0.6294999708267693,
      "grad_norm": 1.031186192412197,
      "learning_rate": 3.1881308739719043e-06,
      "loss": 0.1414,
      "step": 21578
    },
    {
      "epoch": 0.6295291440574129,
      "grad_norm": 0.7004382066330048,
      "learning_rate": 3.1876905579129947e-06,
      "loss": 0.1136,
      "step": 21579
    },
    {
      "epoch": 0.6295583172880564,
      "grad_norm": 0.7906080386510678,
      "learning_rate": 3.187250258033952e-06,
      "loss": 0.1358,
      "step": 21580
    },
    {
      "epoch": 0.6295874905187,
      "grad_norm": 1.0132762957323436,
      "learning_rate": 3.186809974338708e-06,
      "loss": 0.1321,
      "step": 21581
    },
    {
      "epoch": 0.6296166637493436,
      "grad_norm": 0.8342937522349811,
      "learning_rate": 3.18636970683119e-06,
      "loss": 0.1096,
      "step": 21582
    },
    {
      "epoch": 0.6296458369799872,
      "grad_norm": 0.7805721851565051,
      "learning_rate": 3.1859294555153307e-06,
      "loss": 0.1147,
      "step": 21583
    },
    {
      "epoch": 0.6296750102106308,
      "grad_norm": 0.8848720620201574,
      "learning_rate": 3.185489220395061e-06,
      "loss": 0.1242,
      "step": 21584
    },
    {
      "epoch": 0.6297041834412743,
      "grad_norm": 0.9549712490551664,
      "learning_rate": 3.1850490014743073e-06,
      "loss": 0.1267,
      "step": 21585
    },
    {
      "epoch": 0.6297333566719179,
      "grad_norm": 0.6993995909889085,
      "learning_rate": 3.1846087987570064e-06,
      "loss": 0.1316,
      "step": 21586
    },
    {
      "epoch": 0.6297625299025614,
      "grad_norm": 0.7656935525511819,
      "learning_rate": 3.184168612247083e-06,
      "loss": 0.1434,
      "step": 21587
    },
    {
      "epoch": 0.629791703133205,
      "grad_norm": 0.6832487924100321,
      "learning_rate": 3.1837284419484692e-06,
      "loss": 0.1163,
      "step": 21588
    },
    {
      "epoch": 0.6298208763638485,
      "grad_norm": 0.6859772217800502,
      "learning_rate": 3.183288287865095e-06,
      "loss": 0.1234,
      "step": 21589
    },
    {
      "epoch": 0.6298500495944921,
      "grad_norm": 0.7820156328027904,
      "learning_rate": 3.182848150000889e-06,
      "loss": 0.1293,
      "step": 21590
    },
    {
      "epoch": 0.6298792228251356,
      "grad_norm": 1.0464432483476764,
      "learning_rate": 3.182408028359779e-06,
      "loss": 0.1476,
      "step": 21591
    },
    {
      "epoch": 0.6299083960557792,
      "grad_norm": 0.9398890972408397,
      "learning_rate": 3.181967922945698e-06,
      "loss": 0.1092,
      "step": 21592
    },
    {
      "epoch": 0.6299375692864227,
      "grad_norm": 0.6819936556417671,
      "learning_rate": 3.181527833762573e-06,
      "loss": 0.1228,
      "step": 21593
    },
    {
      "epoch": 0.6299667425170663,
      "grad_norm": 0.7882889215378267,
      "learning_rate": 3.181087760814334e-06,
      "loss": 0.1217,
      "step": 21594
    },
    {
      "epoch": 0.6299959157477099,
      "grad_norm": 0.8198202330165746,
      "learning_rate": 3.1806477041049088e-06,
      "loss": 0.1269,
      "step": 21595
    },
    {
      "epoch": 0.6300250889783534,
      "grad_norm": 0.8829989907763003,
      "learning_rate": 3.1802076636382266e-06,
      "loss": 0.1294,
      "step": 21596
    },
    {
      "epoch": 0.6300542622089971,
      "grad_norm": 0.8119415114941211,
      "learning_rate": 3.1797676394182154e-06,
      "loss": 0.146,
      "step": 21597
    },
    {
      "epoch": 0.6300834354396406,
      "grad_norm": 1.0180215103513393,
      "learning_rate": 3.1793276314488044e-06,
      "loss": 0.1354,
      "step": 21598
    },
    {
      "epoch": 0.6301126086702842,
      "grad_norm": 0.8614817739939169,
      "learning_rate": 3.178887639733923e-06,
      "loss": 0.113,
      "step": 21599
    },
    {
      "epoch": 0.6301417819009277,
      "grad_norm": 0.9822827151036375,
      "learning_rate": 3.1784476642774965e-06,
      "loss": 0.1062,
      "step": 21600
    },
    {
      "epoch": 0.6301709551315713,
      "grad_norm": 0.883722596679789,
      "learning_rate": 3.178007705083455e-06,
      "loss": 0.1336,
      "step": 21601
    },
    {
      "epoch": 0.6302001283622148,
      "grad_norm": 0.8868749676557872,
      "learning_rate": 3.1775677621557266e-06,
      "loss": 0.1601,
      "step": 21602
    },
    {
      "epoch": 0.6302293015928584,
      "grad_norm": 0.8256364246893216,
      "learning_rate": 3.177127835498236e-06,
      "loss": 0.126,
      "step": 21603
    },
    {
      "epoch": 0.630258474823502,
      "grad_norm": 0.7191852828996356,
      "learning_rate": 3.176687925114914e-06,
      "loss": 0.0992,
      "step": 21604
    },
    {
      "epoch": 0.6302876480541455,
      "grad_norm": 0.9261084337875681,
      "learning_rate": 3.1762480310096875e-06,
      "loss": 0.1197,
      "step": 21605
    },
    {
      "epoch": 0.630316821284789,
      "grad_norm": 0.7821855119917328,
      "learning_rate": 3.1758081531864836e-06,
      "loss": 0.1343,
      "step": 21606
    },
    {
      "epoch": 0.6303459945154326,
      "grad_norm": 1.2769804075875613,
      "learning_rate": 3.1753682916492283e-06,
      "loss": 0.1321,
      "step": 21607
    },
    {
      "epoch": 0.6303751677460762,
      "grad_norm": 0.6846081119655252,
      "learning_rate": 3.1749284464018493e-06,
      "loss": 0.1102,
      "step": 21608
    },
    {
      "epoch": 0.6304043409767197,
      "grad_norm": 0.803259659986723,
      "learning_rate": 3.1744886174482727e-06,
      "loss": 0.1315,
      "step": 21609
    },
    {
      "epoch": 0.6304335142073634,
      "grad_norm": 1.0148603553708146,
      "learning_rate": 3.174048804792426e-06,
      "loss": 0.1175,
      "step": 21610
    },
    {
      "epoch": 0.6304626874380069,
      "grad_norm": 0.8276113582559743,
      "learning_rate": 3.1736090084382375e-06,
      "loss": 0.1352,
      "step": 21611
    },
    {
      "epoch": 0.6304918606686505,
      "grad_norm": 0.674217183269698,
      "learning_rate": 3.173169228389631e-06,
      "loss": 0.1189,
      "step": 21612
    },
    {
      "epoch": 0.630521033899294,
      "grad_norm": 0.8854670487468641,
      "learning_rate": 3.1727294646505326e-06,
      "loss": 0.111,
      "step": 21613
    },
    {
      "epoch": 0.6305502071299376,
      "grad_norm": 0.7736500206964132,
      "learning_rate": 3.172289717224871e-06,
      "loss": 0.1251,
      "step": 21614
    },
    {
      "epoch": 0.6305793803605811,
      "grad_norm": 0.8464978375916811,
      "learning_rate": 3.1718499861165675e-06,
      "loss": 0.1306,
      "step": 21615
    },
    {
      "epoch": 0.6306085535912247,
      "grad_norm": 0.8395150571330763,
      "learning_rate": 3.1714102713295538e-06,
      "loss": 0.1349,
      "step": 21616
    },
    {
      "epoch": 0.6306377268218682,
      "grad_norm": 0.8225095010010273,
      "learning_rate": 3.1709705728677516e-06,
      "loss": 0.119,
      "step": 21617
    },
    {
      "epoch": 0.6306669000525118,
      "grad_norm": 0.7485651100930605,
      "learning_rate": 3.1705308907350874e-06,
      "loss": 0.1363,
      "step": 21618
    },
    {
      "epoch": 0.6306960732831554,
      "grad_norm": 0.8285357155703458,
      "learning_rate": 3.1700912249354876e-06,
      "loss": 0.1408,
      "step": 21619
    },
    {
      "epoch": 0.6307252465137989,
      "grad_norm": 0.725291400812458,
      "learning_rate": 3.169651575472876e-06,
      "loss": 0.1147,
      "step": 21620
    },
    {
      "epoch": 0.6307544197444425,
      "grad_norm": 0.8214455353634862,
      "learning_rate": 3.169211942351177e-06,
      "loss": 0.1304,
      "step": 21621
    },
    {
      "epoch": 0.630783592975086,
      "grad_norm": 0.8523354910578501,
      "learning_rate": 3.1687723255743175e-06,
      "loss": 0.1285,
      "step": 21622
    },
    {
      "epoch": 0.6308127662057296,
      "grad_norm": 0.8011027639649022,
      "learning_rate": 3.1683327251462214e-06,
      "loss": 0.1254,
      "step": 21623
    },
    {
      "epoch": 0.6308419394363732,
      "grad_norm": 0.8352920934123474,
      "learning_rate": 3.1678931410708147e-06,
      "loss": 0.14,
      "step": 21624
    },
    {
      "epoch": 0.6308711126670168,
      "grad_norm": 0.7782148491210552,
      "learning_rate": 3.16745357335202e-06,
      "loss": 0.1106,
      "step": 21625
    },
    {
      "epoch": 0.6309002858976603,
      "grad_norm": 0.7553737254850275,
      "learning_rate": 3.1670140219937618e-06,
      "loss": 0.1091,
      "step": 21626
    },
    {
      "epoch": 0.6309294591283039,
      "grad_norm": 0.844354724598551,
      "learning_rate": 3.166574486999964e-06,
      "loss": 0.1399,
      "step": 21627
    },
    {
      "epoch": 0.6309586323589474,
      "grad_norm": 0.84286281446668,
      "learning_rate": 3.1661349683745527e-06,
      "loss": 0.1194,
      "step": 21628
    },
    {
      "epoch": 0.630987805589591,
      "grad_norm": 0.9342965999046021,
      "learning_rate": 3.1656954661214517e-06,
      "loss": 0.1265,
      "step": 21629
    },
    {
      "epoch": 0.6310169788202346,
      "grad_norm": 0.8805631626157451,
      "learning_rate": 3.1652559802445824e-06,
      "loss": 0.1242,
      "step": 21630
    },
    {
      "epoch": 0.6310461520508781,
      "grad_norm": 0.826035691178042,
      "learning_rate": 3.16481651074787e-06,
      "loss": 0.1262,
      "step": 21631
    },
    {
      "epoch": 0.6310753252815217,
      "grad_norm": 1.0506807534626028,
      "learning_rate": 3.1643770576352385e-06,
      "loss": 0.15,
      "step": 21632
    },
    {
      "epoch": 0.6311044985121652,
      "grad_norm": 0.9071116991791308,
      "learning_rate": 3.1639376209106087e-06,
      "loss": 0.132,
      "step": 21633
    },
    {
      "epoch": 0.6311336717428088,
      "grad_norm": 0.810589991856113,
      "learning_rate": 3.1634982005779057e-06,
      "loss": 0.1183,
      "step": 21634
    },
    {
      "epoch": 0.6311628449734523,
      "grad_norm": 1.1061353599375823,
      "learning_rate": 3.163058796641053e-06,
      "loss": 0.1338,
      "step": 21635
    },
    {
      "epoch": 0.6311920182040959,
      "grad_norm": 1.2644968792372722,
      "learning_rate": 3.162619409103974e-06,
      "loss": 0.1386,
      "step": 21636
    },
    {
      "epoch": 0.6312211914347395,
      "grad_norm": 1.2277733713954433,
      "learning_rate": 3.162180037970589e-06,
      "loss": 0.1226,
      "step": 21637
    },
    {
      "epoch": 0.6312503646653831,
      "grad_norm": 0.9519430141792353,
      "learning_rate": 3.1617406832448226e-06,
      "loss": 0.1272,
      "step": 21638
    },
    {
      "epoch": 0.6312795378960266,
      "grad_norm": 1.2693948166689224,
      "learning_rate": 3.1613013449305948e-06,
      "loss": 0.1311,
      "step": 21639
    },
    {
      "epoch": 0.6313087111266702,
      "grad_norm": 0.9420657904999812,
      "learning_rate": 3.160862023031831e-06,
      "loss": 0.1352,
      "step": 21640
    },
    {
      "epoch": 0.6313378843573137,
      "grad_norm": 1.052262365752245,
      "learning_rate": 3.1604227175524527e-06,
      "loss": 0.1177,
      "step": 21641
    },
    {
      "epoch": 0.6313670575879573,
      "grad_norm": 1.098416247575714,
      "learning_rate": 3.15998342849638e-06,
      "loss": 0.1403,
      "step": 21642
    },
    {
      "epoch": 0.6313962308186009,
      "grad_norm": 0.7520614242649607,
      "learning_rate": 3.1595441558675364e-06,
      "loss": 0.1178,
      "step": 21643
    },
    {
      "epoch": 0.6314254040492444,
      "grad_norm": 1.3690315430696907,
      "learning_rate": 3.1591048996698426e-06,
      "loss": 0.1332,
      "step": 21644
    },
    {
      "epoch": 0.631454577279888,
      "grad_norm": 0.8677210659976563,
      "learning_rate": 3.1586656599072205e-06,
      "loss": 0.1458,
      "step": 21645
    },
    {
      "epoch": 0.6314837505105315,
      "grad_norm": 0.8448804808239114,
      "learning_rate": 3.1582264365835946e-06,
      "loss": 0.1169,
      "step": 21646
    },
    {
      "epoch": 0.6315129237411751,
      "grad_norm": 0.7226770601195681,
      "learning_rate": 3.1577872297028813e-06,
      "loss": 0.1143,
      "step": 21647
    },
    {
      "epoch": 0.6315420969718186,
      "grad_norm": 0.7707979322774487,
      "learning_rate": 3.157348039269004e-06,
      "loss": 0.0992,
      "step": 21648
    },
    {
      "epoch": 0.6315712702024622,
      "grad_norm": 1.046312267048633,
      "learning_rate": 3.1569088652858847e-06,
      "loss": 0.1156,
      "step": 21649
    },
    {
      "epoch": 0.6316004434331057,
      "grad_norm": 0.9281426295578425,
      "learning_rate": 3.1564697077574403e-06,
      "loss": 0.1208,
      "step": 21650
    },
    {
      "epoch": 0.6316296166637494,
      "grad_norm": 0.9278221382126396,
      "learning_rate": 3.156030566687597e-06,
      "loss": 0.1546,
      "step": 21651
    },
    {
      "epoch": 0.631658789894393,
      "grad_norm": 0.7366072250886886,
      "learning_rate": 3.155591442080271e-06,
      "loss": 0.1143,
      "step": 21652
    },
    {
      "epoch": 0.6316879631250365,
      "grad_norm": 1.1135366008466816,
      "learning_rate": 3.1551523339393855e-06,
      "loss": 0.1464,
      "step": 21653
    },
    {
      "epoch": 0.63171713635568,
      "grad_norm": 0.7537104048847073,
      "learning_rate": 3.1547132422688593e-06,
      "loss": 0.1416,
      "step": 21654
    },
    {
      "epoch": 0.6317463095863236,
      "grad_norm": 0.8406120523779976,
      "learning_rate": 3.1542741670726123e-06,
      "loss": 0.1212,
      "step": 21655
    },
    {
      "epoch": 0.6317754828169672,
      "grad_norm": 0.9879725891146476,
      "learning_rate": 3.153835108354564e-06,
      "loss": 0.1427,
      "step": 21656
    },
    {
      "epoch": 0.6318046560476107,
      "grad_norm": 0.9870917487487902,
      "learning_rate": 3.153396066118636e-06,
      "loss": 0.1104,
      "step": 21657
    },
    {
      "epoch": 0.6318338292782543,
      "grad_norm": 0.57449428922917,
      "learning_rate": 3.152957040368747e-06,
      "loss": 0.1127,
      "step": 21658
    },
    {
      "epoch": 0.6318630025088978,
      "grad_norm": 0.6881469921397672,
      "learning_rate": 3.152518031108818e-06,
      "loss": 0.1076,
      "step": 21659
    },
    {
      "epoch": 0.6318921757395414,
      "grad_norm": 0.7112782570654465,
      "learning_rate": 3.1520790383427657e-06,
      "loss": 0.0951,
      "step": 21660
    },
    {
      "epoch": 0.6319213489701849,
      "grad_norm": 0.6982666678987339,
      "learning_rate": 3.1516400620745112e-06,
      "loss": 0.117,
      "step": 21661
    },
    {
      "epoch": 0.6319505222008285,
      "grad_norm": 0.6983628511843952,
      "learning_rate": 3.1512011023079714e-06,
      "loss": 0.1204,
      "step": 21662
    },
    {
      "epoch": 0.631979695431472,
      "grad_norm": 0.8568639745493805,
      "learning_rate": 3.1507621590470692e-06,
      "loss": 0.149,
      "step": 21663
    },
    {
      "epoch": 0.6320088686621157,
      "grad_norm": 0.8962051912890376,
      "learning_rate": 3.15032323229572e-06,
      "loss": 0.1431,
      "step": 21664
    },
    {
      "epoch": 0.6320380418927593,
      "grad_norm": 1.0384683097379803,
      "learning_rate": 3.149884322057843e-06,
      "loss": 0.1375,
      "step": 21665
    },
    {
      "epoch": 0.6320672151234028,
      "grad_norm": 0.7291252888006237,
      "learning_rate": 3.1494454283373583e-06,
      "loss": 0.1366,
      "step": 21666
    },
    {
      "epoch": 0.6320963883540464,
      "grad_norm": 0.9349662375087829,
      "learning_rate": 3.1490065511381816e-06,
      "loss": 0.1428,
      "step": 21667
    },
    {
      "epoch": 0.6321255615846899,
      "grad_norm": 0.9394634596186122,
      "learning_rate": 3.1485676904642326e-06,
      "loss": 0.1158,
      "step": 21668
    },
    {
      "epoch": 0.6321547348153335,
      "grad_norm": 0.812259875545716,
      "learning_rate": 3.1481288463194295e-06,
      "loss": 0.1306,
      "step": 21669
    },
    {
      "epoch": 0.632183908045977,
      "grad_norm": 0.7069932550567836,
      "learning_rate": 3.1476900187076896e-06,
      "loss": 0.1288,
      "step": 21670
    },
    {
      "epoch": 0.6322130812766206,
      "grad_norm": 0.9154211675415918,
      "learning_rate": 3.147251207632933e-06,
      "loss": 0.1164,
      "step": 21671
    },
    {
      "epoch": 0.6322422545072641,
      "grad_norm": 1.0751263803716586,
      "learning_rate": 3.146812413099074e-06,
      "loss": 0.1401,
      "step": 21672
    },
    {
      "epoch": 0.6322714277379077,
      "grad_norm": 0.8306769690576609,
      "learning_rate": 3.1463736351100315e-06,
      "loss": 0.1309,
      "step": 21673
    },
    {
      "epoch": 0.6323006009685512,
      "grad_norm": 0.8637797866408252,
      "learning_rate": 3.1459348736697214e-06,
      "loss": 0.1242,
      "step": 21674
    },
    {
      "epoch": 0.6323297741991948,
      "grad_norm": 0.899775583907776,
      "learning_rate": 3.1454961287820627e-06,
      "loss": 0.1073,
      "step": 21675
    },
    {
      "epoch": 0.6323589474298383,
      "grad_norm": 0.9252264441433307,
      "learning_rate": 3.1450574004509737e-06,
      "loss": 0.1477,
      "step": 21676
    },
    {
      "epoch": 0.6323881206604819,
      "grad_norm": 0.8818618303142279,
      "learning_rate": 3.144618688680368e-06,
      "loss": 0.1186,
      "step": 21677
    },
    {
      "epoch": 0.6324172938911256,
      "grad_norm": 1.1302293896714315,
      "learning_rate": 3.144179993474164e-06,
      "loss": 0.1442,
      "step": 21678
    },
    {
      "epoch": 0.6324464671217691,
      "grad_norm": 0.9210576401465935,
      "learning_rate": 3.143741314836279e-06,
      "loss": 0.133,
      "step": 21679
    },
    {
      "epoch": 0.6324756403524127,
      "grad_norm": 0.6783099486101454,
      "learning_rate": 3.143302652770625e-06,
      "loss": 0.1169,
      "step": 21680
    },
    {
      "epoch": 0.6325048135830562,
      "grad_norm": 0.9228908753654181,
      "learning_rate": 3.142864007281125e-06,
      "loss": 0.1219,
      "step": 21681
    },
    {
      "epoch": 0.6325339868136998,
      "grad_norm": 0.748219865458826,
      "learning_rate": 3.142425378371691e-06,
      "loss": 0.1161,
      "step": 21682
    },
    {
      "epoch": 0.6325631600443433,
      "grad_norm": 0.7959972009416684,
      "learning_rate": 3.1419867660462393e-06,
      "loss": 0.1249,
      "step": 21683
    },
    {
      "epoch": 0.6325923332749869,
      "grad_norm": 0.766086134365,
      "learning_rate": 3.1415481703086875e-06,
      "loss": 0.1207,
      "step": 21684
    },
    {
      "epoch": 0.6326215065056304,
      "grad_norm": 0.7530772631663772,
      "learning_rate": 3.1411095911629493e-06,
      "loss": 0.1139,
      "step": 21685
    },
    {
      "epoch": 0.632650679736274,
      "grad_norm": 0.9548755474435264,
      "learning_rate": 3.1406710286129395e-06,
      "loss": 0.1403,
      "step": 21686
    },
    {
      "epoch": 0.6326798529669175,
      "grad_norm": 0.8235797774988136,
      "learning_rate": 3.1402324826625758e-06,
      "loss": 0.1414,
      "step": 21687
    },
    {
      "epoch": 0.6327090261975611,
      "grad_norm": 0.749668243905599,
      "learning_rate": 3.139793953315773e-06,
      "loss": 0.1318,
      "step": 21688
    },
    {
      "epoch": 0.6327381994282046,
      "grad_norm": 0.9450579388649207,
      "learning_rate": 3.139355440576446e-06,
      "loss": 0.1006,
      "step": 21689
    },
    {
      "epoch": 0.6327673726588482,
      "grad_norm": 0.802373093106417,
      "learning_rate": 3.1389169444485092e-06,
      "loss": 0.1392,
      "step": 21690
    },
    {
      "epoch": 0.6327965458894919,
      "grad_norm": 0.9469888424479527,
      "learning_rate": 3.138478464935877e-06,
      "loss": 0.1293,
      "step": 21691
    },
    {
      "epoch": 0.6328257191201354,
      "grad_norm": 0.922208982657413,
      "learning_rate": 3.1380400020424638e-06,
      "loss": 0.1415,
      "step": 21692
    },
    {
      "epoch": 0.632854892350779,
      "grad_norm": 0.9266849914243759,
      "learning_rate": 3.1376015557721875e-06,
      "loss": 0.1035,
      "step": 21693
    },
    {
      "epoch": 0.6328840655814225,
      "grad_norm": 0.9004134124589869,
      "learning_rate": 3.1371631261289583e-06,
      "loss": 0.1177,
      "step": 21694
    },
    {
      "epoch": 0.6329132388120661,
      "grad_norm": 0.8702502407614358,
      "learning_rate": 3.136724713116692e-06,
      "loss": 0.1164,
      "step": 21695
    },
    {
      "epoch": 0.6329424120427096,
      "grad_norm": 0.7611662273728962,
      "learning_rate": 3.136286316739304e-06,
      "loss": 0.0976,
      "step": 21696
    },
    {
      "epoch": 0.6329715852733532,
      "grad_norm": 1.0615112949234136,
      "learning_rate": 3.1358479370007067e-06,
      "loss": 0.1348,
      "step": 21697
    },
    {
      "epoch": 0.6330007585039967,
      "grad_norm": 0.8625541826782752,
      "learning_rate": 3.135409573904812e-06,
      "loss": 0.1151,
      "step": 21698
    },
    {
      "epoch": 0.6330299317346403,
      "grad_norm": 0.9753872935193014,
      "learning_rate": 3.1349712274555364e-06,
      "loss": 0.123,
      "step": 21699
    },
    {
      "epoch": 0.6330591049652838,
      "grad_norm": 0.7421668580821029,
      "learning_rate": 3.1345328976567923e-06,
      "loss": 0.1111,
      "step": 21700
    },
    {
      "epoch": 0.6330882781959274,
      "grad_norm": 0.666921239172021,
      "learning_rate": 3.1340945845124948e-06,
      "loss": 0.1144,
      "step": 21701
    },
    {
      "epoch": 0.6331174514265709,
      "grad_norm": 0.8423616308448548,
      "learning_rate": 3.133656288026554e-06,
      "loss": 0.1502,
      "step": 21702
    },
    {
      "epoch": 0.6331466246572145,
      "grad_norm": 0.9447961324947602,
      "learning_rate": 3.133218008202885e-06,
      "loss": 0.096,
      "step": 21703
    },
    {
      "epoch": 0.633175797887858,
      "grad_norm": 0.8084249031859451,
      "learning_rate": 3.1327797450453984e-06,
      "loss": 0.1131,
      "step": 21704
    },
    {
      "epoch": 0.6332049711185017,
      "grad_norm": 0.7578063947746068,
      "learning_rate": 3.1323414985580092e-06,
      "loss": 0.1187,
      "step": 21705
    },
    {
      "epoch": 0.6332341443491453,
      "grad_norm": 0.9348425084795907,
      "learning_rate": 3.131903268744631e-06,
      "loss": 0.1348,
      "step": 21706
    },
    {
      "epoch": 0.6332633175797888,
      "grad_norm": 1.0434833243171249,
      "learning_rate": 3.131465055609173e-06,
      "loss": 0.12,
      "step": 21707
    },
    {
      "epoch": 0.6332924908104324,
      "grad_norm": 0.6079233290908971,
      "learning_rate": 3.1310268591555494e-06,
      "loss": 0.1126,
      "step": 21708
    },
    {
      "epoch": 0.6333216640410759,
      "grad_norm": 0.7797421497396152,
      "learning_rate": 3.130588679387672e-06,
      "loss": 0.1135,
      "step": 21709
    },
    {
      "epoch": 0.6333508372717195,
      "grad_norm": 0.8559849384990991,
      "learning_rate": 3.13015051630945e-06,
      "loss": 0.1189,
      "step": 21710
    },
    {
      "epoch": 0.633380010502363,
      "grad_norm": 0.7625466746703312,
      "learning_rate": 3.129712369924801e-06,
      "loss": 0.1053,
      "step": 21711
    },
    {
      "epoch": 0.6334091837330066,
      "grad_norm": 0.870000530641801,
      "learning_rate": 3.129274240237633e-06,
      "loss": 0.1402,
      "step": 21712
    },
    {
      "epoch": 0.6334383569636501,
      "grad_norm": 1.1223567464906534,
      "learning_rate": 3.1288361272518575e-06,
      "loss": 0.1295,
      "step": 21713
    },
    {
      "epoch": 0.6334675301942937,
      "grad_norm": 0.9488875518791894,
      "learning_rate": 3.128398030971387e-06,
      "loss": 0.1374,
      "step": 21714
    },
    {
      "epoch": 0.6334967034249372,
      "grad_norm": 0.7958988894901167,
      "learning_rate": 3.127959951400131e-06,
      "loss": 0.1418,
      "step": 21715
    },
    {
      "epoch": 0.6335258766555808,
      "grad_norm": 1.2673980555432538,
      "learning_rate": 3.127521888542001e-06,
      "loss": 0.1388,
      "step": 21716
    },
    {
      "epoch": 0.6335550498862244,
      "grad_norm": 0.996179579160665,
      "learning_rate": 3.1270838424009097e-06,
      "loss": 0.1238,
      "step": 21717
    },
    {
      "epoch": 0.633584223116868,
      "grad_norm": 0.8176115520522832,
      "learning_rate": 3.126645812980767e-06,
      "loss": 0.1467,
      "step": 21718
    },
    {
      "epoch": 0.6336133963475116,
      "grad_norm": 0.7650193098981782,
      "learning_rate": 3.126207800285484e-06,
      "loss": 0.1244,
      "step": 21719
    },
    {
      "epoch": 0.6336425695781551,
      "grad_norm": 0.9278519089992957,
      "learning_rate": 3.1257698043189693e-06,
      "loss": 0.1235,
      "step": 21720
    },
    {
      "epoch": 0.6336717428087987,
      "grad_norm": 0.7736597741982416,
      "learning_rate": 3.1253318250851345e-06,
      "loss": 0.1126,
      "step": 21721
    },
    {
      "epoch": 0.6337009160394422,
      "grad_norm": 0.6927540006471979,
      "learning_rate": 3.124893862587889e-06,
      "loss": 0.114,
      "step": 21722
    },
    {
      "epoch": 0.6337300892700858,
      "grad_norm": 0.9132711973936171,
      "learning_rate": 3.1244559168311452e-06,
      "loss": 0.1257,
      "step": 21723
    },
    {
      "epoch": 0.6337592625007293,
      "grad_norm": 0.8161342566372455,
      "learning_rate": 3.124017987818809e-06,
      "loss": 0.1343,
      "step": 21724
    },
    {
      "epoch": 0.6337884357313729,
      "grad_norm": 0.8222380140798606,
      "learning_rate": 3.123580075554794e-06,
      "loss": 0.1135,
      "step": 21725
    },
    {
      "epoch": 0.6338176089620164,
      "grad_norm": 0.9491268803419923,
      "learning_rate": 3.1231421800430084e-06,
      "loss": 0.1088,
      "step": 21726
    },
    {
      "epoch": 0.63384678219266,
      "grad_norm": 0.9556709148887018,
      "learning_rate": 3.122704301287361e-06,
      "loss": 0.146,
      "step": 21727
    },
    {
      "epoch": 0.6338759554233035,
      "grad_norm": 0.9430692757125659,
      "learning_rate": 3.12226643929176e-06,
      "loss": 0.1389,
      "step": 21728
    },
    {
      "epoch": 0.6339051286539471,
      "grad_norm": 0.7764755199732704,
      "learning_rate": 3.1218285940601166e-06,
      "loss": 0.1235,
      "step": 21729
    },
    {
      "epoch": 0.6339343018845907,
      "grad_norm": 0.7909181206676502,
      "learning_rate": 3.1213907655963406e-06,
      "loss": 0.1428,
      "step": 21730
    },
    {
      "epoch": 0.6339634751152342,
      "grad_norm": 0.7880675764901902,
      "learning_rate": 3.120952953904339e-06,
      "loss": 0.1104,
      "step": 21731
    },
    {
      "epoch": 0.6339926483458779,
      "grad_norm": 0.739293609115454,
      "learning_rate": 3.12051515898802e-06,
      "loss": 0.1136,
      "step": 21732
    },
    {
      "epoch": 0.6340218215765214,
      "grad_norm": 0.8153702795905208,
      "learning_rate": 3.1200773808512936e-06,
      "loss": 0.1182,
      "step": 21733
    },
    {
      "epoch": 0.634050994807165,
      "grad_norm": 0.7242207422869644,
      "learning_rate": 3.119639619498066e-06,
      "loss": 0.131,
      "step": 21734
    },
    {
      "epoch": 0.6340801680378085,
      "grad_norm": 0.8791651001527339,
      "learning_rate": 3.1192018749322482e-06,
      "loss": 0.1218,
      "step": 21735
    },
    {
      "epoch": 0.6341093412684521,
      "grad_norm": 0.7925481732877339,
      "learning_rate": 3.1187641471577478e-06,
      "loss": 0.1277,
      "step": 21736
    },
    {
      "epoch": 0.6341385144990956,
      "grad_norm": 1.0229415245648514,
      "learning_rate": 3.1183264361784716e-06,
      "loss": 0.127,
      "step": 21737
    },
    {
      "epoch": 0.6341676877297392,
      "grad_norm": 1.0213441175537599,
      "learning_rate": 3.117888741998328e-06,
      "loss": 0.1278,
      "step": 21738
    },
    {
      "epoch": 0.6341968609603827,
      "grad_norm": 0.8416204239503033,
      "learning_rate": 3.1174510646212247e-06,
      "loss": 0.1324,
      "step": 21739
    },
    {
      "epoch": 0.6342260341910263,
      "grad_norm": 0.9804147924450002,
      "learning_rate": 3.117013404051066e-06,
      "loss": 0.1569,
      "step": 21740
    },
    {
      "epoch": 0.6342552074216699,
      "grad_norm": 1.604150237655962,
      "learning_rate": 3.1165757602917653e-06,
      "loss": 0.1202,
      "step": 21741
    },
    {
      "epoch": 0.6342843806523134,
      "grad_norm": 0.8373751070357379,
      "learning_rate": 3.1161381333472253e-06,
      "loss": 0.1116,
      "step": 21742
    },
    {
      "epoch": 0.634313553882957,
      "grad_norm": 0.7782899909471713,
      "learning_rate": 3.1157005232213542e-06,
      "loss": 0.1354,
      "step": 21743
    },
    {
      "epoch": 0.6343427271136005,
      "grad_norm": 0.9136252138127734,
      "learning_rate": 3.115262929918061e-06,
      "loss": 0.1233,
      "step": 21744
    },
    {
      "epoch": 0.6343719003442441,
      "grad_norm": 0.8607119451216523,
      "learning_rate": 3.114825353441249e-06,
      "loss": 0.1379,
      "step": 21745
    },
    {
      "epoch": 0.6344010735748877,
      "grad_norm": 0.911061730616713,
      "learning_rate": 3.1143877937948247e-06,
      "loss": 0.1122,
      "step": 21746
    },
    {
      "epoch": 0.6344302468055313,
      "grad_norm": 0.8118839289481564,
      "learning_rate": 3.1139502509826975e-06,
      "loss": 0.1215,
      "step": 21747
    },
    {
      "epoch": 0.6344594200361748,
      "grad_norm": 0.6488405045595612,
      "learning_rate": 3.113512725008772e-06,
      "loss": 0.1115,
      "step": 21748
    },
    {
      "epoch": 0.6344885932668184,
      "grad_norm": 0.7618996093964154,
      "learning_rate": 3.1130752158769555e-06,
      "loss": 0.1318,
      "step": 21749
    },
    {
      "epoch": 0.6345177664974619,
      "grad_norm": 0.9880605615753427,
      "learning_rate": 3.112637723591152e-06,
      "loss": 0.1144,
      "step": 21750
    },
    {
      "epoch": 0.6345469397281055,
      "grad_norm": 0.7804695926800223,
      "learning_rate": 3.112200248155269e-06,
      "loss": 0.1663,
      "step": 21751
    },
    {
      "epoch": 0.634576112958749,
      "grad_norm": 0.9445166281720175,
      "learning_rate": 3.11176278957321e-06,
      "loss": 0.112,
      "step": 21752
    },
    {
      "epoch": 0.6346052861893926,
      "grad_norm": 0.7725115968509061,
      "learning_rate": 3.111325347848884e-06,
      "loss": 0.1182,
      "step": 21753
    },
    {
      "epoch": 0.6346344594200362,
      "grad_norm": 0.655354166208351,
      "learning_rate": 3.1108879229861934e-06,
      "loss": 0.119,
      "step": 21754
    },
    {
      "epoch": 0.6346636326506797,
      "grad_norm": 0.7494193134613015,
      "learning_rate": 3.110450514989045e-06,
      "loss": 0.1402,
      "step": 21755
    },
    {
      "epoch": 0.6346928058813233,
      "grad_norm": 0.9474238399449878,
      "learning_rate": 3.110013123861344e-06,
      "loss": 0.1192,
      "step": 21756
    },
    {
      "epoch": 0.6347219791119668,
      "grad_norm": 0.8790234496562304,
      "learning_rate": 3.1095757496069934e-06,
      "loss": 0.1343,
      "step": 21757
    },
    {
      "epoch": 0.6347511523426104,
      "grad_norm": 0.7351632736785756,
      "learning_rate": 3.1091383922298982e-06,
      "loss": 0.1185,
      "step": 21758
    },
    {
      "epoch": 0.634780325573254,
      "grad_norm": 0.8324877787027928,
      "learning_rate": 3.1087010517339656e-06,
      "loss": 0.1278,
      "step": 21759
    },
    {
      "epoch": 0.6348094988038976,
      "grad_norm": 0.8039823791184235,
      "learning_rate": 3.1082637281230977e-06,
      "loss": 0.1122,
      "step": 21760
    },
    {
      "epoch": 0.6348386720345411,
      "grad_norm": 0.8083964921544764,
      "learning_rate": 3.107826421401201e-06,
      "loss": 0.107,
      "step": 21761
    },
    {
      "epoch": 0.6348678452651847,
      "grad_norm": 0.7078659341223058,
      "learning_rate": 3.107389131572178e-06,
      "loss": 0.109,
      "step": 21762
    },
    {
      "epoch": 0.6348970184958282,
      "grad_norm": 1.2444803594802591,
      "learning_rate": 3.1069518586399323e-06,
      "loss": 0.1635,
      "step": 21763
    },
    {
      "epoch": 0.6349261917264718,
      "grad_norm": 0.9175885724502149,
      "learning_rate": 3.1065146026083675e-06,
      "loss": 0.1134,
      "step": 21764
    },
    {
      "epoch": 0.6349553649571154,
      "grad_norm": 0.8921175961086458,
      "learning_rate": 3.1060773634813895e-06,
      "loss": 0.1361,
      "step": 21765
    },
    {
      "epoch": 0.6349845381877589,
      "grad_norm": 0.8195935800664395,
      "learning_rate": 3.1056401412629023e-06,
      "loss": 0.1332,
      "step": 21766
    },
    {
      "epoch": 0.6350137114184025,
      "grad_norm": 0.8485398500224487,
      "learning_rate": 3.105202935956806e-06,
      "loss": 0.1453,
      "step": 21767
    },
    {
      "epoch": 0.635042884649046,
      "grad_norm": 0.7762656619937783,
      "learning_rate": 3.104765747567005e-06,
      "loss": 0.0851,
      "step": 21768
    },
    {
      "epoch": 0.6350720578796896,
      "grad_norm": 0.7682391691572218,
      "learning_rate": 3.104328576097405e-06,
      "loss": 0.121,
      "step": 21769
    },
    {
      "epoch": 0.6351012311103331,
      "grad_norm": 0.7209647293901071,
      "learning_rate": 3.1038914215519035e-06,
      "loss": 0.1106,
      "step": 21770
    },
    {
      "epoch": 0.6351304043409767,
      "grad_norm": 0.7839635461636388,
      "learning_rate": 3.1034542839344094e-06,
      "loss": 0.0989,
      "step": 21771
    },
    {
      "epoch": 0.6351595775716202,
      "grad_norm": 0.9488156725088999,
      "learning_rate": 3.1030171632488226e-06,
      "loss": 0.1179,
      "step": 21772
    },
    {
      "epoch": 0.6351887508022639,
      "grad_norm": 0.8923147396641986,
      "learning_rate": 3.102580059499045e-06,
      "loss": 0.1145,
      "step": 21773
    },
    {
      "epoch": 0.6352179240329074,
      "grad_norm": 0.8070798670150509,
      "learning_rate": 3.1021429726889808e-06,
      "loss": 0.1352,
      "step": 21774
    },
    {
      "epoch": 0.635247097263551,
      "grad_norm": 1.0570815815348948,
      "learning_rate": 3.1017059028225303e-06,
      "loss": 0.1068,
      "step": 21775
    },
    {
      "epoch": 0.6352762704941946,
      "grad_norm": 0.8505928246591681,
      "learning_rate": 3.1012688499035955e-06,
      "loss": 0.111,
      "step": 21776
    },
    {
      "epoch": 0.6353054437248381,
      "grad_norm": 0.9204157772627148,
      "learning_rate": 3.1008318139360795e-06,
      "loss": 0.1308,
      "step": 21777
    },
    {
      "epoch": 0.6353346169554817,
      "grad_norm": 0.933776237861263,
      "learning_rate": 3.100394794923884e-06,
      "loss": 0.0989,
      "step": 21778
    },
    {
      "epoch": 0.6353637901861252,
      "grad_norm": 1.0928731214810563,
      "learning_rate": 3.0999577928709114e-06,
      "loss": 0.1189,
      "step": 21779
    },
    {
      "epoch": 0.6353929634167688,
      "grad_norm": 1.0584957960818002,
      "learning_rate": 3.0995208077810613e-06,
      "loss": 0.1253,
      "step": 21780
    },
    {
      "epoch": 0.6354221366474123,
      "grad_norm": 1.1712067444457757,
      "learning_rate": 3.0990838396582357e-06,
      "loss": 0.1262,
      "step": 21781
    },
    {
      "epoch": 0.6354513098780559,
      "grad_norm": 0.6716273021083589,
      "learning_rate": 3.0986468885063344e-06,
      "loss": 0.0927,
      "step": 21782
    },
    {
      "epoch": 0.6354804831086994,
      "grad_norm": 1.0672201278160154,
      "learning_rate": 3.098209954329262e-06,
      "loss": 0.1352,
      "step": 21783
    },
    {
      "epoch": 0.635509656339343,
      "grad_norm": 0.9193766952718748,
      "learning_rate": 3.0977730371309154e-06,
      "loss": 0.1437,
      "step": 21784
    },
    {
      "epoch": 0.6355388295699865,
      "grad_norm": 0.8749013625383029,
      "learning_rate": 3.0973361369151977e-06,
      "loss": 0.1117,
      "step": 21785
    },
    {
      "epoch": 0.6355680028006302,
      "grad_norm": 1.0707215369033931,
      "learning_rate": 3.09689925368601e-06,
      "loss": 0.122,
      "step": 21786
    },
    {
      "epoch": 0.6355971760312737,
      "grad_norm": 0.6587241875285823,
      "learning_rate": 3.0964623874472503e-06,
      "loss": 0.1101,
      "step": 21787
    },
    {
      "epoch": 0.6356263492619173,
      "grad_norm": 0.9705001267168245,
      "learning_rate": 3.0960255382028193e-06,
      "loss": 0.114,
      "step": 21788
    },
    {
      "epoch": 0.6356555224925609,
      "grad_norm": 1.2154443540220972,
      "learning_rate": 3.095588705956618e-06,
      "loss": 0.1406,
      "step": 21789
    },
    {
      "epoch": 0.6356846957232044,
      "grad_norm": 0.8496619936358655,
      "learning_rate": 3.0951518907125468e-06,
      "loss": 0.1327,
      "step": 21790
    },
    {
      "epoch": 0.635713868953848,
      "grad_norm": 0.741635813731282,
      "learning_rate": 3.094715092474505e-06,
      "loss": 0.1229,
      "step": 21791
    },
    {
      "epoch": 0.6357430421844915,
      "grad_norm": 0.8686250528099158,
      "learning_rate": 3.094278311246392e-06,
      "loss": 0.1253,
      "step": 21792
    },
    {
      "epoch": 0.6357722154151351,
      "grad_norm": 0.7785649933729758,
      "learning_rate": 3.093841547032107e-06,
      "loss": 0.1382,
      "step": 21793
    },
    {
      "epoch": 0.6358013886457786,
      "grad_norm": 0.9243323103759059,
      "learning_rate": 3.093404799835548e-06,
      "loss": 0.1135,
      "step": 21794
    },
    {
      "epoch": 0.6358305618764222,
      "grad_norm": 0.9318614880552503,
      "learning_rate": 3.092968069660618e-06,
      "loss": 0.1303,
      "step": 21795
    },
    {
      "epoch": 0.6358597351070657,
      "grad_norm": 0.8252247957126727,
      "learning_rate": 3.0925313565112135e-06,
      "loss": 0.11,
      "step": 21796
    },
    {
      "epoch": 0.6358889083377093,
      "grad_norm": 0.7766490834908291,
      "learning_rate": 3.092094660391234e-06,
      "loss": 0.1203,
      "step": 21797
    },
    {
      "epoch": 0.6359180815683528,
      "grad_norm": 0.8685011647973678,
      "learning_rate": 3.0916579813045764e-06,
      "loss": 0.1222,
      "step": 21798
    },
    {
      "epoch": 0.6359472547989964,
      "grad_norm": 0.8992556388119634,
      "learning_rate": 3.0912213192551434e-06,
      "loss": 0.0998,
      "step": 21799
    },
    {
      "epoch": 0.63597642802964,
      "grad_norm": 0.798910637193303,
      "learning_rate": 3.090784674246826e-06,
      "loss": 0.1154,
      "step": 21800
    },
    {
      "epoch": 0.6360056012602836,
      "grad_norm": 0.97939781384001,
      "learning_rate": 3.0903480462835323e-06,
      "loss": 0.1308,
      "step": 21801
    },
    {
      "epoch": 0.6360347744909272,
      "grad_norm": 1.0612139597400803,
      "learning_rate": 3.089911435369153e-06,
      "loss": 0.1358,
      "step": 21802
    },
    {
      "epoch": 0.6360639477215707,
      "grad_norm": 0.9971849600337275,
      "learning_rate": 3.0894748415075887e-06,
      "loss": 0.1333,
      "step": 21803
    },
    {
      "epoch": 0.6360931209522143,
      "grad_norm": 0.7554802910512478,
      "learning_rate": 3.0890382647027382e-06,
      "loss": 0.121,
      "step": 21804
    },
    {
      "epoch": 0.6361222941828578,
      "grad_norm": 1.1523452522450974,
      "learning_rate": 3.0886017049584963e-06,
      "loss": 0.1412,
      "step": 21805
    },
    {
      "epoch": 0.6361514674135014,
      "grad_norm": 1.0359600180640038,
      "learning_rate": 3.088165162278762e-06,
      "loss": 0.1324,
      "step": 21806
    },
    {
      "epoch": 0.6361806406441449,
      "grad_norm": 1.3569448829642885,
      "learning_rate": 3.087728636667433e-06,
      "loss": 0.1022,
      "step": 21807
    },
    {
      "epoch": 0.6362098138747885,
      "grad_norm": 1.2866344165629882,
      "learning_rate": 3.0872921281284063e-06,
      "loss": 0.1332,
      "step": 21808
    },
    {
      "epoch": 0.636238987105432,
      "grad_norm": 1.2706445821451313,
      "learning_rate": 3.08685563666558e-06,
      "loss": 0.1463,
      "step": 21809
    },
    {
      "epoch": 0.6362681603360756,
      "grad_norm": 1.354870983290892,
      "learning_rate": 3.086419162282849e-06,
      "loss": 0.1174,
      "step": 21810
    },
    {
      "epoch": 0.6362973335667191,
      "grad_norm": 1.383365738194712,
      "learning_rate": 3.0859827049841105e-06,
      "loss": 0.1216,
      "step": 21811
    },
    {
      "epoch": 0.6363265067973627,
      "grad_norm": 0.6837405199969548,
      "learning_rate": 3.0855462647732615e-06,
      "loss": 0.1301,
      "step": 21812
    },
    {
      "epoch": 0.6363556800280064,
      "grad_norm": 1.1635242670466868,
      "learning_rate": 3.085109841654199e-06,
      "loss": 0.1313,
      "step": 21813
    },
    {
      "epoch": 0.6363848532586499,
      "grad_norm": 1.405525454712235,
      "learning_rate": 3.084673435630819e-06,
      "loss": 0.1001,
      "step": 21814
    },
    {
      "epoch": 0.6364140264892935,
      "grad_norm": 1.165711951011714,
      "learning_rate": 3.084237046707017e-06,
      "loss": 0.1139,
      "step": 21815
    },
    {
      "epoch": 0.636443199719937,
      "grad_norm": 0.880725538768999,
      "learning_rate": 3.08380067488669e-06,
      "loss": 0.1213,
      "step": 21816
    },
    {
      "epoch": 0.6364723729505806,
      "grad_norm": 2.381796792180809,
      "learning_rate": 3.083364320173732e-06,
      "loss": 0.1301,
      "step": 21817
    },
    {
      "epoch": 0.6365015461812241,
      "grad_norm": 0.9165355178052541,
      "learning_rate": 3.0829279825720393e-06,
      "loss": 0.1074,
      "step": 21818
    },
    {
      "epoch": 0.6365307194118677,
      "grad_norm": 1.0651285245372524,
      "learning_rate": 3.082491662085508e-06,
      "loss": 0.1172,
      "step": 21819
    },
    {
      "epoch": 0.6365598926425112,
      "grad_norm": 0.8388221543037918,
      "learning_rate": 3.0820553587180346e-06,
      "loss": 0.1116,
      "step": 21820
    },
    {
      "epoch": 0.6365890658731548,
      "grad_norm": 0.7066232406302658,
      "learning_rate": 3.081619072473514e-06,
      "loss": 0.1284,
      "step": 21821
    },
    {
      "epoch": 0.6366182391037983,
      "grad_norm": 0.9950246610089574,
      "learning_rate": 3.0811828033558388e-06,
      "loss": 0.1267,
      "step": 21822
    },
    {
      "epoch": 0.6366474123344419,
      "grad_norm": 1.1898016694790707,
      "learning_rate": 3.080746551368906e-06,
      "loss": 0.1262,
      "step": 21823
    },
    {
      "epoch": 0.6366765855650854,
      "grad_norm": 0.8613786808996372,
      "learning_rate": 3.080310316516608e-06,
      "loss": 0.1154,
      "step": 21824
    },
    {
      "epoch": 0.636705758795729,
      "grad_norm": 0.8929237471297329,
      "learning_rate": 3.079874098802843e-06,
      "loss": 0.1239,
      "step": 21825
    },
    {
      "epoch": 0.6367349320263725,
      "grad_norm": 1.5433127283925474,
      "learning_rate": 3.0794378982315044e-06,
      "loss": 0.1454,
      "step": 21826
    },
    {
      "epoch": 0.6367641052570162,
      "grad_norm": 1.2118272981373643,
      "learning_rate": 3.0790017148064844e-06,
      "loss": 0.1218,
      "step": 21827
    },
    {
      "epoch": 0.6367932784876598,
      "grad_norm": 0.9964554054028634,
      "learning_rate": 3.0785655485316788e-06,
      "loss": 0.1148,
      "step": 21828
    },
    {
      "epoch": 0.6368224517183033,
      "grad_norm": 0.6779271565125494,
      "learning_rate": 3.0781293994109828e-06,
      "loss": 0.1307,
      "step": 21829
    },
    {
      "epoch": 0.6368516249489469,
      "grad_norm": 1.0674790488557546,
      "learning_rate": 3.077693267448285e-06,
      "loss": 0.1347,
      "step": 21830
    },
    {
      "epoch": 0.6368807981795904,
      "grad_norm": 1.316009182413062,
      "learning_rate": 3.077257152647486e-06,
      "loss": 0.13,
      "step": 21831
    },
    {
      "epoch": 0.636909971410234,
      "grad_norm": 0.9490288674966368,
      "learning_rate": 3.0768210550124757e-06,
      "loss": 0.0948,
      "step": 21832
    },
    {
      "epoch": 0.6369391446408775,
      "grad_norm": 0.6827399227485108,
      "learning_rate": 3.0763849745471475e-06,
      "loss": 0.1195,
      "step": 21833
    },
    {
      "epoch": 0.6369683178715211,
      "grad_norm": 1.268438017932574,
      "learning_rate": 3.075948911255396e-06,
      "loss": 0.1251,
      "step": 21834
    },
    {
      "epoch": 0.6369974911021646,
      "grad_norm": 1.5663374850285545,
      "learning_rate": 3.0755128651411115e-06,
      "loss": 0.1025,
      "step": 21835
    },
    {
      "epoch": 0.6370266643328082,
      "grad_norm": 1.061104852661182,
      "learning_rate": 3.0750768362081895e-06,
      "loss": 0.1092,
      "step": 21836
    },
    {
      "epoch": 0.6370558375634517,
      "grad_norm": 0.7752633644749488,
      "learning_rate": 3.074640824460522e-06,
      "loss": 0.1347,
      "step": 21837
    },
    {
      "epoch": 0.6370850107940953,
      "grad_norm": 1.2016071339398173,
      "learning_rate": 3.074204829902001e-06,
      "loss": 0.1365,
      "step": 21838
    },
    {
      "epoch": 0.6371141840247388,
      "grad_norm": 1.3821317503312849,
      "learning_rate": 3.073768852536522e-06,
      "loss": 0.1328,
      "step": 21839
    },
    {
      "epoch": 0.6371433572553825,
      "grad_norm": 0.9094382064689873,
      "learning_rate": 3.073332892367973e-06,
      "loss": 0.1144,
      "step": 21840
    },
    {
      "epoch": 0.6371725304860261,
      "grad_norm": 0.66381617402986,
      "learning_rate": 3.072896949400247e-06,
      "loss": 0.1253,
      "step": 21841
    },
    {
      "epoch": 0.6372017037166696,
      "grad_norm": 0.9821706677181078,
      "learning_rate": 3.0724610236372377e-06,
      "loss": 0.1125,
      "step": 21842
    },
    {
      "epoch": 0.6372308769473132,
      "grad_norm": 1.0959836495574657,
      "learning_rate": 3.072025115082838e-06,
      "loss": 0.0997,
      "step": 21843
    },
    {
      "epoch": 0.6372600501779567,
      "grad_norm": 1.1337204186278373,
      "learning_rate": 3.071589223740936e-06,
      "loss": 0.1119,
      "step": 21844
    },
    {
      "epoch": 0.6372892234086003,
      "grad_norm": 0.6897983155072406,
      "learning_rate": 3.0711533496154258e-06,
      "loss": 0.1149,
      "step": 21845
    },
    {
      "epoch": 0.6373183966392438,
      "grad_norm": 0.9365897347237178,
      "learning_rate": 3.070717492710199e-06,
      "loss": 0.1232,
      "step": 21846
    },
    {
      "epoch": 0.6373475698698874,
      "grad_norm": 0.9274447875877025,
      "learning_rate": 3.0702816530291425e-06,
      "loss": 0.1391,
      "step": 21847
    },
    {
      "epoch": 0.6373767431005309,
      "grad_norm": 1.0215428332432723,
      "learning_rate": 3.0698458305761538e-06,
      "loss": 0.1493,
      "step": 21848
    },
    {
      "epoch": 0.6374059163311745,
      "grad_norm": 0.9081846317617027,
      "learning_rate": 3.0694100253551195e-06,
      "loss": 0.1112,
      "step": 21849
    },
    {
      "epoch": 0.637435089561818,
      "grad_norm": 0.7751254122582037,
      "learning_rate": 3.068974237369932e-06,
      "loss": 0.1116,
      "step": 21850
    },
    {
      "epoch": 0.6374642627924616,
      "grad_norm": 0.8434729375194193,
      "learning_rate": 3.068538466624482e-06,
      "loss": 0.1376,
      "step": 21851
    },
    {
      "epoch": 0.6374934360231052,
      "grad_norm": 0.9442999885080988,
      "learning_rate": 3.068102713122659e-06,
      "loss": 0.1707,
      "step": 21852
    },
    {
      "epoch": 0.6375226092537487,
      "grad_norm": 0.9222775526189096,
      "learning_rate": 3.067666976868353e-06,
      "loss": 0.133,
      "step": 21853
    },
    {
      "epoch": 0.6375517824843924,
      "grad_norm": 0.7792483602620585,
      "learning_rate": 3.067231257865456e-06,
      "loss": 0.1364,
      "step": 21854
    },
    {
      "epoch": 0.6375809557150359,
      "grad_norm": 0.8573625377828165,
      "learning_rate": 3.0667955561178566e-06,
      "loss": 0.1257,
      "step": 21855
    },
    {
      "epoch": 0.6376101289456795,
      "grad_norm": 0.7886138594932548,
      "learning_rate": 3.066359871629446e-06,
      "loss": 0.1099,
      "step": 21856
    },
    {
      "epoch": 0.637639302176323,
      "grad_norm": 0.9117843252800424,
      "learning_rate": 3.0659242044041117e-06,
      "loss": 0.1359,
      "step": 21857
    },
    {
      "epoch": 0.6376684754069666,
      "grad_norm": 0.7730494799951317,
      "learning_rate": 3.0654885544457446e-06,
      "loss": 0.1193,
      "step": 21858
    },
    {
      "epoch": 0.6376976486376101,
      "grad_norm": 0.859282447127082,
      "learning_rate": 3.0650529217582333e-06,
      "loss": 0.1319,
      "step": 21859
    },
    {
      "epoch": 0.6377268218682537,
      "grad_norm": 1.0796400353288,
      "learning_rate": 3.0646173063454676e-06,
      "loss": 0.1279,
      "step": 21860
    },
    {
      "epoch": 0.6377559950988972,
      "grad_norm": 0.9053880327450825,
      "learning_rate": 3.0641817082113385e-06,
      "loss": 0.119,
      "step": 21861
    },
    {
      "epoch": 0.6377851683295408,
      "grad_norm": 0.727253148668691,
      "learning_rate": 3.0637461273597312e-06,
      "loss": 0.1181,
      "step": 21862
    },
    {
      "epoch": 0.6378143415601844,
      "grad_norm": 1.0231326515798984,
      "learning_rate": 3.063310563794537e-06,
      "loss": 0.1216,
      "step": 21863
    },
    {
      "epoch": 0.6378435147908279,
      "grad_norm": 0.8431087631210024,
      "learning_rate": 3.062875017519645e-06,
      "loss": 0.1049,
      "step": 21864
    },
    {
      "epoch": 0.6378726880214715,
      "grad_norm": 0.7231253542040589,
      "learning_rate": 3.0624394885389397e-06,
      "loss": 0.1382,
      "step": 21865
    },
    {
      "epoch": 0.637901861252115,
      "grad_norm": 0.8593676211645107,
      "learning_rate": 3.062003976856313e-06,
      "loss": 0.1327,
      "step": 21866
    },
    {
      "epoch": 0.6379310344827587,
      "grad_norm": 0.7948219579548143,
      "learning_rate": 3.0615684824756525e-06,
      "loss": 0.1246,
      "step": 21867
    },
    {
      "epoch": 0.6379602077134022,
      "grad_norm": 0.7230630063087355,
      "learning_rate": 3.061133005400846e-06,
      "loss": 0.1372,
      "step": 21868
    },
    {
      "epoch": 0.6379893809440458,
      "grad_norm": 0.8481834099556024,
      "learning_rate": 3.0606975456357817e-06,
      "loss": 0.1446,
      "step": 21869
    },
    {
      "epoch": 0.6380185541746893,
      "grad_norm": 0.8388905248271551,
      "learning_rate": 3.060262103184346e-06,
      "loss": 0.1242,
      "step": 21870
    },
    {
      "epoch": 0.6380477274053329,
      "grad_norm": 0.8039098227458443,
      "learning_rate": 3.0598266780504267e-06,
      "loss": 0.129,
      "step": 21871
    },
    {
      "epoch": 0.6380769006359764,
      "grad_norm": 0.6854274554351321,
      "learning_rate": 3.059391270237912e-06,
      "loss": 0.1023,
      "step": 21872
    },
    {
      "epoch": 0.63810607386662,
      "grad_norm": 0.8483208695899778,
      "learning_rate": 3.05895587975069e-06,
      "loss": 0.1288,
      "step": 21873
    },
    {
      "epoch": 0.6381352470972635,
      "grad_norm": 0.8825059011319244,
      "learning_rate": 3.0585205065926453e-06,
      "loss": 0.1131,
      "step": 21874
    },
    {
      "epoch": 0.6381644203279071,
      "grad_norm": 0.704871957973163,
      "learning_rate": 3.058085150767667e-06,
      "loss": 0.101,
      "step": 21875
    },
    {
      "epoch": 0.6381935935585507,
      "grad_norm": 0.7805779065666156,
      "learning_rate": 3.0576498122796403e-06,
      "loss": 0.1156,
      "step": 21876
    },
    {
      "epoch": 0.6382227667891942,
      "grad_norm": 0.9444300393589251,
      "learning_rate": 3.057214491132451e-06,
      "loss": 0.1326,
      "step": 21877
    },
    {
      "epoch": 0.6382519400198378,
      "grad_norm": 0.9479615537361952,
      "learning_rate": 3.056779187329989e-06,
      "loss": 0.1374,
      "step": 21878
    },
    {
      "epoch": 0.6382811132504813,
      "grad_norm": 0.8509617843592863,
      "learning_rate": 3.0563439008761377e-06,
      "loss": 0.1115,
      "step": 21879
    },
    {
      "epoch": 0.6383102864811249,
      "grad_norm": 0.7367877561446459,
      "learning_rate": 3.055908631774784e-06,
      "loss": 0.1287,
      "step": 21880
    },
    {
      "epoch": 0.6383394597117685,
      "grad_norm": 0.8504320472884936,
      "learning_rate": 3.0554733800298154e-06,
      "loss": 0.1279,
      "step": 21881
    },
    {
      "epoch": 0.6383686329424121,
      "grad_norm": 0.9429986359842464,
      "learning_rate": 3.0550381456451144e-06,
      "loss": 0.1256,
      "step": 21882
    },
    {
      "epoch": 0.6383978061730556,
      "grad_norm": 1.0032892736514707,
      "learning_rate": 3.054602928624568e-06,
      "loss": 0.1217,
      "step": 21883
    },
    {
      "epoch": 0.6384269794036992,
      "grad_norm": 0.9133680212190357,
      "learning_rate": 3.0541677289720632e-06,
      "loss": 0.1269,
      "step": 21884
    },
    {
      "epoch": 0.6384561526343427,
      "grad_norm": 0.9797065792975945,
      "learning_rate": 3.053732546691485e-06,
      "loss": 0.1046,
      "step": 21885
    },
    {
      "epoch": 0.6384853258649863,
      "grad_norm": 0.9860794712482452,
      "learning_rate": 3.0532973817867185e-06,
      "loss": 0.1342,
      "step": 21886
    },
    {
      "epoch": 0.6385144990956299,
      "grad_norm": 1.0183469029431744,
      "learning_rate": 3.0528622342616472e-06,
      "loss": 0.1214,
      "step": 21887
    },
    {
      "epoch": 0.6385436723262734,
      "grad_norm": 0.9259410328115509,
      "learning_rate": 3.052427104120157e-06,
      "loss": 0.1191,
      "step": 21888
    },
    {
      "epoch": 0.638572845556917,
      "grad_norm": 1.2581005181482376,
      "learning_rate": 3.0519919913661317e-06,
      "loss": 0.0981,
      "step": 21889
    },
    {
      "epoch": 0.6386020187875605,
      "grad_norm": 1.561430521600238,
      "learning_rate": 3.051556896003458e-06,
      "loss": 0.1356,
      "step": 21890
    },
    {
      "epoch": 0.6386311920182041,
      "grad_norm": 0.8987681531126557,
      "learning_rate": 3.05112181803602e-06,
      "loss": 0.1138,
      "step": 21891
    },
    {
      "epoch": 0.6386603652488476,
      "grad_norm": 0.9575434341929723,
      "learning_rate": 3.0506867574677007e-06,
      "loss": 0.1122,
      "step": 21892
    },
    {
      "epoch": 0.6386895384794912,
      "grad_norm": 0.9803084522916741,
      "learning_rate": 3.0502517143023846e-06,
      "loss": 0.1244,
      "step": 21893
    },
    {
      "epoch": 0.6387187117101348,
      "grad_norm": 1.1634107455634812,
      "learning_rate": 3.049816688543956e-06,
      "loss": 0.1319,
      "step": 21894
    },
    {
      "epoch": 0.6387478849407784,
      "grad_norm": 1.0048455952281852,
      "learning_rate": 3.0493816801962974e-06,
      "loss": 0.1404,
      "step": 21895
    },
    {
      "epoch": 0.6387770581714219,
      "grad_norm": 0.7597447083970655,
      "learning_rate": 3.0489466892632934e-06,
      "loss": 0.1034,
      "step": 21896
    },
    {
      "epoch": 0.6388062314020655,
      "grad_norm": 0.9691091562542609,
      "learning_rate": 3.0485117157488287e-06,
      "loss": 0.1387,
      "step": 21897
    },
    {
      "epoch": 0.638835404632709,
      "grad_norm": 0.8071088564651172,
      "learning_rate": 3.048076759656785e-06,
      "loss": 0.1211,
      "step": 21898
    },
    {
      "epoch": 0.6388645778633526,
      "grad_norm": 0.6358678297759078,
      "learning_rate": 3.0476418209910475e-06,
      "loss": 0.12,
      "step": 21899
    },
    {
      "epoch": 0.6388937510939962,
      "grad_norm": 0.7951613576571183,
      "learning_rate": 3.047206899755496e-06,
      "loss": 0.126,
      "step": 21900
    },
    {
      "epoch": 0.6389229243246397,
      "grad_norm": 1.0271889369572134,
      "learning_rate": 3.046771995954015e-06,
      "loss": 0.1086,
      "step": 21901
    },
    {
      "epoch": 0.6389520975552833,
      "grad_norm": 0.9344279582335326,
      "learning_rate": 3.046337109590488e-06,
      "loss": 0.1276,
      "step": 21902
    },
    {
      "epoch": 0.6389812707859268,
      "grad_norm": 0.7668315638447574,
      "learning_rate": 3.0459022406687977e-06,
      "loss": 0.1029,
      "step": 21903
    },
    {
      "epoch": 0.6390104440165704,
      "grad_norm": 0.8563832055480559,
      "learning_rate": 3.045467389192824e-06,
      "loss": 0.1381,
      "step": 21904
    },
    {
      "epoch": 0.6390396172472139,
      "grad_norm": 1.3599042135465556,
      "learning_rate": 3.0450325551664522e-06,
      "loss": 0.1243,
      "step": 21905
    },
    {
      "epoch": 0.6390687904778575,
      "grad_norm": 0.7885305964212832,
      "learning_rate": 3.044597738593564e-06,
      "loss": 0.1308,
      "step": 21906
    },
    {
      "epoch": 0.639097963708501,
      "grad_norm": 0.6982586480666967,
      "learning_rate": 3.044162939478037e-06,
      "loss": 0.1,
      "step": 21907
    },
    {
      "epoch": 0.6391271369391447,
      "grad_norm": 0.8285675420662989,
      "learning_rate": 3.0437281578237587e-06,
      "loss": 0.1265,
      "step": 21908
    },
    {
      "epoch": 0.6391563101697882,
      "grad_norm": 0.8283163405615398,
      "learning_rate": 3.0432933936346083e-06,
      "loss": 0.1257,
      "step": 21909
    },
    {
      "epoch": 0.6391854834004318,
      "grad_norm": 0.9649752723675649,
      "learning_rate": 3.042858646914467e-06,
      "loss": 0.1107,
      "step": 21910
    },
    {
      "epoch": 0.6392146566310754,
      "grad_norm": 0.9493165956215821,
      "learning_rate": 3.0424239176672177e-06,
      "loss": 0.0975,
      "step": 21911
    },
    {
      "epoch": 0.6392438298617189,
      "grad_norm": 1.0680619133120668,
      "learning_rate": 3.0419892058967393e-06,
      "loss": 0.1373,
      "step": 21912
    },
    {
      "epoch": 0.6392730030923625,
      "grad_norm": 1.3516981134621209,
      "learning_rate": 3.0415545116069127e-06,
      "loss": 0.109,
      "step": 21913
    },
    {
      "epoch": 0.639302176323006,
      "grad_norm": 1.0398911799811374,
      "learning_rate": 3.041119834801621e-06,
      "loss": 0.1146,
      "step": 21914
    },
    {
      "epoch": 0.6393313495536496,
      "grad_norm": 0.7618778724016294,
      "learning_rate": 3.040685175484744e-06,
      "loss": 0.1078,
      "step": 21915
    },
    {
      "epoch": 0.6393605227842931,
      "grad_norm": 0.8823225671438082,
      "learning_rate": 3.040250533660163e-06,
      "loss": 0.14,
      "step": 21916
    },
    {
      "epoch": 0.6393896960149367,
      "grad_norm": 0.9549327538169706,
      "learning_rate": 3.039815909331756e-06,
      "loss": 0.1263,
      "step": 21917
    },
    {
      "epoch": 0.6394188692455802,
      "grad_norm": 0.7160000367478118,
      "learning_rate": 3.0393813025034046e-06,
      "loss": 0.1138,
      "step": 21918
    },
    {
      "epoch": 0.6394480424762238,
      "grad_norm": 0.8889924544531044,
      "learning_rate": 3.0389467131789884e-06,
      "loss": 0.1198,
      "step": 21919
    },
    {
      "epoch": 0.6394772157068673,
      "grad_norm": 0.7604025105980199,
      "learning_rate": 3.0385121413623888e-06,
      "loss": 0.1263,
      "step": 21920
    },
    {
      "epoch": 0.639506388937511,
      "grad_norm": 0.8818718817076348,
      "learning_rate": 3.038077587057485e-06,
      "loss": 0.1126,
      "step": 21921
    },
    {
      "epoch": 0.6395355621681545,
      "grad_norm": 0.8709218495331252,
      "learning_rate": 3.0376430502681554e-06,
      "loss": 0.1251,
      "step": 21922
    },
    {
      "epoch": 0.6395647353987981,
      "grad_norm": 0.7977261760500468,
      "learning_rate": 3.03720853099828e-06,
      "loss": 0.1317,
      "step": 21923
    },
    {
      "epoch": 0.6395939086294417,
      "grad_norm": 0.9860642992008797,
      "learning_rate": 3.03677402925174e-06,
      "loss": 0.1247,
      "step": 21924
    },
    {
      "epoch": 0.6396230818600852,
      "grad_norm": 0.7759127192684441,
      "learning_rate": 3.0363395450324103e-06,
      "loss": 0.1148,
      "step": 21925
    },
    {
      "epoch": 0.6396522550907288,
      "grad_norm": 0.634868363926555,
      "learning_rate": 3.0359050783441736e-06,
      "loss": 0.1183,
      "step": 21926
    },
    {
      "epoch": 0.6396814283213723,
      "grad_norm": 0.7773985173968639,
      "learning_rate": 3.035470629190907e-06,
      "loss": 0.124,
      "step": 21927
    },
    {
      "epoch": 0.6397106015520159,
      "grad_norm": 0.8659888848175732,
      "learning_rate": 3.0350361975764907e-06,
      "loss": 0.127,
      "step": 21928
    },
    {
      "epoch": 0.6397397747826594,
      "grad_norm": 0.9017771768607322,
      "learning_rate": 3.0346017835048015e-06,
      "loss": 0.1269,
      "step": 21929
    },
    {
      "epoch": 0.639768948013303,
      "grad_norm": 0.7096728122358629,
      "learning_rate": 3.0341673869797183e-06,
      "loss": 0.111,
      "step": 21930
    },
    {
      "epoch": 0.6397981212439465,
      "grad_norm": 0.8243088859971814,
      "learning_rate": 3.0337330080051188e-06,
      "loss": 0.1141,
      "step": 21931
    },
    {
      "epoch": 0.6398272944745901,
      "grad_norm": 1.0075431142357507,
      "learning_rate": 3.0332986465848824e-06,
      "loss": 0.1346,
      "step": 21932
    },
    {
      "epoch": 0.6398564677052336,
      "grad_norm": 0.9271585951905548,
      "learning_rate": 3.0328643027228864e-06,
      "loss": 0.1114,
      "step": 21933
    },
    {
      "epoch": 0.6398856409358772,
      "grad_norm": 0.8825953290479391,
      "learning_rate": 3.032429976423008e-06,
      "loss": 0.1403,
      "step": 21934
    },
    {
      "epoch": 0.6399148141665209,
      "grad_norm": 0.6456542010582347,
      "learning_rate": 3.0319956676891253e-06,
      "loss": 0.1308,
      "step": 21935
    },
    {
      "epoch": 0.6399439873971644,
      "grad_norm": 0.9258455589709558,
      "learning_rate": 3.0315613765251164e-06,
      "loss": 0.1298,
      "step": 21936
    },
    {
      "epoch": 0.639973160627808,
      "grad_norm": 1.014659021103718,
      "learning_rate": 3.0311271029348545e-06,
      "loss": 0.1211,
      "step": 21937
    },
    {
      "epoch": 0.6400023338584515,
      "grad_norm": 1.040044223233855,
      "learning_rate": 3.0306928469222225e-06,
      "loss": 0.1402,
      "step": 21938
    },
    {
      "epoch": 0.6400315070890951,
      "grad_norm": 1.1177879221621851,
      "learning_rate": 3.0302586084910934e-06,
      "loss": 0.1388,
      "step": 21939
    },
    {
      "epoch": 0.6400606803197386,
      "grad_norm": 0.9252474808014111,
      "learning_rate": 3.0298243876453458e-06,
      "loss": 0.1178,
      "step": 21940
    },
    {
      "epoch": 0.6400898535503822,
      "grad_norm": 1.1589570442812709,
      "learning_rate": 3.0293901843888573e-06,
      "loss": 0.1315,
      "step": 21941
    },
    {
      "epoch": 0.6401190267810257,
      "grad_norm": 1.0330798901038698,
      "learning_rate": 3.0289559987255015e-06,
      "loss": 0.1342,
      "step": 21942
    },
    {
      "epoch": 0.6401482000116693,
      "grad_norm": 0.83232699843636,
      "learning_rate": 3.028521830659154e-06,
      "loss": 0.1269,
      "step": 21943
    },
    {
      "epoch": 0.6401773732423128,
      "grad_norm": 1.0162619330003844,
      "learning_rate": 3.028087680193695e-06,
      "loss": 0.1193,
      "step": 21944
    },
    {
      "epoch": 0.6402065464729564,
      "grad_norm": 0.9821735402285827,
      "learning_rate": 3.0276535473329983e-06,
      "loss": 0.1513,
      "step": 21945
    },
    {
      "epoch": 0.6402357197035999,
      "grad_norm": 0.7782382406666812,
      "learning_rate": 3.02721943208094e-06,
      "loss": 0.1204,
      "step": 21946
    },
    {
      "epoch": 0.6402648929342435,
      "grad_norm": 0.942174926867864,
      "learning_rate": 3.0267853344413956e-06,
      "loss": 0.1221,
      "step": 21947
    },
    {
      "epoch": 0.6402940661648872,
      "grad_norm": 1.2193659694124632,
      "learning_rate": 3.0263512544182407e-06,
      "loss": 0.121,
      "step": 21948
    },
    {
      "epoch": 0.6403232393955307,
      "grad_norm": 1.1613143866439641,
      "learning_rate": 3.025917192015349e-06,
      "loss": 0.1257,
      "step": 21949
    },
    {
      "epoch": 0.6403524126261743,
      "grad_norm": 1.198356382509822,
      "learning_rate": 3.025483147236599e-06,
      "loss": 0.1395,
      "step": 21950
    },
    {
      "epoch": 0.6403815858568178,
      "grad_norm": 0.8952116743310828,
      "learning_rate": 3.0250491200858643e-06,
      "loss": 0.1222,
      "step": 21951
    },
    {
      "epoch": 0.6404107590874614,
      "grad_norm": 1.2709399242472916,
      "learning_rate": 3.0246151105670197e-06,
      "loss": 0.136,
      "step": 21952
    },
    {
      "epoch": 0.6404399323181049,
      "grad_norm": 0.9343221816026714,
      "learning_rate": 3.0241811186839394e-06,
      "loss": 0.1505,
      "step": 21953
    },
    {
      "epoch": 0.6404691055487485,
      "grad_norm": 0.7738212314549767,
      "learning_rate": 3.0237471444404993e-06,
      "loss": 0.1052,
      "step": 21954
    },
    {
      "epoch": 0.640498278779392,
      "grad_norm": 1.096858823539028,
      "learning_rate": 3.023313187840571e-06,
      "loss": 0.1326,
      "step": 21955
    },
    {
      "epoch": 0.6405274520100356,
      "grad_norm": 1.0254494717705362,
      "learning_rate": 3.0228792488880315e-06,
      "loss": 0.1104,
      "step": 21956
    },
    {
      "epoch": 0.6405566252406791,
      "grad_norm": 0.7765213523737811,
      "learning_rate": 3.0224453275867544e-06,
      "loss": 0.1379,
      "step": 21957
    },
    {
      "epoch": 0.6405857984713227,
      "grad_norm": 1.0059879994998377,
      "learning_rate": 3.022011423940614e-06,
      "loss": 0.1173,
      "step": 21958
    },
    {
      "epoch": 0.6406149717019662,
      "grad_norm": 0.914641722131277,
      "learning_rate": 3.0215775379534827e-06,
      "loss": 0.1253,
      "step": 21959
    },
    {
      "epoch": 0.6406441449326098,
      "grad_norm": 0.9022035160712973,
      "learning_rate": 3.0211436696292346e-06,
      "loss": 0.1441,
      "step": 21960
    },
    {
      "epoch": 0.6406733181632533,
      "grad_norm": 0.8967770234631273,
      "learning_rate": 3.020709818971743e-06,
      "loss": 0.1271,
      "step": 21961
    },
    {
      "epoch": 0.640702491393897,
      "grad_norm": 0.9269267973477245,
      "learning_rate": 3.0202759859848818e-06,
      "loss": 0.1061,
      "step": 21962
    },
    {
      "epoch": 0.6407316646245406,
      "grad_norm": 0.8430561957964302,
      "learning_rate": 3.0198421706725257e-06,
      "loss": 0.1451,
      "step": 21963
    },
    {
      "epoch": 0.6407608378551841,
      "grad_norm": 0.8483089362372715,
      "learning_rate": 3.0194083730385443e-06,
      "loss": 0.1137,
      "step": 21964
    },
    {
      "epoch": 0.6407900110858277,
      "grad_norm": 0.7416063276970117,
      "learning_rate": 3.0189745930868127e-06,
      "loss": 0.1284,
      "step": 21965
    },
    {
      "epoch": 0.6408191843164712,
      "grad_norm": 0.6940232346523145,
      "learning_rate": 3.018540830821204e-06,
      "loss": 0.1348,
      "step": 21966
    },
    {
      "epoch": 0.6408483575471148,
      "grad_norm": 0.9563816919176291,
      "learning_rate": 3.0181070862455862e-06,
      "loss": 0.1409,
      "step": 21967
    },
    {
      "epoch": 0.6408775307777583,
      "grad_norm": 0.8849799514833532,
      "learning_rate": 3.0176733593638387e-06,
      "loss": 0.1225,
      "step": 21968
    },
    {
      "epoch": 0.6409067040084019,
      "grad_norm": 0.7482586046657074,
      "learning_rate": 3.0172396501798295e-06,
      "loss": 0.143,
      "step": 21969
    },
    {
      "epoch": 0.6409358772390454,
      "grad_norm": 0.7317275594846018,
      "learning_rate": 3.0168059586974307e-06,
      "loss": 0.1223,
      "step": 21970
    },
    {
      "epoch": 0.640965050469689,
      "grad_norm": 0.8917560050040593,
      "learning_rate": 3.0163722849205163e-06,
      "loss": 0.1132,
      "step": 21971
    },
    {
      "epoch": 0.6409942237003325,
      "grad_norm": 0.6154021358023682,
      "learning_rate": 3.0159386288529556e-06,
      "loss": 0.1044,
      "step": 21972
    },
    {
      "epoch": 0.6410233969309761,
      "grad_norm": 0.8310882793958907,
      "learning_rate": 3.01550499049862e-06,
      "loss": 0.1223,
      "step": 21973
    },
    {
      "epoch": 0.6410525701616197,
      "grad_norm": 0.8564255076719426,
      "learning_rate": 3.0150713698613833e-06,
      "loss": 0.1274,
      "step": 21974
    },
    {
      "epoch": 0.6410817433922633,
      "grad_norm": 0.8576802317833004,
      "learning_rate": 3.0146377669451154e-06,
      "loss": 0.112,
      "step": 21975
    },
    {
      "epoch": 0.6411109166229069,
      "grad_norm": 0.8924822883242373,
      "learning_rate": 3.0142041817536883e-06,
      "loss": 0.124,
      "step": 21976
    },
    {
      "epoch": 0.6411400898535504,
      "grad_norm": 0.8209841294782584,
      "learning_rate": 3.0137706142909717e-06,
      "loss": 0.1289,
      "step": 21977
    },
    {
      "epoch": 0.641169263084194,
      "grad_norm": 0.832261433301835,
      "learning_rate": 3.0133370645608372e-06,
      "loss": 0.1327,
      "step": 21978
    },
    {
      "epoch": 0.6411984363148375,
      "grad_norm": 0.9162160733691365,
      "learning_rate": 3.0129035325671534e-06,
      "loss": 0.1273,
      "step": 21979
    },
    {
      "epoch": 0.6412276095454811,
      "grad_norm": 1.0494374263771356,
      "learning_rate": 3.0124700183137938e-06,
      "loss": 0.1108,
      "step": 21980
    },
    {
      "epoch": 0.6412567827761246,
      "grad_norm": 0.8841927875348656,
      "learning_rate": 3.0120365218046287e-06,
      "loss": 0.1239,
      "step": 21981
    },
    {
      "epoch": 0.6412859560067682,
      "grad_norm": 0.8271655609304868,
      "learning_rate": 3.0116030430435254e-06,
      "loss": 0.113,
      "step": 21982
    },
    {
      "epoch": 0.6413151292374117,
      "grad_norm": 0.8725414580239986,
      "learning_rate": 3.0111695820343557e-06,
      "loss": 0.1194,
      "step": 21983
    },
    {
      "epoch": 0.6413443024680553,
      "grad_norm": 0.878717120453251,
      "learning_rate": 3.010736138780991e-06,
      "loss": 0.1217,
      "step": 21984
    },
    {
      "epoch": 0.6413734756986988,
      "grad_norm": 0.8967277763892505,
      "learning_rate": 3.010302713287296e-06,
      "loss": 0.1266,
      "step": 21985
    },
    {
      "epoch": 0.6414026489293424,
      "grad_norm": 0.9566597113111973,
      "learning_rate": 3.009869305557145e-06,
      "loss": 0.1211,
      "step": 21986
    },
    {
      "epoch": 0.641431822159986,
      "grad_norm": 0.6656255080399474,
      "learning_rate": 3.0094359155944053e-06,
      "loss": 0.1171,
      "step": 21987
    },
    {
      "epoch": 0.6414609953906295,
      "grad_norm": 0.926675776362127,
      "learning_rate": 3.009002543402948e-06,
      "loss": 0.1176,
      "step": 21988
    },
    {
      "epoch": 0.6414901686212732,
      "grad_norm": 0.9455297630585128,
      "learning_rate": 3.0085691889866396e-06,
      "loss": 0.1191,
      "step": 21989
    },
    {
      "epoch": 0.6415193418519167,
      "grad_norm": 0.8037505377903367,
      "learning_rate": 3.00813585234935e-06,
      "loss": 0.1352,
      "step": 21990
    },
    {
      "epoch": 0.6415485150825603,
      "grad_norm": 0.8411793116770895,
      "learning_rate": 3.0077025334949465e-06,
      "loss": 0.1368,
      "step": 21991
    },
    {
      "epoch": 0.6415776883132038,
      "grad_norm": 0.9225695404193294,
      "learning_rate": 3.007269232427301e-06,
      "loss": 0.1198,
      "step": 21992
    },
    {
      "epoch": 0.6416068615438474,
      "grad_norm": 0.9568972889835847,
      "learning_rate": 3.0068359491502806e-06,
      "loss": 0.1257,
      "step": 21993
    },
    {
      "epoch": 0.6416360347744909,
      "grad_norm": 0.8408048220332507,
      "learning_rate": 3.0064026836677527e-06,
      "loss": 0.1071,
      "step": 21994
    },
    {
      "epoch": 0.6416652080051345,
      "grad_norm": 1.0444506090823575,
      "learning_rate": 3.005969435983585e-06,
      "loss": 0.1565,
      "step": 21995
    },
    {
      "epoch": 0.641694381235778,
      "grad_norm": 0.8235011669755461,
      "learning_rate": 3.005536206101648e-06,
      "loss": 0.1205,
      "step": 21996
    },
    {
      "epoch": 0.6417235544664216,
      "grad_norm": 1.0100537331249817,
      "learning_rate": 3.0051029940258035e-06,
      "loss": 0.1314,
      "step": 21997
    },
    {
      "epoch": 0.6417527276970652,
      "grad_norm": 0.7175819177898534,
      "learning_rate": 3.004669799759927e-06,
      "loss": 0.1185,
      "step": 21998
    },
    {
      "epoch": 0.6417819009277087,
      "grad_norm": 0.6697634596920586,
      "learning_rate": 3.004236623307881e-06,
      "loss": 0.1176,
      "step": 21999
    },
    {
      "epoch": 0.6418110741583523,
      "grad_norm": 0.7314890809794784,
      "learning_rate": 3.003803464673534e-06,
      "loss": 0.1134,
      "step": 22000
    },
    {
      "epoch": 0.6418402473889958,
      "grad_norm": 0.8877970432339143,
      "learning_rate": 3.0033703238607544e-06,
      "loss": 0.1142,
      "step": 22001
    },
    {
      "epoch": 0.6418694206196394,
      "grad_norm": 1.0484756317865096,
      "learning_rate": 3.0029372008734065e-06,
      "loss": 0.1278,
      "step": 22002
    },
    {
      "epoch": 0.641898593850283,
      "grad_norm": 0.784807631667673,
      "learning_rate": 3.0025040957153576e-06,
      "loss": 0.1305,
      "step": 22003
    },
    {
      "epoch": 0.6419277670809266,
      "grad_norm": 0.7995695426138639,
      "learning_rate": 3.002071008390477e-06,
      "loss": 0.1022,
      "step": 22004
    },
    {
      "epoch": 0.6419569403115701,
      "grad_norm": 1.0498805837174303,
      "learning_rate": 3.0016379389026283e-06,
      "loss": 0.1178,
      "step": 22005
    },
    {
      "epoch": 0.6419861135422137,
      "grad_norm": 0.8907687355688043,
      "learning_rate": 3.001204887255681e-06,
      "loss": 0.132,
      "step": 22006
    },
    {
      "epoch": 0.6420152867728572,
      "grad_norm": 1.2388437603140898,
      "learning_rate": 3.000771853453498e-06,
      "loss": 0.1146,
      "step": 22007
    },
    {
      "epoch": 0.6420444600035008,
      "grad_norm": 0.7294235891008237,
      "learning_rate": 3.0003388374999464e-06,
      "loss": 0.1552,
      "step": 22008
    },
    {
      "epoch": 0.6420736332341443,
      "grad_norm": 0.9111170835689067,
      "learning_rate": 2.999905839398891e-06,
      "loss": 0.1453,
      "step": 22009
    },
    {
      "epoch": 0.6421028064647879,
      "grad_norm": 1.0006834918474947,
      "learning_rate": 2.9994728591542012e-06,
      "loss": 0.1386,
      "step": 22010
    },
    {
      "epoch": 0.6421319796954315,
      "grad_norm": 0.7411976676137246,
      "learning_rate": 2.99903989676974e-06,
      "loss": 0.1287,
      "step": 22011
    },
    {
      "epoch": 0.642161152926075,
      "grad_norm": 0.8374476498672586,
      "learning_rate": 2.998606952249372e-06,
      "loss": 0.1239,
      "step": 22012
    },
    {
      "epoch": 0.6421903261567186,
      "grad_norm": 1.1581749601311697,
      "learning_rate": 2.998174025596964e-06,
      "loss": 0.1205,
      "step": 22013
    },
    {
      "epoch": 0.6422194993873621,
      "grad_norm": 0.8413115633375319,
      "learning_rate": 2.9977411168163807e-06,
      "loss": 0.1247,
      "step": 22014
    },
    {
      "epoch": 0.6422486726180057,
      "grad_norm": 0.7172542444129659,
      "learning_rate": 2.997308225911485e-06,
      "loss": 0.1048,
      "step": 22015
    },
    {
      "epoch": 0.6422778458486493,
      "grad_norm": 0.7459797529607547,
      "learning_rate": 2.9968753528861443e-06,
      "loss": 0.143,
      "step": 22016
    },
    {
      "epoch": 0.6423070190792929,
      "grad_norm": 0.8838932432440447,
      "learning_rate": 2.9964424977442223e-06,
      "loss": 0.1219,
      "step": 22017
    },
    {
      "epoch": 0.6423361923099364,
      "grad_norm": 0.7892937388748272,
      "learning_rate": 2.9960096604895843e-06,
      "loss": 0.1356,
      "step": 22018
    },
    {
      "epoch": 0.64236536554058,
      "grad_norm": 0.723426527644377,
      "learning_rate": 2.9955768411260935e-06,
      "loss": 0.1041,
      "step": 22019
    },
    {
      "epoch": 0.6423945387712235,
      "grad_norm": 0.817449795560833,
      "learning_rate": 2.9951440396576128e-06,
      "loss": 0.1075,
      "step": 22020
    },
    {
      "epoch": 0.6424237120018671,
      "grad_norm": 0.7913939349359839,
      "learning_rate": 2.9947112560880076e-06,
      "loss": 0.1152,
      "step": 22021
    },
    {
      "epoch": 0.6424528852325107,
      "grad_norm": 0.766103026246493,
      "learning_rate": 2.9942784904211418e-06,
      "loss": 0.1364,
      "step": 22022
    },
    {
      "epoch": 0.6424820584631542,
      "grad_norm": 0.87075082492271,
      "learning_rate": 2.9938457426608802e-06,
      "loss": 0.1159,
      "step": 22023
    },
    {
      "epoch": 0.6425112316937978,
      "grad_norm": 0.9102059288066796,
      "learning_rate": 2.993413012811084e-06,
      "loss": 0.1258,
      "step": 22024
    },
    {
      "epoch": 0.6425404049244413,
      "grad_norm": 0.8912854960681408,
      "learning_rate": 2.9929803008756174e-06,
      "loss": 0.1038,
      "step": 22025
    },
    {
      "epoch": 0.6425695781550849,
      "grad_norm": 0.7777757062121337,
      "learning_rate": 2.992547606858345e-06,
      "loss": 0.1203,
      "step": 22026
    },
    {
      "epoch": 0.6425987513857284,
      "grad_norm": 0.9571044758716966,
      "learning_rate": 2.992114930763125e-06,
      "loss": 0.1407,
      "step": 22027
    },
    {
      "epoch": 0.642627924616372,
      "grad_norm": 1.0391279665164588,
      "learning_rate": 2.9916822725938253e-06,
      "loss": 0.1132,
      "step": 22028
    },
    {
      "epoch": 0.6426570978470155,
      "grad_norm": 0.7927344573522009,
      "learning_rate": 2.9912496323543074e-06,
      "loss": 0.0956,
      "step": 22029
    },
    {
      "epoch": 0.6426862710776592,
      "grad_norm": 0.6825566380151522,
      "learning_rate": 2.990817010048433e-06,
      "loss": 0.1274,
      "step": 22030
    },
    {
      "epoch": 0.6427154443083027,
      "grad_norm": 0.8293530431236341,
      "learning_rate": 2.9903844056800657e-06,
      "loss": 0.1324,
      "step": 22031
    },
    {
      "epoch": 0.6427446175389463,
      "grad_norm": 0.902414512926688,
      "learning_rate": 2.989951819253063e-06,
      "loss": 0.1078,
      "step": 22032
    },
    {
      "epoch": 0.6427737907695898,
      "grad_norm": 0.9077883812298531,
      "learning_rate": 2.9895192507712943e-06,
      "loss": 0.1289,
      "step": 22033
    },
    {
      "epoch": 0.6428029640002334,
      "grad_norm": 1.0576657118963588,
      "learning_rate": 2.989086700238617e-06,
      "loss": 0.1355,
      "step": 22034
    },
    {
      "epoch": 0.642832137230877,
      "grad_norm": 0.7742967496213374,
      "learning_rate": 2.988654167658893e-06,
      "loss": 0.117,
      "step": 22035
    },
    {
      "epoch": 0.6428613104615205,
      "grad_norm": 0.9320383661908841,
      "learning_rate": 2.9882216530359855e-06,
      "loss": 0.1129,
      "step": 22036
    },
    {
      "epoch": 0.6428904836921641,
      "grad_norm": 0.8551385599946922,
      "learning_rate": 2.9877891563737538e-06,
      "loss": 0.1171,
      "step": 22037
    },
    {
      "epoch": 0.6429196569228076,
      "grad_norm": 1.1244173168804574,
      "learning_rate": 2.98735667767606e-06,
      "loss": 0.1221,
      "step": 22038
    },
    {
      "epoch": 0.6429488301534512,
      "grad_norm": 0.8901108181760674,
      "learning_rate": 2.986924216946765e-06,
      "loss": 0.1115,
      "step": 22039
    },
    {
      "epoch": 0.6429780033840947,
      "grad_norm": 0.8029337076108818,
      "learning_rate": 2.986491774189731e-06,
      "loss": 0.1458,
      "step": 22040
    },
    {
      "epoch": 0.6430071766147383,
      "grad_norm": 0.8066120730382462,
      "learning_rate": 2.9860593494088187e-06,
      "loss": 0.12,
      "step": 22041
    },
    {
      "epoch": 0.6430363498453818,
      "grad_norm": 0.864314297791898,
      "learning_rate": 2.9856269426078867e-06,
      "loss": 0.1217,
      "step": 22042
    },
    {
      "epoch": 0.6430655230760255,
      "grad_norm": 0.7196071010194764,
      "learning_rate": 2.985194553790796e-06,
      "loss": 0.1192,
      "step": 22043
    },
    {
      "epoch": 0.643094696306669,
      "grad_norm": 0.8675494861147474,
      "learning_rate": 2.984762182961407e-06,
      "loss": 0.1164,
      "step": 22044
    },
    {
      "epoch": 0.6431238695373126,
      "grad_norm": 0.8661346968630518,
      "learning_rate": 2.9843298301235812e-06,
      "loss": 0.1364,
      "step": 22045
    },
    {
      "epoch": 0.6431530427679562,
      "grad_norm": 0.8267836367556943,
      "learning_rate": 2.983897495281177e-06,
      "loss": 0.1115,
      "step": 22046
    },
    {
      "epoch": 0.6431822159985997,
      "grad_norm": 0.8494980209202818,
      "learning_rate": 2.9834651784380554e-06,
      "loss": 0.1035,
      "step": 22047
    },
    {
      "epoch": 0.6432113892292433,
      "grad_norm": 0.8089928985302235,
      "learning_rate": 2.9830328795980756e-06,
      "loss": 0.1218,
      "step": 22048
    },
    {
      "epoch": 0.6432405624598868,
      "grad_norm": 0.9807952922080896,
      "learning_rate": 2.9826005987650964e-06,
      "loss": 0.1248,
      "step": 22049
    },
    {
      "epoch": 0.6432697356905304,
      "grad_norm": 0.8681631326657169,
      "learning_rate": 2.9821683359429755e-06,
      "loss": 0.1278,
      "step": 22050
    },
    {
      "epoch": 0.6432989089211739,
      "grad_norm": 2.583564011172082,
      "learning_rate": 2.981736091135575e-06,
      "loss": 0.1268,
      "step": 22051
    },
    {
      "epoch": 0.6433280821518175,
      "grad_norm": 1.0576873002634501,
      "learning_rate": 2.981303864346754e-06,
      "loss": 0.1167,
      "step": 22052
    },
    {
      "epoch": 0.643357255382461,
      "grad_norm": 0.9968347441174502,
      "learning_rate": 2.9808716555803704e-06,
      "loss": 0.1242,
      "step": 22053
    },
    {
      "epoch": 0.6433864286131046,
      "grad_norm": 0.8397921202388221,
      "learning_rate": 2.980439464840282e-06,
      "loss": 0.1306,
      "step": 22054
    },
    {
      "epoch": 0.6434156018437481,
      "grad_norm": 0.775762972254896,
      "learning_rate": 2.9800072921303474e-06,
      "loss": 0.1297,
      "step": 22055
    },
    {
      "epoch": 0.6434447750743917,
      "grad_norm": 0.8310130153168136,
      "learning_rate": 2.9795751374544244e-06,
      "loss": 0.1066,
      "step": 22056
    },
    {
      "epoch": 0.6434739483050353,
      "grad_norm": 0.7542953577879931,
      "learning_rate": 2.9791430008163743e-06,
      "loss": 0.1146,
      "step": 22057
    },
    {
      "epoch": 0.6435031215356789,
      "grad_norm": 0.7323427741304133,
      "learning_rate": 2.9787108822200535e-06,
      "loss": 0.1128,
      "step": 22058
    },
    {
      "epoch": 0.6435322947663225,
      "grad_norm": 0.907603976780851,
      "learning_rate": 2.978278781669318e-06,
      "loss": 0.1282,
      "step": 22059
    },
    {
      "epoch": 0.643561467996966,
      "grad_norm": 0.84944528132297,
      "learning_rate": 2.977846699168028e-06,
      "loss": 0.1365,
      "step": 22060
    },
    {
      "epoch": 0.6435906412276096,
      "grad_norm": 0.7779429623159441,
      "learning_rate": 2.9774146347200394e-06,
      "loss": 0.1582,
      "step": 22061
    },
    {
      "epoch": 0.6436198144582531,
      "grad_norm": 0.7783991942694073,
      "learning_rate": 2.9769825883292082e-06,
      "loss": 0.154,
      "step": 22062
    },
    {
      "epoch": 0.6436489876888967,
      "grad_norm": 1.0587142119934603,
      "learning_rate": 2.976550559999396e-06,
      "loss": 0.1439,
      "step": 22063
    },
    {
      "epoch": 0.6436781609195402,
      "grad_norm": 0.8804615046154666,
      "learning_rate": 2.976118549734457e-06,
      "loss": 0.1222,
      "step": 22064
    },
    {
      "epoch": 0.6437073341501838,
      "grad_norm": 0.8756130173308975,
      "learning_rate": 2.9756865575382475e-06,
      "loss": 0.1248,
      "step": 22065
    },
    {
      "epoch": 0.6437365073808273,
      "grad_norm": 0.7862666622013987,
      "learning_rate": 2.9752545834146275e-06,
      "loss": 0.1159,
      "step": 22066
    },
    {
      "epoch": 0.6437656806114709,
      "grad_norm": 0.9494680000328745,
      "learning_rate": 2.974822627367449e-06,
      "loss": 0.1288,
      "step": 22067
    },
    {
      "epoch": 0.6437948538421144,
      "grad_norm": 0.798034359516251,
      "learning_rate": 2.97439068940057e-06,
      "loss": 0.1132,
      "step": 22068
    },
    {
      "epoch": 0.643824027072758,
      "grad_norm": 1.1078607766031694,
      "learning_rate": 2.9739587695178485e-06,
      "loss": 0.1537,
      "step": 22069
    },
    {
      "epoch": 0.6438532003034017,
      "grad_norm": 1.2731222312197774,
      "learning_rate": 2.97352686772314e-06,
      "loss": 0.1083,
      "step": 22070
    },
    {
      "epoch": 0.6438823735340452,
      "grad_norm": 0.9063292670247907,
      "learning_rate": 2.9730949840203e-06,
      "loss": 0.1162,
      "step": 22071
    },
    {
      "epoch": 0.6439115467646888,
      "grad_norm": 0.939624863022762,
      "learning_rate": 2.9726631184131833e-06,
      "loss": 0.1585,
      "step": 22072
    },
    {
      "epoch": 0.6439407199953323,
      "grad_norm": 0.9894101783704818,
      "learning_rate": 2.9722312709056466e-06,
      "loss": 0.1278,
      "step": 22073
    },
    {
      "epoch": 0.6439698932259759,
      "grad_norm": 1.1466272745788815,
      "learning_rate": 2.971799441501544e-06,
      "loss": 0.1211,
      "step": 22074
    },
    {
      "epoch": 0.6439990664566194,
      "grad_norm": 0.7980771666472901,
      "learning_rate": 2.9713676302047335e-06,
      "loss": 0.0974,
      "step": 22075
    },
    {
      "epoch": 0.644028239687263,
      "grad_norm": 0.9849768463337183,
      "learning_rate": 2.9709358370190677e-06,
      "loss": 0.1329,
      "step": 22076
    },
    {
      "epoch": 0.6440574129179065,
      "grad_norm": 1.0127582034249096,
      "learning_rate": 2.970504061948403e-06,
      "loss": 0.1245,
      "step": 22077
    },
    {
      "epoch": 0.6440865861485501,
      "grad_norm": 1.1530073418603166,
      "learning_rate": 2.9700723049965928e-06,
      "loss": 0.1236,
      "step": 22078
    },
    {
      "epoch": 0.6441157593791936,
      "grad_norm": 0.9272447726282984,
      "learning_rate": 2.969640566167493e-06,
      "loss": 0.122,
      "step": 22079
    },
    {
      "epoch": 0.6441449326098372,
      "grad_norm": 0.917588325968273,
      "learning_rate": 2.969208845464956e-06,
      "loss": 0.126,
      "step": 22080
    },
    {
      "epoch": 0.6441741058404807,
      "grad_norm": 1.0576892000786284,
      "learning_rate": 2.968777142892839e-06,
      "loss": 0.1437,
      "step": 22081
    },
    {
      "epoch": 0.6442032790711243,
      "grad_norm": 0.6943798440503597,
      "learning_rate": 2.9683454584549943e-06,
      "loss": 0.1172,
      "step": 22082
    },
    {
      "epoch": 0.6442324523017678,
      "grad_norm": 0.985432734009756,
      "learning_rate": 2.967913792155278e-06,
      "loss": 0.1285,
      "step": 22083
    },
    {
      "epoch": 0.6442616255324115,
      "grad_norm": 0.9401943586052562,
      "learning_rate": 2.967482143997541e-06,
      "loss": 0.1253,
      "step": 22084
    },
    {
      "epoch": 0.6442907987630551,
      "grad_norm": 0.9388888997823897,
      "learning_rate": 2.9670505139856375e-06,
      "loss": 0.1139,
      "step": 22085
    },
    {
      "epoch": 0.6443199719936986,
      "grad_norm": 0.9569262205041986,
      "learning_rate": 2.9666189021234214e-06,
      "loss": 0.1053,
      "step": 22086
    },
    {
      "epoch": 0.6443491452243422,
      "grad_norm": 0.8517320319007174,
      "learning_rate": 2.9661873084147473e-06,
      "loss": 0.1084,
      "step": 22087
    },
    {
      "epoch": 0.6443783184549857,
      "grad_norm": 1.0762450535352732,
      "learning_rate": 2.9657557328634688e-06,
      "loss": 0.165,
      "step": 22088
    },
    {
      "epoch": 0.6444074916856293,
      "grad_norm": 0.9851269425213238,
      "learning_rate": 2.9653241754734363e-06,
      "loss": 0.1348,
      "step": 22089
    },
    {
      "epoch": 0.6444366649162728,
      "grad_norm": 0.8501366656248518,
      "learning_rate": 2.964892636248503e-06,
      "loss": 0.1143,
      "step": 22090
    },
    {
      "epoch": 0.6444658381469164,
      "grad_norm": 1.1994242588566304,
      "learning_rate": 2.964461115192524e-06,
      "loss": 0.1307,
      "step": 22091
    },
    {
      "epoch": 0.6444950113775599,
      "grad_norm": 0.8736033208421453,
      "learning_rate": 2.9640296123093476e-06,
      "loss": 0.1183,
      "step": 22092
    },
    {
      "epoch": 0.6445241846082035,
      "grad_norm": 0.8446397914758896,
      "learning_rate": 2.963598127602831e-06,
      "loss": 0.1307,
      "step": 22093
    },
    {
      "epoch": 0.644553357838847,
      "grad_norm": 0.7794112893337023,
      "learning_rate": 2.963166661076824e-06,
      "loss": 0.1301,
      "step": 22094
    },
    {
      "epoch": 0.6445825310694906,
      "grad_norm": 0.8520922326501634,
      "learning_rate": 2.9627352127351783e-06,
      "loss": 0.1395,
      "step": 22095
    },
    {
      "epoch": 0.6446117043001341,
      "grad_norm": 0.7382525198148784,
      "learning_rate": 2.962303782581748e-06,
      "loss": 0.1116,
      "step": 22096
    },
    {
      "epoch": 0.6446408775307778,
      "grad_norm": 1.0613390162714242,
      "learning_rate": 2.9618723706203812e-06,
      "loss": 0.1236,
      "step": 22097
    },
    {
      "epoch": 0.6446700507614214,
      "grad_norm": 0.9408842892032847,
      "learning_rate": 2.961440976854931e-06,
      "loss": 0.1264,
      "step": 22098
    },
    {
      "epoch": 0.6446992239920649,
      "grad_norm": 0.72467979429876,
      "learning_rate": 2.9610096012892496e-06,
      "loss": 0.1297,
      "step": 22099
    },
    {
      "epoch": 0.6447283972227085,
      "grad_norm": 0.737689601338275,
      "learning_rate": 2.960578243927188e-06,
      "loss": 0.1149,
      "step": 22100
    },
    {
      "epoch": 0.644757570453352,
      "grad_norm": 0.9520034245445577,
      "learning_rate": 2.960146904772598e-06,
      "loss": 0.1228,
      "step": 22101
    },
    {
      "epoch": 0.6447867436839956,
      "grad_norm": 1.136912996718269,
      "learning_rate": 2.959715583829328e-06,
      "loss": 0.1399,
      "step": 22102
    },
    {
      "epoch": 0.6448159169146391,
      "grad_norm": 0.8071727351381353,
      "learning_rate": 2.959284281101231e-06,
      "loss": 0.1399,
      "step": 22103
    },
    {
      "epoch": 0.6448450901452827,
      "grad_norm": 0.8089437736268954,
      "learning_rate": 2.958852996592155e-06,
      "loss": 0.1224,
      "step": 22104
    },
    {
      "epoch": 0.6448742633759262,
      "grad_norm": 0.947368275285795,
      "learning_rate": 2.958421730305955e-06,
      "loss": 0.1156,
      "step": 22105
    },
    {
      "epoch": 0.6449034366065698,
      "grad_norm": 1.1211907837726405,
      "learning_rate": 2.9579904822464767e-06,
      "loss": 0.1408,
      "step": 22106
    },
    {
      "epoch": 0.6449326098372133,
      "grad_norm": 0.792799923898483,
      "learning_rate": 2.9575592524175723e-06,
      "loss": 0.1132,
      "step": 22107
    },
    {
      "epoch": 0.6449617830678569,
      "grad_norm": 0.7318146421671569,
      "learning_rate": 2.9571280408230917e-06,
      "loss": 0.1305,
      "step": 22108
    },
    {
      "epoch": 0.6449909562985005,
      "grad_norm": 1.0511070170228909,
      "learning_rate": 2.9566968474668847e-06,
      "loss": 0.1203,
      "step": 22109
    },
    {
      "epoch": 0.645020129529144,
      "grad_norm": 0.8679755600181671,
      "learning_rate": 2.956265672352798e-06,
      "loss": 0.1153,
      "step": 22110
    },
    {
      "epoch": 0.6450493027597877,
      "grad_norm": 0.9430004558181917,
      "learning_rate": 2.955834515484685e-06,
      "loss": 0.1296,
      "step": 22111
    },
    {
      "epoch": 0.6450784759904312,
      "grad_norm": 0.9449628681644011,
      "learning_rate": 2.9554033768663937e-06,
      "loss": 0.1121,
      "step": 22112
    },
    {
      "epoch": 0.6451076492210748,
      "grad_norm": 0.7746785251412969,
      "learning_rate": 2.9549722565017737e-06,
      "loss": 0.1392,
      "step": 22113
    },
    {
      "epoch": 0.6451368224517183,
      "grad_norm": 0.8924165420964965,
      "learning_rate": 2.9545411543946723e-06,
      "loss": 0.1305,
      "step": 22114
    },
    {
      "epoch": 0.6451659956823619,
      "grad_norm": 0.7748486079407143,
      "learning_rate": 2.9541100705489393e-06,
      "loss": 0.1518,
      "step": 22115
    },
    {
      "epoch": 0.6451951689130054,
      "grad_norm": 0.6335176502798734,
      "learning_rate": 2.9536790049684224e-06,
      "loss": 0.1081,
      "step": 22116
    },
    {
      "epoch": 0.645224342143649,
      "grad_norm": 1.0450723518143625,
      "learning_rate": 2.9532479576569716e-06,
      "loss": 0.1275,
      "step": 22117
    },
    {
      "epoch": 0.6452535153742925,
      "grad_norm": 0.7215618244914422,
      "learning_rate": 2.9528169286184348e-06,
      "loss": 0.1052,
      "step": 22118
    },
    {
      "epoch": 0.6452826886049361,
      "grad_norm": 0.9781881820207516,
      "learning_rate": 2.9523859178566594e-06,
      "loss": 0.133,
      "step": 22119
    },
    {
      "epoch": 0.6453118618355796,
      "grad_norm": 0.707457863237948,
      "learning_rate": 2.951954925375494e-06,
      "loss": 0.1145,
      "step": 22120
    },
    {
      "epoch": 0.6453410350662232,
      "grad_norm": 0.7934944591494858,
      "learning_rate": 2.951523951178787e-06,
      "loss": 0.1044,
      "step": 22121
    },
    {
      "epoch": 0.6453702082968668,
      "grad_norm": 0.7912181486480262,
      "learning_rate": 2.9510929952703815e-06,
      "loss": 0.1289,
      "step": 22122
    },
    {
      "epoch": 0.6453993815275103,
      "grad_norm": 0.7793378273608388,
      "learning_rate": 2.950662057654132e-06,
      "loss": 0.1309,
      "step": 22123
    },
    {
      "epoch": 0.645428554758154,
      "grad_norm": 0.8729551155646463,
      "learning_rate": 2.950231138333882e-06,
      "loss": 0.1038,
      "step": 22124
    },
    {
      "epoch": 0.6454577279887975,
      "grad_norm": 0.813778001061841,
      "learning_rate": 2.949800237313478e-06,
      "loss": 0.126,
      "step": 22125
    },
    {
      "epoch": 0.6454869012194411,
      "grad_norm": 0.894484055265241,
      "learning_rate": 2.94936935459677e-06,
      "loss": 0.1372,
      "step": 22126
    },
    {
      "epoch": 0.6455160744500846,
      "grad_norm": 0.895704340897552,
      "learning_rate": 2.9489384901876016e-06,
      "loss": 0.1383,
      "step": 22127
    },
    {
      "epoch": 0.6455452476807282,
      "grad_norm": 0.8094463295701287,
      "learning_rate": 2.94850764408982e-06,
      "loss": 0.1336,
      "step": 22128
    },
    {
      "epoch": 0.6455744209113717,
      "grad_norm": 0.8192602504632791,
      "learning_rate": 2.9480768163072726e-06,
      "loss": 0.117,
      "step": 22129
    },
    {
      "epoch": 0.6456035941420153,
      "grad_norm": 0.6793074005980579,
      "learning_rate": 2.9476460068438064e-06,
      "loss": 0.1049,
      "step": 22130
    },
    {
      "epoch": 0.6456327673726588,
      "grad_norm": 0.7619057899262515,
      "learning_rate": 2.947215215703267e-06,
      "loss": 0.1251,
      "step": 22131
    },
    {
      "epoch": 0.6456619406033024,
      "grad_norm": 0.8574812210018996,
      "learning_rate": 2.9467844428894998e-06,
      "loss": 0.1229,
      "step": 22132
    },
    {
      "epoch": 0.645691113833946,
      "grad_norm": 0.7172180666131697,
      "learning_rate": 2.9463536884063505e-06,
      "loss": 0.1324,
      "step": 22133
    },
    {
      "epoch": 0.6457202870645895,
      "grad_norm": 0.7592250951735323,
      "learning_rate": 2.945922952257664e-06,
      "loss": 0.1339,
      "step": 22134
    },
    {
      "epoch": 0.6457494602952331,
      "grad_norm": 1.1939515370283684,
      "learning_rate": 2.9454922344472893e-06,
      "loss": 0.1114,
      "step": 22135
    },
    {
      "epoch": 0.6457786335258766,
      "grad_norm": 0.6845693570592758,
      "learning_rate": 2.945061534979069e-06,
      "loss": 0.0953,
      "step": 22136
    },
    {
      "epoch": 0.6458078067565202,
      "grad_norm": 0.7923648608066438,
      "learning_rate": 2.944630853856848e-06,
      "loss": 0.1186,
      "step": 22137
    },
    {
      "epoch": 0.6458369799871638,
      "grad_norm": 1.3492082174648148,
      "learning_rate": 2.944200191084473e-06,
      "loss": 0.1334,
      "step": 22138
    },
    {
      "epoch": 0.6458661532178074,
      "grad_norm": 1.0539879261958156,
      "learning_rate": 2.9437695466657877e-06,
      "loss": 0.1165,
      "step": 22139
    },
    {
      "epoch": 0.6458953264484509,
      "grad_norm": 0.6507382814474879,
      "learning_rate": 2.943338920604636e-06,
      "loss": 0.1012,
      "step": 22140
    },
    {
      "epoch": 0.6459244996790945,
      "grad_norm": 0.9651391579973144,
      "learning_rate": 2.9429083129048636e-06,
      "loss": 0.1133,
      "step": 22141
    },
    {
      "epoch": 0.645953672909738,
      "grad_norm": 1.1172918869556225,
      "learning_rate": 2.942477723570315e-06,
      "loss": 0.1211,
      "step": 22142
    },
    {
      "epoch": 0.6459828461403816,
      "grad_norm": 0.914783104135238,
      "learning_rate": 2.9420471526048356e-06,
      "loss": 0.1299,
      "step": 22143
    },
    {
      "epoch": 0.6460120193710251,
      "grad_norm": 1.1613740299798396,
      "learning_rate": 2.941616600012267e-06,
      "loss": 0.1403,
      "step": 22144
    },
    {
      "epoch": 0.6460411926016687,
      "grad_norm": 0.7755959362290784,
      "learning_rate": 2.941186065796453e-06,
      "loss": 0.139,
      "step": 22145
    },
    {
      "epoch": 0.6460703658323123,
      "grad_norm": 0.7695792323176357,
      "learning_rate": 2.9407555499612383e-06,
      "loss": 0.0885,
      "step": 22146
    },
    {
      "epoch": 0.6460995390629558,
      "grad_norm": 0.7572841970919096,
      "learning_rate": 2.9403250525104672e-06,
      "loss": 0.1054,
      "step": 22147
    },
    {
      "epoch": 0.6461287122935994,
      "grad_norm": 0.8386233756755662,
      "learning_rate": 2.939894573447983e-06,
      "loss": 0.1171,
      "step": 22148
    },
    {
      "epoch": 0.6461578855242429,
      "grad_norm": 1.1214203074447373,
      "learning_rate": 2.939464112777628e-06,
      "loss": 0.1308,
      "step": 22149
    },
    {
      "epoch": 0.6461870587548865,
      "grad_norm": 0.7332803395155896,
      "learning_rate": 2.9390336705032452e-06,
      "loss": 0.1036,
      "step": 22150
    },
    {
      "epoch": 0.6462162319855301,
      "grad_norm": 0.9084925279839042,
      "learning_rate": 2.9386032466286783e-06,
      "loss": 0.1378,
      "step": 22151
    },
    {
      "epoch": 0.6462454052161737,
      "grad_norm": 1.191419971198691,
      "learning_rate": 2.938172841157767e-06,
      "loss": 0.1477,
      "step": 22152
    },
    {
      "epoch": 0.6462745784468172,
      "grad_norm": 0.8901477574469311,
      "learning_rate": 2.9377424540943594e-06,
      "loss": 0.1216,
      "step": 22153
    },
    {
      "epoch": 0.6463037516774608,
      "grad_norm": 0.8752770002895012,
      "learning_rate": 2.937312085442294e-06,
      "loss": 0.1337,
      "step": 22154
    },
    {
      "epoch": 0.6463329249081043,
      "grad_norm": 0.7788200669650033,
      "learning_rate": 2.9368817352054137e-06,
      "loss": 0.1083,
      "step": 22155
    },
    {
      "epoch": 0.6463620981387479,
      "grad_norm": 0.9631934396815398,
      "learning_rate": 2.9364514033875614e-06,
      "loss": 0.1024,
      "step": 22156
    },
    {
      "epoch": 0.6463912713693915,
      "grad_norm": 0.7865690791547649,
      "learning_rate": 2.936021089992578e-06,
      "loss": 0.0926,
      "step": 22157
    },
    {
      "epoch": 0.646420444600035,
      "grad_norm": 0.8685141462870842,
      "learning_rate": 2.935590795024304e-06,
      "loss": 0.1306,
      "step": 22158
    },
    {
      "epoch": 0.6464496178306786,
      "grad_norm": 0.8094680277486507,
      "learning_rate": 2.935160518486584e-06,
      "loss": 0.1125,
      "step": 22159
    },
    {
      "epoch": 0.6464787910613221,
      "grad_norm": 0.9249522662181008,
      "learning_rate": 2.934730260383258e-06,
      "loss": 0.1466,
      "step": 22160
    },
    {
      "epoch": 0.6465079642919657,
      "grad_norm": 0.8095583761245791,
      "learning_rate": 2.9343000207181676e-06,
      "loss": 0.1269,
      "step": 22161
    },
    {
      "epoch": 0.6465371375226092,
      "grad_norm": 1.096617108135167,
      "learning_rate": 2.9338697994951532e-06,
      "loss": 0.1555,
      "step": 22162
    },
    {
      "epoch": 0.6465663107532528,
      "grad_norm": 0.8291858025448493,
      "learning_rate": 2.933439596718056e-06,
      "loss": 0.1289,
      "step": 22163
    },
    {
      "epoch": 0.6465954839838963,
      "grad_norm": 0.7494580678500257,
      "learning_rate": 2.933009412390715e-06,
      "loss": 0.1132,
      "step": 22164
    },
    {
      "epoch": 0.64662465721454,
      "grad_norm": 0.7315831730628344,
      "learning_rate": 2.9325792465169755e-06,
      "loss": 0.1325,
      "step": 22165
    },
    {
      "epoch": 0.6466538304451835,
      "grad_norm": 0.7560886606993555,
      "learning_rate": 2.932149099100673e-06,
      "loss": 0.1274,
      "step": 22166
    },
    {
      "epoch": 0.6466830036758271,
      "grad_norm": 0.8554363865744427,
      "learning_rate": 2.9317189701456505e-06,
      "loss": 0.0989,
      "step": 22167
    },
    {
      "epoch": 0.6467121769064706,
      "grad_norm": 0.8235054489768502,
      "learning_rate": 2.9312888596557476e-06,
      "loss": 0.1083,
      "step": 22168
    },
    {
      "epoch": 0.6467413501371142,
      "grad_norm": 0.7127127157064905,
      "learning_rate": 2.930858767634803e-06,
      "loss": 0.0995,
      "step": 22169
    },
    {
      "epoch": 0.6467705233677578,
      "grad_norm": 0.8658708281194661,
      "learning_rate": 2.930428694086657e-06,
      "loss": 0.1464,
      "step": 22170
    },
    {
      "epoch": 0.6467996965984013,
      "grad_norm": 0.8377800537767242,
      "learning_rate": 2.92999863901515e-06,
      "loss": 0.119,
      "step": 22171
    },
    {
      "epoch": 0.6468288698290449,
      "grad_norm": 0.6426346720352603,
      "learning_rate": 2.9295686024241222e-06,
      "loss": 0.1198,
      "step": 22172
    },
    {
      "epoch": 0.6468580430596884,
      "grad_norm": 0.6362227121973375,
      "learning_rate": 2.9291385843174114e-06,
      "loss": 0.121,
      "step": 22173
    },
    {
      "epoch": 0.646887216290332,
      "grad_norm": 0.8023373488833564,
      "learning_rate": 2.928708584698856e-06,
      "loss": 0.147,
      "step": 22174
    },
    {
      "epoch": 0.6469163895209755,
      "grad_norm": 0.9216522972434915,
      "learning_rate": 2.9282786035722965e-06,
      "loss": 0.1238,
      "step": 22175
    },
    {
      "epoch": 0.6469455627516191,
      "grad_norm": 0.6761602563199479,
      "learning_rate": 2.9278486409415694e-06,
      "loss": 0.1292,
      "step": 22176
    },
    {
      "epoch": 0.6469747359822626,
      "grad_norm": 0.998378761133215,
      "learning_rate": 2.9274186968105167e-06,
      "loss": 0.1237,
      "step": 22177
    },
    {
      "epoch": 0.6470039092129063,
      "grad_norm": 0.7092643212091149,
      "learning_rate": 2.9269887711829758e-06,
      "loss": 0.0997,
      "step": 22178
    },
    {
      "epoch": 0.6470330824435498,
      "grad_norm": 1.027844812565083,
      "learning_rate": 2.926558864062783e-06,
      "loss": 0.1045,
      "step": 22179
    },
    {
      "epoch": 0.6470622556741934,
      "grad_norm": 0.7285614319596585,
      "learning_rate": 2.926128975453778e-06,
      "loss": 0.1292,
      "step": 22180
    },
    {
      "epoch": 0.647091428904837,
      "grad_norm": 0.8914201346505192,
      "learning_rate": 2.9256991053597995e-06,
      "loss": 0.125,
      "step": 22181
    },
    {
      "epoch": 0.6471206021354805,
      "grad_norm": 0.9331847810236362,
      "learning_rate": 2.9252692537846807e-06,
      "loss": 0.1307,
      "step": 22182
    },
    {
      "epoch": 0.6471497753661241,
      "grad_norm": 0.7859581084706962,
      "learning_rate": 2.924839420732266e-06,
      "loss": 0.1094,
      "step": 22183
    },
    {
      "epoch": 0.6471789485967676,
      "grad_norm": 1.0851130472313715,
      "learning_rate": 2.9244096062063887e-06,
      "loss": 0.1319,
      "step": 22184
    },
    {
      "epoch": 0.6472081218274112,
      "grad_norm": 0.843565091800913,
      "learning_rate": 2.9239798102108876e-06,
      "loss": 0.1102,
      "step": 22185
    },
    {
      "epoch": 0.6472372950580547,
      "grad_norm": 0.6561390125954926,
      "learning_rate": 2.923550032749599e-06,
      "loss": 0.1146,
      "step": 22186
    },
    {
      "epoch": 0.6472664682886983,
      "grad_norm": 0.9459923822715989,
      "learning_rate": 2.9231202738263596e-06,
      "loss": 0.1154,
      "step": 22187
    },
    {
      "epoch": 0.6472956415193418,
      "grad_norm": 0.7443980986626273,
      "learning_rate": 2.922690533445005e-06,
      "loss": 0.1258,
      "step": 22188
    },
    {
      "epoch": 0.6473248147499854,
      "grad_norm": 0.9890264743048002,
      "learning_rate": 2.922260811609375e-06,
      "loss": 0.1093,
      "step": 22189
    },
    {
      "epoch": 0.6473539879806289,
      "grad_norm": 0.7692884800769954,
      "learning_rate": 2.9218311083233043e-06,
      "loss": 0.1207,
      "step": 22190
    },
    {
      "epoch": 0.6473831612112725,
      "grad_norm": 0.8194871660919252,
      "learning_rate": 2.921401423590631e-06,
      "loss": 0.1304,
      "step": 22191
    },
    {
      "epoch": 0.6474123344419161,
      "grad_norm": 0.9914336772632879,
      "learning_rate": 2.9209717574151876e-06,
      "loss": 0.1135,
      "step": 22192
    },
    {
      "epoch": 0.6474415076725597,
      "grad_norm": 0.8285517709083561,
      "learning_rate": 2.9205421098008125e-06,
      "loss": 0.1224,
      "step": 22193
    },
    {
      "epoch": 0.6474706809032033,
      "grad_norm": 0.7174786091560991,
      "learning_rate": 2.9201124807513404e-06,
      "loss": 0.1307,
      "step": 22194
    },
    {
      "epoch": 0.6474998541338468,
      "grad_norm": 0.8336481348727043,
      "learning_rate": 2.9196828702706093e-06,
      "loss": 0.1233,
      "step": 22195
    },
    {
      "epoch": 0.6475290273644904,
      "grad_norm": 0.7776439857809159,
      "learning_rate": 2.9192532783624503e-06,
      "loss": 0.1339,
      "step": 22196
    },
    {
      "epoch": 0.6475582005951339,
      "grad_norm": 0.8855835433072633,
      "learning_rate": 2.9188237050307043e-06,
      "loss": 0.1128,
      "step": 22197
    },
    {
      "epoch": 0.6475873738257775,
      "grad_norm": 0.8987721106669736,
      "learning_rate": 2.9183941502792024e-06,
      "loss": 0.1206,
      "step": 22198
    },
    {
      "epoch": 0.647616547056421,
      "grad_norm": 0.8379566254560279,
      "learning_rate": 2.9179646141117796e-06,
      "loss": 0.1129,
      "step": 22199
    },
    {
      "epoch": 0.6476457202870646,
      "grad_norm": 0.9057008007230078,
      "learning_rate": 2.917535096532271e-06,
      "loss": 0.1265,
      "step": 22200
    },
    {
      "epoch": 0.6476748935177081,
      "grad_norm": 0.8331903270729928,
      "learning_rate": 2.9171055975445146e-06,
      "loss": 0.1172,
      "step": 22201
    },
    {
      "epoch": 0.6477040667483517,
      "grad_norm": 0.8653612302633052,
      "learning_rate": 2.916676117152342e-06,
      "loss": 0.1283,
      "step": 22202
    },
    {
      "epoch": 0.6477332399789952,
      "grad_norm": 0.7786705846466023,
      "learning_rate": 2.9162466553595855e-06,
      "loss": 0.1117,
      "step": 22203
    },
    {
      "epoch": 0.6477624132096388,
      "grad_norm": 0.8807180720103558,
      "learning_rate": 2.9158172121700832e-06,
      "loss": 0.1131,
      "step": 22204
    },
    {
      "epoch": 0.6477915864402825,
      "grad_norm": 0.8487521008138452,
      "learning_rate": 2.9153877875876676e-06,
      "loss": 0.1184,
      "step": 22205
    },
    {
      "epoch": 0.647820759670926,
      "grad_norm": 1.059747550967466,
      "learning_rate": 2.9149583816161696e-06,
      "loss": 0.1237,
      "step": 22206
    },
    {
      "epoch": 0.6478499329015696,
      "grad_norm": 0.883438067013105,
      "learning_rate": 2.9145289942594264e-06,
      "loss": 0.1162,
      "step": 22207
    },
    {
      "epoch": 0.6478791061322131,
      "grad_norm": 0.8808293893054353,
      "learning_rate": 2.9140996255212717e-06,
      "loss": 0.1392,
      "step": 22208
    },
    {
      "epoch": 0.6479082793628567,
      "grad_norm": 0.8223902277286501,
      "learning_rate": 2.9136702754055378e-06,
      "loss": 0.1249,
      "step": 22209
    },
    {
      "epoch": 0.6479374525935002,
      "grad_norm": 0.7806718802515316,
      "learning_rate": 2.9132409439160563e-06,
      "loss": 0.1365,
      "step": 22210
    },
    {
      "epoch": 0.6479666258241438,
      "grad_norm": 0.8111719380116086,
      "learning_rate": 2.912811631056663e-06,
      "loss": 0.1052,
      "step": 22211
    },
    {
      "epoch": 0.6479957990547873,
      "grad_norm": 0.8420319153966669,
      "learning_rate": 2.9123823368311872e-06,
      "loss": 0.1345,
      "step": 22212
    },
    {
      "epoch": 0.6480249722854309,
      "grad_norm": 0.8771429884939764,
      "learning_rate": 2.9119530612434632e-06,
      "loss": 0.1652,
      "step": 22213
    },
    {
      "epoch": 0.6480541455160744,
      "grad_norm": 0.8131677053655297,
      "learning_rate": 2.9115238042973263e-06,
      "loss": 0.1281,
      "step": 22214
    },
    {
      "epoch": 0.648083318746718,
      "grad_norm": 0.7371308360770922,
      "learning_rate": 2.9110945659966063e-06,
      "loss": 0.1211,
      "step": 22215
    },
    {
      "epoch": 0.6481124919773615,
      "grad_norm": 0.9234163657485489,
      "learning_rate": 2.9106653463451327e-06,
      "loss": 0.1186,
      "step": 22216
    },
    {
      "epoch": 0.6481416652080051,
      "grad_norm": 1.0125152645211053,
      "learning_rate": 2.9102361453467434e-06,
      "loss": 0.143,
      "step": 22217
    },
    {
      "epoch": 0.6481708384386486,
      "grad_norm": 0.8084206160272337,
      "learning_rate": 2.909806963005264e-06,
      "loss": 0.1068,
      "step": 22218
    },
    {
      "epoch": 0.6482000116692923,
      "grad_norm": 0.6277330122588356,
      "learning_rate": 2.909377799324531e-06,
      "loss": 0.1231,
      "step": 22219
    },
    {
      "epoch": 0.6482291848999359,
      "grad_norm": 0.6551189963208536,
      "learning_rate": 2.9089486543083724e-06,
      "loss": 0.1126,
      "step": 22220
    },
    {
      "epoch": 0.6482583581305794,
      "grad_norm": 0.8168921311399865,
      "learning_rate": 2.9085195279606226e-06,
      "loss": 0.1136,
      "step": 22221
    },
    {
      "epoch": 0.648287531361223,
      "grad_norm": 0.6861027445830455,
      "learning_rate": 2.908090420285112e-06,
      "loss": 0.127,
      "step": 22222
    },
    {
      "epoch": 0.6483167045918665,
      "grad_norm": 0.7218587338935585,
      "learning_rate": 2.9076613312856662e-06,
      "loss": 0.1196,
      "step": 22223
    },
    {
      "epoch": 0.6483458778225101,
      "grad_norm": 0.7896318321198683,
      "learning_rate": 2.907232260966124e-06,
      "loss": 0.1199,
      "step": 22224
    },
    {
      "epoch": 0.6483750510531536,
      "grad_norm": 0.837601289541288,
      "learning_rate": 2.906803209330313e-06,
      "loss": 0.1209,
      "step": 22225
    },
    {
      "epoch": 0.6484042242837972,
      "grad_norm": 0.7454560796760552,
      "learning_rate": 2.906374176382062e-06,
      "loss": 0.1191,
      "step": 22226
    },
    {
      "epoch": 0.6484333975144407,
      "grad_norm": 0.85999224330815,
      "learning_rate": 2.9059451621252035e-06,
      "loss": 0.1383,
      "step": 22227
    },
    {
      "epoch": 0.6484625707450843,
      "grad_norm": 0.9845153676921726,
      "learning_rate": 2.9055161665635665e-06,
      "loss": 0.1326,
      "step": 22228
    },
    {
      "epoch": 0.6484917439757278,
      "grad_norm": 0.8082920375886358,
      "learning_rate": 2.9050871897009803e-06,
      "loss": 0.1037,
      "step": 22229
    },
    {
      "epoch": 0.6485209172063714,
      "grad_norm": 0.8762478996095815,
      "learning_rate": 2.9046582315412753e-06,
      "loss": 0.1245,
      "step": 22230
    },
    {
      "epoch": 0.648550090437015,
      "grad_norm": 0.8291544298446543,
      "learning_rate": 2.904229292088283e-06,
      "loss": 0.1234,
      "step": 22231
    },
    {
      "epoch": 0.6485792636676585,
      "grad_norm": 0.65625097007985,
      "learning_rate": 2.903800371345832e-06,
      "loss": 0.1023,
      "step": 22232
    },
    {
      "epoch": 0.6486084368983022,
      "grad_norm": 0.9834768611648416,
      "learning_rate": 2.9033714693177476e-06,
      "loss": 0.0977,
      "step": 22233
    },
    {
      "epoch": 0.6486376101289457,
      "grad_norm": 0.8343521171507311,
      "learning_rate": 2.9029425860078654e-06,
      "loss": 0.1226,
      "step": 22234
    },
    {
      "epoch": 0.6486667833595893,
      "grad_norm": 1.1112147369985677,
      "learning_rate": 2.9025137214200083e-06,
      "loss": 0.1363,
      "step": 22235
    },
    {
      "epoch": 0.6486959565902328,
      "grad_norm": 0.8137001270267556,
      "learning_rate": 2.9020848755580105e-06,
      "loss": 0.1277,
      "step": 22236
    },
    {
      "epoch": 0.6487251298208764,
      "grad_norm": 0.9032656744694125,
      "learning_rate": 2.9016560484256962e-06,
      "loss": 0.1174,
      "step": 22237
    },
    {
      "epoch": 0.6487543030515199,
      "grad_norm": 1.0388719305762362,
      "learning_rate": 2.9012272400268975e-06,
      "loss": 0.1263,
      "step": 22238
    },
    {
      "epoch": 0.6487834762821635,
      "grad_norm": 0.9447980528238217,
      "learning_rate": 2.9007984503654413e-06,
      "loss": 0.1392,
      "step": 22239
    },
    {
      "epoch": 0.648812649512807,
      "grad_norm": 0.8465938837144383,
      "learning_rate": 2.900369679445153e-06,
      "loss": 0.1165,
      "step": 22240
    },
    {
      "epoch": 0.6488418227434506,
      "grad_norm": 1.0450775181172034,
      "learning_rate": 2.899940927269863e-06,
      "loss": 0.1288,
      "step": 22241
    },
    {
      "epoch": 0.6488709959740941,
      "grad_norm": 0.9843836485440016,
      "learning_rate": 2.8995121938434013e-06,
      "loss": 0.134,
      "step": 22242
    },
    {
      "epoch": 0.6489001692047377,
      "grad_norm": 0.8441332050784572,
      "learning_rate": 2.8990834791695915e-06,
      "loss": 0.1363,
      "step": 22243
    },
    {
      "epoch": 0.6489293424353813,
      "grad_norm": 0.8157436532775706,
      "learning_rate": 2.898654783252265e-06,
      "loss": 0.1223,
      "step": 22244
    },
    {
      "epoch": 0.6489585156660248,
      "grad_norm": 1.0417248356619506,
      "learning_rate": 2.8982261060952464e-06,
      "loss": 0.1162,
      "step": 22245
    },
    {
      "epoch": 0.6489876888966685,
      "grad_norm": 0.8010471495555376,
      "learning_rate": 2.897797447702362e-06,
      "loss": 0.1068,
      "step": 22246
    },
    {
      "epoch": 0.649016862127312,
      "grad_norm": 0.8060256648823838,
      "learning_rate": 2.897368808077439e-06,
      "loss": 0.1405,
      "step": 22247
    },
    {
      "epoch": 0.6490460353579556,
      "grad_norm": 0.7801619476066202,
      "learning_rate": 2.8969401872243087e-06,
      "loss": 0.1295,
      "step": 22248
    },
    {
      "epoch": 0.6490752085885991,
      "grad_norm": 0.9191791421847849,
      "learning_rate": 2.8965115851467935e-06,
      "loss": 0.1606,
      "step": 22249
    },
    {
      "epoch": 0.6491043818192427,
      "grad_norm": 1.120677276714752,
      "learning_rate": 2.8960830018487183e-06,
      "loss": 0.161,
      "step": 22250
    },
    {
      "epoch": 0.6491335550498862,
      "grad_norm": 1.481076613089939,
      "learning_rate": 2.895654437333915e-06,
      "loss": 0.1237,
      "step": 22251
    },
    {
      "epoch": 0.6491627282805298,
      "grad_norm": 0.7821748784121355,
      "learning_rate": 2.895225891606206e-06,
      "loss": 0.1246,
      "step": 22252
    },
    {
      "epoch": 0.6491919015111733,
      "grad_norm": 0.7540817950701243,
      "learning_rate": 2.894797364669414e-06,
      "loss": 0.087,
      "step": 22253
    },
    {
      "epoch": 0.6492210747418169,
      "grad_norm": 0.7995361905697157,
      "learning_rate": 2.894368856527372e-06,
      "loss": 0.11,
      "step": 22254
    },
    {
      "epoch": 0.6492502479724604,
      "grad_norm": 0.9061402087324139,
      "learning_rate": 2.8939403671839027e-06,
      "loss": 0.1057,
      "step": 22255
    },
    {
      "epoch": 0.649279421203104,
      "grad_norm": 0.9513215363869687,
      "learning_rate": 2.893511896642829e-06,
      "loss": 0.122,
      "step": 22256
    },
    {
      "epoch": 0.6493085944337476,
      "grad_norm": 0.9415533149661969,
      "learning_rate": 2.8930834449079803e-06,
      "loss": 0.0895,
      "step": 22257
    },
    {
      "epoch": 0.6493377676643911,
      "grad_norm": 0.999627909313702,
      "learning_rate": 2.8926550119831798e-06,
      "loss": 0.1393,
      "step": 22258
    },
    {
      "epoch": 0.6493669408950347,
      "grad_norm": 0.8792576086743242,
      "learning_rate": 2.89222659787225e-06,
      "loss": 0.1368,
      "step": 22259
    },
    {
      "epoch": 0.6493961141256783,
      "grad_norm": 1.1220494267093675,
      "learning_rate": 2.891798202579018e-06,
      "loss": 0.1321,
      "step": 22260
    },
    {
      "epoch": 0.6494252873563219,
      "grad_norm": 1.2925017141032562,
      "learning_rate": 2.8913698261073097e-06,
      "loss": 0.1006,
      "step": 22261
    },
    {
      "epoch": 0.6494544605869654,
      "grad_norm": 1.1950285563697882,
      "learning_rate": 2.890941468460949e-06,
      "loss": 0.1339,
      "step": 22262
    },
    {
      "epoch": 0.649483633817609,
      "grad_norm": 0.9762538718653299,
      "learning_rate": 2.890513129643757e-06,
      "loss": 0.1421,
      "step": 22263
    },
    {
      "epoch": 0.6495128070482525,
      "grad_norm": 0.818201095500948,
      "learning_rate": 2.890084809659563e-06,
      "loss": 0.1297,
      "step": 22264
    },
    {
      "epoch": 0.6495419802788961,
      "grad_norm": 0.8716190450685624,
      "learning_rate": 2.8896565085121854e-06,
      "loss": 0.1294,
      "step": 22265
    },
    {
      "epoch": 0.6495711535095396,
      "grad_norm": 0.8326342416327545,
      "learning_rate": 2.8892282262054533e-06,
      "loss": 0.1141,
      "step": 22266
    },
    {
      "epoch": 0.6496003267401832,
      "grad_norm": 1.1768202664076068,
      "learning_rate": 2.8887999627431853e-06,
      "loss": 0.1527,
      "step": 22267
    },
    {
      "epoch": 0.6496294999708268,
      "grad_norm": 0.7728175670162283,
      "learning_rate": 2.8883717181292092e-06,
      "loss": 0.1076,
      "step": 22268
    },
    {
      "epoch": 0.6496586732014703,
      "grad_norm": 0.8384587145072449,
      "learning_rate": 2.8879434923673465e-06,
      "loss": 0.1268,
      "step": 22269
    },
    {
      "epoch": 0.6496878464321139,
      "grad_norm": 0.9827271946730417,
      "learning_rate": 2.887515285461418e-06,
      "loss": 0.1324,
      "step": 22270
    },
    {
      "epoch": 0.6497170196627574,
      "grad_norm": 0.7941963284896321,
      "learning_rate": 2.8870870974152485e-06,
      "loss": 0.1143,
      "step": 22271
    },
    {
      "epoch": 0.649746192893401,
      "grad_norm": 0.8922433027313872,
      "learning_rate": 2.8866589282326633e-06,
      "loss": 0.1311,
      "step": 22272
    },
    {
      "epoch": 0.6497753661240446,
      "grad_norm": 0.8916129357864748,
      "learning_rate": 2.886230777917481e-06,
      "loss": 0.1082,
      "step": 22273
    },
    {
      "epoch": 0.6498045393546882,
      "grad_norm": 0.8046660369678841,
      "learning_rate": 2.8858026464735275e-06,
      "loss": 0.1031,
      "step": 22274
    },
    {
      "epoch": 0.6498337125853317,
      "grad_norm": 0.7749585521185539,
      "learning_rate": 2.885374533904623e-06,
      "loss": 0.1054,
      "step": 22275
    },
    {
      "epoch": 0.6498628858159753,
      "grad_norm": 0.7019637526795982,
      "learning_rate": 2.8849464402145878e-06,
      "loss": 0.1207,
      "step": 22276
    },
    {
      "epoch": 0.6498920590466188,
      "grad_norm": 0.8911156397946876,
      "learning_rate": 2.8845183654072463e-06,
      "loss": 0.1156,
      "step": 22277
    },
    {
      "epoch": 0.6499212322772624,
      "grad_norm": 0.8485200099235773,
      "learning_rate": 2.8840903094864213e-06,
      "loss": 0.1494,
      "step": 22278
    },
    {
      "epoch": 0.649950405507906,
      "grad_norm": 0.7812087982819907,
      "learning_rate": 2.8836622724559332e-06,
      "loss": 0.1099,
      "step": 22279
    },
    {
      "epoch": 0.6499795787385495,
      "grad_norm": 0.9901185834870755,
      "learning_rate": 2.8832342543196013e-06,
      "loss": 0.1353,
      "step": 22280
    },
    {
      "epoch": 0.650008751969193,
      "grad_norm": 0.8845551968744959,
      "learning_rate": 2.882806255081251e-06,
      "loss": 0.1255,
      "step": 22281
    },
    {
      "epoch": 0.6500379251998366,
      "grad_norm": 0.7359573079446841,
      "learning_rate": 2.8823782747447002e-06,
      "loss": 0.0956,
      "step": 22282
    },
    {
      "epoch": 0.6500670984304802,
      "grad_norm": 1.1051803047028599,
      "learning_rate": 2.881950313313767e-06,
      "loss": 0.141,
      "step": 22283
    },
    {
      "epoch": 0.6500962716611237,
      "grad_norm": 1.2176424935196708,
      "learning_rate": 2.88152237079228e-06,
      "loss": 0.109,
      "step": 22284
    },
    {
      "epoch": 0.6501254448917673,
      "grad_norm": 0.8490213342693431,
      "learning_rate": 2.8810944471840553e-06,
      "loss": 0.1254,
      "step": 22285
    },
    {
      "epoch": 0.6501546181224108,
      "grad_norm": 0.7544935484334007,
      "learning_rate": 2.8806665424929115e-06,
      "loss": 0.1262,
      "step": 22286
    },
    {
      "epoch": 0.6501837913530545,
      "grad_norm": 1.1875890504706237,
      "learning_rate": 2.8802386567226724e-06,
      "loss": 0.1321,
      "step": 22287
    },
    {
      "epoch": 0.650212964583698,
      "grad_norm": 0.8672871483778205,
      "learning_rate": 2.8798107898771577e-06,
      "loss": 0.1198,
      "step": 22288
    },
    {
      "epoch": 0.6502421378143416,
      "grad_norm": 0.6797266463043273,
      "learning_rate": 2.879382941960183e-06,
      "loss": 0.1107,
      "step": 22289
    },
    {
      "epoch": 0.6502713110449851,
      "grad_norm": 0.7605994544537991,
      "learning_rate": 2.878955112975572e-06,
      "loss": 0.1037,
      "step": 22290
    },
    {
      "epoch": 0.6503004842756287,
      "grad_norm": 0.8885139352006814,
      "learning_rate": 2.8785273029271447e-06,
      "loss": 0.1225,
      "step": 22291
    },
    {
      "epoch": 0.6503296575062723,
      "grad_norm": 0.8725811308371497,
      "learning_rate": 2.87809951181872e-06,
      "loss": 0.1145,
      "step": 22292
    },
    {
      "epoch": 0.6503588307369158,
      "grad_norm": 0.946947772713514,
      "learning_rate": 2.8776717396541145e-06,
      "loss": 0.1336,
      "step": 22293
    },
    {
      "epoch": 0.6503880039675594,
      "grad_norm": 0.9082505042413088,
      "learning_rate": 2.8772439864371497e-06,
      "loss": 0.1463,
      "step": 22294
    },
    {
      "epoch": 0.6504171771982029,
      "grad_norm": 0.8276712450899966,
      "learning_rate": 2.8768162521716426e-06,
      "loss": 0.1227,
      "step": 22295
    },
    {
      "epoch": 0.6504463504288465,
      "grad_norm": 0.8256386614093127,
      "learning_rate": 2.876388536861415e-06,
      "loss": 0.0924,
      "step": 22296
    },
    {
      "epoch": 0.65047552365949,
      "grad_norm": 0.8900106813547162,
      "learning_rate": 2.875960840510282e-06,
      "loss": 0.1032,
      "step": 22297
    },
    {
      "epoch": 0.6505046968901336,
      "grad_norm": 1.0424582409899887,
      "learning_rate": 2.8755331631220654e-06,
      "loss": 0.1373,
      "step": 22298
    },
    {
      "epoch": 0.6505338701207771,
      "grad_norm": 0.9105610222122973,
      "learning_rate": 2.8751055047005817e-06,
      "loss": 0.1181,
      "step": 22299
    },
    {
      "epoch": 0.6505630433514208,
      "grad_norm": 0.8846220500941803,
      "learning_rate": 2.8746778652496467e-06,
      "loss": 0.1198,
      "step": 22300
    },
    {
      "epoch": 0.6505922165820643,
      "grad_norm": 1.130978155378496,
      "learning_rate": 2.8742502447730803e-06,
      "loss": 0.1324,
      "step": 22301
    },
    {
      "epoch": 0.6506213898127079,
      "grad_norm": 0.8745998816816701,
      "learning_rate": 2.8738226432747025e-06,
      "loss": 0.0988,
      "step": 22302
    },
    {
      "epoch": 0.6506505630433514,
      "grad_norm": 0.9903151874375687,
      "learning_rate": 2.873395060758326e-06,
      "loss": 0.1363,
      "step": 22303
    },
    {
      "epoch": 0.650679736273995,
      "grad_norm": 1.0525509613535189,
      "learning_rate": 2.872967497227773e-06,
      "loss": 0.1336,
      "step": 22304
    },
    {
      "epoch": 0.6507089095046386,
      "grad_norm": 0.932936869396191,
      "learning_rate": 2.872539952686859e-06,
      "loss": 0.1295,
      "step": 22305
    },
    {
      "epoch": 0.6507380827352821,
      "grad_norm": 0.9742411972903376,
      "learning_rate": 2.8721124271393973e-06,
      "loss": 0.103,
      "step": 22306
    },
    {
      "epoch": 0.6507672559659257,
      "grad_norm": 0.7703058739276191,
      "learning_rate": 2.8716849205892087e-06,
      "loss": 0.1104,
      "step": 22307
    },
    {
      "epoch": 0.6507964291965692,
      "grad_norm": 0.9885490723901301,
      "learning_rate": 2.8712574330401112e-06,
      "loss": 0.1156,
      "step": 22308
    },
    {
      "epoch": 0.6508256024272128,
      "grad_norm": 1.0284538432567505,
      "learning_rate": 2.8708299644959187e-06,
      "loss": 0.1092,
      "step": 22309
    },
    {
      "epoch": 0.6508547756578563,
      "grad_norm": 0.8335080222930304,
      "learning_rate": 2.8704025149604465e-06,
      "loss": 0.0943,
      "step": 22310
    },
    {
      "epoch": 0.6508839488884999,
      "grad_norm": 0.7431695653097307,
      "learning_rate": 2.8699750844375136e-06,
      "loss": 0.1068,
      "step": 22311
    },
    {
      "epoch": 0.6509131221191434,
      "grad_norm": 0.9368502487206658,
      "learning_rate": 2.8695476729309345e-06,
      "loss": 0.1296,
      "step": 22312
    },
    {
      "epoch": 0.650942295349787,
      "grad_norm": 0.668798187950919,
      "learning_rate": 2.869120280444522e-06,
      "loss": 0.1136,
      "step": 22313
    },
    {
      "epoch": 0.6509714685804306,
      "grad_norm": 1.0603467585020838,
      "learning_rate": 2.868692906982099e-06,
      "loss": 0.1578,
      "step": 22314
    },
    {
      "epoch": 0.6510006418110742,
      "grad_norm": 0.9109501761270273,
      "learning_rate": 2.868265552547477e-06,
      "loss": 0.1064,
      "step": 22315
    },
    {
      "epoch": 0.6510298150417178,
      "grad_norm": 0.8593863084108118,
      "learning_rate": 2.8678382171444686e-06,
      "loss": 0.1272,
      "step": 22316
    },
    {
      "epoch": 0.6510589882723613,
      "grad_norm": 1.026785609717716,
      "learning_rate": 2.8674109007768935e-06,
      "loss": 0.1226,
      "step": 22317
    },
    {
      "epoch": 0.6510881615030049,
      "grad_norm": 0.945703504374666,
      "learning_rate": 2.8669836034485655e-06,
      "loss": 0.1079,
      "step": 22318
    },
    {
      "epoch": 0.6511173347336484,
      "grad_norm": 0.7709154267247766,
      "learning_rate": 2.866556325163296e-06,
      "loss": 0.1226,
      "step": 22319
    },
    {
      "epoch": 0.651146507964292,
      "grad_norm": 0.7851121998357765,
      "learning_rate": 2.866129065924903e-06,
      "loss": 0.1164,
      "step": 22320
    },
    {
      "epoch": 0.6511756811949355,
      "grad_norm": 0.7446738456615879,
      "learning_rate": 2.8657018257372017e-06,
      "loss": 0.1214,
      "step": 22321
    },
    {
      "epoch": 0.6512048544255791,
      "grad_norm": 1.1456257114106072,
      "learning_rate": 2.8652746046040053e-06,
      "loss": 0.1531,
      "step": 22322
    },
    {
      "epoch": 0.6512340276562226,
      "grad_norm": 0.8452625852629456,
      "learning_rate": 2.8648474025291257e-06,
      "loss": 0.1209,
      "step": 22323
    },
    {
      "epoch": 0.6512632008868662,
      "grad_norm": 0.7001595159123495,
      "learning_rate": 2.8644202195163807e-06,
      "loss": 0.1125,
      "step": 22324
    },
    {
      "epoch": 0.6512923741175097,
      "grad_norm": 0.9826576706862223,
      "learning_rate": 2.86399305556958e-06,
      "loss": 0.123,
      "step": 22325
    },
    {
      "epoch": 0.6513215473481533,
      "grad_norm": 0.9083825273559737,
      "learning_rate": 2.8635659106925415e-06,
      "loss": 0.1181,
      "step": 22326
    },
    {
      "epoch": 0.651350720578797,
      "grad_norm": 0.8339662508236567,
      "learning_rate": 2.8631387848890744e-06,
      "loss": 0.1312,
      "step": 22327
    },
    {
      "epoch": 0.6513798938094405,
      "grad_norm": 1.1447321900342426,
      "learning_rate": 2.8627116781629966e-06,
      "loss": 0.1097,
      "step": 22328
    },
    {
      "epoch": 0.651409067040084,
      "grad_norm": 0.7953141510279437,
      "learning_rate": 2.8622845905181185e-06,
      "loss": 0.1135,
      "step": 22329
    },
    {
      "epoch": 0.6514382402707276,
      "grad_norm": 0.8991002693315734,
      "learning_rate": 2.8618575219582514e-06,
      "loss": 0.1194,
      "step": 22330
    },
    {
      "epoch": 0.6514674135013712,
      "grad_norm": 0.7034638652643109,
      "learning_rate": 2.8614304724872094e-06,
      "loss": 0.1043,
      "step": 22331
    },
    {
      "epoch": 0.6514965867320147,
      "grad_norm": 1.0133834699574427,
      "learning_rate": 2.8610034421088084e-06,
      "loss": 0.105,
      "step": 22332
    },
    {
      "epoch": 0.6515257599626583,
      "grad_norm": 0.815816599781631,
      "learning_rate": 2.8605764308268554e-06,
      "loss": 0.1279,
      "step": 22333
    },
    {
      "epoch": 0.6515549331933018,
      "grad_norm": 0.9157008933986431,
      "learning_rate": 2.860149438645168e-06,
      "loss": 0.1249,
      "step": 22334
    },
    {
      "epoch": 0.6515841064239454,
      "grad_norm": 0.8890247586150002,
      "learning_rate": 2.859722465567555e-06,
      "loss": 0.1009,
      "step": 22335
    },
    {
      "epoch": 0.6516132796545889,
      "grad_norm": 0.9735215141512003,
      "learning_rate": 2.8592955115978268e-06,
      "loss": 0.1254,
      "step": 22336
    },
    {
      "epoch": 0.6516424528852325,
      "grad_norm": 0.877890120340445,
      "learning_rate": 2.858868576739797e-06,
      "loss": 0.1311,
      "step": 22337
    },
    {
      "epoch": 0.651671626115876,
      "grad_norm": 0.7703131794966079,
      "learning_rate": 2.85844166099728e-06,
      "loss": 0.1164,
      "step": 22338
    },
    {
      "epoch": 0.6517007993465196,
      "grad_norm": 1.033143415958569,
      "learning_rate": 2.8580147643740847e-06,
      "loss": 0.146,
      "step": 22339
    },
    {
      "epoch": 0.6517299725771631,
      "grad_norm": 0.8663714746089348,
      "learning_rate": 2.8575878868740197e-06,
      "loss": 0.1347,
      "step": 22340
    },
    {
      "epoch": 0.6517591458078068,
      "grad_norm": 0.9090693280832018,
      "learning_rate": 2.857161028500901e-06,
      "loss": 0.1191,
      "step": 22341
    },
    {
      "epoch": 0.6517883190384504,
      "grad_norm": 0.6568736409347923,
      "learning_rate": 2.8567341892585373e-06,
      "loss": 0.1088,
      "step": 22342
    },
    {
      "epoch": 0.6518174922690939,
      "grad_norm": 0.9747536446220381,
      "learning_rate": 2.8563073691507346e-06,
      "loss": 0.1181,
      "step": 22343
    },
    {
      "epoch": 0.6518466654997375,
      "grad_norm": 0.8898675146297077,
      "learning_rate": 2.8558805681813123e-06,
      "loss": 0.1284,
      "step": 22344
    },
    {
      "epoch": 0.651875838730381,
      "grad_norm": 0.7639550982067947,
      "learning_rate": 2.8554537863540766e-06,
      "loss": 0.1247,
      "step": 22345
    },
    {
      "epoch": 0.6519050119610246,
      "grad_norm": 0.9532939328691233,
      "learning_rate": 2.855027023672835e-06,
      "loss": 0.1101,
      "step": 22346
    },
    {
      "epoch": 0.6519341851916681,
      "grad_norm": 0.8518130061041179,
      "learning_rate": 2.854600280141403e-06,
      "loss": 0.1437,
      "step": 22347
    },
    {
      "epoch": 0.6519633584223117,
      "grad_norm": 0.7728245534145937,
      "learning_rate": 2.8541735557635863e-06,
      "loss": 0.1003,
      "step": 22348
    },
    {
      "epoch": 0.6519925316529552,
      "grad_norm": 0.9433016687814527,
      "learning_rate": 2.853746850543195e-06,
      "loss": 0.1256,
      "step": 22349
    },
    {
      "epoch": 0.6520217048835988,
      "grad_norm": 0.8137539866414318,
      "learning_rate": 2.8533201644840392e-06,
      "loss": 0.0998,
      "step": 22350
    },
    {
      "epoch": 0.6520508781142423,
      "grad_norm": 0.5994125075179363,
      "learning_rate": 2.8528934975899303e-06,
      "loss": 0.1085,
      "step": 22351
    },
    {
      "epoch": 0.6520800513448859,
      "grad_norm": 0.8790534188690425,
      "learning_rate": 2.8524668498646755e-06,
      "loss": 0.141,
      "step": 22352
    },
    {
      "epoch": 0.6521092245755294,
      "grad_norm": 1.018077477680946,
      "learning_rate": 2.852040221312082e-06,
      "loss": 0.1224,
      "step": 22353
    },
    {
      "epoch": 0.6521383978061731,
      "grad_norm": 0.8866096984268983,
      "learning_rate": 2.851613611935963e-06,
      "loss": 0.1205,
      "step": 22354
    },
    {
      "epoch": 0.6521675710368167,
      "grad_norm": 0.697688642917243,
      "learning_rate": 2.8511870217401227e-06,
      "loss": 0.1191,
      "step": 22355
    },
    {
      "epoch": 0.6521967442674602,
      "grad_norm": 0.919552829781995,
      "learning_rate": 2.8507604507283736e-06,
      "loss": 0.1152,
      "step": 22356
    },
    {
      "epoch": 0.6522259174981038,
      "grad_norm": 0.9177034296130838,
      "learning_rate": 2.8503338989045202e-06,
      "loss": 0.092,
      "step": 22357
    },
    {
      "epoch": 0.6522550907287473,
      "grad_norm": 1.0123413429136259,
      "learning_rate": 2.8499073662723743e-06,
      "loss": 0.1375,
      "step": 22358
    },
    {
      "epoch": 0.6522842639593909,
      "grad_norm": 0.8239520774706534,
      "learning_rate": 2.8494808528357424e-06,
      "loss": 0.1055,
      "step": 22359
    },
    {
      "epoch": 0.6523134371900344,
      "grad_norm": 0.8823536883204104,
      "learning_rate": 2.8490543585984303e-06,
      "loss": 0.114,
      "step": 22360
    },
    {
      "epoch": 0.652342610420678,
      "grad_norm": 0.8723999640850891,
      "learning_rate": 2.8486278835642474e-06,
      "loss": 0.1116,
      "step": 22361
    },
    {
      "epoch": 0.6523717836513215,
      "grad_norm": 0.8112371910259645,
      "learning_rate": 2.848201427737003e-06,
      "loss": 0.1235,
      "step": 22362
    },
    {
      "epoch": 0.6524009568819651,
      "grad_norm": 0.8758281179562669,
      "learning_rate": 2.8477749911205007e-06,
      "loss": 0.1195,
      "step": 22363
    },
    {
      "epoch": 0.6524301301126086,
      "grad_norm": 0.7635111746276324,
      "learning_rate": 2.8473485737185513e-06,
      "loss": 0.118,
      "step": 22364
    },
    {
      "epoch": 0.6524593033432522,
      "grad_norm": 0.6954318845365827,
      "learning_rate": 2.8469221755349596e-06,
      "loss": 0.1204,
      "step": 22365
    },
    {
      "epoch": 0.6524884765738957,
      "grad_norm": 1.0995960484615341,
      "learning_rate": 2.8464957965735317e-06,
      "loss": 0.1341,
      "step": 22366
    },
    {
      "epoch": 0.6525176498045393,
      "grad_norm": 0.8960672492372462,
      "learning_rate": 2.846069436838075e-06,
      "loss": 0.1214,
      "step": 22367
    },
    {
      "epoch": 0.652546823035183,
      "grad_norm": 0.9486649441543449,
      "learning_rate": 2.8456430963323977e-06,
      "loss": 0.1361,
      "step": 22368
    },
    {
      "epoch": 0.6525759962658265,
      "grad_norm": 0.7028028926129187,
      "learning_rate": 2.8452167750603044e-06,
      "loss": 0.1102,
      "step": 22369
    },
    {
      "epoch": 0.6526051694964701,
      "grad_norm": 1.4042321279524002,
      "learning_rate": 2.8447904730256e-06,
      "loss": 0.1386,
      "step": 22370
    },
    {
      "epoch": 0.6526343427271136,
      "grad_norm": 1.2118493118809086,
      "learning_rate": 2.8443641902320935e-06,
      "loss": 0.1075,
      "step": 22371
    },
    {
      "epoch": 0.6526635159577572,
      "grad_norm": 0.8708631911218182,
      "learning_rate": 2.8439379266835888e-06,
      "loss": 0.1152,
      "step": 22372
    },
    {
      "epoch": 0.6526926891884007,
      "grad_norm": 0.8939611154119125,
      "learning_rate": 2.843511682383888e-06,
      "loss": 0.1383,
      "step": 22373
    },
    {
      "epoch": 0.6527218624190443,
      "grad_norm": 1.197413725856025,
      "learning_rate": 2.843085457336804e-06,
      "loss": 0.1162,
      "step": 22374
    },
    {
      "epoch": 0.6527510356496878,
      "grad_norm": 1.2039479656733154,
      "learning_rate": 2.842659251546137e-06,
      "loss": 0.1101,
      "step": 22375
    },
    {
      "epoch": 0.6527802088803314,
      "grad_norm": 0.7770492126751283,
      "learning_rate": 2.8422330650156926e-06,
      "loss": 0.1082,
      "step": 22376
    },
    {
      "epoch": 0.652809382110975,
      "grad_norm": 0.9534063290817145,
      "learning_rate": 2.8418068977492773e-06,
      "loss": 0.1278,
      "step": 22377
    },
    {
      "epoch": 0.6528385553416185,
      "grad_norm": 1.0784775794238626,
      "learning_rate": 2.841380749750696e-06,
      "loss": 0.118,
      "step": 22378
    },
    {
      "epoch": 0.652867728572262,
      "grad_norm": 1.0676492238026483,
      "learning_rate": 2.840954621023749e-06,
      "loss": 0.1132,
      "step": 22379
    },
    {
      "epoch": 0.6528969018029056,
      "grad_norm": 0.8335513307074479,
      "learning_rate": 2.840528511572245e-06,
      "loss": 0.139,
      "step": 22380
    },
    {
      "epoch": 0.6529260750335493,
      "grad_norm": 0.9354201849273656,
      "learning_rate": 2.840102421399987e-06,
      "loss": 0.1319,
      "step": 22381
    },
    {
      "epoch": 0.6529552482641928,
      "grad_norm": 1.068194468994753,
      "learning_rate": 2.8396763505107804e-06,
      "loss": 0.1268,
      "step": 22382
    },
    {
      "epoch": 0.6529844214948364,
      "grad_norm": 0.8156923346856937,
      "learning_rate": 2.8392502989084255e-06,
      "loss": 0.1394,
      "step": 22383
    },
    {
      "epoch": 0.6530135947254799,
      "grad_norm": 1.0070871330619462,
      "learning_rate": 2.8388242665967296e-06,
      "loss": 0.1086,
      "step": 22384
    },
    {
      "epoch": 0.6530427679561235,
      "grad_norm": 1.074138388207415,
      "learning_rate": 2.838398253579493e-06,
      "loss": 0.1111,
      "step": 22385
    },
    {
      "epoch": 0.653071941186767,
      "grad_norm": 0.756491479175004,
      "learning_rate": 2.8379722598605233e-06,
      "loss": 0.106,
      "step": 22386
    },
    {
      "epoch": 0.6531011144174106,
      "grad_norm": 0.7841927221775205,
      "learning_rate": 2.8375462854436187e-06,
      "loss": 0.1163,
      "step": 22387
    },
    {
      "epoch": 0.6531302876480541,
      "grad_norm": 0.78904380879566,
      "learning_rate": 2.837120330332587e-06,
      "loss": 0.1191,
      "step": 22388
    },
    {
      "epoch": 0.6531594608786977,
      "grad_norm": 1.0917631612426648,
      "learning_rate": 2.8366943945312274e-06,
      "loss": 0.1252,
      "step": 22389
    },
    {
      "epoch": 0.6531886341093412,
      "grad_norm": 0.7741510810631309,
      "learning_rate": 2.836268478043343e-06,
      "loss": 0.1294,
      "step": 22390
    },
    {
      "epoch": 0.6532178073399848,
      "grad_norm": 0.7935141619877567,
      "learning_rate": 2.835842580872737e-06,
      "loss": 0.1197,
      "step": 22391
    },
    {
      "epoch": 0.6532469805706284,
      "grad_norm": 0.9354382676729552,
      "learning_rate": 2.835416703023214e-06,
      "loss": 0.1092,
      "step": 22392
    },
    {
      "epoch": 0.6532761538012719,
      "grad_norm": 1.1299759732247512,
      "learning_rate": 2.8349908444985706e-06,
      "loss": 0.1147,
      "step": 22393
    },
    {
      "epoch": 0.6533053270319155,
      "grad_norm": 0.8956681349786876,
      "learning_rate": 2.834565005302615e-06,
      "loss": 0.1234,
      "step": 22394
    },
    {
      "epoch": 0.6533345002625591,
      "grad_norm": 0.8416045818463777,
      "learning_rate": 2.8341391854391466e-06,
      "loss": 0.1204,
      "step": 22395
    },
    {
      "epoch": 0.6533636734932027,
      "grad_norm": 0.873502656204317,
      "learning_rate": 2.8337133849119643e-06,
      "loss": 0.125,
      "step": 22396
    },
    {
      "epoch": 0.6533928467238462,
      "grad_norm": 0.8876938485626714,
      "learning_rate": 2.8332876037248714e-06,
      "loss": 0.1262,
      "step": 22397
    },
    {
      "epoch": 0.6534220199544898,
      "grad_norm": 0.8923825238580209,
      "learning_rate": 2.8328618418816715e-06,
      "loss": 0.1073,
      "step": 22398
    },
    {
      "epoch": 0.6534511931851333,
      "grad_norm": 0.7873329537573146,
      "learning_rate": 2.8324360993861644e-06,
      "loss": 0.1056,
      "step": 22399
    },
    {
      "epoch": 0.6534803664157769,
      "grad_norm": 0.7710899821948057,
      "learning_rate": 2.832010376242148e-06,
      "loss": 0.1298,
      "step": 22400
    },
    {
      "epoch": 0.6535095396464204,
      "grad_norm": 1.0411339849268668,
      "learning_rate": 2.831584672453427e-06,
      "loss": 0.108,
      "step": 22401
    },
    {
      "epoch": 0.653538712877064,
      "grad_norm": 0.9291247455436145,
      "learning_rate": 2.831158988023801e-06,
      "loss": 0.1155,
      "step": 22402
    },
    {
      "epoch": 0.6535678861077076,
      "grad_norm": 0.8797433662321947,
      "learning_rate": 2.8307333229570653e-06,
      "loss": 0.1177,
      "step": 22403
    },
    {
      "epoch": 0.6535970593383511,
      "grad_norm": 0.8237735555776691,
      "learning_rate": 2.8303076772570292e-06,
      "loss": 0.1317,
      "step": 22404
    },
    {
      "epoch": 0.6536262325689947,
      "grad_norm": 0.8579782609770397,
      "learning_rate": 2.8298820509274876e-06,
      "loss": 0.1307,
      "step": 22405
    },
    {
      "epoch": 0.6536554057996382,
      "grad_norm": 0.8855548023531,
      "learning_rate": 2.8294564439722395e-06,
      "loss": 0.107,
      "step": 22406
    },
    {
      "epoch": 0.6536845790302818,
      "grad_norm": 1.0135499412180398,
      "learning_rate": 2.8290308563950876e-06,
      "loss": 0.1073,
      "step": 22407
    },
    {
      "epoch": 0.6537137522609254,
      "grad_norm": 0.8882936866312848,
      "learning_rate": 2.8286052881998303e-06,
      "loss": 0.1173,
      "step": 22408
    },
    {
      "epoch": 0.653742925491569,
      "grad_norm": 0.8248935598199801,
      "learning_rate": 2.8281797393902643e-06,
      "loss": 0.1424,
      "step": 22409
    },
    {
      "epoch": 0.6537720987222125,
      "grad_norm": 0.9835032623361226,
      "learning_rate": 2.8277542099701916e-06,
      "loss": 0.1118,
      "step": 22410
    },
    {
      "epoch": 0.6538012719528561,
      "grad_norm": 1.080433309396559,
      "learning_rate": 2.827328699943413e-06,
      "loss": 0.1374,
      "step": 22411
    },
    {
      "epoch": 0.6538304451834996,
      "grad_norm": 1.0401450762281055,
      "learning_rate": 2.826903209313725e-06,
      "loss": 0.1389,
      "step": 22412
    },
    {
      "epoch": 0.6538596184141432,
      "grad_norm": 0.8951298171915354,
      "learning_rate": 2.826477738084924e-06,
      "loss": 0.131,
      "step": 22413
    },
    {
      "epoch": 0.6538887916447867,
      "grad_norm": 0.7924965449107844,
      "learning_rate": 2.8260522862608123e-06,
      "loss": 0.1204,
      "step": 22414
    },
    {
      "epoch": 0.6539179648754303,
      "grad_norm": 0.8684814252637882,
      "learning_rate": 2.825626853845186e-06,
      "loss": 0.1343,
      "step": 22415
    },
    {
      "epoch": 0.6539471381060739,
      "grad_norm": 1.14344951259655,
      "learning_rate": 2.8252014408418455e-06,
      "loss": 0.132,
      "step": 22416
    },
    {
      "epoch": 0.6539763113367174,
      "grad_norm": 1.650113471243881,
      "learning_rate": 2.8247760472545856e-06,
      "loss": 0.1187,
      "step": 22417
    },
    {
      "epoch": 0.654005484567361,
      "grad_norm": 0.9882441329202631,
      "learning_rate": 2.8243506730872072e-06,
      "loss": 0.109,
      "step": 22418
    },
    {
      "epoch": 0.6540346577980045,
      "grad_norm": 0.806671554677657,
      "learning_rate": 2.8239253183435078e-06,
      "loss": 0.1143,
      "step": 22419
    },
    {
      "epoch": 0.6540638310286481,
      "grad_norm": 0.8737769216924756,
      "learning_rate": 2.8234999830272793e-06,
      "loss": 0.1258,
      "step": 22420
    },
    {
      "epoch": 0.6540930042592916,
      "grad_norm": 1.027183654696904,
      "learning_rate": 2.823074667142327e-06,
      "loss": 0.1008,
      "step": 22421
    },
    {
      "epoch": 0.6541221774899353,
      "grad_norm": 1.0784696427280314,
      "learning_rate": 2.822649370692444e-06,
      "loss": 0.1229,
      "step": 22422
    },
    {
      "epoch": 0.6541513507205788,
      "grad_norm": 1.0854113484420793,
      "learning_rate": 2.822224093681426e-06,
      "loss": 0.1082,
      "step": 22423
    },
    {
      "epoch": 0.6541805239512224,
      "grad_norm": 0.8773620291506504,
      "learning_rate": 2.8217988361130745e-06,
      "loss": 0.1538,
      "step": 22424
    },
    {
      "epoch": 0.654209697181866,
      "grad_norm": 1.052376493376747,
      "learning_rate": 2.8213735979911815e-06,
      "loss": 0.1084,
      "step": 22425
    },
    {
      "epoch": 0.6542388704125095,
      "grad_norm": 1.00853733576621,
      "learning_rate": 2.8209483793195434e-06,
      "loss": 0.1258,
      "step": 22426
    },
    {
      "epoch": 0.654268043643153,
      "grad_norm": 0.9335007307215261,
      "learning_rate": 2.8205231801019584e-06,
      "loss": 0.116,
      "step": 22427
    },
    {
      "epoch": 0.6542972168737966,
      "grad_norm": 0.7882432973844449,
      "learning_rate": 2.820098000342224e-06,
      "loss": 0.1072,
      "step": 22428
    },
    {
      "epoch": 0.6543263901044402,
      "grad_norm": 0.9348378620044736,
      "learning_rate": 2.8196728400441343e-06,
      "loss": 0.1013,
      "step": 22429
    },
    {
      "epoch": 0.6543555633350837,
      "grad_norm": 1.0358406809412704,
      "learning_rate": 2.8192476992114825e-06,
      "loss": 0.1209,
      "step": 22430
    },
    {
      "epoch": 0.6543847365657273,
      "grad_norm": 0.9515981882527773,
      "learning_rate": 2.8188225778480694e-06,
      "loss": 0.1123,
      "step": 22431
    },
    {
      "epoch": 0.6544139097963708,
      "grad_norm": 0.854083675080113,
      "learning_rate": 2.818397475957685e-06,
      "loss": 0.1463,
      "step": 22432
    },
    {
      "epoch": 0.6544430830270144,
      "grad_norm": 1.0071188848555603,
      "learning_rate": 2.8179723935441273e-06,
      "loss": 0.1338,
      "step": 22433
    },
    {
      "epoch": 0.6544722562576579,
      "grad_norm": 0.9228795996135992,
      "learning_rate": 2.8175473306111932e-06,
      "loss": 0.1131,
      "step": 22434
    },
    {
      "epoch": 0.6545014294883016,
      "grad_norm": 1.0131591896620302,
      "learning_rate": 2.817122287162676e-06,
      "loss": 0.147,
      "step": 22435
    },
    {
      "epoch": 0.6545306027189451,
      "grad_norm": 0.9948899904967381,
      "learning_rate": 2.816697263202367e-06,
      "loss": 0.1174,
      "step": 22436
    },
    {
      "epoch": 0.6545597759495887,
      "grad_norm": 0.8279068293792077,
      "learning_rate": 2.8162722587340663e-06,
      "loss": 0.1318,
      "step": 22437
    },
    {
      "epoch": 0.6545889491802322,
      "grad_norm": 0.9452393538791887,
      "learning_rate": 2.815847273761564e-06,
      "loss": 0.1519,
      "step": 22438
    },
    {
      "epoch": 0.6546181224108758,
      "grad_norm": 0.8785567153294748,
      "learning_rate": 2.8154223082886568e-06,
      "loss": 0.1204,
      "step": 22439
    },
    {
      "epoch": 0.6546472956415194,
      "grad_norm": 0.8682213748099582,
      "learning_rate": 2.8149973623191363e-06,
      "loss": 0.1223,
      "step": 22440
    },
    {
      "epoch": 0.6546764688721629,
      "grad_norm": 0.794535610763149,
      "learning_rate": 2.8145724358567994e-06,
      "loss": 0.1071,
      "step": 22441
    },
    {
      "epoch": 0.6547056421028065,
      "grad_norm": 0.8324289979702626,
      "learning_rate": 2.8141475289054387e-06,
      "loss": 0.1357,
      "step": 22442
    },
    {
      "epoch": 0.65473481533345,
      "grad_norm": 0.946816262503394,
      "learning_rate": 2.8137226414688447e-06,
      "loss": 0.1198,
      "step": 22443
    },
    {
      "epoch": 0.6547639885640936,
      "grad_norm": 0.9347654787220115,
      "learning_rate": 2.8132977735508125e-06,
      "loss": 0.1156,
      "step": 22444
    },
    {
      "epoch": 0.6547931617947371,
      "grad_norm": 1.054201416734486,
      "learning_rate": 2.812872925155139e-06,
      "loss": 0.1205,
      "step": 22445
    },
    {
      "epoch": 0.6548223350253807,
      "grad_norm": 0.8743155275048713,
      "learning_rate": 2.812448096285613e-06,
      "loss": 0.1164,
      "step": 22446
    },
    {
      "epoch": 0.6548515082560242,
      "grad_norm": 0.989058469367441,
      "learning_rate": 2.812023286946026e-06,
      "loss": 0.1224,
      "step": 22447
    },
    {
      "epoch": 0.6548806814866678,
      "grad_norm": 0.8370971723206385,
      "learning_rate": 2.8115984971401753e-06,
      "loss": 0.1297,
      "step": 22448
    },
    {
      "epoch": 0.6549098547173114,
      "grad_norm": 1.2293559844157345,
      "learning_rate": 2.8111737268718507e-06,
      "loss": 0.1396,
      "step": 22449
    },
    {
      "epoch": 0.654939027947955,
      "grad_norm": 0.7159436549623293,
      "learning_rate": 2.8107489761448416e-06,
      "loss": 0.1094,
      "step": 22450
    },
    {
      "epoch": 0.6549682011785986,
      "grad_norm": 1.059710812761776,
      "learning_rate": 2.8103242449629455e-06,
      "loss": 0.1245,
      "step": 22451
    },
    {
      "epoch": 0.6549973744092421,
      "grad_norm": 1.0762803144274677,
      "learning_rate": 2.8098995333299522e-06,
      "loss": 0.132,
      "step": 22452
    },
    {
      "epoch": 0.6550265476398857,
      "grad_norm": 0.9713609543462169,
      "learning_rate": 2.8094748412496507e-06,
      "loss": 0.1087,
      "step": 22453
    },
    {
      "epoch": 0.6550557208705292,
      "grad_norm": 0.9352136934925658,
      "learning_rate": 2.8090501687258378e-06,
      "loss": 0.135,
      "step": 22454
    },
    {
      "epoch": 0.6550848941011728,
      "grad_norm": 1.009512518613979,
      "learning_rate": 2.8086255157623017e-06,
      "loss": 0.1451,
      "step": 22455
    },
    {
      "epoch": 0.6551140673318163,
      "grad_norm": 1.3412461828688422,
      "learning_rate": 2.8082008823628313e-06,
      "loss": 0.1209,
      "step": 22456
    },
    {
      "epoch": 0.6551432405624599,
      "grad_norm": 1.0134594124748277,
      "learning_rate": 2.807776268531221e-06,
      "loss": 0.1165,
      "step": 22457
    },
    {
      "epoch": 0.6551724137931034,
      "grad_norm": 0.9105265471124363,
      "learning_rate": 2.8073516742712626e-06,
      "loss": 0.1167,
      "step": 22458
    },
    {
      "epoch": 0.655201587023747,
      "grad_norm": 0.9796945647382419,
      "learning_rate": 2.8069270995867447e-06,
      "loss": 0.1277,
      "step": 22459
    },
    {
      "epoch": 0.6552307602543905,
      "grad_norm": 1.2798380530827744,
      "learning_rate": 2.8065025444814566e-06,
      "loss": 0.1201,
      "step": 22460
    },
    {
      "epoch": 0.6552599334850341,
      "grad_norm": 0.8342012696107435,
      "learning_rate": 2.8060780089591915e-06,
      "loss": 0.1418,
      "step": 22461
    },
    {
      "epoch": 0.6552891067156777,
      "grad_norm": 0.77476631915906,
      "learning_rate": 2.8056534930237367e-06,
      "loss": 0.1372,
      "step": 22462
    },
    {
      "epoch": 0.6553182799463213,
      "grad_norm": 0.7909487902491348,
      "learning_rate": 2.8052289966788838e-06,
      "loss": 0.1076,
      "step": 22463
    },
    {
      "epoch": 0.6553474531769649,
      "grad_norm": 0.6720644960359576,
      "learning_rate": 2.804804519928424e-06,
      "loss": 0.0971,
      "step": 22464
    },
    {
      "epoch": 0.6553766264076084,
      "grad_norm": 0.793948908039111,
      "learning_rate": 2.8043800627761453e-06,
      "loss": 0.111,
      "step": 22465
    },
    {
      "epoch": 0.655405799638252,
      "grad_norm": 1.0834419645421325,
      "learning_rate": 2.803955625225836e-06,
      "loss": 0.1469,
      "step": 22466
    },
    {
      "epoch": 0.6554349728688955,
      "grad_norm": 0.8105128864027233,
      "learning_rate": 2.803531207281288e-06,
      "loss": 0.1049,
      "step": 22467
    },
    {
      "epoch": 0.6554641460995391,
      "grad_norm": 0.8490429674101937,
      "learning_rate": 2.8031068089462874e-06,
      "loss": 0.1274,
      "step": 22468
    },
    {
      "epoch": 0.6554933193301826,
      "grad_norm": 0.7610165214629974,
      "learning_rate": 2.802682430224627e-06,
      "loss": 0.1364,
      "step": 22469
    },
    {
      "epoch": 0.6555224925608262,
      "grad_norm": 1.0817781727835747,
      "learning_rate": 2.802258071120091e-06,
      "loss": 0.1397,
      "step": 22470
    },
    {
      "epoch": 0.6555516657914697,
      "grad_norm": 0.8705608230901531,
      "learning_rate": 2.801833731636472e-06,
      "loss": 0.1193,
      "step": 22471
    },
    {
      "epoch": 0.6555808390221133,
      "grad_norm": 0.808086343932562,
      "learning_rate": 2.801409411777557e-06,
      "loss": 0.1137,
      "step": 22472
    },
    {
      "epoch": 0.6556100122527568,
      "grad_norm": 0.8992175199035769,
      "learning_rate": 2.800985111547132e-06,
      "loss": 0.1204,
      "step": 22473
    },
    {
      "epoch": 0.6556391854834004,
      "grad_norm": 0.7156651496369794,
      "learning_rate": 2.800560830948987e-06,
      "loss": 0.11,
      "step": 22474
    },
    {
      "epoch": 0.6556683587140439,
      "grad_norm": 0.7464135277631306,
      "learning_rate": 2.8001365699869108e-06,
      "loss": 0.1284,
      "step": 22475
    },
    {
      "epoch": 0.6556975319446876,
      "grad_norm": 1.0967097120862654,
      "learning_rate": 2.7997123286646916e-06,
      "loss": 0.1497,
      "step": 22476
    },
    {
      "epoch": 0.6557267051753312,
      "grad_norm": 0.9632295410992674,
      "learning_rate": 2.7992881069861135e-06,
      "loss": 0.1275,
      "step": 22477
    },
    {
      "epoch": 0.6557558784059747,
      "grad_norm": 0.8300706982170465,
      "learning_rate": 2.798863904954967e-06,
      "loss": 0.1543,
      "step": 22478
    },
    {
      "epoch": 0.6557850516366183,
      "grad_norm": 1.1260893811778192,
      "learning_rate": 2.798439722575038e-06,
      "loss": 0.1329,
      "step": 22479
    },
    {
      "epoch": 0.6558142248672618,
      "grad_norm": 0.8562290905610475,
      "learning_rate": 2.79801555985011e-06,
      "loss": 0.1233,
      "step": 22480
    },
    {
      "epoch": 0.6558433980979054,
      "grad_norm": 0.8569677028540854,
      "learning_rate": 2.797591416783978e-06,
      "loss": 0.1196,
      "step": 22481
    },
    {
      "epoch": 0.6558725713285489,
      "grad_norm": 0.7703821745089594,
      "learning_rate": 2.7971672933804227e-06,
      "loss": 0.133,
      "step": 22482
    },
    {
      "epoch": 0.6559017445591925,
      "grad_norm": 0.7191233824796355,
      "learning_rate": 2.796743189643231e-06,
      "loss": 0.1125,
      "step": 22483
    },
    {
      "epoch": 0.655930917789836,
      "grad_norm": 0.9049161664824638,
      "learning_rate": 2.7963191055761916e-06,
      "loss": 0.1316,
      "step": 22484
    },
    {
      "epoch": 0.6559600910204796,
      "grad_norm": 0.7079508217902415,
      "learning_rate": 2.79589504118309e-06,
      "loss": 0.098,
      "step": 22485
    },
    {
      "epoch": 0.6559892642511231,
      "grad_norm": 0.8930720409388776,
      "learning_rate": 2.7954709964677083e-06,
      "loss": 0.129,
      "step": 22486
    },
    {
      "epoch": 0.6560184374817667,
      "grad_norm": 0.802453894170231,
      "learning_rate": 2.7950469714338356e-06,
      "loss": 0.1348,
      "step": 22487
    },
    {
      "epoch": 0.6560476107124102,
      "grad_norm": 0.9234038288708448,
      "learning_rate": 2.7946229660852598e-06,
      "loss": 0.1159,
      "step": 22488
    },
    {
      "epoch": 0.6560767839430538,
      "grad_norm": 0.7178968242083876,
      "learning_rate": 2.7941989804257628e-06,
      "loss": 0.1225,
      "step": 22489
    },
    {
      "epoch": 0.6561059571736975,
      "grad_norm": 0.8047241178207227,
      "learning_rate": 2.793775014459129e-06,
      "loss": 0.1288,
      "step": 22490
    },
    {
      "epoch": 0.656135130404341,
      "grad_norm": 0.8627289103067527,
      "learning_rate": 2.7933510681891477e-06,
      "loss": 0.1203,
      "step": 22491
    },
    {
      "epoch": 0.6561643036349846,
      "grad_norm": 0.8776410857739505,
      "learning_rate": 2.792927141619599e-06,
      "loss": 0.121,
      "step": 22492
    },
    {
      "epoch": 0.6561934768656281,
      "grad_norm": 0.9019677742950784,
      "learning_rate": 2.79250323475427e-06,
      "loss": 0.1078,
      "step": 22493
    },
    {
      "epoch": 0.6562226500962717,
      "grad_norm": 0.6099422452034862,
      "learning_rate": 2.7920793475969465e-06,
      "loss": 0.1132,
      "step": 22494
    },
    {
      "epoch": 0.6562518233269152,
      "grad_norm": 1.3063584220997537,
      "learning_rate": 2.7916554801514124e-06,
      "loss": 0.1269,
      "step": 22495
    },
    {
      "epoch": 0.6562809965575588,
      "grad_norm": 0.7329641995200221,
      "learning_rate": 2.7912316324214485e-06,
      "loss": 0.1181,
      "step": 22496
    },
    {
      "epoch": 0.6563101697882023,
      "grad_norm": 0.7523293310272482,
      "learning_rate": 2.790807804410843e-06,
      "loss": 0.1166,
      "step": 22497
    },
    {
      "epoch": 0.6563393430188459,
      "grad_norm": 0.8334991656815292,
      "learning_rate": 2.790383996123377e-06,
      "loss": 0.1153,
      "step": 22498
    },
    {
      "epoch": 0.6563685162494894,
      "grad_norm": 0.8202842930485915,
      "learning_rate": 2.7899602075628366e-06,
      "loss": 0.1288,
      "step": 22499
    },
    {
      "epoch": 0.656397689480133,
      "grad_norm": 1.188380710720048,
      "learning_rate": 2.789536438733002e-06,
      "loss": 0.0935,
      "step": 22500
    },
    {
      "epoch": 0.6564268627107765,
      "grad_norm": 0.810922022589193,
      "learning_rate": 2.7891126896376603e-06,
      "loss": 0.1157,
      "step": 22501
    },
    {
      "epoch": 0.6564560359414201,
      "grad_norm": 0.7876301060910118,
      "learning_rate": 2.7886889602805926e-06,
      "loss": 0.1291,
      "step": 22502
    },
    {
      "epoch": 0.6564852091720638,
      "grad_norm": 0.7881536119215248,
      "learning_rate": 2.7882652506655807e-06,
      "loss": 0.1145,
      "step": 22503
    },
    {
      "epoch": 0.6565143824027073,
      "grad_norm": 0.8918753539973705,
      "learning_rate": 2.787841560796408e-06,
      "loss": 0.1267,
      "step": 22504
    },
    {
      "epoch": 0.6565435556333509,
      "grad_norm": 1.1072156778997373,
      "learning_rate": 2.78741789067686e-06,
      "loss": 0.1108,
      "step": 22505
    },
    {
      "epoch": 0.6565727288639944,
      "grad_norm": 0.8877621015890508,
      "learning_rate": 2.7869942403107163e-06,
      "loss": 0.1276,
      "step": 22506
    },
    {
      "epoch": 0.656601902094638,
      "grad_norm": 0.8072881168843168,
      "learning_rate": 2.7865706097017585e-06,
      "loss": 0.1032,
      "step": 22507
    },
    {
      "epoch": 0.6566310753252815,
      "grad_norm": 0.9372479949765035,
      "learning_rate": 2.7861469988537714e-06,
      "loss": 0.1358,
      "step": 22508
    },
    {
      "epoch": 0.6566602485559251,
      "grad_norm": 0.8541840180347505,
      "learning_rate": 2.7857234077705355e-06,
      "loss": 0.131,
      "step": 22509
    },
    {
      "epoch": 0.6566894217865686,
      "grad_norm": 0.7326392057265572,
      "learning_rate": 2.7852998364558287e-06,
      "loss": 0.1091,
      "step": 22510
    },
    {
      "epoch": 0.6567185950172122,
      "grad_norm": 1.046935756928821,
      "learning_rate": 2.7848762849134405e-06,
      "loss": 0.1423,
      "step": 22511
    },
    {
      "epoch": 0.6567477682478557,
      "grad_norm": 0.9414506643276448,
      "learning_rate": 2.784452753147147e-06,
      "loss": 0.14,
      "step": 22512
    },
    {
      "epoch": 0.6567769414784993,
      "grad_norm": 0.7318518189924511,
      "learning_rate": 2.7840292411607296e-06,
      "loss": 0.1243,
      "step": 22513
    },
    {
      "epoch": 0.6568061147091429,
      "grad_norm": 0.818140835759817,
      "learning_rate": 2.7836057489579714e-06,
      "loss": 0.1148,
      "step": 22514
    },
    {
      "epoch": 0.6568352879397864,
      "grad_norm": 0.9527922379739329,
      "learning_rate": 2.783182276542652e-06,
      "loss": 0.1245,
      "step": 22515
    },
    {
      "epoch": 0.65686446117043,
      "grad_norm": 0.8002737478246307,
      "learning_rate": 2.7827588239185497e-06,
      "loss": 0.123,
      "step": 22516
    },
    {
      "epoch": 0.6568936344010736,
      "grad_norm": 0.8946948698519541,
      "learning_rate": 2.7823353910894486e-06,
      "loss": 0.1199,
      "step": 22517
    },
    {
      "epoch": 0.6569228076317172,
      "grad_norm": 0.7873289502771038,
      "learning_rate": 2.7819119780591284e-06,
      "loss": 0.0966,
      "step": 22518
    },
    {
      "epoch": 0.6569519808623607,
      "grad_norm": 0.8008015819465731,
      "learning_rate": 2.7814885848313692e-06,
      "loss": 0.1136,
      "step": 22519
    },
    {
      "epoch": 0.6569811540930043,
      "grad_norm": 1.103038646793943,
      "learning_rate": 2.7810652114099483e-06,
      "loss": 0.1178,
      "step": 22520
    },
    {
      "epoch": 0.6570103273236478,
      "grad_norm": 0.9026383226586517,
      "learning_rate": 2.7806418577986494e-06,
      "loss": 0.1279,
      "step": 22521
    },
    {
      "epoch": 0.6570395005542914,
      "grad_norm": 0.8547687783967982,
      "learning_rate": 2.7802185240012485e-06,
      "loss": 0.1179,
      "step": 22522
    },
    {
      "epoch": 0.6570686737849349,
      "grad_norm": 1.0004540488073015,
      "learning_rate": 2.7797952100215263e-06,
      "loss": 0.1589,
      "step": 22523
    },
    {
      "epoch": 0.6570978470155785,
      "grad_norm": 0.8353649868482187,
      "learning_rate": 2.779371915863265e-06,
      "loss": 0.1183,
      "step": 22524
    },
    {
      "epoch": 0.657127020246222,
      "grad_norm": 0.7423876304035788,
      "learning_rate": 2.7789486415302404e-06,
      "loss": 0.1019,
      "step": 22525
    },
    {
      "epoch": 0.6571561934768656,
      "grad_norm": 0.8084602701288659,
      "learning_rate": 2.778525387026231e-06,
      "loss": 0.111,
      "step": 22526
    },
    {
      "epoch": 0.6571853667075092,
      "grad_norm": 0.823783401402058,
      "learning_rate": 2.7781021523550177e-06,
      "loss": 0.1175,
      "step": 22527
    },
    {
      "epoch": 0.6572145399381527,
      "grad_norm": 0.8878706143548062,
      "learning_rate": 2.777678937520376e-06,
      "loss": 0.1321,
      "step": 22528
    },
    {
      "epoch": 0.6572437131687963,
      "grad_norm": 0.9299055328002069,
      "learning_rate": 2.7772557425260886e-06,
      "loss": 0.114,
      "step": 22529
    },
    {
      "epoch": 0.6572728863994399,
      "grad_norm": 0.8653418937087174,
      "learning_rate": 2.7768325673759296e-06,
      "loss": 0.1359,
      "step": 22530
    },
    {
      "epoch": 0.6573020596300835,
      "grad_norm": 0.7045029678143745,
      "learning_rate": 2.7764094120736805e-06,
      "loss": 0.1325,
      "step": 22531
    },
    {
      "epoch": 0.657331232860727,
      "grad_norm": 2.7500356859860577,
      "learning_rate": 2.775986276623117e-06,
      "loss": 0.1112,
      "step": 22532
    },
    {
      "epoch": 0.6573604060913706,
      "grad_norm": 0.7649597187975548,
      "learning_rate": 2.7755631610280154e-06,
      "loss": 0.1313,
      "step": 22533
    },
    {
      "epoch": 0.6573895793220141,
      "grad_norm": 0.7479922180573809,
      "learning_rate": 2.775140065292155e-06,
      "loss": 0.1211,
      "step": 22534
    },
    {
      "epoch": 0.6574187525526577,
      "grad_norm": 0.7180944342727001,
      "learning_rate": 2.7747169894193148e-06,
      "loss": 0.1061,
      "step": 22535
    },
    {
      "epoch": 0.6574479257833012,
      "grad_norm": 0.6721148205570775,
      "learning_rate": 2.77429393341327e-06,
      "loss": 0.0963,
      "step": 22536
    },
    {
      "epoch": 0.6574770990139448,
      "grad_norm": 0.7612768261335503,
      "learning_rate": 2.7738708972777963e-06,
      "loss": 0.1313,
      "step": 22537
    },
    {
      "epoch": 0.6575062722445884,
      "grad_norm": 0.9427584560829441,
      "learning_rate": 2.7734478810166734e-06,
      "loss": 0.1453,
      "step": 22538
    },
    {
      "epoch": 0.6575354454752319,
      "grad_norm": 0.7583288904935364,
      "learning_rate": 2.773024884633676e-06,
      "loss": 0.1043,
      "step": 22539
    },
    {
      "epoch": 0.6575646187058755,
      "grad_norm": 0.9393309194213926,
      "learning_rate": 2.772601908132577e-06,
      "loss": 0.1476,
      "step": 22540
    },
    {
      "epoch": 0.657593791936519,
      "grad_norm": 0.9863844353901349,
      "learning_rate": 2.7721789515171605e-06,
      "loss": 0.1364,
      "step": 22541
    },
    {
      "epoch": 0.6576229651671626,
      "grad_norm": 1.2494265439675063,
      "learning_rate": 2.771756014791198e-06,
      "loss": 0.1439,
      "step": 22542
    },
    {
      "epoch": 0.6576521383978061,
      "grad_norm": 0.5955301698206025,
      "learning_rate": 2.7713330979584645e-06,
      "loss": 0.0968,
      "step": 22543
    },
    {
      "epoch": 0.6576813116284498,
      "grad_norm": 1.136010740585745,
      "learning_rate": 2.770910201022739e-06,
      "loss": 0.1314,
      "step": 22544
    },
    {
      "epoch": 0.6577104848590933,
      "grad_norm": 1.0458930875849077,
      "learning_rate": 2.770487323987795e-06,
      "loss": 0.1283,
      "step": 22545
    },
    {
      "epoch": 0.6577396580897369,
      "grad_norm": 1.3174283796579231,
      "learning_rate": 2.770064466857406e-06,
      "loss": 0.1136,
      "step": 22546
    },
    {
      "epoch": 0.6577688313203804,
      "grad_norm": 0.8495506250413206,
      "learning_rate": 2.769641629635349e-06,
      "loss": 0.1291,
      "step": 22547
    },
    {
      "epoch": 0.657798004551024,
      "grad_norm": 0.8198223648100067,
      "learning_rate": 2.769218812325401e-06,
      "loss": 0.1226,
      "step": 22548
    },
    {
      "epoch": 0.6578271777816675,
      "grad_norm": 0.7818460254895462,
      "learning_rate": 2.7687960149313354e-06,
      "loss": 0.128,
      "step": 22549
    },
    {
      "epoch": 0.6578563510123111,
      "grad_norm": 1.0414744926522435,
      "learning_rate": 2.7683732374569237e-06,
      "loss": 0.1172,
      "step": 22550
    },
    {
      "epoch": 0.6578855242429547,
      "grad_norm": 0.8192019799411547,
      "learning_rate": 2.7679504799059454e-06,
      "loss": 0.1012,
      "step": 22551
    },
    {
      "epoch": 0.6579146974735982,
      "grad_norm": 0.662398181258038,
      "learning_rate": 2.76752774228217e-06,
      "loss": 0.1066,
      "step": 22552
    },
    {
      "epoch": 0.6579438707042418,
      "grad_norm": 1.0746020643569365,
      "learning_rate": 2.767105024589375e-06,
      "loss": 0.1206,
      "step": 22553
    },
    {
      "epoch": 0.6579730439348853,
      "grad_norm": 0.9684590729721944,
      "learning_rate": 2.7666823268313342e-06,
      "loss": 0.1069,
      "step": 22554
    },
    {
      "epoch": 0.6580022171655289,
      "grad_norm": 1.0419397473849004,
      "learning_rate": 2.766259649011821e-06,
      "loss": 0.134,
      "step": 22555
    },
    {
      "epoch": 0.6580313903961724,
      "grad_norm": 0.8649779098765733,
      "learning_rate": 2.765836991134606e-06,
      "loss": 0.1424,
      "step": 22556
    },
    {
      "epoch": 0.6580605636268161,
      "grad_norm": 0.7707057213352337,
      "learning_rate": 2.765414353203467e-06,
      "loss": 0.086,
      "step": 22557
    },
    {
      "epoch": 0.6580897368574596,
      "grad_norm": 0.7544210102811724,
      "learning_rate": 2.7649917352221738e-06,
      "loss": 0.1119,
      "step": 22558
    },
    {
      "epoch": 0.6581189100881032,
      "grad_norm": 1.0636440454375342,
      "learning_rate": 2.764569137194503e-06,
      "loss": 0.1225,
      "step": 22559
    },
    {
      "epoch": 0.6581480833187467,
      "grad_norm": 0.7504238018266771,
      "learning_rate": 2.7641465591242224e-06,
      "loss": 0.0913,
      "step": 22560
    },
    {
      "epoch": 0.6581772565493903,
      "grad_norm": 0.8251551102768702,
      "learning_rate": 2.7637240010151103e-06,
      "loss": 0.1096,
      "step": 22561
    },
    {
      "epoch": 0.6582064297800339,
      "grad_norm": 0.8602708857579717,
      "learning_rate": 2.763301462870936e-06,
      "loss": 0.1281,
      "step": 22562
    },
    {
      "epoch": 0.6582356030106774,
      "grad_norm": 0.7887120908935958,
      "learning_rate": 2.7628789446954705e-06,
      "loss": 0.1217,
      "step": 22563
    },
    {
      "epoch": 0.658264776241321,
      "grad_norm": 0.9049183673836019,
      "learning_rate": 2.7624564464924874e-06,
      "loss": 0.1034,
      "step": 22564
    },
    {
      "epoch": 0.6582939494719645,
      "grad_norm": 0.889621781164339,
      "learning_rate": 2.7620339682657616e-06,
      "loss": 0.1307,
      "step": 22565
    },
    {
      "epoch": 0.6583231227026081,
      "grad_norm": 0.7334398805214061,
      "learning_rate": 2.761611510019062e-06,
      "loss": 0.1121,
      "step": 22566
    },
    {
      "epoch": 0.6583522959332516,
      "grad_norm": 0.7767238014675791,
      "learning_rate": 2.7611890717561584e-06,
      "loss": 0.142,
      "step": 22567
    },
    {
      "epoch": 0.6583814691638952,
      "grad_norm": 0.8985569388201397,
      "learning_rate": 2.7607666534808262e-06,
      "loss": 0.1152,
      "step": 22568
    },
    {
      "epoch": 0.6584106423945387,
      "grad_norm": 0.9604261215054245,
      "learning_rate": 2.760344255196835e-06,
      "loss": 0.1212,
      "step": 22569
    },
    {
      "epoch": 0.6584398156251823,
      "grad_norm": 1.0756881300950472,
      "learning_rate": 2.7599218769079518e-06,
      "loss": 0.1533,
      "step": 22570
    },
    {
      "epoch": 0.6584689888558259,
      "grad_norm": 0.6519123585154943,
      "learning_rate": 2.759499518617955e-06,
      "loss": 0.1068,
      "step": 22571
    },
    {
      "epoch": 0.6584981620864695,
      "grad_norm": 1.0883905142442654,
      "learning_rate": 2.759077180330612e-06,
      "loss": 0.1266,
      "step": 22572
    },
    {
      "epoch": 0.658527335317113,
      "grad_norm": 1.0998632526864929,
      "learning_rate": 2.758654862049691e-06,
      "loss": 0.1146,
      "step": 22573
    },
    {
      "epoch": 0.6585565085477566,
      "grad_norm": 0.9522597459448454,
      "learning_rate": 2.758232563778966e-06,
      "loss": 0.1441,
      "step": 22574
    },
    {
      "epoch": 0.6585856817784002,
      "grad_norm": 0.8295697964866922,
      "learning_rate": 2.7578102855222056e-06,
      "loss": 0.1267,
      "step": 22575
    },
    {
      "epoch": 0.6586148550090437,
      "grad_norm": 0.8743768041029046,
      "learning_rate": 2.757388027283178e-06,
      "loss": 0.1113,
      "step": 22576
    },
    {
      "epoch": 0.6586440282396873,
      "grad_norm": 1.2465440196922168,
      "learning_rate": 2.7569657890656543e-06,
      "loss": 0.131,
      "step": 22577
    },
    {
      "epoch": 0.6586732014703308,
      "grad_norm": 0.8671282291886849,
      "learning_rate": 2.7565435708734067e-06,
      "loss": 0.087,
      "step": 22578
    },
    {
      "epoch": 0.6587023747009744,
      "grad_norm": 1.0194614465372704,
      "learning_rate": 2.7561213727102026e-06,
      "loss": 0.114,
      "step": 22579
    },
    {
      "epoch": 0.6587315479316179,
      "grad_norm": 1.0730631218781865,
      "learning_rate": 2.7556991945798097e-06,
      "loss": 0.1359,
      "step": 22580
    },
    {
      "epoch": 0.6587607211622615,
      "grad_norm": 1.0049835996455878,
      "learning_rate": 2.755277036486e-06,
      "loss": 0.1217,
      "step": 22581
    },
    {
      "epoch": 0.658789894392905,
      "grad_norm": 0.9253939123222444,
      "learning_rate": 2.7548548984325392e-06,
      "loss": 0.1341,
      "step": 22582
    },
    {
      "epoch": 0.6588190676235486,
      "grad_norm": 0.8551010319335021,
      "learning_rate": 2.754432780423198e-06,
      "loss": 0.1269,
      "step": 22583
    },
    {
      "epoch": 0.6588482408541922,
      "grad_norm": 1.2062317792160597,
      "learning_rate": 2.7540106824617467e-06,
      "loss": 0.1574,
      "step": 22584
    },
    {
      "epoch": 0.6588774140848358,
      "grad_norm": 1.0425488044374756,
      "learning_rate": 2.753588604551952e-06,
      "loss": 0.1338,
      "step": 22585
    },
    {
      "epoch": 0.6589065873154794,
      "grad_norm": 0.6753586540234627,
      "learning_rate": 2.75316654669758e-06,
      "loss": 0.1133,
      "step": 22586
    },
    {
      "epoch": 0.6589357605461229,
      "grad_norm": 0.8399699469968134,
      "learning_rate": 2.752744508902403e-06,
      "loss": 0.1196,
      "step": 22587
    },
    {
      "epoch": 0.6589649337767665,
      "grad_norm": 1.1790994215607702,
      "learning_rate": 2.752322491170184e-06,
      "loss": 0.1364,
      "step": 22588
    },
    {
      "epoch": 0.65899410700741,
      "grad_norm": 0.8149822434798201,
      "learning_rate": 2.7519004935046955e-06,
      "loss": 0.1149,
      "step": 22589
    },
    {
      "epoch": 0.6590232802380536,
      "grad_norm": 1.0824127306644329,
      "learning_rate": 2.7514785159097006e-06,
      "loss": 0.1208,
      "step": 22590
    },
    {
      "epoch": 0.6590524534686971,
      "grad_norm": 0.8803409266018206,
      "learning_rate": 2.751056558388971e-06,
      "loss": 0.1272,
      "step": 22591
    },
    {
      "epoch": 0.6590816266993407,
      "grad_norm": 0.8494313396501025,
      "learning_rate": 2.7506346209462715e-06,
      "loss": 0.1227,
      "step": 22592
    },
    {
      "epoch": 0.6591107999299842,
      "grad_norm": 0.7793815655046239,
      "learning_rate": 2.7502127035853666e-06,
      "loss": 0.1403,
      "step": 22593
    },
    {
      "epoch": 0.6591399731606278,
      "grad_norm": 0.7266458890049697,
      "learning_rate": 2.7497908063100266e-06,
      "loss": 0.1105,
      "step": 22594
    },
    {
      "epoch": 0.6591691463912713,
      "grad_norm": 0.7358696537864176,
      "learning_rate": 2.7493689291240185e-06,
      "loss": 0.1348,
      "step": 22595
    },
    {
      "epoch": 0.6591983196219149,
      "grad_norm": 0.9577455005381101,
      "learning_rate": 2.7489470720311074e-06,
      "loss": 0.117,
      "step": 22596
    },
    {
      "epoch": 0.6592274928525584,
      "grad_norm": 0.8449180607899556,
      "learning_rate": 2.7485252350350576e-06,
      "loss": 0.1227,
      "step": 22597
    },
    {
      "epoch": 0.6592566660832021,
      "grad_norm": 0.8158164345241355,
      "learning_rate": 2.748103418139639e-06,
      "loss": 0.1136,
      "step": 22598
    },
    {
      "epoch": 0.6592858393138457,
      "grad_norm": 0.8134861595351345,
      "learning_rate": 2.747681621348615e-06,
      "loss": 0.1282,
      "step": 22599
    },
    {
      "epoch": 0.6593150125444892,
      "grad_norm": 1.0964996014100363,
      "learning_rate": 2.7472598446657484e-06,
      "loss": 0.1115,
      "step": 22600
    },
    {
      "epoch": 0.6593441857751328,
      "grad_norm": 0.828303490228993,
      "learning_rate": 2.746838088094812e-06,
      "loss": 0.116,
      "step": 22601
    },
    {
      "epoch": 0.6593733590057763,
      "grad_norm": 0.7322896981011644,
      "learning_rate": 2.746416351639567e-06,
      "loss": 0.1378,
      "step": 22602
    },
    {
      "epoch": 0.6594025322364199,
      "grad_norm": 0.7766145382743532,
      "learning_rate": 2.7459946353037775e-06,
      "loss": 0.1154,
      "step": 22603
    },
    {
      "epoch": 0.6594317054670634,
      "grad_norm": 0.751708619639476,
      "learning_rate": 2.7455729390912113e-06,
      "loss": 0.1155,
      "step": 22604
    },
    {
      "epoch": 0.659460878697707,
      "grad_norm": 0.6654296957356924,
      "learning_rate": 2.7451512630056323e-06,
      "loss": 0.1212,
      "step": 22605
    },
    {
      "epoch": 0.6594900519283505,
      "grad_norm": 0.7198116486010239,
      "learning_rate": 2.7447296070508017e-06,
      "loss": 0.1178,
      "step": 22606
    },
    {
      "epoch": 0.6595192251589941,
      "grad_norm": 1.5396030220829788,
      "learning_rate": 2.744307971230487e-06,
      "loss": 0.1119,
      "step": 22607
    },
    {
      "epoch": 0.6595483983896376,
      "grad_norm": 0.7797775196398998,
      "learning_rate": 2.7438863555484545e-06,
      "loss": 0.1219,
      "step": 22608
    },
    {
      "epoch": 0.6595775716202812,
      "grad_norm": 0.7060434245789803,
      "learning_rate": 2.7434647600084662e-06,
      "loss": 0.1189,
      "step": 22609
    },
    {
      "epoch": 0.6596067448509247,
      "grad_norm": 0.7733708686479651,
      "learning_rate": 2.7430431846142837e-06,
      "loss": 0.1101,
      "step": 22610
    },
    {
      "epoch": 0.6596359180815684,
      "grad_norm": 0.6482630170163397,
      "learning_rate": 2.742621629369675e-06,
      "loss": 0.1187,
      "step": 22611
    },
    {
      "epoch": 0.659665091312212,
      "grad_norm": 0.9201612115909791,
      "learning_rate": 2.742200094278399e-06,
      "loss": 0.125,
      "step": 22612
    },
    {
      "epoch": 0.6596942645428555,
      "grad_norm": 0.8470097144589287,
      "learning_rate": 2.741778579344222e-06,
      "loss": 0.1421,
      "step": 22613
    },
    {
      "epoch": 0.6597234377734991,
      "grad_norm": 0.7590184952460127,
      "learning_rate": 2.7413570845709086e-06,
      "loss": 0.1166,
      "step": 22614
    },
    {
      "epoch": 0.6597526110041426,
      "grad_norm": 0.8141803945777077,
      "learning_rate": 2.74093560996222e-06,
      "loss": 0.1323,
      "step": 22615
    },
    {
      "epoch": 0.6597817842347862,
      "grad_norm": 0.8725507264487203,
      "learning_rate": 2.740514155521917e-06,
      "loss": 0.1274,
      "step": 22616
    },
    {
      "epoch": 0.6598109574654297,
      "grad_norm": 0.9987485743600544,
      "learning_rate": 2.7400927212537643e-06,
      "loss": 0.139,
      "step": 22617
    },
    {
      "epoch": 0.6598401306960733,
      "grad_norm": 0.751603426942623,
      "learning_rate": 2.7396713071615262e-06,
      "loss": 0.1146,
      "step": 22618
    },
    {
      "epoch": 0.6598693039267168,
      "grad_norm": 0.7431142504141429,
      "learning_rate": 2.739249913248963e-06,
      "loss": 0.1342,
      "step": 22619
    },
    {
      "epoch": 0.6598984771573604,
      "grad_norm": 0.9901582677371695,
      "learning_rate": 2.7388285395198354e-06,
      "loss": 0.1384,
      "step": 22620
    },
    {
      "epoch": 0.6599276503880039,
      "grad_norm": 0.6486016174974297,
      "learning_rate": 2.738407185977908e-06,
      "loss": 0.1129,
      "step": 22621
    },
    {
      "epoch": 0.6599568236186475,
      "grad_norm": 0.7825818162792993,
      "learning_rate": 2.7379858526269422e-06,
      "loss": 0.105,
      "step": 22622
    },
    {
      "epoch": 0.659985996849291,
      "grad_norm": 0.7434131882275584,
      "learning_rate": 2.7375645394706963e-06,
      "loss": 0.119,
      "step": 22623
    },
    {
      "epoch": 0.6600151700799346,
      "grad_norm": 0.8849720606460794,
      "learning_rate": 2.7371432465129343e-06,
      "loss": 0.134,
      "step": 22624
    },
    {
      "epoch": 0.6600443433105783,
      "grad_norm": 0.749407026158925,
      "learning_rate": 2.736721973757419e-06,
      "loss": 0.0968,
      "step": 22625
    },
    {
      "epoch": 0.6600735165412218,
      "grad_norm": 0.7722278356763757,
      "learning_rate": 2.7363007212079097e-06,
      "loss": 0.1402,
      "step": 22626
    },
    {
      "epoch": 0.6601026897718654,
      "grad_norm": 0.9857979943000369,
      "learning_rate": 2.735879488868165e-06,
      "loss": 0.1364,
      "step": 22627
    },
    {
      "epoch": 0.6601318630025089,
      "grad_norm": 0.9618799872613633,
      "learning_rate": 2.7354582767419498e-06,
      "loss": 0.1221,
      "step": 22628
    },
    {
      "epoch": 0.6601610362331525,
      "grad_norm": 0.7715563866110873,
      "learning_rate": 2.7350370848330204e-06,
      "loss": 0.1237,
      "step": 22629
    },
    {
      "epoch": 0.660190209463796,
      "grad_norm": 0.9087764788045213,
      "learning_rate": 2.7346159131451396e-06,
      "loss": 0.1066,
      "step": 22630
    },
    {
      "epoch": 0.6602193826944396,
      "grad_norm": 1.2365590407456843,
      "learning_rate": 2.7341947616820686e-06,
      "loss": 0.1238,
      "step": 22631
    },
    {
      "epoch": 0.6602485559250831,
      "grad_norm": 0.8406230466167327,
      "learning_rate": 2.7337736304475665e-06,
      "loss": 0.1078,
      "step": 22632
    },
    {
      "epoch": 0.6602777291557267,
      "grad_norm": 0.6884332220566729,
      "learning_rate": 2.7333525194453904e-06,
      "loss": 0.1041,
      "step": 22633
    },
    {
      "epoch": 0.6603069023863702,
      "grad_norm": 0.8460422483031798,
      "learning_rate": 2.732931428679303e-06,
      "loss": 0.1381,
      "step": 22634
    },
    {
      "epoch": 0.6603360756170138,
      "grad_norm": 0.7864247637944488,
      "learning_rate": 2.7325103581530616e-06,
      "loss": 0.1057,
      "step": 22635
    },
    {
      "epoch": 0.6603652488476573,
      "grad_norm": 0.9469238268633753,
      "learning_rate": 2.732089307870428e-06,
      "loss": 0.1331,
      "step": 22636
    },
    {
      "epoch": 0.6603944220783009,
      "grad_norm": 0.8255544281769466,
      "learning_rate": 2.7316682778351576e-06,
      "loss": 0.1067,
      "step": 22637
    },
    {
      "epoch": 0.6604235953089446,
      "grad_norm": 0.7599272483817766,
      "learning_rate": 2.731247268051014e-06,
      "loss": 0.1138,
      "step": 22638
    },
    {
      "epoch": 0.6604527685395881,
      "grad_norm": 1.4022651437557578,
      "learning_rate": 2.730826278521753e-06,
      "loss": 0.1035,
      "step": 22639
    },
    {
      "epoch": 0.6604819417702317,
      "grad_norm": 0.8874944794084177,
      "learning_rate": 2.7304053092511307e-06,
      "loss": 0.098,
      "step": 22640
    },
    {
      "epoch": 0.6605111150008752,
      "grad_norm": 0.7912420615086282,
      "learning_rate": 2.7299843602429076e-06,
      "loss": 0.1167,
      "step": 22641
    },
    {
      "epoch": 0.6605402882315188,
      "grad_norm": 0.899065096134132,
      "learning_rate": 2.7295634315008456e-06,
      "loss": 0.1204,
      "step": 22642
    },
    {
      "epoch": 0.6605694614621623,
      "grad_norm": 1.2234695633380441,
      "learning_rate": 2.7291425230286962e-06,
      "loss": 0.1114,
      "step": 22643
    },
    {
      "epoch": 0.6605986346928059,
      "grad_norm": 0.7589885649474262,
      "learning_rate": 2.7287216348302225e-06,
      "loss": 0.1191,
      "step": 22644
    },
    {
      "epoch": 0.6606278079234494,
      "grad_norm": 1.024785208722725,
      "learning_rate": 2.7283007669091804e-06,
      "loss": 0.123,
      "step": 22645
    },
    {
      "epoch": 0.660656981154093,
      "grad_norm": 0.7557866587988469,
      "learning_rate": 2.727879919269324e-06,
      "loss": 0.1158,
      "step": 22646
    },
    {
      "epoch": 0.6606861543847365,
      "grad_norm": 0.8675824733934782,
      "learning_rate": 2.727459091914414e-06,
      "loss": 0.1318,
      "step": 22647
    },
    {
      "epoch": 0.6607153276153801,
      "grad_norm": 0.9281154785377084,
      "learning_rate": 2.727038284848208e-06,
      "loss": 0.1035,
      "step": 22648
    },
    {
      "epoch": 0.6607445008460237,
      "grad_norm": 0.973165458035548,
      "learning_rate": 2.726617498074462e-06,
      "loss": 0.1208,
      "step": 22649
    },
    {
      "epoch": 0.6607736740766672,
      "grad_norm": 0.7990752633850764,
      "learning_rate": 2.7261967315969307e-06,
      "loss": 0.1154,
      "step": 22650
    },
    {
      "epoch": 0.6608028473073108,
      "grad_norm": 0.9689587981984759,
      "learning_rate": 2.7257759854193735e-06,
      "loss": 0.1187,
      "step": 22651
    },
    {
      "epoch": 0.6608320205379544,
      "grad_norm": 1.0824105336467058,
      "learning_rate": 2.7253552595455458e-06,
      "loss": 0.1379,
      "step": 22652
    },
    {
      "epoch": 0.660861193768598,
      "grad_norm": 0.976149906546021,
      "learning_rate": 2.724934553979201e-06,
      "loss": 0.1237,
      "step": 22653
    },
    {
      "epoch": 0.6608903669992415,
      "grad_norm": 0.8239511261071738,
      "learning_rate": 2.724513868724098e-06,
      "loss": 0.1063,
      "step": 22654
    },
    {
      "epoch": 0.6609195402298851,
      "grad_norm": 0.7308597033836158,
      "learning_rate": 2.724093203783993e-06,
      "loss": 0.1142,
      "step": 22655
    },
    {
      "epoch": 0.6609487134605286,
      "grad_norm": 1.0008807120680314,
      "learning_rate": 2.7236725591626413e-06,
      "loss": 0.0989,
      "step": 22656
    },
    {
      "epoch": 0.6609778866911722,
      "grad_norm": 0.8664122835198478,
      "learning_rate": 2.7232519348637955e-06,
      "loss": 0.1116,
      "step": 22657
    },
    {
      "epoch": 0.6610070599218157,
      "grad_norm": 0.7248420383427195,
      "learning_rate": 2.7228313308912145e-06,
      "loss": 0.1204,
      "step": 22658
    },
    {
      "epoch": 0.6610362331524593,
      "grad_norm": 0.7085192623477988,
      "learning_rate": 2.7224107472486504e-06,
      "loss": 0.133,
      "step": 22659
    },
    {
      "epoch": 0.6610654063831028,
      "grad_norm": 0.9273281691664607,
      "learning_rate": 2.721990183939859e-06,
      "loss": 0.1119,
      "step": 22660
    },
    {
      "epoch": 0.6610945796137464,
      "grad_norm": 0.7399050138849338,
      "learning_rate": 2.7215696409685977e-06,
      "loss": 0.1035,
      "step": 22661
    },
    {
      "epoch": 0.66112375284439,
      "grad_norm": 0.7656988761837674,
      "learning_rate": 2.7211491183386185e-06,
      "loss": 0.1336,
      "step": 22662
    },
    {
      "epoch": 0.6611529260750335,
      "grad_norm": 1.0817766966059668,
      "learning_rate": 2.720728616053674e-06,
      "loss": 0.1288,
      "step": 22663
    },
    {
      "epoch": 0.6611820993056771,
      "grad_norm": 0.9326493487858707,
      "learning_rate": 2.7203081341175225e-06,
      "loss": 0.1007,
      "step": 22664
    },
    {
      "epoch": 0.6612112725363207,
      "grad_norm": 0.8270822126515731,
      "learning_rate": 2.7198876725339143e-06,
      "loss": 0.1072,
      "step": 22665
    },
    {
      "epoch": 0.6612404457669643,
      "grad_norm": 0.9552120809115704,
      "learning_rate": 2.719467231306605e-06,
      "loss": 0.143,
      "step": 22666
    },
    {
      "epoch": 0.6612696189976078,
      "grad_norm": 1.0792521373628325,
      "learning_rate": 2.7190468104393474e-06,
      "loss": 0.1209,
      "step": 22667
    },
    {
      "epoch": 0.6612987922282514,
      "grad_norm": 0.8389385598404163,
      "learning_rate": 2.7186264099358965e-06,
      "loss": 0.0945,
      "step": 22668
    },
    {
      "epoch": 0.6613279654588949,
      "grad_norm": 0.8101273566871517,
      "learning_rate": 2.7182060298000047e-06,
      "loss": 0.1457,
      "step": 22669
    },
    {
      "epoch": 0.6613571386895385,
      "grad_norm": 0.8277979793603644,
      "learning_rate": 2.7177856700354233e-06,
      "loss": 0.1285,
      "step": 22670
    },
    {
      "epoch": 0.661386311920182,
      "grad_norm": 1.0234917416377036,
      "learning_rate": 2.7173653306459056e-06,
      "loss": 0.1152,
      "step": 22671
    },
    {
      "epoch": 0.6614154851508256,
      "grad_norm": 0.6416059728778798,
      "learning_rate": 2.716945011635208e-06,
      "loss": 0.0961,
      "step": 22672
    },
    {
      "epoch": 0.6614446583814692,
      "grad_norm": 0.7594503786367398,
      "learning_rate": 2.716524713007078e-06,
      "loss": 0.1093,
      "step": 22673
    },
    {
      "epoch": 0.6614738316121127,
      "grad_norm": 0.7648313826222802,
      "learning_rate": 2.716104434765273e-06,
      "loss": 0.1218,
      "step": 22674
    },
    {
      "epoch": 0.6615030048427563,
      "grad_norm": 0.6918303444233517,
      "learning_rate": 2.715684176913542e-06,
      "loss": 0.1183,
      "step": 22675
    },
    {
      "epoch": 0.6615321780733998,
      "grad_norm": 0.8202650141884076,
      "learning_rate": 2.7152639394556345e-06,
      "loss": 0.1297,
      "step": 22676
    },
    {
      "epoch": 0.6615613513040434,
      "grad_norm": 0.8170915936510572,
      "learning_rate": 2.7148437223953063e-06,
      "loss": 0.1175,
      "step": 22677
    },
    {
      "epoch": 0.6615905245346869,
      "grad_norm": 1.0014751947777412,
      "learning_rate": 2.7144235257363095e-06,
      "loss": 0.1346,
      "step": 22678
    },
    {
      "epoch": 0.6616196977653306,
      "grad_norm": 1.1731135235685226,
      "learning_rate": 2.7140033494823937e-06,
      "loss": 0.1249,
      "step": 22679
    },
    {
      "epoch": 0.6616488709959741,
      "grad_norm": 0.813509199094576,
      "learning_rate": 2.713583193637308e-06,
      "loss": 0.1167,
      "step": 22680
    },
    {
      "epoch": 0.6616780442266177,
      "grad_norm": 0.9376931570758963,
      "learning_rate": 2.713163058204808e-06,
      "loss": 0.118,
      "step": 22681
    },
    {
      "epoch": 0.6617072174572612,
      "grad_norm": 0.9350101979027772,
      "learning_rate": 2.712742943188642e-06,
      "loss": 0.1092,
      "step": 22682
    },
    {
      "epoch": 0.6617363906879048,
      "grad_norm": 0.7997215927470465,
      "learning_rate": 2.7123228485925603e-06,
      "loss": 0.1516,
      "step": 22683
    },
    {
      "epoch": 0.6617655639185483,
      "grad_norm": 1.004204197024303,
      "learning_rate": 2.7119027744203125e-06,
      "loss": 0.1301,
      "step": 22684
    },
    {
      "epoch": 0.6617947371491919,
      "grad_norm": 0.8437709970552765,
      "learning_rate": 2.7114827206756534e-06,
      "loss": 0.1384,
      "step": 22685
    },
    {
      "epoch": 0.6618239103798355,
      "grad_norm": 0.7818249507810892,
      "learning_rate": 2.71106268736233e-06,
      "loss": 0.1193,
      "step": 22686
    },
    {
      "epoch": 0.661853083610479,
      "grad_norm": 0.8459262924633701,
      "learning_rate": 2.7106426744840903e-06,
      "loss": 0.1061,
      "step": 22687
    },
    {
      "epoch": 0.6618822568411226,
      "grad_norm": 0.7941108554831526,
      "learning_rate": 2.710222682044689e-06,
      "loss": 0.1364,
      "step": 22688
    },
    {
      "epoch": 0.6619114300717661,
      "grad_norm": 0.8446789782799237,
      "learning_rate": 2.70980271004787e-06,
      "loss": 0.1131,
      "step": 22689
    },
    {
      "epoch": 0.6619406033024097,
      "grad_norm": 0.7177839383770583,
      "learning_rate": 2.7093827584973864e-06,
      "loss": 0.1033,
      "step": 22690
    },
    {
      "epoch": 0.6619697765330532,
      "grad_norm": 0.9387945384857022,
      "learning_rate": 2.708962827396988e-06,
      "loss": 0.1453,
      "step": 22691
    },
    {
      "epoch": 0.6619989497636969,
      "grad_norm": 0.8395679812808373,
      "learning_rate": 2.7085429167504227e-06,
      "loss": 0.1007,
      "step": 22692
    },
    {
      "epoch": 0.6620281229943404,
      "grad_norm": 0.7686783234017451,
      "learning_rate": 2.708123026561438e-06,
      "loss": 0.1278,
      "step": 22693
    },
    {
      "epoch": 0.662057296224984,
      "grad_norm": 0.8732786275286675,
      "learning_rate": 2.7077031568337853e-06,
      "loss": 0.1374,
      "step": 22694
    },
    {
      "epoch": 0.6620864694556275,
      "grad_norm": 0.9427103113246084,
      "learning_rate": 2.7072833075712102e-06,
      "loss": 0.1275,
      "step": 22695
    },
    {
      "epoch": 0.6621156426862711,
      "grad_norm": 0.7449211040189512,
      "learning_rate": 2.7068634787774637e-06,
      "loss": 0.1084,
      "step": 22696
    },
    {
      "epoch": 0.6621448159169147,
      "grad_norm": 1.0419258901937143,
      "learning_rate": 2.7064436704562906e-06,
      "loss": 0.1332,
      "step": 22697
    },
    {
      "epoch": 0.6621739891475582,
      "grad_norm": 0.6996777628115296,
      "learning_rate": 2.706023882611443e-06,
      "loss": 0.1061,
      "step": 22698
    },
    {
      "epoch": 0.6622031623782018,
      "grad_norm": 1.0122235070797678,
      "learning_rate": 2.705604115246667e-06,
      "loss": 0.1055,
      "step": 22699
    },
    {
      "epoch": 0.6622323356088453,
      "grad_norm": 0.7493860508219115,
      "learning_rate": 2.7051843683657073e-06,
      "loss": 0.1146,
      "step": 22700
    },
    {
      "epoch": 0.6622615088394889,
      "grad_norm": 0.9118083070252305,
      "learning_rate": 2.704764641972314e-06,
      "loss": 0.1298,
      "step": 22701
    },
    {
      "epoch": 0.6622906820701324,
      "grad_norm": 0.7962610988844777,
      "learning_rate": 2.7043449360702356e-06,
      "loss": 0.0997,
      "step": 22702
    },
    {
      "epoch": 0.662319855300776,
      "grad_norm": 0.8788015974195829,
      "learning_rate": 2.703925250663216e-06,
      "loss": 0.1139,
      "step": 22703
    },
    {
      "epoch": 0.6623490285314195,
      "grad_norm": 0.8997812162323178,
      "learning_rate": 2.7035055857550056e-06,
      "loss": 0.1232,
      "step": 22704
    },
    {
      "epoch": 0.6623782017620631,
      "grad_norm": 0.7622724133070561,
      "learning_rate": 2.703085941349349e-06,
      "loss": 0.106,
      "step": 22705
    },
    {
      "epoch": 0.6624073749927067,
      "grad_norm": 0.715834222217085,
      "learning_rate": 2.702666317449991e-06,
      "loss": 0.1294,
      "step": 22706
    },
    {
      "epoch": 0.6624365482233503,
      "grad_norm": 0.863459359701019,
      "learning_rate": 2.70224671406068e-06,
      "loss": 0.1324,
      "step": 22707
    },
    {
      "epoch": 0.6624657214539938,
      "grad_norm": 1.007169623792825,
      "learning_rate": 2.701827131185163e-06,
      "loss": 0.1229,
      "step": 22708
    },
    {
      "epoch": 0.6624948946846374,
      "grad_norm": 0.9935501999166708,
      "learning_rate": 2.7014075688271857e-06,
      "loss": 0.1311,
      "step": 22709
    },
    {
      "epoch": 0.662524067915281,
      "grad_norm": 0.8176213043713191,
      "learning_rate": 2.70098802699049e-06,
      "loss": 0.1334,
      "step": 22710
    },
    {
      "epoch": 0.6625532411459245,
      "grad_norm": 0.8131923698562828,
      "learning_rate": 2.7005685056788266e-06,
      "loss": 0.1098,
      "step": 22711
    },
    {
      "epoch": 0.6625824143765681,
      "grad_norm": 0.6752129161337445,
      "learning_rate": 2.700149004895939e-06,
      "loss": 0.1169,
      "step": 22712
    },
    {
      "epoch": 0.6626115876072116,
      "grad_norm": 1.3347273643967854,
      "learning_rate": 2.69972952464557e-06,
      "loss": 0.1615,
      "step": 22713
    },
    {
      "epoch": 0.6626407608378552,
      "grad_norm": 0.9448109125956424,
      "learning_rate": 2.6993100649314663e-06,
      "loss": 0.1235,
      "step": 22714
    },
    {
      "epoch": 0.6626699340684987,
      "grad_norm": 0.7656346529520892,
      "learning_rate": 2.6988906257573757e-06,
      "loss": 0.1326,
      "step": 22715
    },
    {
      "epoch": 0.6626991072991423,
      "grad_norm": 0.9833753913203368,
      "learning_rate": 2.6984712071270396e-06,
      "loss": 0.1242,
      "step": 22716
    },
    {
      "epoch": 0.6627282805297858,
      "grad_norm": 1.089224248732522,
      "learning_rate": 2.6980518090442016e-06,
      "loss": 0.1582,
      "step": 22717
    },
    {
      "epoch": 0.6627574537604294,
      "grad_norm": 0.7869744316955992,
      "learning_rate": 2.697632431512609e-06,
      "loss": 0.119,
      "step": 22718
    },
    {
      "epoch": 0.662786626991073,
      "grad_norm": 0.8584606434500435,
      "learning_rate": 2.6972130745360033e-06,
      "loss": 0.1059,
      "step": 22719
    },
    {
      "epoch": 0.6628158002217166,
      "grad_norm": 0.8352241109709454,
      "learning_rate": 2.696793738118129e-06,
      "loss": 0.1352,
      "step": 22720
    },
    {
      "epoch": 0.6628449734523602,
      "grad_norm": 0.9355569458030719,
      "learning_rate": 2.6963744222627326e-06,
      "loss": 0.1433,
      "step": 22721
    },
    {
      "epoch": 0.6628741466830037,
      "grad_norm": 0.7610063806664835,
      "learning_rate": 2.6959551269735553e-06,
      "loss": 0.112,
      "step": 22722
    },
    {
      "epoch": 0.6629033199136473,
      "grad_norm": 0.8125572547701,
      "learning_rate": 2.6955358522543385e-06,
      "loss": 0.1512,
      "step": 22723
    },
    {
      "epoch": 0.6629324931442908,
      "grad_norm": 0.8369608260762295,
      "learning_rate": 2.6951165981088303e-06,
      "loss": 0.1122,
      "step": 22724
    },
    {
      "epoch": 0.6629616663749344,
      "grad_norm": 0.7809482849014473,
      "learning_rate": 2.6946973645407674e-06,
      "loss": 0.1174,
      "step": 22725
    },
    {
      "epoch": 0.6629908396055779,
      "grad_norm": 0.8024793983428733,
      "learning_rate": 2.6942781515538996e-06,
      "loss": 0.118,
      "step": 22726
    },
    {
      "epoch": 0.6630200128362215,
      "grad_norm": 0.7561825368964618,
      "learning_rate": 2.6938589591519624e-06,
      "loss": 0.109,
      "step": 22727
    },
    {
      "epoch": 0.663049186066865,
      "grad_norm": 0.8774031918796692,
      "learning_rate": 2.693439787338705e-06,
      "loss": 0.1175,
      "step": 22728
    },
    {
      "epoch": 0.6630783592975086,
      "grad_norm": 0.7858477310656956,
      "learning_rate": 2.693020636117867e-06,
      "loss": 0.1382,
      "step": 22729
    },
    {
      "epoch": 0.6631075325281521,
      "grad_norm": 0.8101829820640726,
      "learning_rate": 2.6926015054931876e-06,
      "loss": 0.1137,
      "step": 22730
    },
    {
      "epoch": 0.6631367057587957,
      "grad_norm": 0.949393329970762,
      "learning_rate": 2.6921823954684105e-06,
      "loss": 0.1251,
      "step": 22731
    },
    {
      "epoch": 0.6631658789894392,
      "grad_norm": 0.7565575831085826,
      "learning_rate": 2.691763306047281e-06,
      "loss": 0.0978,
      "step": 22732
    },
    {
      "epoch": 0.6631950522200829,
      "grad_norm": 0.8482425123804851,
      "learning_rate": 2.6913442372335353e-06,
      "loss": 0.1201,
      "step": 22733
    },
    {
      "epoch": 0.6632242254507265,
      "grad_norm": 1.0875602580466324,
      "learning_rate": 2.6909251890309185e-06,
      "loss": 0.1484,
      "step": 22734
    },
    {
      "epoch": 0.66325339868137,
      "grad_norm": 0.8051571625003154,
      "learning_rate": 2.6905061614431716e-06,
      "loss": 0.116,
      "step": 22735
    },
    {
      "epoch": 0.6632825719120136,
      "grad_norm": 0.897759965070597,
      "learning_rate": 2.6900871544740315e-06,
      "loss": 0.1321,
      "step": 22736
    },
    {
      "epoch": 0.6633117451426571,
      "grad_norm": 0.9536557279590591,
      "learning_rate": 2.6896681681272417e-06,
      "loss": 0.1246,
      "step": 22737
    },
    {
      "epoch": 0.6633409183733007,
      "grad_norm": 0.7645782420215563,
      "learning_rate": 2.6892492024065453e-06,
      "loss": 0.1268,
      "step": 22738
    },
    {
      "epoch": 0.6633700916039442,
      "grad_norm": 0.7491515181046098,
      "learning_rate": 2.688830257315681e-06,
      "loss": 0.1105,
      "step": 22739
    },
    {
      "epoch": 0.6633992648345878,
      "grad_norm": 0.921236417838504,
      "learning_rate": 2.688411332858386e-06,
      "loss": 0.1299,
      "step": 22740
    },
    {
      "epoch": 0.6634284380652313,
      "grad_norm": 0.9820780602449843,
      "learning_rate": 2.687992429038404e-06,
      "loss": 0.1477,
      "step": 22741
    },
    {
      "epoch": 0.6634576112958749,
      "grad_norm": 0.9562363784714465,
      "learning_rate": 2.687573545859475e-06,
      "loss": 0.1301,
      "step": 22742
    },
    {
      "epoch": 0.6634867845265184,
      "grad_norm": 0.7733876220060898,
      "learning_rate": 2.6871546833253347e-06,
      "loss": 0.1111,
      "step": 22743
    },
    {
      "epoch": 0.663515957757162,
      "grad_norm": 0.7413071223379776,
      "learning_rate": 2.686735841439725e-06,
      "loss": 0.12,
      "step": 22744
    },
    {
      "epoch": 0.6635451309878055,
      "grad_norm": 1.0183380551511207,
      "learning_rate": 2.6863170202063884e-06,
      "loss": 0.115,
      "step": 22745
    },
    {
      "epoch": 0.6635743042184491,
      "grad_norm": 0.786501491655702,
      "learning_rate": 2.68589821962906e-06,
      "loss": 0.098,
      "step": 22746
    },
    {
      "epoch": 0.6636034774490928,
      "grad_norm": 0.8241351984514618,
      "learning_rate": 2.6854794397114785e-06,
      "loss": 0.1419,
      "step": 22747
    },
    {
      "epoch": 0.6636326506797363,
      "grad_norm": 0.7353921423165194,
      "learning_rate": 2.685060680457386e-06,
      "loss": 0.1212,
      "step": 22748
    },
    {
      "epoch": 0.6636618239103799,
      "grad_norm": 1.0368551549822878,
      "learning_rate": 2.684641941870517e-06,
      "loss": 0.1134,
      "step": 22749
    },
    {
      "epoch": 0.6636909971410234,
      "grad_norm": 0.767425619343563,
      "learning_rate": 2.6842232239546118e-06,
      "loss": 0.1149,
      "step": 22750
    },
    {
      "epoch": 0.663720170371667,
      "grad_norm": 0.6842819277001925,
      "learning_rate": 2.6838045267134115e-06,
      "loss": 0.1067,
      "step": 22751
    },
    {
      "epoch": 0.6637493436023105,
      "grad_norm": 0.7447247423431499,
      "learning_rate": 2.683385850150651e-06,
      "loss": 0.1198,
      "step": 22752
    },
    {
      "epoch": 0.6637785168329541,
      "grad_norm": 1.1057954706132223,
      "learning_rate": 2.6829671942700665e-06,
      "loss": 0.1349,
      "step": 22753
    },
    {
      "epoch": 0.6638076900635976,
      "grad_norm": 0.7679389187018296,
      "learning_rate": 2.6825485590754e-06,
      "loss": 0.1342,
      "step": 22754
    },
    {
      "epoch": 0.6638368632942412,
      "grad_norm": 0.6422875569214838,
      "learning_rate": 2.682129944570385e-06,
      "loss": 0.1242,
      "step": 22755
    },
    {
      "epoch": 0.6638660365248847,
      "grad_norm": 0.787111958851915,
      "learning_rate": 2.6817113507587623e-06,
      "loss": 0.1137,
      "step": 22756
    },
    {
      "epoch": 0.6638952097555283,
      "grad_norm": 0.8947541133808778,
      "learning_rate": 2.6812927776442647e-06,
      "loss": 0.144,
      "step": 22757
    },
    {
      "epoch": 0.6639243829861718,
      "grad_norm": 0.7308854387359449,
      "learning_rate": 2.680874225230634e-06,
      "loss": 0.1208,
      "step": 22758
    },
    {
      "epoch": 0.6639535562168154,
      "grad_norm": 0.8570165983911302,
      "learning_rate": 2.680455693521605e-06,
      "loss": 0.1278,
      "step": 22759
    },
    {
      "epoch": 0.6639827294474591,
      "grad_norm": 0.6814366635582152,
      "learning_rate": 2.6800371825209114e-06,
      "loss": 0.1177,
      "step": 22760
    },
    {
      "epoch": 0.6640119026781026,
      "grad_norm": 0.757997692071325,
      "learning_rate": 2.6796186922322926e-06,
      "loss": 0.112,
      "step": 22761
    },
    {
      "epoch": 0.6640410759087462,
      "grad_norm": 0.9121638917545138,
      "learning_rate": 2.679200222659486e-06,
      "loss": 0.1344,
      "step": 22762
    },
    {
      "epoch": 0.6640702491393897,
      "grad_norm": 0.7417214753064422,
      "learning_rate": 2.6787817738062233e-06,
      "loss": 0.1396,
      "step": 22763
    },
    {
      "epoch": 0.6640994223700333,
      "grad_norm": 0.8072939739497127,
      "learning_rate": 2.678363345676245e-06,
      "loss": 0.1063,
      "step": 22764
    },
    {
      "epoch": 0.6641285956006768,
      "grad_norm": 0.6786321051846522,
      "learning_rate": 2.6779449382732846e-06,
      "loss": 0.0934,
      "step": 22765
    },
    {
      "epoch": 0.6641577688313204,
      "grad_norm": 0.7770781535074133,
      "learning_rate": 2.677526551601076e-06,
      "loss": 0.1086,
      "step": 22766
    },
    {
      "epoch": 0.6641869420619639,
      "grad_norm": 0.6871878643258786,
      "learning_rate": 2.6771081856633552e-06,
      "loss": 0.1175,
      "step": 22767
    },
    {
      "epoch": 0.6642161152926075,
      "grad_norm": 0.7890643089896222,
      "learning_rate": 2.6766898404638604e-06,
      "loss": 0.1386,
      "step": 22768
    },
    {
      "epoch": 0.664245288523251,
      "grad_norm": 0.7658669956462429,
      "learning_rate": 2.6762715160063236e-06,
      "loss": 0.1233,
      "step": 22769
    },
    {
      "epoch": 0.6642744617538946,
      "grad_norm": 0.9075248415891648,
      "learning_rate": 2.675853212294478e-06,
      "loss": 0.1158,
      "step": 22770
    },
    {
      "epoch": 0.6643036349845381,
      "grad_norm": 0.6410609193505,
      "learning_rate": 2.6754349293320625e-06,
      "loss": 0.105,
      "step": 22771
    },
    {
      "epoch": 0.6643328082151817,
      "grad_norm": 0.84713033760718,
      "learning_rate": 2.6750166671228094e-06,
      "loss": 0.1306,
      "step": 22772
    },
    {
      "epoch": 0.6643619814458253,
      "grad_norm": 0.7736255291218663,
      "learning_rate": 2.6745984256704498e-06,
      "loss": 0.1182,
      "step": 22773
    },
    {
      "epoch": 0.6643911546764689,
      "grad_norm": 0.9803888247539395,
      "learning_rate": 2.6741802049787202e-06,
      "loss": 0.1155,
      "step": 22774
    },
    {
      "epoch": 0.6644203279071125,
      "grad_norm": 1.0128989948567748,
      "learning_rate": 2.6737620050513567e-06,
      "loss": 0.1372,
      "step": 22775
    },
    {
      "epoch": 0.664449501137756,
      "grad_norm": 0.8742612390528713,
      "learning_rate": 2.6733438258920912e-06,
      "loss": 0.1428,
      "step": 22776
    },
    {
      "epoch": 0.6644786743683996,
      "grad_norm": 0.8847815637561429,
      "learning_rate": 2.6729256675046545e-06,
      "loss": 0.1233,
      "step": 22777
    },
    {
      "epoch": 0.6645078475990431,
      "grad_norm": 0.9185805914733591,
      "learning_rate": 2.6725075298927837e-06,
      "loss": 0.1385,
      "step": 22778
    },
    {
      "epoch": 0.6645370208296867,
      "grad_norm": 0.9081917259864982,
      "learning_rate": 2.672089413060208e-06,
      "loss": 0.1149,
      "step": 22779
    },
    {
      "epoch": 0.6645661940603302,
      "grad_norm": 0.9060151823771639,
      "learning_rate": 2.6716713170106622e-06,
      "loss": 0.12,
      "step": 22780
    },
    {
      "epoch": 0.6645953672909738,
      "grad_norm": 0.8265405242520284,
      "learning_rate": 2.6712532417478817e-06,
      "loss": 0.1203,
      "step": 22781
    },
    {
      "epoch": 0.6646245405216173,
      "grad_norm": 0.9303997829894811,
      "learning_rate": 2.6708351872755955e-06,
      "loss": 0.1205,
      "step": 22782
    },
    {
      "epoch": 0.6646537137522609,
      "grad_norm": 0.6978932775368064,
      "learning_rate": 2.6704171535975353e-06,
      "loss": 0.1163,
      "step": 22783
    },
    {
      "epoch": 0.6646828869829045,
      "grad_norm": 0.9416123650054097,
      "learning_rate": 2.669999140717436e-06,
      "loss": 0.1141,
      "step": 22784
    },
    {
      "epoch": 0.664712060213548,
      "grad_norm": 0.8858413083104199,
      "learning_rate": 2.6695811486390267e-06,
      "loss": 0.1127,
      "step": 22785
    },
    {
      "epoch": 0.6647412334441916,
      "grad_norm": 0.9055770930977479,
      "learning_rate": 2.6691631773660427e-06,
      "loss": 0.1231,
      "step": 22786
    },
    {
      "epoch": 0.6647704066748352,
      "grad_norm": 1.0147306886536345,
      "learning_rate": 2.6687452269022107e-06,
      "loss": 0.1046,
      "step": 22787
    },
    {
      "epoch": 0.6647995799054788,
      "grad_norm": 0.8475171238017849,
      "learning_rate": 2.6683272972512674e-06,
      "loss": 0.1185,
      "step": 22788
    },
    {
      "epoch": 0.6648287531361223,
      "grad_norm": 0.8877258272993748,
      "learning_rate": 2.6679093884169415e-06,
      "loss": 0.0949,
      "step": 22789
    },
    {
      "epoch": 0.6648579263667659,
      "grad_norm": 0.8681780252684836,
      "learning_rate": 2.6674915004029615e-06,
      "loss": 0.1303,
      "step": 22790
    },
    {
      "epoch": 0.6648870995974094,
      "grad_norm": 0.7965396402025787,
      "learning_rate": 2.6670736332130608e-06,
      "loss": 0.1159,
      "step": 22791
    },
    {
      "epoch": 0.664916272828053,
      "grad_norm": 0.9841908263698371,
      "learning_rate": 2.666655786850972e-06,
      "loss": 0.1137,
      "step": 22792
    },
    {
      "epoch": 0.6649454460586965,
      "grad_norm": 0.9079873660443103,
      "learning_rate": 2.666237961320421e-06,
      "loss": 0.1362,
      "step": 22793
    },
    {
      "epoch": 0.6649746192893401,
      "grad_norm": 1.3539266253683224,
      "learning_rate": 2.665820156625142e-06,
      "loss": 0.1111,
      "step": 22794
    },
    {
      "epoch": 0.6650037925199836,
      "grad_norm": 0.9942759204181132,
      "learning_rate": 2.6654023727688637e-06,
      "loss": 0.1299,
      "step": 22795
    },
    {
      "epoch": 0.6650329657506272,
      "grad_norm": 0.9141475331994009,
      "learning_rate": 2.6649846097553144e-06,
      "loss": 0.1284,
      "step": 22796
    },
    {
      "epoch": 0.6650621389812708,
      "grad_norm": 0.8335895823147706,
      "learning_rate": 2.664566867588224e-06,
      "loss": 0.1221,
      "step": 22797
    },
    {
      "epoch": 0.6650913122119143,
      "grad_norm": 0.9376403578406329,
      "learning_rate": 2.664149146271326e-06,
      "loss": 0.1081,
      "step": 22798
    },
    {
      "epoch": 0.6651204854425579,
      "grad_norm": 0.7709837010877325,
      "learning_rate": 2.6637314458083465e-06,
      "loss": 0.1039,
      "step": 22799
    },
    {
      "epoch": 0.6651496586732014,
      "grad_norm": 0.7341095668651686,
      "learning_rate": 2.6633137662030128e-06,
      "loss": 0.113,
      "step": 22800
    },
    {
      "epoch": 0.6651788319038451,
      "grad_norm": 0.8577788157092703,
      "learning_rate": 2.6628961074590575e-06,
      "loss": 0.1331,
      "step": 22801
    },
    {
      "epoch": 0.6652080051344886,
      "grad_norm": 1.0247188704207537,
      "learning_rate": 2.6624784695802087e-06,
      "loss": 0.1204,
      "step": 22802
    },
    {
      "epoch": 0.6652371783651322,
      "grad_norm": 0.7539953384403465,
      "learning_rate": 2.662060852570192e-06,
      "loss": 0.1102,
      "step": 22803
    },
    {
      "epoch": 0.6652663515957757,
      "grad_norm": 0.8177840501455511,
      "learning_rate": 2.6616432564327375e-06,
      "loss": 0.123,
      "step": 22804
    },
    {
      "epoch": 0.6652955248264193,
      "grad_norm": 0.6556082097341025,
      "learning_rate": 2.6612256811715758e-06,
      "loss": 0.1325,
      "step": 22805
    },
    {
      "epoch": 0.6653246980570628,
      "grad_norm": 0.9077159408637516,
      "learning_rate": 2.660808126790433e-06,
      "loss": 0.1088,
      "step": 22806
    },
    {
      "epoch": 0.6653538712877064,
      "grad_norm": 0.8344040674081844,
      "learning_rate": 2.6603905932930353e-06,
      "loss": 0.1328,
      "step": 22807
    },
    {
      "epoch": 0.66538304451835,
      "grad_norm": 0.8027804473541733,
      "learning_rate": 2.6599730806831114e-06,
      "loss": 0.1232,
      "step": 22808
    },
    {
      "epoch": 0.6654122177489935,
      "grad_norm": 0.7289277425195105,
      "learning_rate": 2.659555588964391e-06,
      "loss": 0.1079,
      "step": 22809
    },
    {
      "epoch": 0.6654413909796371,
      "grad_norm": 0.7811310114777291,
      "learning_rate": 2.6591381181405982e-06,
      "loss": 0.1022,
      "step": 22810
    },
    {
      "epoch": 0.6654705642102806,
      "grad_norm": 0.6699944434434246,
      "learning_rate": 2.6587206682154632e-06,
      "loss": 0.1115,
      "step": 22811
    },
    {
      "epoch": 0.6654997374409242,
      "grad_norm": 0.8527080409705557,
      "learning_rate": 2.658303239192711e-06,
      "loss": 0.1171,
      "step": 22812
    },
    {
      "epoch": 0.6655289106715677,
      "grad_norm": 0.8503458504227392,
      "learning_rate": 2.657885831076067e-06,
      "loss": 0.13,
      "step": 22813
    },
    {
      "epoch": 0.6655580839022114,
      "grad_norm": 0.8610452118826941,
      "learning_rate": 2.657468443869259e-06,
      "loss": 0.1096,
      "step": 22814
    },
    {
      "epoch": 0.6655872571328549,
      "grad_norm": 0.8117637844797436,
      "learning_rate": 2.657051077576015e-06,
      "loss": 0.1137,
      "step": 22815
    },
    {
      "epoch": 0.6656164303634985,
      "grad_norm": 1.0340139159103572,
      "learning_rate": 2.6566337322000604e-06,
      "loss": 0.1161,
      "step": 22816
    },
    {
      "epoch": 0.665645603594142,
      "grad_norm": 1.0225009069481235,
      "learning_rate": 2.656216407745118e-06,
      "loss": 0.1208,
      "step": 22817
    },
    {
      "epoch": 0.6656747768247856,
      "grad_norm": 0.883007858803241,
      "learning_rate": 2.655799104214918e-06,
      "loss": 0.1284,
      "step": 22818
    },
    {
      "epoch": 0.6657039500554292,
      "grad_norm": 1.3535224376299615,
      "learning_rate": 2.6553818216131837e-06,
      "loss": 0.1147,
      "step": 22819
    },
    {
      "epoch": 0.6657331232860727,
      "grad_norm": 1.1754917276501897,
      "learning_rate": 2.654964559943639e-06,
      "loss": 0.1333,
      "step": 22820
    },
    {
      "epoch": 0.6657622965167163,
      "grad_norm": 0.909508895822343,
      "learning_rate": 2.654547319210011e-06,
      "loss": 0.1188,
      "step": 22821
    },
    {
      "epoch": 0.6657914697473598,
      "grad_norm": 0.756381712481357,
      "learning_rate": 2.6541300994160267e-06,
      "loss": 0.1047,
      "step": 22822
    },
    {
      "epoch": 0.6658206429780034,
      "grad_norm": 0.8862876683421308,
      "learning_rate": 2.653712900565407e-06,
      "loss": 0.1345,
      "step": 22823
    },
    {
      "epoch": 0.6658498162086469,
      "grad_norm": 0.8249110733240348,
      "learning_rate": 2.6532957226618805e-06,
      "loss": 0.1335,
      "step": 22824
    },
    {
      "epoch": 0.6658789894392905,
      "grad_norm": 1.007246692300794,
      "learning_rate": 2.6528785657091696e-06,
      "loss": 0.1023,
      "step": 22825
    },
    {
      "epoch": 0.665908162669934,
      "grad_norm": 0.8183854563361862,
      "learning_rate": 2.652461429710996e-06,
      "loss": 0.1175,
      "step": 22826
    },
    {
      "epoch": 0.6659373359005776,
      "grad_norm": 0.9746215184008209,
      "learning_rate": 2.652044314671086e-06,
      "loss": 0.1166,
      "step": 22827
    },
    {
      "epoch": 0.6659665091312212,
      "grad_norm": 0.7990369652570128,
      "learning_rate": 2.6516272205931672e-06,
      "loss": 0.1196,
      "step": 22828
    },
    {
      "epoch": 0.6659956823618648,
      "grad_norm": 0.7056059919518601,
      "learning_rate": 2.6512101474809595e-06,
      "loss": 0.1273,
      "step": 22829
    },
    {
      "epoch": 0.6660248555925083,
      "grad_norm": 1.076309111226193,
      "learning_rate": 2.6507930953381844e-06,
      "loss": 0.1254,
      "step": 22830
    },
    {
      "epoch": 0.6660540288231519,
      "grad_norm": 0.7191027710196798,
      "learning_rate": 2.6503760641685698e-06,
      "loss": 0.0996,
      "step": 22831
    },
    {
      "epoch": 0.6660832020537955,
      "grad_norm": 0.8633287497024185,
      "learning_rate": 2.6499590539758354e-06,
      "loss": 0.1207,
      "step": 22832
    },
    {
      "epoch": 0.666112375284439,
      "grad_norm": 0.7809386224903961,
      "learning_rate": 2.6495420647637073e-06,
      "loss": 0.1511,
      "step": 22833
    },
    {
      "epoch": 0.6661415485150826,
      "grad_norm": 0.801690683183648,
      "learning_rate": 2.649125096535904e-06,
      "loss": 0.1244,
      "step": 22834
    },
    {
      "epoch": 0.6661707217457261,
      "grad_norm": 0.8307182356116696,
      "learning_rate": 2.648708149296153e-06,
      "loss": 0.1535,
      "step": 22835
    },
    {
      "epoch": 0.6661998949763697,
      "grad_norm": 0.7847049312689528,
      "learning_rate": 2.648291223048175e-06,
      "loss": 0.1113,
      "step": 22836
    },
    {
      "epoch": 0.6662290682070132,
      "grad_norm": 0.648994948062591,
      "learning_rate": 2.6478743177956888e-06,
      "loss": 0.1291,
      "step": 22837
    },
    {
      "epoch": 0.6662582414376568,
      "grad_norm": 1.0227885258841345,
      "learning_rate": 2.6474574335424193e-06,
      "loss": 0.1692,
      "step": 22838
    },
    {
      "epoch": 0.6662874146683003,
      "grad_norm": 0.8035949903899916,
      "learning_rate": 2.64704057029209e-06,
      "loss": 0.1081,
      "step": 22839
    },
    {
      "epoch": 0.6663165878989439,
      "grad_norm": 0.8940843548417005,
      "learning_rate": 2.6466237280484197e-06,
      "loss": 0.1176,
      "step": 22840
    },
    {
      "epoch": 0.6663457611295875,
      "grad_norm": 0.7227817489635756,
      "learning_rate": 2.646206906815132e-06,
      "loss": 0.1291,
      "step": 22841
    },
    {
      "epoch": 0.6663749343602311,
      "grad_norm": 1.041345619677843,
      "learning_rate": 2.6457901065959474e-06,
      "loss": 0.1174,
      "step": 22842
    },
    {
      "epoch": 0.6664041075908747,
      "grad_norm": 0.8271141276860493,
      "learning_rate": 2.645373327394585e-06,
      "loss": 0.1318,
      "step": 22843
    },
    {
      "epoch": 0.6664332808215182,
      "grad_norm": 0.7784161027827586,
      "learning_rate": 2.6449565692147673e-06,
      "loss": 0.1233,
      "step": 22844
    },
    {
      "epoch": 0.6664624540521618,
      "grad_norm": 0.913431319897687,
      "learning_rate": 2.6445398320602168e-06,
      "loss": 0.1175,
      "step": 22845
    },
    {
      "epoch": 0.6664916272828053,
      "grad_norm": 0.7544592426231707,
      "learning_rate": 2.644123115934653e-06,
      "loss": 0.1218,
      "step": 22846
    },
    {
      "epoch": 0.6665208005134489,
      "grad_norm": 0.7237926195654799,
      "learning_rate": 2.6437064208417934e-06,
      "loss": 0.1255,
      "step": 22847
    },
    {
      "epoch": 0.6665499737440924,
      "grad_norm": 0.7785536203346751,
      "learning_rate": 2.6432897467853626e-06,
      "loss": 0.1359,
      "step": 22848
    },
    {
      "epoch": 0.666579146974736,
      "grad_norm": 0.9023965626581943,
      "learning_rate": 2.642873093769078e-06,
      "loss": 0.1294,
      "step": 22849
    },
    {
      "epoch": 0.6666083202053795,
      "grad_norm": 1.1122519111675686,
      "learning_rate": 2.6424564617966574e-06,
      "loss": 0.1302,
      "step": 22850
    },
    {
      "epoch": 0.6666374934360231,
      "grad_norm": 0.7810632635125973,
      "learning_rate": 2.6420398508718237e-06,
      "loss": 0.1274,
      "step": 22851
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 1.1671355798287721,
      "learning_rate": 2.641623260998296e-06,
      "loss": 0.1267,
      "step": 22852
    },
    {
      "epoch": 0.6666958398973102,
      "grad_norm": 0.7620712314961029,
      "learning_rate": 2.641206692179794e-06,
      "loss": 0.1082,
      "step": 22853
    },
    {
      "epoch": 0.6667250131279537,
      "grad_norm": 0.9685592833678796,
      "learning_rate": 2.6407901444200335e-06,
      "loss": 0.1198,
      "step": 22854
    },
    {
      "epoch": 0.6667541863585974,
      "grad_norm": 1.3405049572231604,
      "learning_rate": 2.640373617722737e-06,
      "loss": 0.1386,
      "step": 22855
    },
    {
      "epoch": 0.666783359589241,
      "grad_norm": 0.7881531449010316,
      "learning_rate": 2.639957112091619e-06,
      "loss": 0.1266,
      "step": 22856
    },
    {
      "epoch": 0.6668125328198845,
      "grad_norm": 0.9545364113754767,
      "learning_rate": 2.6395406275304014e-06,
      "loss": 0.1009,
      "step": 22857
    },
    {
      "epoch": 0.6668417060505281,
      "grad_norm": 0.9211951782557177,
      "learning_rate": 2.6391241640428034e-06,
      "loss": 0.1227,
      "step": 22858
    },
    {
      "epoch": 0.6668708792811716,
      "grad_norm": 0.8779482312824567,
      "learning_rate": 2.6387077216325407e-06,
      "loss": 0.1228,
      "step": 22859
    },
    {
      "epoch": 0.6669000525118152,
      "grad_norm": 0.9842666461074883,
      "learning_rate": 2.6382913003033305e-06,
      "loss": 0.1368,
      "step": 22860
    },
    {
      "epoch": 0.6669292257424587,
      "grad_norm": 0.81804116491733,
      "learning_rate": 2.637874900058893e-06,
      "loss": 0.1422,
      "step": 22861
    },
    {
      "epoch": 0.6669583989731023,
      "grad_norm": 1.093203373968162,
      "learning_rate": 2.6374585209029435e-06,
      "loss": 0.1342,
      "step": 22862
    },
    {
      "epoch": 0.6669875722037458,
      "grad_norm": 0.8712050536001551,
      "learning_rate": 2.637042162839202e-06,
      "loss": 0.1317,
      "step": 22863
    },
    {
      "epoch": 0.6670167454343894,
      "grad_norm": 0.8753929827276967,
      "learning_rate": 2.6366258258713816e-06,
      "loss": 0.1203,
      "step": 22864
    },
    {
      "epoch": 0.6670459186650329,
      "grad_norm": 0.9179428116205727,
      "learning_rate": 2.636209510003204e-06,
      "loss": 0.1156,
      "step": 22865
    },
    {
      "epoch": 0.6670750918956765,
      "grad_norm": 0.9122813596479129,
      "learning_rate": 2.6357932152383837e-06,
      "loss": 0.1229,
      "step": 22866
    },
    {
      "epoch": 0.66710426512632,
      "grad_norm": 0.9132962926045334,
      "learning_rate": 2.635376941580635e-06,
      "loss": 0.1178,
      "step": 22867
    },
    {
      "epoch": 0.6671334383569637,
      "grad_norm": 0.746429558365203,
      "learning_rate": 2.6349606890336765e-06,
      "loss": 0.1243,
      "step": 22868
    },
    {
      "epoch": 0.6671626115876073,
      "grad_norm": 0.8487321079667792,
      "learning_rate": 2.634544457601227e-06,
      "loss": 0.1306,
      "step": 22869
    },
    {
      "epoch": 0.6671917848182508,
      "grad_norm": 1.071069474226585,
      "learning_rate": 2.6341282472869968e-06,
      "loss": 0.1121,
      "step": 22870
    },
    {
      "epoch": 0.6672209580488944,
      "grad_norm": 0.7754749506074043,
      "learning_rate": 2.6337120580947074e-06,
      "loss": 0.0795,
      "step": 22871
    },
    {
      "epoch": 0.6672501312795379,
      "grad_norm": 0.8771783417649982,
      "learning_rate": 2.6332958900280715e-06,
      "loss": 0.1316,
      "step": 22872
    },
    {
      "epoch": 0.6672793045101815,
      "grad_norm": 1.0235420465187148,
      "learning_rate": 2.6328797430908038e-06,
      "loss": 0.1148,
      "step": 22873
    },
    {
      "epoch": 0.667308477740825,
      "grad_norm": 0.7937782970107664,
      "learning_rate": 2.63246361728662e-06,
      "loss": 0.108,
      "step": 22874
    },
    {
      "epoch": 0.6673376509714686,
      "grad_norm": 0.8326839070925353,
      "learning_rate": 2.6320475126192378e-06,
      "loss": 0.0956,
      "step": 22875
    },
    {
      "epoch": 0.6673668242021121,
      "grad_norm": 0.7108209346505884,
      "learning_rate": 2.6316314290923705e-06,
      "loss": 0.1105,
      "step": 22876
    },
    {
      "epoch": 0.6673959974327557,
      "grad_norm": 0.7732239015891544,
      "learning_rate": 2.6312153667097297e-06,
      "loss": 0.1262,
      "step": 22877
    },
    {
      "epoch": 0.6674251706633992,
      "grad_norm": 0.8739867117762663,
      "learning_rate": 2.6307993254750354e-06,
      "loss": 0.1309,
      "step": 22878
    },
    {
      "epoch": 0.6674543438940428,
      "grad_norm": 0.7112937245112858,
      "learning_rate": 2.630383305391999e-06,
      "loss": 0.1312,
      "step": 22879
    },
    {
      "epoch": 0.6674835171246863,
      "grad_norm": 0.9659848731887509,
      "learning_rate": 2.629967306464333e-06,
      "loss": 0.1247,
      "step": 22880
    },
    {
      "epoch": 0.6675126903553299,
      "grad_norm": 0.9564188845262732,
      "learning_rate": 2.629551328695752e-06,
      "loss": 0.123,
      "step": 22881
    },
    {
      "epoch": 0.6675418635859736,
      "grad_norm": 0.9836315585119073,
      "learning_rate": 2.629135372089974e-06,
      "loss": 0.1247,
      "step": 22882
    },
    {
      "epoch": 0.6675710368166171,
      "grad_norm": 1.0055943275658235,
      "learning_rate": 2.628719436650709e-06,
      "loss": 0.1459,
      "step": 22883
    },
    {
      "epoch": 0.6676002100472607,
      "grad_norm": 0.9715136891055601,
      "learning_rate": 2.628303522381669e-06,
      "loss": 0.1388,
      "step": 22884
    },
    {
      "epoch": 0.6676293832779042,
      "grad_norm": 0.9111219117398697,
      "learning_rate": 2.6278876292865705e-06,
      "loss": 0.0897,
      "step": 22885
    },
    {
      "epoch": 0.6676585565085478,
      "grad_norm": 0.8552261691374892,
      "learning_rate": 2.627471757369123e-06,
      "loss": 0.1279,
      "step": 22886
    },
    {
      "epoch": 0.6676877297391913,
      "grad_norm": 0.7070872845443794,
      "learning_rate": 2.6270559066330425e-06,
      "loss": 0.1202,
      "step": 22887
    },
    {
      "epoch": 0.6677169029698349,
      "grad_norm": 0.9344461378859874,
      "learning_rate": 2.626640077082041e-06,
      "loss": 0.1433,
      "step": 22888
    },
    {
      "epoch": 0.6677460762004784,
      "grad_norm": 0.9546904343586825,
      "learning_rate": 2.626224268719831e-06,
      "loss": 0.126,
      "step": 22889
    },
    {
      "epoch": 0.667775249431122,
      "grad_norm": 0.8914692488127292,
      "learning_rate": 2.6258084815501217e-06,
      "loss": 0.133,
      "step": 22890
    },
    {
      "epoch": 0.6678044226617655,
      "grad_norm": 1.0945021448766752,
      "learning_rate": 2.62539271557663e-06,
      "loss": 0.1349,
      "step": 22891
    },
    {
      "epoch": 0.6678335958924091,
      "grad_norm": 0.8442179469964343,
      "learning_rate": 2.6249769708030626e-06,
      "loss": 0.1411,
      "step": 22892
    },
    {
      "epoch": 0.6678627691230526,
      "grad_norm": 0.6690842340004873,
      "learning_rate": 2.624561247233136e-06,
      "loss": 0.1161,
      "step": 22893
    },
    {
      "epoch": 0.6678919423536962,
      "grad_norm": 1.055252963845563,
      "learning_rate": 2.6241455448705585e-06,
      "loss": 0.1389,
      "step": 22894
    },
    {
      "epoch": 0.6679211155843399,
      "grad_norm": 0.9517407132556734,
      "learning_rate": 2.6237298637190433e-06,
      "loss": 0.1278,
      "step": 22895
    },
    {
      "epoch": 0.6679502888149834,
      "grad_norm": 0.714132413605472,
      "learning_rate": 2.6233142037823013e-06,
      "loss": 0.104,
      "step": 22896
    },
    {
      "epoch": 0.667979462045627,
      "grad_norm": 0.7384307375035443,
      "learning_rate": 2.6228985650640405e-06,
      "loss": 0.1249,
      "step": 22897
    },
    {
      "epoch": 0.6680086352762705,
      "grad_norm": 0.6353591443588497,
      "learning_rate": 2.6224829475679737e-06,
      "loss": 0.1235,
      "step": 22898
    },
    {
      "epoch": 0.6680378085069141,
      "grad_norm": 0.913948058624623,
      "learning_rate": 2.6220673512978135e-06,
      "loss": 0.1174,
      "step": 22899
    },
    {
      "epoch": 0.6680669817375576,
      "grad_norm": 0.8425906355515902,
      "learning_rate": 2.621651776257266e-06,
      "loss": 0.1102,
      "step": 22900
    },
    {
      "epoch": 0.6680961549682012,
      "grad_norm": 0.8877934953357128,
      "learning_rate": 2.6212362224500467e-06,
      "loss": 0.1241,
      "step": 22901
    },
    {
      "epoch": 0.6681253281988447,
      "grad_norm": 0.8366168848950594,
      "learning_rate": 2.6208206898798618e-06,
      "loss": 0.115,
      "step": 22902
    },
    {
      "epoch": 0.6681545014294883,
      "grad_norm": 0.9015235413550581,
      "learning_rate": 2.6204051785504197e-06,
      "loss": 0.1199,
      "step": 22903
    },
    {
      "epoch": 0.6681836746601318,
      "grad_norm": 0.7936000443709997,
      "learning_rate": 2.619989688465433e-06,
      "loss": 0.1208,
      "step": 22904
    },
    {
      "epoch": 0.6682128478907754,
      "grad_norm": 0.905018323752349,
      "learning_rate": 2.619574219628611e-06,
      "loss": 0.1142,
      "step": 22905
    },
    {
      "epoch": 0.668242021121419,
      "grad_norm": 0.9977333843447267,
      "learning_rate": 2.619158772043663e-06,
      "loss": 0.1455,
      "step": 22906
    },
    {
      "epoch": 0.6682711943520625,
      "grad_norm": 0.7992462283310806,
      "learning_rate": 2.6187433457142953e-06,
      "loss": 0.1124,
      "step": 22907
    },
    {
      "epoch": 0.6683003675827061,
      "grad_norm": 0.8310542157884723,
      "learning_rate": 2.6183279406442195e-06,
      "loss": 0.127,
      "step": 22908
    },
    {
      "epoch": 0.6683295408133497,
      "grad_norm": 1.1425389556738446,
      "learning_rate": 2.6179125568371444e-06,
      "loss": 0.1313,
      "step": 22909
    },
    {
      "epoch": 0.6683587140439933,
      "grad_norm": 1.0003638408942155,
      "learning_rate": 2.617497194296774e-06,
      "loss": 0.1308,
      "step": 22910
    },
    {
      "epoch": 0.6683878872746368,
      "grad_norm": 0.7607575092540827,
      "learning_rate": 2.6170818530268218e-06,
      "loss": 0.1416,
      "step": 22911
    },
    {
      "epoch": 0.6684170605052804,
      "grad_norm": 1.035812661802313,
      "learning_rate": 2.616666533030995e-06,
      "loss": 0.1503,
      "step": 22912
    },
    {
      "epoch": 0.6684462337359239,
      "grad_norm": 0.9809074352486825,
      "learning_rate": 2.6162512343129996e-06,
      "loss": 0.1125,
      "step": 22913
    },
    {
      "epoch": 0.6684754069665675,
      "grad_norm": 0.8755276446003115,
      "learning_rate": 2.6158359568765436e-06,
      "loss": 0.1218,
      "step": 22914
    },
    {
      "epoch": 0.668504580197211,
      "grad_norm": 0.931614951643964,
      "learning_rate": 2.6154207007253364e-06,
      "loss": 0.1165,
      "step": 22915
    },
    {
      "epoch": 0.6685337534278546,
      "grad_norm": 1.1166821915032044,
      "learning_rate": 2.6150054658630814e-06,
      "loss": 0.1401,
      "step": 22916
    },
    {
      "epoch": 0.6685629266584981,
      "grad_norm": 0.896098729123017,
      "learning_rate": 2.6145902522934886e-06,
      "loss": 0.1289,
      "step": 22917
    },
    {
      "epoch": 0.6685920998891417,
      "grad_norm": 0.7827358265099456,
      "learning_rate": 2.614175060020267e-06,
      "loss": 0.1031,
      "step": 22918
    },
    {
      "epoch": 0.6686212731197853,
      "grad_norm": 0.8556313979924545,
      "learning_rate": 2.6137598890471204e-06,
      "loss": 0.1316,
      "step": 22919
    },
    {
      "epoch": 0.6686504463504288,
      "grad_norm": 0.9517679559457114,
      "learning_rate": 2.6133447393777545e-06,
      "loss": 0.127,
      "step": 22920
    },
    {
      "epoch": 0.6686796195810724,
      "grad_norm": 0.9916652286106274,
      "learning_rate": 2.6129296110158784e-06,
      "loss": 0.0907,
      "step": 22921
    },
    {
      "epoch": 0.668708792811716,
      "grad_norm": 0.6660238125799548,
      "learning_rate": 2.6125145039651955e-06,
      "loss": 0.1211,
      "step": 22922
    },
    {
      "epoch": 0.6687379660423596,
      "grad_norm": 0.859101359514304,
      "learning_rate": 2.612099418229415e-06,
      "loss": 0.1568,
      "step": 22923
    },
    {
      "epoch": 0.6687671392730031,
      "grad_norm": 1.0702947707584312,
      "learning_rate": 2.6116843538122383e-06,
      "loss": 0.1346,
      "step": 22924
    },
    {
      "epoch": 0.6687963125036467,
      "grad_norm": 0.8805219805749532,
      "learning_rate": 2.611269310717376e-06,
      "loss": 0.1029,
      "step": 22925
    },
    {
      "epoch": 0.6688254857342902,
      "grad_norm": 0.8034430681476991,
      "learning_rate": 2.6108542889485304e-06,
      "loss": 0.1282,
      "step": 22926
    },
    {
      "epoch": 0.6688546589649338,
      "grad_norm": 0.8488474482376444,
      "learning_rate": 2.6104392885094067e-06,
      "loss": 0.1199,
      "step": 22927
    },
    {
      "epoch": 0.6688838321955773,
      "grad_norm": 1.282805480030664,
      "learning_rate": 2.610024309403709e-06,
      "loss": 0.118,
      "step": 22928
    },
    {
      "epoch": 0.6689130054262209,
      "grad_norm": 0.8617636404005417,
      "learning_rate": 2.6096093516351473e-06,
      "loss": 0.1236,
      "step": 22929
    },
    {
      "epoch": 0.6689421786568645,
      "grad_norm": 1.2089206394358851,
      "learning_rate": 2.60919441520742e-06,
      "loss": 0.1504,
      "step": 22930
    },
    {
      "epoch": 0.668971351887508,
      "grad_norm": 0.8843065407926914,
      "learning_rate": 2.6087795001242357e-06,
      "loss": 0.1226,
      "step": 22931
    },
    {
      "epoch": 0.6690005251181516,
      "grad_norm": 1.1493588882110541,
      "learning_rate": 2.6083646063892976e-06,
      "loss": 0.13,
      "step": 22932
    },
    {
      "epoch": 0.6690296983487951,
      "grad_norm": 1.1155950339039007,
      "learning_rate": 2.6079497340063077e-06,
      "loss": 0.1407,
      "step": 22933
    },
    {
      "epoch": 0.6690588715794387,
      "grad_norm": 0.9143638603560748,
      "learning_rate": 2.6075348829789716e-06,
      "loss": 0.1232,
      "step": 22934
    },
    {
      "epoch": 0.6690880448100822,
      "grad_norm": 0.8037598879231852,
      "learning_rate": 2.6071200533109943e-06,
      "loss": 0.1097,
      "step": 22935
    },
    {
      "epoch": 0.6691172180407259,
      "grad_norm": 1.1392089839495685,
      "learning_rate": 2.606705245006078e-06,
      "loss": 0.1231,
      "step": 22936
    },
    {
      "epoch": 0.6691463912713694,
      "grad_norm": 0.8531527364776602,
      "learning_rate": 2.6062904580679243e-06,
      "loss": 0.1279,
      "step": 22937
    },
    {
      "epoch": 0.669175564502013,
      "grad_norm": 0.815867906913901,
      "learning_rate": 2.6058756925002405e-06,
      "loss": 0.1155,
      "step": 22938
    },
    {
      "epoch": 0.6692047377326565,
      "grad_norm": 0.8291403675860054,
      "learning_rate": 2.605460948306726e-06,
      "loss": 0.1115,
      "step": 22939
    },
    {
      "epoch": 0.6692339109633001,
      "grad_norm": 0.7748396013848297,
      "learning_rate": 2.6050462254910825e-06,
      "loss": 0.1493,
      "step": 22940
    },
    {
      "epoch": 0.6692630841939436,
      "grad_norm": 0.8602687148445679,
      "learning_rate": 2.604631524057015e-06,
      "loss": 0.1066,
      "step": 22941
    },
    {
      "epoch": 0.6692922574245872,
      "grad_norm": 1.0815187177418915,
      "learning_rate": 2.6042168440082278e-06,
      "loss": 0.1051,
      "step": 22942
    },
    {
      "epoch": 0.6693214306552308,
      "grad_norm": 0.7993337586485786,
      "learning_rate": 2.6038021853484204e-06,
      "loss": 0.1247,
      "step": 22943
    },
    {
      "epoch": 0.6693506038858743,
      "grad_norm": 0.8172989834362377,
      "learning_rate": 2.6033875480812932e-06,
      "loss": 0.1323,
      "step": 22944
    },
    {
      "epoch": 0.6693797771165179,
      "grad_norm": 1.0694196145230908,
      "learning_rate": 2.602972932210551e-06,
      "loss": 0.1185,
      "step": 22945
    },
    {
      "epoch": 0.6694089503471614,
      "grad_norm": 0.8139093409115654,
      "learning_rate": 2.6025583377398933e-06,
      "loss": 0.1233,
      "step": 22946
    },
    {
      "epoch": 0.669438123577805,
      "grad_norm": 1.2906758880216815,
      "learning_rate": 2.602143764673022e-06,
      "loss": 0.1207,
      "step": 22947
    },
    {
      "epoch": 0.6694672968084485,
      "grad_norm": 0.8766576779177476,
      "learning_rate": 2.6017292130136406e-06,
      "loss": 0.1222,
      "step": 22948
    },
    {
      "epoch": 0.6694964700390922,
      "grad_norm": 0.7039154293282055,
      "learning_rate": 2.6013146827654485e-06,
      "loss": 0.0965,
      "step": 22949
    },
    {
      "epoch": 0.6695256432697357,
      "grad_norm": 0.7745158648055794,
      "learning_rate": 2.600900173932144e-06,
      "loss": 0.1098,
      "step": 22950
    },
    {
      "epoch": 0.6695548165003793,
      "grad_norm": 0.9171519033405305,
      "learning_rate": 2.600485686517432e-06,
      "loss": 0.1288,
      "step": 22951
    },
    {
      "epoch": 0.6695839897310228,
      "grad_norm": 1.0745458521225597,
      "learning_rate": 2.60007122052501e-06,
      "loss": 0.133,
      "step": 22952
    },
    {
      "epoch": 0.6696131629616664,
      "grad_norm": 1.0622810261729105,
      "learning_rate": 2.59965677595858e-06,
      "loss": 0.1201,
      "step": 22953
    },
    {
      "epoch": 0.66964233619231,
      "grad_norm": 0.6532650285431103,
      "learning_rate": 2.5992423528218404e-06,
      "loss": 0.1056,
      "step": 22954
    },
    {
      "epoch": 0.6696715094229535,
      "grad_norm": 1.0279845866111847,
      "learning_rate": 2.5988279511184934e-06,
      "loss": 0.1158,
      "step": 22955
    },
    {
      "epoch": 0.6697006826535971,
      "grad_norm": 0.8731483651185638,
      "learning_rate": 2.598413570852237e-06,
      "loss": 0.0986,
      "step": 22956
    },
    {
      "epoch": 0.6697298558842406,
      "grad_norm": 0.9696134873233425,
      "learning_rate": 2.597999212026769e-06,
      "loss": 0.1212,
      "step": 22957
    },
    {
      "epoch": 0.6697590291148842,
      "grad_norm": 1.1347937170465638,
      "learning_rate": 2.597584874645791e-06,
      "loss": 0.1624,
      "step": 22958
    },
    {
      "epoch": 0.6697882023455277,
      "grad_norm": 1.0999434347085677,
      "learning_rate": 2.5971705587130036e-06,
      "loss": 0.1261,
      "step": 22959
    },
    {
      "epoch": 0.6698173755761713,
      "grad_norm": 0.9410433692833908,
      "learning_rate": 2.5967562642321014e-06,
      "loss": 0.1308,
      "step": 22960
    },
    {
      "epoch": 0.6698465488068148,
      "grad_norm": 1.110225390909667,
      "learning_rate": 2.596341991206788e-06,
      "loss": 0.1168,
      "step": 22961
    },
    {
      "epoch": 0.6698757220374584,
      "grad_norm": 1.1251422889019116,
      "learning_rate": 2.5959277396407588e-06,
      "loss": 0.1421,
      "step": 22962
    },
    {
      "epoch": 0.669904895268102,
      "grad_norm": 0.9735252353434014,
      "learning_rate": 2.595513509537712e-06,
      "loss": 0.1162,
      "step": 22963
    },
    {
      "epoch": 0.6699340684987456,
      "grad_norm": 0.7471502557532341,
      "learning_rate": 2.595099300901346e-06,
      "loss": 0.0935,
      "step": 22964
    },
    {
      "epoch": 0.6699632417293891,
      "grad_norm": 0.7418263137633314,
      "learning_rate": 2.5946851137353614e-06,
      "loss": 0.1179,
      "step": 22965
    },
    {
      "epoch": 0.6699924149600327,
      "grad_norm": 1.192634862037079,
      "learning_rate": 2.594270948043454e-06,
      "loss": 0.1106,
      "step": 22966
    },
    {
      "epoch": 0.6700215881906763,
      "grad_norm": 0.7710972242554188,
      "learning_rate": 2.5938568038293193e-06,
      "loss": 0.1288,
      "step": 22967
    },
    {
      "epoch": 0.6700507614213198,
      "grad_norm": 0.8604964494284084,
      "learning_rate": 2.5934426810966585e-06,
      "loss": 0.117,
      "step": 22968
    },
    {
      "epoch": 0.6700799346519634,
      "grad_norm": 0.9719138250761021,
      "learning_rate": 2.593028579849167e-06,
      "loss": 0.1361,
      "step": 22969
    },
    {
      "epoch": 0.6701091078826069,
      "grad_norm": 0.9418603652946018,
      "learning_rate": 2.5926145000905402e-06,
      "loss": 0.1196,
      "step": 22970
    },
    {
      "epoch": 0.6701382811132505,
      "grad_norm": 0.8049131281734525,
      "learning_rate": 2.5922004418244758e-06,
      "loss": 0.1009,
      "step": 22971
    },
    {
      "epoch": 0.670167454343894,
      "grad_norm": 0.9613558638504005,
      "learning_rate": 2.591786405054673e-06,
      "loss": 0.1411,
      "step": 22972
    },
    {
      "epoch": 0.6701966275745376,
      "grad_norm": 1.01183092493497,
      "learning_rate": 2.5913723897848264e-06,
      "loss": 0.117,
      "step": 22973
    },
    {
      "epoch": 0.6702258008051811,
      "grad_norm": 0.9235575685507096,
      "learning_rate": 2.5909583960186306e-06,
      "loss": 0.0919,
      "step": 22974
    },
    {
      "epoch": 0.6702549740358247,
      "grad_norm": 0.8172537684550482,
      "learning_rate": 2.590544423759785e-06,
      "loss": 0.1164,
      "step": 22975
    },
    {
      "epoch": 0.6702841472664683,
      "grad_norm": 0.7142541635523699,
      "learning_rate": 2.5901304730119816e-06,
      "loss": 0.121,
      "step": 22976
    },
    {
      "epoch": 0.6703133204971119,
      "grad_norm": 0.905901228042321,
      "learning_rate": 2.5897165437789175e-06,
      "loss": 0.1393,
      "step": 22977
    },
    {
      "epoch": 0.6703424937277555,
      "grad_norm": 0.8181608904803088,
      "learning_rate": 2.5893026360642912e-06,
      "loss": 0.1244,
      "step": 22978
    },
    {
      "epoch": 0.670371666958399,
      "grad_norm": 0.9139770807479383,
      "learning_rate": 2.588888749871795e-06,
      "loss": 0.1453,
      "step": 22979
    },
    {
      "epoch": 0.6704008401890426,
      "grad_norm": 0.782312618193292,
      "learning_rate": 2.5884748852051236e-06,
      "loss": 0.1265,
      "step": 22980
    },
    {
      "epoch": 0.6704300134196861,
      "grad_norm": 0.6666164999326576,
      "learning_rate": 2.588061042067974e-06,
      "loss": 0.1319,
      "step": 22981
    },
    {
      "epoch": 0.6704591866503297,
      "grad_norm": 0.7876602526271521,
      "learning_rate": 2.5876472204640375e-06,
      "loss": 0.1064,
      "step": 22982
    },
    {
      "epoch": 0.6704883598809732,
      "grad_norm": 0.8579907867622241,
      "learning_rate": 2.587233420397013e-06,
      "loss": 0.1264,
      "step": 22983
    },
    {
      "epoch": 0.6705175331116168,
      "grad_norm": 0.9408215174558893,
      "learning_rate": 2.5868196418705906e-06,
      "loss": 0.1386,
      "step": 22984
    },
    {
      "epoch": 0.6705467063422603,
      "grad_norm": 0.8042278480049216,
      "learning_rate": 2.5864058848884678e-06,
      "loss": 0.1271,
      "step": 22985
    },
    {
      "epoch": 0.6705758795729039,
      "grad_norm": 0.8255218498327359,
      "learning_rate": 2.585992149454337e-06,
      "loss": 0.1257,
      "step": 22986
    },
    {
      "epoch": 0.6706050528035474,
      "grad_norm": 0.8001131882840679,
      "learning_rate": 2.585578435571891e-06,
      "loss": 0.1128,
      "step": 22987
    },
    {
      "epoch": 0.670634226034191,
      "grad_norm": 0.7617123776116494,
      "learning_rate": 2.5851647432448242e-06,
      "loss": 0.1418,
      "step": 22988
    },
    {
      "epoch": 0.6706633992648345,
      "grad_norm": 0.9987195121392268,
      "learning_rate": 2.5847510724768315e-06,
      "loss": 0.1189,
      "step": 22989
    },
    {
      "epoch": 0.6706925724954782,
      "grad_norm": 0.8461177176633972,
      "learning_rate": 2.5843374232716035e-06,
      "loss": 0.1251,
      "step": 22990
    },
    {
      "epoch": 0.6707217457261218,
      "grad_norm": 0.798303148708433,
      "learning_rate": 2.5839237956328356e-06,
      "loss": 0.1353,
      "step": 22991
    },
    {
      "epoch": 0.6707509189567653,
      "grad_norm": 0.8383877968105283,
      "learning_rate": 2.583510189564219e-06,
      "loss": 0.1175,
      "step": 22992
    },
    {
      "epoch": 0.6707800921874089,
      "grad_norm": 0.9793682524815928,
      "learning_rate": 2.583096605069445e-06,
      "loss": 0.1246,
      "step": 22993
    },
    {
      "epoch": 0.6708092654180524,
      "grad_norm": 0.9327028157121722,
      "learning_rate": 2.5826830421522075e-06,
      "loss": 0.1317,
      "step": 22994
    },
    {
      "epoch": 0.670838438648696,
      "grad_norm": 0.8195649016502793,
      "learning_rate": 2.5822695008162015e-06,
      "loss": 0.1238,
      "step": 22995
    },
    {
      "epoch": 0.6708676118793395,
      "grad_norm": 1.2466977300984077,
      "learning_rate": 2.581855981065115e-06,
      "loss": 0.1332,
      "step": 22996
    },
    {
      "epoch": 0.6708967851099831,
      "grad_norm": 0.9484199779616992,
      "learning_rate": 2.5814424829026395e-06,
      "loss": 0.1276,
      "step": 22997
    },
    {
      "epoch": 0.6709259583406266,
      "grad_norm": 0.9945373859264935,
      "learning_rate": 2.5810290063324705e-06,
      "loss": 0.1365,
      "step": 22998
    },
    {
      "epoch": 0.6709551315712702,
      "grad_norm": 0.8312290435145775,
      "learning_rate": 2.5806155513582963e-06,
      "loss": 0.1144,
      "step": 22999
    },
    {
      "epoch": 0.6709843048019137,
      "grad_norm": 0.912826024619701,
      "learning_rate": 2.580202117983808e-06,
      "loss": 0.1289,
      "step": 23000
    },
    {
      "epoch": 0.6710134780325573,
      "grad_norm": 0.8686912167702016,
      "learning_rate": 2.579788706212697e-06,
      "loss": 0.1068,
      "step": 23001
    },
    {
      "epoch": 0.6710426512632008,
      "grad_norm": 1.3427708204630477,
      "learning_rate": 2.5793753160486566e-06,
      "loss": 0.1316,
      "step": 23002
    },
    {
      "epoch": 0.6710718244938444,
      "grad_norm": 0.8592042705285448,
      "learning_rate": 2.5789619474953753e-06,
      "loss": 0.117,
      "step": 23003
    },
    {
      "epoch": 0.6711009977244881,
      "grad_norm": 1.082975401321015,
      "learning_rate": 2.578548600556542e-06,
      "loss": 0.1173,
      "step": 23004
    },
    {
      "epoch": 0.6711301709551316,
      "grad_norm": 1.2965524748252881,
      "learning_rate": 2.5781352752358492e-06,
      "loss": 0.1339,
      "step": 23005
    },
    {
      "epoch": 0.6711593441857752,
      "grad_norm": 1.1511374378045174,
      "learning_rate": 2.5777219715369876e-06,
      "loss": 0.1469,
      "step": 23006
    },
    {
      "epoch": 0.6711885174164187,
      "grad_norm": 1.0427632821341766,
      "learning_rate": 2.5773086894636446e-06,
      "loss": 0.1263,
      "step": 23007
    },
    {
      "epoch": 0.6712176906470623,
      "grad_norm": 0.929663944711284,
      "learning_rate": 2.5768954290195136e-06,
      "loss": 0.1361,
      "step": 23008
    },
    {
      "epoch": 0.6712468638777058,
      "grad_norm": 1.1165350096418751,
      "learning_rate": 2.5764821902082814e-06,
      "loss": 0.1264,
      "step": 23009
    },
    {
      "epoch": 0.6712760371083494,
      "grad_norm": 0.9749342362429257,
      "learning_rate": 2.576068973033635e-06,
      "loss": 0.1267,
      "step": 23010
    },
    {
      "epoch": 0.6713052103389929,
      "grad_norm": 0.8730546817614292,
      "learning_rate": 2.5756557774992676e-06,
      "loss": 0.1284,
      "step": 23011
    },
    {
      "epoch": 0.6713343835696365,
      "grad_norm": 0.9547620352732499,
      "learning_rate": 2.575242603608867e-06,
      "loss": 0.1297,
      "step": 23012
    },
    {
      "epoch": 0.67136355680028,
      "grad_norm": 1.1379441054263437,
      "learning_rate": 2.5748294513661233e-06,
      "loss": 0.1372,
      "step": 23013
    },
    {
      "epoch": 0.6713927300309236,
      "grad_norm": 0.9335225321098604,
      "learning_rate": 2.5744163207747202e-06,
      "loss": 0.1269,
      "step": 23014
    },
    {
      "epoch": 0.6714219032615671,
      "grad_norm": 1.6534952123573796,
      "learning_rate": 2.574003211838352e-06,
      "loss": 0.1327,
      "step": 23015
    },
    {
      "epoch": 0.6714510764922107,
      "grad_norm": 1.0425208606698102,
      "learning_rate": 2.573590124560703e-06,
      "loss": 0.1117,
      "step": 23016
    },
    {
      "epoch": 0.6714802497228544,
      "grad_norm": 0.8787349795954643,
      "learning_rate": 2.5731770589454584e-06,
      "loss": 0.0986,
      "step": 23017
    },
    {
      "epoch": 0.6715094229534979,
      "grad_norm": 0.9389100598612916,
      "learning_rate": 2.572764014996314e-06,
      "loss": 0.116,
      "step": 23018
    },
    {
      "epoch": 0.6715385961841415,
      "grad_norm": 0.8437284003147788,
      "learning_rate": 2.5723509927169526e-06,
      "loss": 0.1278,
      "step": 23019
    },
    {
      "epoch": 0.671567769414785,
      "grad_norm": 1.1954962997715544,
      "learning_rate": 2.5719379921110605e-06,
      "loss": 0.1513,
      "step": 23020
    },
    {
      "epoch": 0.6715969426454286,
      "grad_norm": 1.0029319571789357,
      "learning_rate": 2.5715250131823284e-06,
      "loss": 0.1262,
      "step": 23021
    },
    {
      "epoch": 0.6716261158760721,
      "grad_norm": 0.6272414702708596,
      "learning_rate": 2.5711120559344404e-06,
      "loss": 0.1005,
      "step": 23022
    },
    {
      "epoch": 0.6716552891067157,
      "grad_norm": 1.0685323592073221,
      "learning_rate": 2.570699120371083e-06,
      "loss": 0.1135,
      "step": 23023
    },
    {
      "epoch": 0.6716844623373592,
      "grad_norm": 0.848090512308395,
      "learning_rate": 2.5702862064959445e-06,
      "loss": 0.1227,
      "step": 23024
    },
    {
      "epoch": 0.6717136355680028,
      "grad_norm": 0.8136112669022519,
      "learning_rate": 2.569873314312712e-06,
      "loss": 0.1136,
      "step": 23025
    },
    {
      "epoch": 0.6717428087986463,
      "grad_norm": 0.7260969938467134,
      "learning_rate": 2.5694604438250697e-06,
      "loss": 0.1245,
      "step": 23026
    },
    {
      "epoch": 0.6717719820292899,
      "grad_norm": 0.9505974670743264,
      "learning_rate": 2.5690475950367035e-06,
      "loss": 0.1424,
      "step": 23027
    },
    {
      "epoch": 0.6718011552599334,
      "grad_norm": 0.8703509019340886,
      "learning_rate": 2.5686347679513013e-06,
      "loss": 0.1247,
      "step": 23028
    },
    {
      "epoch": 0.671830328490577,
      "grad_norm": 0.9355258205340619,
      "learning_rate": 2.5682219625725456e-06,
      "loss": 0.1345,
      "step": 23029
    },
    {
      "epoch": 0.6718595017212206,
      "grad_norm": 1.1087803126689815,
      "learning_rate": 2.5678091789041258e-06,
      "loss": 0.1325,
      "step": 23030
    },
    {
      "epoch": 0.6718886749518642,
      "grad_norm": 0.7716792466995319,
      "learning_rate": 2.5673964169497233e-06,
      "loss": 0.1189,
      "step": 23031
    },
    {
      "epoch": 0.6719178481825078,
      "grad_norm": 0.9071085343732047,
      "learning_rate": 2.5669836767130266e-06,
      "loss": 0.0846,
      "step": 23032
    },
    {
      "epoch": 0.6719470214131513,
      "grad_norm": 0.7954196170911573,
      "learning_rate": 2.5665709581977195e-06,
      "loss": 0.1092,
      "step": 23033
    },
    {
      "epoch": 0.6719761946437949,
      "grad_norm": 0.7621613749477639,
      "learning_rate": 2.566158261407483e-06,
      "loss": 0.122,
      "step": 23034
    },
    {
      "epoch": 0.6720053678744384,
      "grad_norm": 0.7136243545915051,
      "learning_rate": 2.565745586346005e-06,
      "loss": 0.105,
      "step": 23035
    },
    {
      "epoch": 0.672034541105082,
      "grad_norm": 0.8073084554454861,
      "learning_rate": 2.5653329330169713e-06,
      "loss": 0.1144,
      "step": 23036
    },
    {
      "epoch": 0.6720637143357255,
      "grad_norm": 1.0988537169193622,
      "learning_rate": 2.564920301424062e-06,
      "loss": 0.1342,
      "step": 23037
    },
    {
      "epoch": 0.6720928875663691,
      "grad_norm": 1.8690672441944067,
      "learning_rate": 2.5645076915709644e-06,
      "loss": 0.1218,
      "step": 23038
    },
    {
      "epoch": 0.6721220607970126,
      "grad_norm": 0.9031188759708206,
      "learning_rate": 2.5640951034613613e-06,
      "loss": 0.1322,
      "step": 23039
    },
    {
      "epoch": 0.6721512340276562,
      "grad_norm": 0.913525689768466,
      "learning_rate": 2.5636825370989336e-06,
      "loss": 0.1274,
      "step": 23040
    },
    {
      "epoch": 0.6721804072582998,
      "grad_norm": 0.8978105971050229,
      "learning_rate": 2.5632699924873667e-06,
      "loss": 0.1275,
      "step": 23041
    },
    {
      "epoch": 0.6722095804889433,
      "grad_norm": 0.8385083518811937,
      "learning_rate": 2.5628574696303452e-06,
      "loss": 0.1061,
      "step": 23042
    },
    {
      "epoch": 0.6722387537195869,
      "grad_norm": 0.7681185201675401,
      "learning_rate": 2.562444968531551e-06,
      "loss": 0.1372,
      "step": 23043
    },
    {
      "epoch": 0.6722679269502305,
      "grad_norm": 0.9136671236931833,
      "learning_rate": 2.5620324891946636e-06,
      "loss": 0.1218,
      "step": 23044
    },
    {
      "epoch": 0.6722971001808741,
      "grad_norm": 0.9791057087025754,
      "learning_rate": 2.5616200316233706e-06,
      "loss": 0.1517,
      "step": 23045
    },
    {
      "epoch": 0.6723262734115176,
      "grad_norm": 0.9491180681649855,
      "learning_rate": 2.5612075958213516e-06,
      "loss": 0.1194,
      "step": 23046
    },
    {
      "epoch": 0.6723554466421612,
      "grad_norm": 0.8517097631766665,
      "learning_rate": 2.560795181792285e-06,
      "loss": 0.1548,
      "step": 23047
    },
    {
      "epoch": 0.6723846198728047,
      "grad_norm": 0.816692508907859,
      "learning_rate": 2.5603827895398613e-06,
      "loss": 0.1373,
      "step": 23048
    },
    {
      "epoch": 0.6724137931034483,
      "grad_norm": 1.037973862130898,
      "learning_rate": 2.5599704190677567e-06,
      "loss": 0.1269,
      "step": 23049
    },
    {
      "epoch": 0.6724429663340918,
      "grad_norm": 0.8269344422895314,
      "learning_rate": 2.5595580703796526e-06,
      "loss": 0.1118,
      "step": 23050
    },
    {
      "epoch": 0.6724721395647354,
      "grad_norm": 0.7313766193411093,
      "learning_rate": 2.5591457434792332e-06,
      "loss": 0.1201,
      "step": 23051
    },
    {
      "epoch": 0.672501312795379,
      "grad_norm": 1.0086293225702918,
      "learning_rate": 2.5587334383701777e-06,
      "loss": 0.133,
      "step": 23052
    },
    {
      "epoch": 0.6725304860260225,
      "grad_norm": 0.8973611634538674,
      "learning_rate": 2.5583211550561654e-06,
      "loss": 0.1119,
      "step": 23053
    },
    {
      "epoch": 0.672559659256666,
      "grad_norm": 1.1141740133744749,
      "learning_rate": 2.5579088935408793e-06,
      "loss": 0.1111,
      "step": 23054
    },
    {
      "epoch": 0.6725888324873096,
      "grad_norm": 0.8410195825188868,
      "learning_rate": 2.557496653828001e-06,
      "loss": 0.117,
      "step": 23055
    },
    {
      "epoch": 0.6726180057179532,
      "grad_norm": 0.915494174963484,
      "learning_rate": 2.5570844359212098e-06,
      "loss": 0.1195,
      "step": 23056
    },
    {
      "epoch": 0.6726471789485967,
      "grad_norm": 0.7680153816881505,
      "learning_rate": 2.556672239824183e-06,
      "loss": 0.1282,
      "step": 23057
    },
    {
      "epoch": 0.6726763521792404,
      "grad_norm": 0.8294072737514747,
      "learning_rate": 2.556260065540606e-06,
      "loss": 0.1017,
      "step": 23058
    },
    {
      "epoch": 0.6727055254098839,
      "grad_norm": 1.1684656397570399,
      "learning_rate": 2.5558479130741537e-06,
      "loss": 0.1243,
      "step": 23059
    },
    {
      "epoch": 0.6727346986405275,
      "grad_norm": 0.7347464654050591,
      "learning_rate": 2.555435782428509e-06,
      "loss": 0.1146,
      "step": 23060
    },
    {
      "epoch": 0.672763871871171,
      "grad_norm": 0.8680076540846722,
      "learning_rate": 2.555023673607349e-06,
      "loss": 0.1244,
      "step": 23061
    },
    {
      "epoch": 0.6727930451018146,
      "grad_norm": 0.8891559959126526,
      "learning_rate": 2.5546115866143555e-06,
      "loss": 0.1412,
      "step": 23062
    },
    {
      "epoch": 0.6728222183324581,
      "grad_norm": 0.9379800251188479,
      "learning_rate": 2.5541995214532066e-06,
      "loss": 0.11,
      "step": 23063
    },
    {
      "epoch": 0.6728513915631017,
      "grad_norm": 0.9374131690397216,
      "learning_rate": 2.5537874781275777e-06,
      "loss": 0.1059,
      "step": 23064
    },
    {
      "epoch": 0.6728805647937453,
      "grad_norm": 0.9932545360671732,
      "learning_rate": 2.5533754566411505e-06,
      "loss": 0.1339,
      "step": 23065
    },
    {
      "epoch": 0.6729097380243888,
      "grad_norm": 1.0481182700216478,
      "learning_rate": 2.5529634569976053e-06,
      "loss": 0.1207,
      "step": 23066
    },
    {
      "epoch": 0.6729389112550324,
      "grad_norm": 0.8243975726970814,
      "learning_rate": 2.552551479200616e-06,
      "loss": 0.1338,
      "step": 23067
    },
    {
      "epoch": 0.6729680844856759,
      "grad_norm": 0.7311252626071658,
      "learning_rate": 2.5521395232538647e-06,
      "loss": 0.0994,
      "step": 23068
    },
    {
      "epoch": 0.6729972577163195,
      "grad_norm": 0.8570503512909122,
      "learning_rate": 2.5517275891610283e-06,
      "loss": 0.1192,
      "step": 23069
    },
    {
      "epoch": 0.673026430946963,
      "grad_norm": 0.9790333521513247,
      "learning_rate": 2.551315676925781e-06,
      "loss": 0.122,
      "step": 23070
    },
    {
      "epoch": 0.6730556041776067,
      "grad_norm": 1.162734332338707,
      "learning_rate": 2.5509037865518026e-06,
      "loss": 0.1191,
      "step": 23071
    },
    {
      "epoch": 0.6730847774082502,
      "grad_norm": 0.8244726491109521,
      "learning_rate": 2.5504919180427723e-06,
      "loss": 0.1099,
      "step": 23072
    },
    {
      "epoch": 0.6731139506388938,
      "grad_norm": 0.906523145815668,
      "learning_rate": 2.5500800714023654e-06,
      "loss": 0.1252,
      "step": 23073
    },
    {
      "epoch": 0.6731431238695373,
      "grad_norm": 1.21337257274489,
      "learning_rate": 2.5496682466342576e-06,
      "loss": 0.1236,
      "step": 23074
    },
    {
      "epoch": 0.6731722971001809,
      "grad_norm": 1.0699147048227882,
      "learning_rate": 2.5492564437421287e-06,
      "loss": 0.11,
      "step": 23075
    },
    {
      "epoch": 0.6732014703308244,
      "grad_norm": 0.7315953389906471,
      "learning_rate": 2.5488446627296525e-06,
      "loss": 0.1431,
      "step": 23076
    },
    {
      "epoch": 0.673230643561468,
      "grad_norm": 1.1274509195859086,
      "learning_rate": 2.5484329036005024e-06,
      "loss": 0.1323,
      "step": 23077
    },
    {
      "epoch": 0.6732598167921116,
      "grad_norm": 1.2727683190101309,
      "learning_rate": 2.548021166358362e-06,
      "loss": 0.1141,
      "step": 23078
    },
    {
      "epoch": 0.6732889900227551,
      "grad_norm": 0.8429130965203371,
      "learning_rate": 2.5476094510069025e-06,
      "loss": 0.1226,
      "step": 23079
    },
    {
      "epoch": 0.6733181632533987,
      "grad_norm": 0.7867535761335129,
      "learning_rate": 2.5471977575497995e-06,
      "loss": 0.1034,
      "step": 23080
    },
    {
      "epoch": 0.6733473364840422,
      "grad_norm": 0.7939388142724333,
      "learning_rate": 2.5467860859907314e-06,
      "loss": 0.1424,
      "step": 23081
    },
    {
      "epoch": 0.6733765097146858,
      "grad_norm": 0.9056405970328766,
      "learning_rate": 2.546374436333371e-06,
      "loss": 0.1102,
      "step": 23082
    },
    {
      "epoch": 0.6734056829453293,
      "grad_norm": 0.9434431303885463,
      "learning_rate": 2.5459628085813924e-06,
      "loss": 0.1173,
      "step": 23083
    },
    {
      "epoch": 0.6734348561759729,
      "grad_norm": 0.802263753656373,
      "learning_rate": 2.5455512027384717e-06,
      "loss": 0.1192,
      "step": 23084
    },
    {
      "epoch": 0.6734640294066165,
      "grad_norm": 1.0040014999455318,
      "learning_rate": 2.5451396188082853e-06,
      "loss": 0.1215,
      "step": 23085
    },
    {
      "epoch": 0.6734932026372601,
      "grad_norm": 0.9112107723131383,
      "learning_rate": 2.5447280567945077e-06,
      "loss": 0.1028,
      "step": 23086
    },
    {
      "epoch": 0.6735223758679036,
      "grad_norm": 0.9070384194223957,
      "learning_rate": 2.544316516700809e-06,
      "loss": 0.1201,
      "step": 23087
    },
    {
      "epoch": 0.6735515490985472,
      "grad_norm": 0.8828354313074755,
      "learning_rate": 2.543904998530868e-06,
      "loss": 0.1157,
      "step": 23088
    },
    {
      "epoch": 0.6735807223291908,
      "grad_norm": 0.9397248081116105,
      "learning_rate": 2.5434935022883557e-06,
      "loss": 0.0965,
      "step": 23089
    },
    {
      "epoch": 0.6736098955598343,
      "grad_norm": 0.8354630477146472,
      "learning_rate": 2.5430820279769487e-06,
      "loss": 0.1424,
      "step": 23090
    },
    {
      "epoch": 0.6736390687904779,
      "grad_norm": 0.9887642570087085,
      "learning_rate": 2.5426705756003167e-06,
      "loss": 0.1349,
      "step": 23091
    },
    {
      "epoch": 0.6736682420211214,
      "grad_norm": 0.9275770073484282,
      "learning_rate": 2.542259145162137e-06,
      "loss": 0.1225,
      "step": 23092
    },
    {
      "epoch": 0.673697415251765,
      "grad_norm": 0.792629227694484,
      "learning_rate": 2.5418477366660808e-06,
      "loss": 0.1283,
      "step": 23093
    },
    {
      "epoch": 0.6737265884824085,
      "grad_norm": 0.8784732543703531,
      "learning_rate": 2.54143635011582e-06,
      "loss": 0.1232,
      "step": 23094
    },
    {
      "epoch": 0.6737557617130521,
      "grad_norm": 0.9103302091094058,
      "learning_rate": 2.541024985515028e-06,
      "loss": 0.1427,
      "step": 23095
    },
    {
      "epoch": 0.6737849349436956,
      "grad_norm": 0.8114707191285135,
      "learning_rate": 2.54061364286738e-06,
      "loss": 0.1133,
      "step": 23096
    },
    {
      "epoch": 0.6738141081743392,
      "grad_norm": 0.8646502927639343,
      "learning_rate": 2.540202322176544e-06,
      "loss": 0.1179,
      "step": 23097
    },
    {
      "epoch": 0.6738432814049828,
      "grad_norm": 1.0437988076601405,
      "learning_rate": 2.539791023446197e-06,
      "loss": 0.1272,
      "step": 23098
    },
    {
      "epoch": 0.6738724546356264,
      "grad_norm": 0.8614424288493707,
      "learning_rate": 2.5393797466800084e-06,
      "loss": 0.1208,
      "step": 23099
    },
    {
      "epoch": 0.67390162786627,
      "grad_norm": 1.023278228041395,
      "learning_rate": 2.5389684918816477e-06,
      "loss": 0.1463,
      "step": 23100
    },
    {
      "epoch": 0.6739308010969135,
      "grad_norm": 1.0339157199520221,
      "learning_rate": 2.5385572590547893e-06,
      "loss": 0.1144,
      "step": 23101
    },
    {
      "epoch": 0.673959974327557,
      "grad_norm": 1.023955827056682,
      "learning_rate": 2.538146048203105e-06,
      "loss": 0.1308,
      "step": 23102
    },
    {
      "epoch": 0.6739891475582006,
      "grad_norm": 0.8152534885073576,
      "learning_rate": 2.5377348593302664e-06,
      "loss": 0.1205,
      "step": 23103
    },
    {
      "epoch": 0.6740183207888442,
      "grad_norm": 0.7159288195277267,
      "learning_rate": 2.5373236924399402e-06,
      "loss": 0.1116,
      "step": 23104
    },
    {
      "epoch": 0.6740474940194877,
      "grad_norm": 0.8480227129254346,
      "learning_rate": 2.5369125475358027e-06,
      "loss": 0.121,
      "step": 23105
    },
    {
      "epoch": 0.6740766672501313,
      "grad_norm": 0.8506173762262046,
      "learning_rate": 2.536501424621522e-06,
      "loss": 0.1289,
      "step": 23106
    },
    {
      "epoch": 0.6741058404807748,
      "grad_norm": 0.7761814065000823,
      "learning_rate": 2.5360903237007647e-06,
      "loss": 0.1193,
      "step": 23107
    },
    {
      "epoch": 0.6741350137114184,
      "grad_norm": 0.7300850631860111,
      "learning_rate": 2.5356792447772084e-06,
      "loss": 0.1126,
      "step": 23108
    },
    {
      "epoch": 0.6741641869420619,
      "grad_norm": 0.7781441208719955,
      "learning_rate": 2.5352681878545195e-06,
      "loss": 0.1141,
      "step": 23109
    },
    {
      "epoch": 0.6741933601727055,
      "grad_norm": 0.8628981366748527,
      "learning_rate": 2.5348571529363658e-06,
      "loss": 0.1056,
      "step": 23110
    },
    {
      "epoch": 0.674222533403349,
      "grad_norm": 0.8131652804182944,
      "learning_rate": 2.534446140026421e-06,
      "loss": 0.1397,
      "step": 23111
    },
    {
      "epoch": 0.6742517066339927,
      "grad_norm": 1.1178345651148511,
      "learning_rate": 2.5340351491283527e-06,
      "loss": 0.141,
      "step": 23112
    },
    {
      "epoch": 0.6742808798646363,
      "grad_norm": 1.02264296395628,
      "learning_rate": 2.5336241802458283e-06,
      "loss": 0.1127,
      "step": 23113
    },
    {
      "epoch": 0.6743100530952798,
      "grad_norm": 0.8421944935825467,
      "learning_rate": 2.5332132333825177e-06,
      "loss": 0.1017,
      "step": 23114
    },
    {
      "epoch": 0.6743392263259234,
      "grad_norm": 0.9686863631015941,
      "learning_rate": 2.5328023085420926e-06,
      "loss": 0.1251,
      "step": 23115
    },
    {
      "epoch": 0.6743683995565669,
      "grad_norm": 0.9521066364827775,
      "learning_rate": 2.5323914057282194e-06,
      "loss": 0.1168,
      "step": 23116
    },
    {
      "epoch": 0.6743975727872105,
      "grad_norm": 0.9256275405216868,
      "learning_rate": 2.531980524944565e-06,
      "loss": 0.1321,
      "step": 23117
    },
    {
      "epoch": 0.674426746017854,
      "grad_norm": 1.1994132199038094,
      "learning_rate": 2.531569666194802e-06,
      "loss": 0.1153,
      "step": 23118
    },
    {
      "epoch": 0.6744559192484976,
      "grad_norm": 0.9828038265013251,
      "learning_rate": 2.531158829482593e-06,
      "loss": 0.1283,
      "step": 23119
    },
    {
      "epoch": 0.6744850924791411,
      "grad_norm": 1.0409838458988905,
      "learning_rate": 2.5307480148116105e-06,
      "loss": 0.1171,
      "step": 23120
    },
    {
      "epoch": 0.6745142657097847,
      "grad_norm": 1.014336557559262,
      "learning_rate": 2.5303372221855183e-06,
      "loss": 0.1108,
      "step": 23121
    },
    {
      "epoch": 0.6745434389404282,
      "grad_norm": 0.7821874423805307,
      "learning_rate": 2.529926451607988e-06,
      "loss": 0.1219,
      "step": 23122
    },
    {
      "epoch": 0.6745726121710718,
      "grad_norm": 0.7416077641116059,
      "learning_rate": 2.5295157030826844e-06,
      "loss": 0.1171,
      "step": 23123
    },
    {
      "epoch": 0.6746017854017153,
      "grad_norm": 2.0238546409117313,
      "learning_rate": 2.529104976613273e-06,
      "loss": 0.1265,
      "step": 23124
    },
    {
      "epoch": 0.674630958632359,
      "grad_norm": 0.8823217553091093,
      "learning_rate": 2.5286942722034226e-06,
      "loss": 0.1006,
      "step": 23125
    },
    {
      "epoch": 0.6746601318630026,
      "grad_norm": 1.0362855400209419,
      "learning_rate": 2.5282835898568014e-06,
      "loss": 0.1182,
      "step": 23126
    },
    {
      "epoch": 0.6746893050936461,
      "grad_norm": 0.8452556499659798,
      "learning_rate": 2.5278729295770733e-06,
      "loss": 0.1147,
      "step": 23127
    },
    {
      "epoch": 0.6747184783242897,
      "grad_norm": 1.0348333314274814,
      "learning_rate": 2.5274622913679063e-06,
      "loss": 0.107,
      "step": 23128
    },
    {
      "epoch": 0.6747476515549332,
      "grad_norm": 0.7822383658747245,
      "learning_rate": 2.5270516752329667e-06,
      "loss": 0.1183,
      "step": 23129
    },
    {
      "epoch": 0.6747768247855768,
      "grad_norm": 1.1259794431640837,
      "learning_rate": 2.526641081175917e-06,
      "loss": 0.1284,
      "step": 23130
    },
    {
      "epoch": 0.6748059980162203,
      "grad_norm": 1.5440868563599224,
      "learning_rate": 2.5262305092004246e-06,
      "loss": 0.1159,
      "step": 23131
    },
    {
      "epoch": 0.6748351712468639,
      "grad_norm": 0.9044928359421482,
      "learning_rate": 2.5258199593101586e-06,
      "loss": 0.1301,
      "step": 23132
    },
    {
      "epoch": 0.6748643444775074,
      "grad_norm": 0.7338884647055344,
      "learning_rate": 2.5254094315087814e-06,
      "loss": 0.1256,
      "step": 23133
    },
    {
      "epoch": 0.674893517708151,
      "grad_norm": 1.3777881612596148,
      "learning_rate": 2.524998925799956e-06,
      "loss": 0.1367,
      "step": 23134
    },
    {
      "epoch": 0.6749226909387945,
      "grad_norm": 1.0969808987305798,
      "learning_rate": 2.5245884421873507e-06,
      "loss": 0.1078,
      "step": 23135
    },
    {
      "epoch": 0.6749518641694381,
      "grad_norm": 0.8023092456942991,
      "learning_rate": 2.524177980674629e-06,
      "loss": 0.1297,
      "step": 23136
    },
    {
      "epoch": 0.6749810374000816,
      "grad_norm": 0.9949631306497734,
      "learning_rate": 2.523767541265452e-06,
      "loss": 0.1406,
      "step": 23137
    },
    {
      "epoch": 0.6750102106307252,
      "grad_norm": 1.4188378041600502,
      "learning_rate": 2.523357123963491e-06,
      "loss": 0.1247,
      "step": 23138
    },
    {
      "epoch": 0.6750393838613689,
      "grad_norm": 0.7450129807835786,
      "learning_rate": 2.5229467287724065e-06,
      "loss": 0.1013,
      "step": 23139
    },
    {
      "epoch": 0.6750685570920124,
      "grad_norm": 0.906348749249676,
      "learning_rate": 2.5225363556958594e-06,
      "loss": 0.1029,
      "step": 23140
    },
    {
      "epoch": 0.675097730322656,
      "grad_norm": 0.8819105263323815,
      "learning_rate": 2.522126004737519e-06,
      "loss": 0.1214,
      "step": 23141
    },
    {
      "epoch": 0.6751269035532995,
      "grad_norm": 0.9888744549332726,
      "learning_rate": 2.521715675901046e-06,
      "loss": 0.1137,
      "step": 23142
    },
    {
      "epoch": 0.6751560767839431,
      "grad_norm": 1.0263899287360436,
      "learning_rate": 2.521305369190102e-06,
      "loss": 0.1259,
      "step": 23143
    },
    {
      "epoch": 0.6751852500145866,
      "grad_norm": 0.7178879697205601,
      "learning_rate": 2.520895084608351e-06,
      "loss": 0.1275,
      "step": 23144
    },
    {
      "epoch": 0.6752144232452302,
      "grad_norm": 1.0230194022667145,
      "learning_rate": 2.5204848221594604e-06,
      "loss": 0.1074,
      "step": 23145
    },
    {
      "epoch": 0.6752435964758737,
      "grad_norm": 0.7791910482511467,
      "learning_rate": 2.5200745818470883e-06,
      "loss": 0.104,
      "step": 23146
    },
    {
      "epoch": 0.6752727697065173,
      "grad_norm": 0.9135505313843247,
      "learning_rate": 2.519664363674897e-06,
      "loss": 0.1194,
      "step": 23147
    },
    {
      "epoch": 0.6753019429371608,
      "grad_norm": 0.9800595424980734,
      "learning_rate": 2.519254167646552e-06,
      "loss": 0.1422,
      "step": 23148
    },
    {
      "epoch": 0.6753311161678044,
      "grad_norm": 0.9179820691529096,
      "learning_rate": 2.518843993765711e-06,
      "loss": 0.1163,
      "step": 23149
    },
    {
      "epoch": 0.6753602893984479,
      "grad_norm": 0.8089301134893563,
      "learning_rate": 2.518433842036041e-06,
      "loss": 0.1203,
      "step": 23150
    },
    {
      "epoch": 0.6753894626290915,
      "grad_norm": 0.9199195743294921,
      "learning_rate": 2.5180237124611996e-06,
      "loss": 0.1134,
      "step": 23151
    },
    {
      "epoch": 0.6754186358597352,
      "grad_norm": 1.1399409120340327,
      "learning_rate": 2.517613605044851e-06,
      "loss": 0.1124,
      "step": 23152
    },
    {
      "epoch": 0.6754478090903787,
      "grad_norm": 0.9247520371924938,
      "learning_rate": 2.5172035197906565e-06,
      "loss": 0.1116,
      "step": 23153
    },
    {
      "epoch": 0.6754769823210223,
      "grad_norm": 1.4068196431459592,
      "learning_rate": 2.516793456702274e-06,
      "loss": 0.1515,
      "step": 23154
    },
    {
      "epoch": 0.6755061555516658,
      "grad_norm": 0.8356682916380015,
      "learning_rate": 2.516383415783367e-06,
      "loss": 0.1261,
      "step": 23155
    },
    {
      "epoch": 0.6755353287823094,
      "grad_norm": 0.8440280544594345,
      "learning_rate": 2.5159733970375978e-06,
      "loss": 0.1133,
      "step": 23156
    },
    {
      "epoch": 0.6755645020129529,
      "grad_norm": 0.7475092168729427,
      "learning_rate": 2.5155634004686226e-06,
      "loss": 0.1201,
      "step": 23157
    },
    {
      "epoch": 0.6755936752435965,
      "grad_norm": 0.8433739199698794,
      "learning_rate": 2.5151534260801068e-06,
      "loss": 0.1226,
      "step": 23158
    },
    {
      "epoch": 0.67562284847424,
      "grad_norm": 0.8682621054044367,
      "learning_rate": 2.5147434738757074e-06,
      "loss": 0.1373,
      "step": 23159
    },
    {
      "epoch": 0.6756520217048836,
      "grad_norm": 0.8544899021696603,
      "learning_rate": 2.5143335438590837e-06,
      "loss": 0.1284,
      "step": 23160
    },
    {
      "epoch": 0.6756811949355271,
      "grad_norm": 0.7681728523315022,
      "learning_rate": 2.513923636033897e-06,
      "loss": 0.1035,
      "step": 23161
    },
    {
      "epoch": 0.6757103681661707,
      "grad_norm": 0.7930441141389791,
      "learning_rate": 2.5135137504038076e-06,
      "loss": 0.1314,
      "step": 23162
    },
    {
      "epoch": 0.6757395413968142,
      "grad_norm": 0.8554197424505521,
      "learning_rate": 2.5131038869724743e-06,
      "loss": 0.104,
      "step": 23163
    },
    {
      "epoch": 0.6757687146274578,
      "grad_norm": 1.0880113516953294,
      "learning_rate": 2.5126940457435543e-06,
      "loss": 0.1251,
      "step": 23164
    },
    {
      "epoch": 0.6757978878581014,
      "grad_norm": 0.8938954356306851,
      "learning_rate": 2.5122842267207092e-06,
      "loss": 0.0999,
      "step": 23165
    },
    {
      "epoch": 0.675827061088745,
      "grad_norm": 0.8132571370356921,
      "learning_rate": 2.511874429907597e-06,
      "loss": 0.1107,
      "step": 23166
    },
    {
      "epoch": 0.6758562343193886,
      "grad_norm": 1.1041847456088503,
      "learning_rate": 2.5114646553078726e-06,
      "loss": 0.1412,
      "step": 23167
    },
    {
      "epoch": 0.6758854075500321,
      "grad_norm": 0.9164250587438241,
      "learning_rate": 2.5110549029252006e-06,
      "loss": 0.117,
      "step": 23168
    },
    {
      "epoch": 0.6759145807806757,
      "grad_norm": 0.7557817034171268,
      "learning_rate": 2.5106451727632374e-06,
      "loss": 0.1219,
      "step": 23169
    },
    {
      "epoch": 0.6759437540113192,
      "grad_norm": 0.8457012666065661,
      "learning_rate": 2.5102354648256373e-06,
      "loss": 0.1018,
      "step": 23170
    },
    {
      "epoch": 0.6759729272419628,
      "grad_norm": 1.0266255610670945,
      "learning_rate": 2.5098257791160623e-06,
      "loss": 0.1123,
      "step": 23171
    },
    {
      "epoch": 0.6760021004726063,
      "grad_norm": 0.7980441916870344,
      "learning_rate": 2.509416115638169e-06,
      "loss": 0.1038,
      "step": 23172
    },
    {
      "epoch": 0.6760312737032499,
      "grad_norm": 0.8657842501474882,
      "learning_rate": 2.509006474395612e-06,
      "loss": 0.1331,
      "step": 23173
    },
    {
      "epoch": 0.6760604469338934,
      "grad_norm": 0.8752634771948503,
      "learning_rate": 2.5085968553920498e-06,
      "loss": 0.1236,
      "step": 23174
    },
    {
      "epoch": 0.676089620164537,
      "grad_norm": 1.3284849313228773,
      "learning_rate": 2.508187258631143e-06,
      "loss": 0.1306,
      "step": 23175
    },
    {
      "epoch": 0.6761187933951806,
      "grad_norm": 0.7788279697705117,
      "learning_rate": 2.507777684116545e-06,
      "loss": 0.118,
      "step": 23176
    },
    {
      "epoch": 0.6761479666258241,
      "grad_norm": 0.8955701524584327,
      "learning_rate": 2.5073681318519106e-06,
      "loss": 0.1276,
      "step": 23177
    },
    {
      "epoch": 0.6761771398564677,
      "grad_norm": 0.8405903324101602,
      "learning_rate": 2.506958601840901e-06,
      "loss": 0.1032,
      "step": 23178
    },
    {
      "epoch": 0.6762063130871113,
      "grad_norm": 0.8614963663971591,
      "learning_rate": 2.5065490940871674e-06,
      "loss": 0.109,
      "step": 23179
    },
    {
      "epoch": 0.6762354863177549,
      "grad_norm": 0.7247493205722442,
      "learning_rate": 2.50613960859437e-06,
      "loss": 0.1139,
      "step": 23180
    },
    {
      "epoch": 0.6762646595483984,
      "grad_norm": 0.94560884041923,
      "learning_rate": 2.505730145366162e-06,
      "loss": 0.1227,
      "step": 23181
    },
    {
      "epoch": 0.676293832779042,
      "grad_norm": 0.6935816823347132,
      "learning_rate": 2.505320704406201e-06,
      "loss": 0.0908,
      "step": 23182
    },
    {
      "epoch": 0.6763230060096855,
      "grad_norm": 0.992465071574614,
      "learning_rate": 2.5049112857181413e-06,
      "loss": 0.1393,
      "step": 23183
    },
    {
      "epoch": 0.6763521792403291,
      "grad_norm": 0.7836895910781367,
      "learning_rate": 2.504501889305636e-06,
      "loss": 0.1205,
      "step": 23184
    },
    {
      "epoch": 0.6763813524709726,
      "grad_norm": 0.8185898009347492,
      "learning_rate": 2.5040925151723428e-06,
      "loss": 0.1215,
      "step": 23185
    },
    {
      "epoch": 0.6764105257016162,
      "grad_norm": 0.8721580626868752,
      "learning_rate": 2.5036831633219173e-06,
      "loss": 0.1138,
      "step": 23186
    },
    {
      "epoch": 0.6764396989322597,
      "grad_norm": 0.8459028226000596,
      "learning_rate": 2.5032738337580107e-06,
      "loss": 0.1358,
      "step": 23187
    },
    {
      "epoch": 0.6764688721629033,
      "grad_norm": 0.8127755401220963,
      "learning_rate": 2.502864526484281e-06,
      "loss": 0.1182,
      "step": 23188
    },
    {
      "epoch": 0.6764980453935469,
      "grad_norm": 0.6487232810150235,
      "learning_rate": 2.5024552415043805e-06,
      "loss": 0.1226,
      "step": 23189
    },
    {
      "epoch": 0.6765272186241904,
      "grad_norm": 0.7757210954520243,
      "learning_rate": 2.502045978821962e-06,
      "loss": 0.1428,
      "step": 23190
    },
    {
      "epoch": 0.676556391854834,
      "grad_norm": 1.1467945440571865,
      "learning_rate": 2.5016367384406803e-06,
      "loss": 0.1327,
      "step": 23191
    },
    {
      "epoch": 0.6765855650854775,
      "grad_norm": 1.2988519775489853,
      "learning_rate": 2.5012275203641917e-06,
      "loss": 0.1091,
      "step": 23192
    },
    {
      "epoch": 0.6766147383161212,
      "grad_norm": 0.8683144536525187,
      "learning_rate": 2.500818324596147e-06,
      "loss": 0.1483,
      "step": 23193
    },
    {
      "epoch": 0.6766439115467647,
      "grad_norm": 0.7776973811963013,
      "learning_rate": 2.500409151140198e-06,
      "loss": 0.129,
      "step": 23194
    },
    {
      "epoch": 0.6766730847774083,
      "grad_norm": 1.0010403591909511,
      "learning_rate": 2.5000000000000015e-06,
      "loss": 0.116,
      "step": 23195
    },
    {
      "epoch": 0.6767022580080518,
      "grad_norm": 0.7641616983467394,
      "learning_rate": 2.4995908711792057e-06,
      "loss": 0.1117,
      "step": 23196
    },
    {
      "epoch": 0.6767314312386954,
      "grad_norm": 0.8650166714865422,
      "learning_rate": 2.499181764681466e-06,
      "loss": 0.1278,
      "step": 23197
    },
    {
      "epoch": 0.6767606044693389,
      "grad_norm": 0.7524820034499831,
      "learning_rate": 2.498772680510436e-06,
      "loss": 0.1183,
      "step": 23198
    },
    {
      "epoch": 0.6767897776999825,
      "grad_norm": 0.9294925244917861,
      "learning_rate": 2.498363618669767e-06,
      "loss": 0.1315,
      "step": 23199
    },
    {
      "epoch": 0.676818950930626,
      "grad_norm": 0.8821962518874622,
      "learning_rate": 2.497954579163108e-06,
      "loss": 0.1318,
      "step": 23200
    },
    {
      "epoch": 0.6768481241612696,
      "grad_norm": 0.8173712125778281,
      "learning_rate": 2.4975455619941158e-06,
      "loss": 0.1093,
      "step": 23201
    },
    {
      "epoch": 0.6768772973919132,
      "grad_norm": 0.8654358545193849,
      "learning_rate": 2.4971365671664373e-06,
      "loss": 0.1112,
      "step": 23202
    },
    {
      "epoch": 0.6769064706225567,
      "grad_norm": 1.0899199397860233,
      "learning_rate": 2.4967275946837276e-06,
      "loss": 0.1514,
      "step": 23203
    },
    {
      "epoch": 0.6769356438532003,
      "grad_norm": 0.901102202281327,
      "learning_rate": 2.496318644549635e-06,
      "loss": 0.1279,
      "step": 23204
    },
    {
      "epoch": 0.6769648170838438,
      "grad_norm": 0.8632830006164858,
      "learning_rate": 2.4959097167678135e-06,
      "loss": 0.1291,
      "step": 23205
    },
    {
      "epoch": 0.6769939903144875,
      "grad_norm": 1.075004156894414,
      "learning_rate": 2.495500811341912e-06,
      "loss": 0.1289,
      "step": 23206
    },
    {
      "epoch": 0.677023163545131,
      "grad_norm": 1.0066407992973547,
      "learning_rate": 2.4950919282755796e-06,
      "loss": 0.1234,
      "step": 23207
    },
    {
      "epoch": 0.6770523367757746,
      "grad_norm": 1.9631922248190352,
      "learning_rate": 2.4946830675724694e-06,
      "loss": 0.1149,
      "step": 23208
    },
    {
      "epoch": 0.6770815100064181,
      "grad_norm": 1.09266457228653,
      "learning_rate": 2.4942742292362316e-06,
      "loss": 0.1295,
      "step": 23209
    },
    {
      "epoch": 0.6771106832370617,
      "grad_norm": 1.2044147096693798,
      "learning_rate": 2.4938654132705154e-06,
      "loss": 0.1139,
      "step": 23210
    },
    {
      "epoch": 0.6771398564677052,
      "grad_norm": 0.6787450334175764,
      "learning_rate": 2.4934566196789687e-06,
      "loss": 0.1176,
      "step": 23211
    },
    {
      "epoch": 0.6771690296983488,
      "grad_norm": 1.0005079052068275,
      "learning_rate": 2.4930478484652447e-06,
      "loss": 0.1224,
      "step": 23212
    },
    {
      "epoch": 0.6771982029289924,
      "grad_norm": 0.9564861776070009,
      "learning_rate": 2.4926390996329912e-06,
      "loss": 0.1349,
      "step": 23213
    },
    {
      "epoch": 0.6772273761596359,
      "grad_norm": 0.9929649457165906,
      "learning_rate": 2.492230373185854e-06,
      "loss": 0.115,
      "step": 23214
    },
    {
      "epoch": 0.6772565493902795,
      "grad_norm": 0.9268754442125966,
      "learning_rate": 2.4918216691274888e-06,
      "loss": 0.1324,
      "step": 23215
    },
    {
      "epoch": 0.677285722620923,
      "grad_norm": 1.1756847446121403,
      "learning_rate": 2.4914129874615404e-06,
      "loss": 0.1494,
      "step": 23216
    },
    {
      "epoch": 0.6773148958515666,
      "grad_norm": 1.383475103297312,
      "learning_rate": 2.491004328191657e-06,
      "loss": 0.1198,
      "step": 23217
    },
    {
      "epoch": 0.6773440690822101,
      "grad_norm": 1.0030078487186729,
      "learning_rate": 2.4905956913214897e-06,
      "loss": 0.1442,
      "step": 23218
    },
    {
      "epoch": 0.6773732423128537,
      "grad_norm": 0.7002481196687629,
      "learning_rate": 2.4901870768546842e-06,
      "loss": 0.117,
      "step": 23219
    },
    {
      "epoch": 0.6774024155434973,
      "grad_norm": 0.97035648522706,
      "learning_rate": 2.4897784847948885e-06,
      "loss": 0.1112,
      "step": 23220
    },
    {
      "epoch": 0.6774315887741409,
      "grad_norm": 0.9056461538362122,
      "learning_rate": 2.4893699151457507e-06,
      "loss": 0.1136,
      "step": 23221
    },
    {
      "epoch": 0.6774607620047844,
      "grad_norm": 0.9779488105303187,
      "learning_rate": 2.4889613679109208e-06,
      "loss": 0.1217,
      "step": 23222
    },
    {
      "epoch": 0.677489935235428,
      "grad_norm": 0.7770129971333952,
      "learning_rate": 2.4885528430940447e-06,
      "loss": 0.1272,
      "step": 23223
    },
    {
      "epoch": 0.6775191084660716,
      "grad_norm": 0.9252881197770451,
      "learning_rate": 2.488144340698767e-06,
      "loss": 0.1407,
      "step": 23224
    },
    {
      "epoch": 0.6775482816967151,
      "grad_norm": 0.9574349860481227,
      "learning_rate": 2.4877358607287393e-06,
      "loss": 0.1341,
      "step": 23225
    },
    {
      "epoch": 0.6775774549273587,
      "grad_norm": 1.138847368745896,
      "learning_rate": 2.4873274031876045e-06,
      "loss": 0.1203,
      "step": 23226
    },
    {
      "epoch": 0.6776066281580022,
      "grad_norm": 1.0141871485961593,
      "learning_rate": 2.48691896807901e-06,
      "loss": 0.1262,
      "step": 23227
    },
    {
      "epoch": 0.6776358013886458,
      "grad_norm": 1.0605770219692743,
      "learning_rate": 2.4865105554066056e-06,
      "loss": 0.1182,
      "step": 23228
    },
    {
      "epoch": 0.6776649746192893,
      "grad_norm": 0.7354830297829584,
      "learning_rate": 2.4861021651740343e-06,
      "loss": 0.1202,
      "step": 23229
    },
    {
      "epoch": 0.6776941478499329,
      "grad_norm": 0.7369375301099033,
      "learning_rate": 2.485693797384941e-06,
      "loss": 0.1203,
      "step": 23230
    },
    {
      "epoch": 0.6777233210805764,
      "grad_norm": 1.1670457478783296,
      "learning_rate": 2.4852854520429754e-06,
      "loss": 0.1199,
      "step": 23231
    },
    {
      "epoch": 0.67775249431122,
      "grad_norm": 0.850800968871462,
      "learning_rate": 2.484877129151779e-06,
      "loss": 0.1025,
      "step": 23232
    },
    {
      "epoch": 0.6777816675418635,
      "grad_norm": 0.7398335044777398,
      "learning_rate": 2.4844688287150014e-06,
      "loss": 0.1115,
      "step": 23233
    },
    {
      "epoch": 0.6778108407725072,
      "grad_norm": 0.7725895973816064,
      "learning_rate": 2.484060550736283e-06,
      "loss": 0.1315,
      "step": 23234
    },
    {
      "epoch": 0.6778400140031507,
      "grad_norm": 1.172249753279124,
      "learning_rate": 2.4836522952192743e-06,
      "loss": 0.1013,
      "step": 23235
    },
    {
      "epoch": 0.6778691872337943,
      "grad_norm": 0.8574820241241642,
      "learning_rate": 2.483244062167616e-06,
      "loss": 0.1296,
      "step": 23236
    },
    {
      "epoch": 0.6778983604644379,
      "grad_norm": 0.7456131632183346,
      "learning_rate": 2.4828358515849532e-06,
      "loss": 0.1089,
      "step": 23237
    },
    {
      "epoch": 0.6779275336950814,
      "grad_norm": 1.4838246913619229,
      "learning_rate": 2.48242766347493e-06,
      "loss": 0.1022,
      "step": 23238
    },
    {
      "epoch": 0.677956706925725,
      "grad_norm": 0.9926797533852754,
      "learning_rate": 2.4820194978411944e-06,
      "loss": 0.1247,
      "step": 23239
    },
    {
      "epoch": 0.6779858801563685,
      "grad_norm": 0.8703493259399564,
      "learning_rate": 2.481611354687387e-06,
      "loss": 0.1317,
      "step": 23240
    },
    {
      "epoch": 0.6780150533870121,
      "grad_norm": 0.7748205997640173,
      "learning_rate": 2.4812032340171504e-06,
      "loss": 0.1159,
      "step": 23241
    },
    {
      "epoch": 0.6780442266176556,
      "grad_norm": 0.8303628657101826,
      "learning_rate": 2.480795135834132e-06,
      "loss": 0.12,
      "step": 23242
    },
    {
      "epoch": 0.6780733998482992,
      "grad_norm": 1.0797060426992753,
      "learning_rate": 2.480387060141974e-06,
      "loss": 0.1258,
      "step": 23243
    },
    {
      "epoch": 0.6781025730789427,
      "grad_norm": 0.7337524852911939,
      "learning_rate": 2.479979006944314e-06,
      "loss": 0.1139,
      "step": 23244
    },
    {
      "epoch": 0.6781317463095863,
      "grad_norm": 0.9822969250196009,
      "learning_rate": 2.479570976244804e-06,
      "loss": 0.1213,
      "step": 23245
    },
    {
      "epoch": 0.6781609195402298,
      "grad_norm": 0.9337499917483004,
      "learning_rate": 2.4791629680470826e-06,
      "loss": 0.1207,
      "step": 23246
    },
    {
      "epoch": 0.6781900927708735,
      "grad_norm": 0.7617695585390399,
      "learning_rate": 2.4787549823547906e-06,
      "loss": 0.108,
      "step": 23247
    },
    {
      "epoch": 0.678219266001517,
      "grad_norm": 0.8697977727272415,
      "learning_rate": 2.478347019171574e-06,
      "loss": 0.1329,
      "step": 23248
    },
    {
      "epoch": 0.6782484392321606,
      "grad_norm": 1.1181899706718783,
      "learning_rate": 2.477939078501074e-06,
      "loss": 0.1184,
      "step": 23249
    },
    {
      "epoch": 0.6782776124628042,
      "grad_norm": 0.9719237304218606,
      "learning_rate": 2.4775311603469294e-06,
      "loss": 0.1056,
      "step": 23250
    },
    {
      "epoch": 0.6783067856934477,
      "grad_norm": 0.74337342749288,
      "learning_rate": 2.4771232647127842e-06,
      "loss": 0.1435,
      "step": 23251
    },
    {
      "epoch": 0.6783359589240913,
      "grad_norm": 0.7109216846044918,
      "learning_rate": 2.4767153916022823e-06,
      "loss": 0.1187,
      "step": 23252
    },
    {
      "epoch": 0.6783651321547348,
      "grad_norm": 1.506068787002971,
      "learning_rate": 2.476307541019063e-06,
      "loss": 0.1273,
      "step": 23253
    },
    {
      "epoch": 0.6783943053853784,
      "grad_norm": 0.8728675770934057,
      "learning_rate": 2.4758997129667654e-06,
      "loss": 0.1036,
      "step": 23254
    },
    {
      "epoch": 0.6784234786160219,
      "grad_norm": 0.9665068777593661,
      "learning_rate": 2.4754919074490353e-06,
      "loss": 0.1018,
      "step": 23255
    },
    {
      "epoch": 0.6784526518466655,
      "grad_norm": 0.9857849818076866,
      "learning_rate": 2.4750841244695076e-06,
      "loss": 0.1378,
      "step": 23256
    },
    {
      "epoch": 0.678481825077309,
      "grad_norm": 1.0120718578592216,
      "learning_rate": 2.4746763640318273e-06,
      "loss": 0.1201,
      "step": 23257
    },
    {
      "epoch": 0.6785109983079526,
      "grad_norm": 1.1232237256642754,
      "learning_rate": 2.474268626139635e-06,
      "loss": 0.1418,
      "step": 23258
    },
    {
      "epoch": 0.6785401715385961,
      "grad_norm": 1.0372873118648187,
      "learning_rate": 2.47386091079657e-06,
      "loss": 0.1141,
      "step": 23259
    },
    {
      "epoch": 0.6785693447692397,
      "grad_norm": 0.8610873900766065,
      "learning_rate": 2.4734532180062694e-06,
      "loss": 0.1058,
      "step": 23260
    },
    {
      "epoch": 0.6785985179998834,
      "grad_norm": 0.7117429116053213,
      "learning_rate": 2.4730455477723768e-06,
      "loss": 0.094,
      "step": 23261
    },
    {
      "epoch": 0.6786276912305269,
      "grad_norm": 0.7874848766282998,
      "learning_rate": 2.472637900098529e-06,
      "loss": 0.1319,
      "step": 23262
    },
    {
      "epoch": 0.6786568644611705,
      "grad_norm": 1.1463798161372984,
      "learning_rate": 2.472230274988368e-06,
      "loss": 0.1305,
      "step": 23263
    },
    {
      "epoch": 0.678686037691814,
      "grad_norm": 0.9051252263064438,
      "learning_rate": 2.4718226724455307e-06,
      "loss": 0.1164,
      "step": 23264
    },
    {
      "epoch": 0.6787152109224576,
      "grad_norm": 0.8160666109047723,
      "learning_rate": 2.4714150924736586e-06,
      "loss": 0.1357,
      "step": 23265
    },
    {
      "epoch": 0.6787443841531011,
      "grad_norm": 1.2440188524743452,
      "learning_rate": 2.4710075350763884e-06,
      "loss": 0.1135,
      "step": 23266
    },
    {
      "epoch": 0.6787735573837447,
      "grad_norm": 1.0329275208505135,
      "learning_rate": 2.4706000002573575e-06,
      "loss": 0.1193,
      "step": 23267
    },
    {
      "epoch": 0.6788027306143882,
      "grad_norm": 0.8850802153830637,
      "learning_rate": 2.4701924880202068e-06,
      "loss": 0.1219,
      "step": 23268
    },
    {
      "epoch": 0.6788319038450318,
      "grad_norm": 1.5421189082104674,
      "learning_rate": 2.4697849983685746e-06,
      "loss": 0.1263,
      "step": 23269
    },
    {
      "epoch": 0.6788610770756753,
      "grad_norm": 0.8565440088863334,
      "learning_rate": 2.469377531306098e-06,
      "loss": 0.1098,
      "step": 23270
    },
    {
      "epoch": 0.6788902503063189,
      "grad_norm": 0.8226132125700635,
      "learning_rate": 2.4689700868364134e-06,
      "loss": 0.1082,
      "step": 23271
    },
    {
      "epoch": 0.6789194235369624,
      "grad_norm": 0.7672631597649043,
      "learning_rate": 2.4685626649631612e-06,
      "loss": 0.1036,
      "step": 23272
    },
    {
      "epoch": 0.678948596767606,
      "grad_norm": 0.8840749501854094,
      "learning_rate": 2.468155265689977e-06,
      "loss": 0.1105,
      "step": 23273
    },
    {
      "epoch": 0.6789777699982497,
      "grad_norm": 1.033047072091153,
      "learning_rate": 2.467747889020495e-06,
      "loss": 0.1145,
      "step": 23274
    },
    {
      "epoch": 0.6790069432288932,
      "grad_norm": 1.0287247916203313,
      "learning_rate": 2.4673405349583584e-06,
      "loss": 0.1327,
      "step": 23275
    },
    {
      "epoch": 0.6790361164595368,
      "grad_norm": 0.7547137343987765,
      "learning_rate": 2.4669332035072015e-06,
      "loss": 0.1206,
      "step": 23276
    },
    {
      "epoch": 0.6790652896901803,
      "grad_norm": 0.8254254707409097,
      "learning_rate": 2.4665258946706584e-06,
      "loss": 0.1152,
      "step": 23277
    },
    {
      "epoch": 0.6790944629208239,
      "grad_norm": 1.1018030493050908,
      "learning_rate": 2.4661186084523687e-06,
      "loss": 0.1147,
      "step": 23278
    },
    {
      "epoch": 0.6791236361514674,
      "grad_norm": 0.9821404654476183,
      "learning_rate": 2.465711344855967e-06,
      "loss": 0.1059,
      "step": 23279
    },
    {
      "epoch": 0.679152809382111,
      "grad_norm": 0.7834246814515112,
      "learning_rate": 2.4653041038850885e-06,
      "loss": 0.1076,
      "step": 23280
    },
    {
      "epoch": 0.6791819826127545,
      "grad_norm": 0.9638182775170588,
      "learning_rate": 2.464896885543369e-06,
      "loss": 0.1245,
      "step": 23281
    },
    {
      "epoch": 0.6792111558433981,
      "grad_norm": 0.9872090026624819,
      "learning_rate": 2.4644896898344474e-06,
      "loss": 0.1158,
      "step": 23282
    },
    {
      "epoch": 0.6792403290740416,
      "grad_norm": 1.0236346161098253,
      "learning_rate": 2.4640825167619565e-06,
      "loss": 0.1208,
      "step": 23283
    },
    {
      "epoch": 0.6792695023046852,
      "grad_norm": 0.8528396822851665,
      "learning_rate": 2.4636753663295293e-06,
      "loss": 0.1361,
      "step": 23284
    },
    {
      "epoch": 0.6792986755353287,
      "grad_norm": 1.0815056572565798,
      "learning_rate": 2.463268238540805e-06,
      "loss": 0.135,
      "step": 23285
    },
    {
      "epoch": 0.6793278487659723,
      "grad_norm": 0.8551548620165316,
      "learning_rate": 2.4628611333994147e-06,
      "loss": 0.1169,
      "step": 23286
    },
    {
      "epoch": 0.6793570219966159,
      "grad_norm": 1.1690525290461162,
      "learning_rate": 2.462454050908994e-06,
      "loss": 0.1496,
      "step": 23287
    },
    {
      "epoch": 0.6793861952272595,
      "grad_norm": 1.056309387325001,
      "learning_rate": 2.4620469910731805e-06,
      "loss": 0.1404,
      "step": 23288
    },
    {
      "epoch": 0.6794153684579031,
      "grad_norm": 0.8149182654688826,
      "learning_rate": 2.461639953895605e-06,
      "loss": 0.0967,
      "step": 23289
    },
    {
      "epoch": 0.6794445416885466,
      "grad_norm": 0.7322182970506431,
      "learning_rate": 2.4612329393799e-06,
      "loss": 0.1298,
      "step": 23290
    },
    {
      "epoch": 0.6794737149191902,
      "grad_norm": 0.9476505209053838,
      "learning_rate": 2.460825947529703e-06,
      "loss": 0.142,
      "step": 23291
    },
    {
      "epoch": 0.6795028881498337,
      "grad_norm": 0.9886733701256886,
      "learning_rate": 2.4604189783486445e-06,
      "loss": 0.1106,
      "step": 23292
    },
    {
      "epoch": 0.6795320613804773,
      "grad_norm": 0.8564438238412084,
      "learning_rate": 2.4600120318403607e-06,
      "loss": 0.1278,
      "step": 23293
    },
    {
      "epoch": 0.6795612346111208,
      "grad_norm": 0.8189582019618686,
      "learning_rate": 2.4596051080084814e-06,
      "loss": 0.1189,
      "step": 23294
    },
    {
      "epoch": 0.6795904078417644,
      "grad_norm": 1.2801099694260838,
      "learning_rate": 2.4591982068566427e-06,
      "loss": 0.111,
      "step": 23295
    },
    {
      "epoch": 0.6796195810724079,
      "grad_norm": 1.0073932176205675,
      "learning_rate": 2.458791328388477e-06,
      "loss": 0.1099,
      "step": 23296
    },
    {
      "epoch": 0.6796487543030515,
      "grad_norm": 1.0406894851352735,
      "learning_rate": 2.4583844726076124e-06,
      "loss": 0.1475,
      "step": 23297
    },
    {
      "epoch": 0.679677927533695,
      "grad_norm": 0.8681268505669584,
      "learning_rate": 2.4579776395176853e-06,
      "loss": 0.1234,
      "step": 23298
    },
    {
      "epoch": 0.6797071007643386,
      "grad_norm": 0.8673681118810458,
      "learning_rate": 2.457570829122329e-06,
      "loss": 0.1345,
      "step": 23299
    },
    {
      "epoch": 0.6797362739949822,
      "grad_norm": 1.1524444999473435,
      "learning_rate": 2.457164041425173e-06,
      "loss": 0.1229,
      "step": 23300
    },
    {
      "epoch": 0.6797654472256258,
      "grad_norm": 0.7966303620764652,
      "learning_rate": 2.4567572764298476e-06,
      "loss": 0.1173,
      "step": 23301
    },
    {
      "epoch": 0.6797946204562694,
      "grad_norm": 1.0292047862244138,
      "learning_rate": 2.456350534139988e-06,
      "loss": 0.1459,
      "step": 23302
    },
    {
      "epoch": 0.6798237936869129,
      "grad_norm": 0.7958008433308223,
      "learning_rate": 2.4559438145592234e-06,
      "loss": 0.1225,
      "step": 23303
    },
    {
      "epoch": 0.6798529669175565,
      "grad_norm": 1.1017273521197222,
      "learning_rate": 2.4555371176911817e-06,
      "loss": 0.1108,
      "step": 23304
    },
    {
      "epoch": 0.6798821401482,
      "grad_norm": 0.7683430539172763,
      "learning_rate": 2.4551304435395007e-06,
      "loss": 0.0946,
      "step": 23305
    },
    {
      "epoch": 0.6799113133788436,
      "grad_norm": 0.9074795400999534,
      "learning_rate": 2.4547237921078077e-06,
      "loss": 0.1348,
      "step": 23306
    },
    {
      "epoch": 0.6799404866094871,
      "grad_norm": 0.7755105568608713,
      "learning_rate": 2.4543171633997314e-06,
      "loss": 0.1034,
      "step": 23307
    },
    {
      "epoch": 0.6799696598401307,
      "grad_norm": 0.9758079951548948,
      "learning_rate": 2.4539105574189052e-06,
      "loss": 0.1347,
      "step": 23308
    },
    {
      "epoch": 0.6799988330707742,
      "grad_norm": 0.9060779365644929,
      "learning_rate": 2.453503974168958e-06,
      "loss": 0.1168,
      "step": 23309
    },
    {
      "epoch": 0.6800280063014178,
      "grad_norm": 0.9526106057205321,
      "learning_rate": 2.453097413653518e-06,
      "loss": 0.1333,
      "step": 23310
    },
    {
      "epoch": 0.6800571795320614,
      "grad_norm": 0.7123504495795431,
      "learning_rate": 2.4526908758762156e-06,
      "loss": 0.1195,
      "step": 23311
    },
    {
      "epoch": 0.6800863527627049,
      "grad_norm": 0.9783550740959671,
      "learning_rate": 2.4522843608406834e-06,
      "loss": 0.1203,
      "step": 23312
    },
    {
      "epoch": 0.6801155259933485,
      "grad_norm": 0.7570682363192021,
      "learning_rate": 2.451877868550548e-06,
      "loss": 0.1185,
      "step": 23313
    },
    {
      "epoch": 0.680144699223992,
      "grad_norm": 0.7957456151917327,
      "learning_rate": 2.451471399009437e-06,
      "loss": 0.1077,
      "step": 23314
    },
    {
      "epoch": 0.6801738724546357,
      "grad_norm": 0.7098774031726814,
      "learning_rate": 2.4510649522209825e-06,
      "loss": 0.1129,
      "step": 23315
    },
    {
      "epoch": 0.6802030456852792,
      "grad_norm": 0.7862130562280645,
      "learning_rate": 2.4506585281888096e-06,
      "loss": 0.1103,
      "step": 23316
    },
    {
      "epoch": 0.6802322189159228,
      "grad_norm": 0.7523670724296232,
      "learning_rate": 2.450252126916549e-06,
      "loss": 0.1062,
      "step": 23317
    },
    {
      "epoch": 0.6802613921465663,
      "grad_norm": 0.8176813521525674,
      "learning_rate": 2.449845748407831e-06,
      "loss": 0.1055,
      "step": 23318
    },
    {
      "epoch": 0.6802905653772099,
      "grad_norm": 0.8821584704883535,
      "learning_rate": 2.4494393926662807e-06,
      "loss": 0.1234,
      "step": 23319
    },
    {
      "epoch": 0.6803197386078534,
      "grad_norm": 0.7341871562175372,
      "learning_rate": 2.4490330596955254e-06,
      "loss": 0.1178,
      "step": 23320
    },
    {
      "epoch": 0.680348911838497,
      "grad_norm": 0.7906863096959323,
      "learning_rate": 2.4486267494991956e-06,
      "loss": 0.1069,
      "step": 23321
    },
    {
      "epoch": 0.6803780850691405,
      "grad_norm": 0.8430909846833469,
      "learning_rate": 2.4482204620809154e-06,
      "loss": 0.1163,
      "step": 23322
    },
    {
      "epoch": 0.6804072582997841,
      "grad_norm": 0.9695821294279957,
      "learning_rate": 2.4478141974443148e-06,
      "loss": 0.1032,
      "step": 23323
    },
    {
      "epoch": 0.6804364315304277,
      "grad_norm": 0.8744977225367986,
      "learning_rate": 2.4474079555930186e-06,
      "loss": 0.126,
      "step": 23324
    },
    {
      "epoch": 0.6804656047610712,
      "grad_norm": 0.789867314354223,
      "learning_rate": 2.447001736530657e-06,
      "loss": 0.0942,
      "step": 23325
    },
    {
      "epoch": 0.6804947779917148,
      "grad_norm": 0.8290402506456502,
      "learning_rate": 2.446595540260854e-06,
      "loss": 0.1289,
      "step": 23326
    },
    {
      "epoch": 0.6805239512223583,
      "grad_norm": 0.8054387956901801,
      "learning_rate": 2.446189366787235e-06,
      "loss": 0.1369,
      "step": 23327
    },
    {
      "epoch": 0.680553124453002,
      "grad_norm": 0.902868084152141,
      "learning_rate": 2.445783216113427e-06,
      "loss": 0.1257,
      "step": 23328
    },
    {
      "epoch": 0.6805822976836455,
      "grad_norm": 0.837937168979911,
      "learning_rate": 2.445377088243059e-06,
      "loss": 0.1069,
      "step": 23329
    },
    {
      "epoch": 0.6806114709142891,
      "grad_norm": 0.7170138498137587,
      "learning_rate": 2.4449709831797546e-06,
      "loss": 0.1297,
      "step": 23330
    },
    {
      "epoch": 0.6806406441449326,
      "grad_norm": 0.7305979998787503,
      "learning_rate": 2.4445649009271373e-06,
      "loss": 0.1198,
      "step": 23331
    },
    {
      "epoch": 0.6806698173755762,
      "grad_norm": 0.8470847097306455,
      "learning_rate": 2.444158841488836e-06,
      "loss": 0.1161,
      "step": 23332
    },
    {
      "epoch": 0.6806989906062197,
      "grad_norm": 0.8936664292927026,
      "learning_rate": 2.4437528048684757e-06,
      "loss": 0.1345,
      "step": 23333
    },
    {
      "epoch": 0.6807281638368633,
      "grad_norm": 0.8879470216622817,
      "learning_rate": 2.4433467910696752e-06,
      "loss": 0.143,
      "step": 23334
    },
    {
      "epoch": 0.6807573370675069,
      "grad_norm": 0.8787725909890597,
      "learning_rate": 2.442940800096068e-06,
      "loss": 0.1298,
      "step": 23335
    },
    {
      "epoch": 0.6807865102981504,
      "grad_norm": 0.8491824379892106,
      "learning_rate": 2.4425348319512753e-06,
      "loss": 0.1059,
      "step": 23336
    },
    {
      "epoch": 0.680815683528794,
      "grad_norm": 0.9070472465815405,
      "learning_rate": 2.4421288866389193e-06,
      "loss": 0.1427,
      "step": 23337
    },
    {
      "epoch": 0.6808448567594375,
      "grad_norm": 0.8201645344592756,
      "learning_rate": 2.441722964162628e-06,
      "loss": 0.1233,
      "step": 23338
    },
    {
      "epoch": 0.6808740299900811,
      "grad_norm": 0.673680317420867,
      "learning_rate": 2.441317064526023e-06,
      "loss": 0.0976,
      "step": 23339
    },
    {
      "epoch": 0.6809032032207246,
      "grad_norm": 0.7401891538857096,
      "learning_rate": 2.440911187732727e-06,
      "loss": 0.109,
      "step": 23340
    },
    {
      "epoch": 0.6809323764513682,
      "grad_norm": 0.8609853245740076,
      "learning_rate": 2.440505333786364e-06,
      "loss": 0.121,
      "step": 23341
    },
    {
      "epoch": 0.6809615496820118,
      "grad_norm": 0.852046250461729,
      "learning_rate": 2.4400995026905612e-06,
      "loss": 0.1045,
      "step": 23342
    },
    {
      "epoch": 0.6809907229126554,
      "grad_norm": 0.9457424621288142,
      "learning_rate": 2.4396936944489384e-06,
      "loss": 0.1028,
      "step": 23343
    },
    {
      "epoch": 0.6810198961432989,
      "grad_norm": 0.8938133279893553,
      "learning_rate": 2.439287909065118e-06,
      "loss": 0.1303,
      "step": 23344
    },
    {
      "epoch": 0.6810490693739425,
      "grad_norm": 0.9570157452775915,
      "learning_rate": 2.4388821465427252e-06,
      "loss": 0.136,
      "step": 23345
    },
    {
      "epoch": 0.681078242604586,
      "grad_norm": 0.791793052052301,
      "learning_rate": 2.4384764068853796e-06,
      "loss": 0.1152,
      "step": 23346
    },
    {
      "epoch": 0.6811074158352296,
      "grad_norm": 1.0417459373225932,
      "learning_rate": 2.4380706900967043e-06,
      "loss": 0.1149,
      "step": 23347
    },
    {
      "epoch": 0.6811365890658732,
      "grad_norm": 1.1372400470501136,
      "learning_rate": 2.437664996180325e-06,
      "loss": 0.1345,
      "step": 23348
    },
    {
      "epoch": 0.6811657622965167,
      "grad_norm": 1.0135776593769665,
      "learning_rate": 2.437259325139861e-06,
      "loss": 0.1228,
      "step": 23349
    },
    {
      "epoch": 0.6811949355271603,
      "grad_norm": 1.053747311183959,
      "learning_rate": 2.436853676978932e-06,
      "loss": 0.1064,
      "step": 23350
    },
    {
      "epoch": 0.6812241087578038,
      "grad_norm": 1.011768761569344,
      "learning_rate": 2.436448051701163e-06,
      "loss": 0.1208,
      "step": 23351
    },
    {
      "epoch": 0.6812532819884474,
      "grad_norm": 1.1087044230681251,
      "learning_rate": 2.436042449310172e-06,
      "loss": 0.1082,
      "step": 23352
    },
    {
      "epoch": 0.6812824552190909,
      "grad_norm": 1.2907939712860053,
      "learning_rate": 2.4356368698095838e-06,
      "loss": 0.1215,
      "step": 23353
    },
    {
      "epoch": 0.6813116284497345,
      "grad_norm": 0.9932325754634937,
      "learning_rate": 2.435231313203016e-06,
      "loss": 0.1348,
      "step": 23354
    },
    {
      "epoch": 0.6813408016803781,
      "grad_norm": 1.09646618133375,
      "learning_rate": 2.4348257794940925e-06,
      "loss": 0.1049,
      "step": 23355
    },
    {
      "epoch": 0.6813699749110217,
      "grad_norm": 0.9649665998018226,
      "learning_rate": 2.4344202686864323e-06,
      "loss": 0.1439,
      "step": 23356
    },
    {
      "epoch": 0.6813991481416652,
      "grad_norm": 0.7197520872665651,
      "learning_rate": 2.434014780783653e-06,
      "loss": 0.1191,
      "step": 23357
    },
    {
      "epoch": 0.6814283213723088,
      "grad_norm": 1.0539640534846044,
      "learning_rate": 2.4336093157893774e-06,
      "loss": 0.1028,
      "step": 23358
    },
    {
      "epoch": 0.6814574946029524,
      "grad_norm": 0.8796372851471373,
      "learning_rate": 2.433203873707227e-06,
      "loss": 0.1293,
      "step": 23359
    },
    {
      "epoch": 0.6814866678335959,
      "grad_norm": 0.6840906635886067,
      "learning_rate": 2.4327984545408203e-06,
      "loss": 0.1033,
      "step": 23360
    },
    {
      "epoch": 0.6815158410642395,
      "grad_norm": 0.6951351396440043,
      "learning_rate": 2.4323930582937737e-06,
      "loss": 0.1202,
      "step": 23361
    },
    {
      "epoch": 0.681545014294883,
      "grad_norm": 0.8606714595639731,
      "learning_rate": 2.4319876849697112e-06,
      "loss": 0.1125,
      "step": 23362
    },
    {
      "epoch": 0.6815741875255266,
      "grad_norm": 0.9876280544161635,
      "learning_rate": 2.431582334572249e-06,
      "loss": 0.1281,
      "step": 23363
    },
    {
      "epoch": 0.6816033607561701,
      "grad_norm": 0.8772466950283561,
      "learning_rate": 2.4311770071050035e-06,
      "loss": 0.1073,
      "step": 23364
    },
    {
      "epoch": 0.6816325339868137,
      "grad_norm": 0.6179190694491767,
      "learning_rate": 2.430771702571599e-06,
      "loss": 0.1002,
      "step": 23365
    },
    {
      "epoch": 0.6816617072174572,
      "grad_norm": 0.7588800959108316,
      "learning_rate": 2.4303664209756526e-06,
      "loss": 0.1166,
      "step": 23366
    },
    {
      "epoch": 0.6816908804481008,
      "grad_norm": 1.0027587154496904,
      "learning_rate": 2.42996116232078e-06,
      "loss": 0.1236,
      "step": 23367
    },
    {
      "epoch": 0.6817200536787443,
      "grad_norm": 0.8885632551773563,
      "learning_rate": 2.429555926610601e-06,
      "loss": 0.1244,
      "step": 23368
    },
    {
      "epoch": 0.681749226909388,
      "grad_norm": 0.6295678571108029,
      "learning_rate": 2.429150713848734e-06,
      "loss": 0.1096,
      "step": 23369
    },
    {
      "epoch": 0.6817784001400315,
      "grad_norm": 0.7654281934622027,
      "learning_rate": 2.428745524038794e-06,
      "loss": 0.1164,
      "step": 23370
    },
    {
      "epoch": 0.6818075733706751,
      "grad_norm": 0.7794154015509821,
      "learning_rate": 2.4283403571843994e-06,
      "loss": 0.1033,
      "step": 23371
    },
    {
      "epoch": 0.6818367466013187,
      "grad_norm": 0.8584749842108826,
      "learning_rate": 2.4279352132891705e-06,
      "loss": 0.1308,
      "step": 23372
    },
    {
      "epoch": 0.6818659198319622,
      "grad_norm": 0.7726199129739065,
      "learning_rate": 2.427530092356722e-06,
      "loss": 0.1203,
      "step": 23373
    },
    {
      "epoch": 0.6818950930626058,
      "grad_norm": 0.8598346354331359,
      "learning_rate": 2.427124994390669e-06,
      "loss": 0.1148,
      "step": 23374
    },
    {
      "epoch": 0.6819242662932493,
      "grad_norm": 0.7132961658887523,
      "learning_rate": 2.4267199193946313e-06,
      "loss": 0.1044,
      "step": 23375
    },
    {
      "epoch": 0.6819534395238929,
      "grad_norm": 0.8904430262186038,
      "learning_rate": 2.426314867372222e-06,
      "loss": 0.1567,
      "step": 23376
    },
    {
      "epoch": 0.6819826127545364,
      "grad_norm": 0.816336462451866,
      "learning_rate": 2.4259098383270596e-06,
      "loss": 0.1299,
      "step": 23377
    },
    {
      "epoch": 0.68201178598518,
      "grad_norm": 0.7746041696509879,
      "learning_rate": 2.425504832262761e-06,
      "loss": 0.0963,
      "step": 23378
    },
    {
      "epoch": 0.6820409592158235,
      "grad_norm": 0.8023654822258212,
      "learning_rate": 2.4250998491829414e-06,
      "loss": 0.1102,
      "step": 23379
    },
    {
      "epoch": 0.6820701324464671,
      "grad_norm": 1.3227585921290468,
      "learning_rate": 2.424694889091213e-06,
      "loss": 0.1295,
      "step": 23380
    },
    {
      "epoch": 0.6820993056771106,
      "grad_norm": 0.9538132455914216,
      "learning_rate": 2.4242899519911966e-06,
      "loss": 0.1253,
      "step": 23381
    },
    {
      "epoch": 0.6821284789077543,
      "grad_norm": 0.7747601742899379,
      "learning_rate": 2.423885037886502e-06,
      "loss": 0.108,
      "step": 23382
    },
    {
      "epoch": 0.6821576521383979,
      "grad_norm": 0.7359015483255598,
      "learning_rate": 2.4234801467807487e-06,
      "loss": 0.1091,
      "step": 23383
    },
    {
      "epoch": 0.6821868253690414,
      "grad_norm": 0.8063697329646766,
      "learning_rate": 2.4230752786775485e-06,
      "loss": 0.1227,
      "step": 23384
    },
    {
      "epoch": 0.682215998599685,
      "grad_norm": 1.1138567153813508,
      "learning_rate": 2.4226704335805186e-06,
      "loss": 0.1132,
      "step": 23385
    },
    {
      "epoch": 0.6822451718303285,
      "grad_norm": 0.7221672197416532,
      "learning_rate": 2.4222656114932713e-06,
      "loss": 0.1253,
      "step": 23386
    },
    {
      "epoch": 0.6822743450609721,
      "grad_norm": 0.6745835099362463,
      "learning_rate": 2.42186081241942e-06,
      "loss": 0.125,
      "step": 23387
    },
    {
      "epoch": 0.6823035182916156,
      "grad_norm": 0.7813051552278909,
      "learning_rate": 2.4214560363625794e-06,
      "loss": 0.1053,
      "step": 23388
    },
    {
      "epoch": 0.6823326915222592,
      "grad_norm": 0.8889806545809844,
      "learning_rate": 2.421051283326366e-06,
      "loss": 0.1277,
      "step": 23389
    },
    {
      "epoch": 0.6823618647529027,
      "grad_norm": 0.7596732140411573,
      "learning_rate": 2.4206465533143906e-06,
      "loss": 0.1298,
      "step": 23390
    },
    {
      "epoch": 0.6823910379835463,
      "grad_norm": 0.7427801645031388,
      "learning_rate": 2.420241846330266e-06,
      "loss": 0.1117,
      "step": 23391
    },
    {
      "epoch": 0.6824202112141898,
      "grad_norm": 0.7627876825591198,
      "learning_rate": 2.4198371623776077e-06,
      "loss": 0.1102,
      "step": 23392
    },
    {
      "epoch": 0.6824493844448334,
      "grad_norm": 0.9159339260042754,
      "learning_rate": 2.4194325014600254e-06,
      "loss": 0.1354,
      "step": 23393
    },
    {
      "epoch": 0.6824785576754769,
      "grad_norm": 0.8578802680763596,
      "learning_rate": 2.4190278635811336e-06,
      "loss": 0.1204,
      "step": 23394
    },
    {
      "epoch": 0.6825077309061205,
      "grad_norm": 1.0237255170517903,
      "learning_rate": 2.418623248744547e-06,
      "loss": 0.1217,
      "step": 23395
    },
    {
      "epoch": 0.6825369041367642,
      "grad_norm": 0.7705445245732929,
      "learning_rate": 2.4182186569538763e-06,
      "loss": 0.1173,
      "step": 23396
    },
    {
      "epoch": 0.6825660773674077,
      "grad_norm": 1.022543322282196,
      "learning_rate": 2.4178140882127304e-06,
      "loss": 0.1085,
      "step": 23397
    },
    {
      "epoch": 0.6825952505980513,
      "grad_norm": 0.8291564370473499,
      "learning_rate": 2.4174095425247263e-06,
      "loss": 0.105,
      "step": 23398
    },
    {
      "epoch": 0.6826244238286948,
      "grad_norm": 0.8308205991615312,
      "learning_rate": 2.4170050198934707e-06,
      "loss": 0.1126,
      "step": 23399
    },
    {
      "epoch": 0.6826535970593384,
      "grad_norm": 1.0059662857379013,
      "learning_rate": 2.4166005203225803e-06,
      "loss": 0.1048,
      "step": 23400
    },
    {
      "epoch": 0.6826827702899819,
      "grad_norm": 0.7635590561700802,
      "learning_rate": 2.416196043815662e-06,
      "loss": 0.1321,
      "step": 23401
    },
    {
      "epoch": 0.6827119435206255,
      "grad_norm": 0.91562126553842,
      "learning_rate": 2.4157915903763295e-06,
      "loss": 0.1234,
      "step": 23402
    },
    {
      "epoch": 0.682741116751269,
      "grad_norm": 0.8778772330689085,
      "learning_rate": 2.4153871600081936e-06,
      "loss": 0.0964,
      "step": 23403
    },
    {
      "epoch": 0.6827702899819126,
      "grad_norm": 0.7183699973220936,
      "learning_rate": 2.414982752714862e-06,
      "loss": 0.1201,
      "step": 23404
    },
    {
      "epoch": 0.6827994632125561,
      "grad_norm": 1.0354123592567526,
      "learning_rate": 2.4145783684999472e-06,
      "loss": 0.1278,
      "step": 23405
    },
    {
      "epoch": 0.6828286364431997,
      "grad_norm": 0.9630908241330794,
      "learning_rate": 2.4141740073670617e-06,
      "loss": 0.141,
      "step": 23406
    },
    {
      "epoch": 0.6828578096738432,
      "grad_norm": 0.7248657130182223,
      "learning_rate": 2.4137696693198113e-06,
      "loss": 0.1249,
      "step": 23407
    },
    {
      "epoch": 0.6828869829044868,
      "grad_norm": 0.7290492842727565,
      "learning_rate": 2.41336535436181e-06,
      "loss": 0.112,
      "step": 23408
    },
    {
      "epoch": 0.6829161561351305,
      "grad_norm": 0.6970817164852366,
      "learning_rate": 2.4129610624966654e-06,
      "loss": 0.1118,
      "step": 23409
    },
    {
      "epoch": 0.682945329365774,
      "grad_norm": 0.8694633343773415,
      "learning_rate": 2.412556793727985e-06,
      "loss": 0.1289,
      "step": 23410
    },
    {
      "epoch": 0.6829745025964176,
      "grad_norm": 0.810799855740774,
      "learning_rate": 2.4121525480593793e-06,
      "loss": 0.1214,
      "step": 23411
    },
    {
      "epoch": 0.6830036758270611,
      "grad_norm": 0.7721906771436191,
      "learning_rate": 2.41174832549446e-06,
      "loss": 0.1128,
      "step": 23412
    },
    {
      "epoch": 0.6830328490577047,
      "grad_norm": 0.8932376975416071,
      "learning_rate": 2.4113441260368335e-06,
      "loss": 0.1086,
      "step": 23413
    },
    {
      "epoch": 0.6830620222883482,
      "grad_norm": 0.8040576828677514,
      "learning_rate": 2.4109399496901074e-06,
      "loss": 0.1142,
      "step": 23414
    },
    {
      "epoch": 0.6830911955189918,
      "grad_norm": 1.060179750476534,
      "learning_rate": 2.4105357964578928e-06,
      "loss": 0.1019,
      "step": 23415
    },
    {
      "epoch": 0.6831203687496353,
      "grad_norm": 0.974518952914883,
      "learning_rate": 2.4101316663437966e-06,
      "loss": 0.1313,
      "step": 23416
    },
    {
      "epoch": 0.6831495419802789,
      "grad_norm": 0.9598616199303697,
      "learning_rate": 2.409727559351425e-06,
      "loss": 0.1114,
      "step": 23417
    },
    {
      "epoch": 0.6831787152109224,
      "grad_norm": 0.9963924525348814,
      "learning_rate": 2.4093234754843873e-06,
      "loss": 0.1005,
      "step": 23418
    },
    {
      "epoch": 0.683207888441566,
      "grad_norm": 1.0680388047755742,
      "learning_rate": 2.408919414746293e-06,
      "loss": 0.1324,
      "step": 23419
    },
    {
      "epoch": 0.6832370616722095,
      "grad_norm": 0.9501564668487673,
      "learning_rate": 2.4085153771407477e-06,
      "loss": 0.1359,
      "step": 23420
    },
    {
      "epoch": 0.6832662349028531,
      "grad_norm": 1.0012804483359667,
      "learning_rate": 2.4081113626713564e-06,
      "loss": 0.1265,
      "step": 23421
    },
    {
      "epoch": 0.6832954081334967,
      "grad_norm": 0.9585963243980063,
      "learning_rate": 2.4077073713417304e-06,
      "loss": 0.1282,
      "step": 23422
    },
    {
      "epoch": 0.6833245813641403,
      "grad_norm": 0.8267488349081108,
      "learning_rate": 2.407303403155472e-06,
      "loss": 0.14,
      "step": 23423
    },
    {
      "epoch": 0.6833537545947839,
      "grad_norm": 0.886117093413851,
      "learning_rate": 2.4068994581161898e-06,
      "loss": 0.1294,
      "step": 23424
    },
    {
      "epoch": 0.6833829278254274,
      "grad_norm": 0.8729574453971385,
      "learning_rate": 2.4064955362274924e-06,
      "loss": 0.1081,
      "step": 23425
    },
    {
      "epoch": 0.683412101056071,
      "grad_norm": 0.8968191632170365,
      "learning_rate": 2.406091637492983e-06,
      "loss": 0.12,
      "step": 23426
    },
    {
      "epoch": 0.6834412742867145,
      "grad_norm": 0.8399957092302629,
      "learning_rate": 2.4056877619162674e-06,
      "loss": 0.1321,
      "step": 23427
    },
    {
      "epoch": 0.6834704475173581,
      "grad_norm": 0.9490961480081912,
      "learning_rate": 2.4052839095009535e-06,
      "loss": 0.1234,
      "step": 23428
    },
    {
      "epoch": 0.6834996207480016,
      "grad_norm": 1.1315340538929324,
      "learning_rate": 2.404880080250643e-06,
      "loss": 0.1459,
      "step": 23429
    },
    {
      "epoch": 0.6835287939786452,
      "grad_norm": 0.8190062475940487,
      "learning_rate": 2.4044762741689464e-06,
      "loss": 0.1302,
      "step": 23430
    },
    {
      "epoch": 0.6835579672092887,
      "grad_norm": 1.027433233966887,
      "learning_rate": 2.404072491259464e-06,
      "loss": 0.135,
      "step": 23431
    },
    {
      "epoch": 0.6835871404399323,
      "grad_norm": 1.0991760340368277,
      "learning_rate": 2.403668731525804e-06,
      "loss": 0.0995,
      "step": 23432
    },
    {
      "epoch": 0.6836163136705758,
      "grad_norm": 1.0180612157341427,
      "learning_rate": 2.4032649949715703e-06,
      "loss": 0.1236,
      "step": 23433
    },
    {
      "epoch": 0.6836454869012194,
      "grad_norm": 0.7992345966043113,
      "learning_rate": 2.402861281600365e-06,
      "loss": 0.1304,
      "step": 23434
    },
    {
      "epoch": 0.683674660131863,
      "grad_norm": 1.0817479691060017,
      "learning_rate": 2.402457591415794e-06,
      "loss": 0.1264,
      "step": 23435
    },
    {
      "epoch": 0.6837038333625066,
      "grad_norm": 0.7583473275234861,
      "learning_rate": 2.402053924421463e-06,
      "loss": 0.1282,
      "step": 23436
    },
    {
      "epoch": 0.6837330065931502,
      "grad_norm": 0.8215215240016145,
      "learning_rate": 2.401650280620973e-06,
      "loss": 0.1194,
      "step": 23437
    },
    {
      "epoch": 0.6837621798237937,
      "grad_norm": 0.9652272594761065,
      "learning_rate": 2.401246660017931e-06,
      "loss": 0.1548,
      "step": 23438
    },
    {
      "epoch": 0.6837913530544373,
      "grad_norm": 0.9839096272142991,
      "learning_rate": 2.4008430626159383e-06,
      "loss": 0.1234,
      "step": 23439
    },
    {
      "epoch": 0.6838205262850808,
      "grad_norm": 0.8477160412619479,
      "learning_rate": 2.4004394884185965e-06,
      "loss": 0.1224,
      "step": 23440
    },
    {
      "epoch": 0.6838496995157244,
      "grad_norm": 0.9105366763503833,
      "learning_rate": 2.40003593742951e-06,
      "loss": 0.1489,
      "step": 23441
    },
    {
      "epoch": 0.6838788727463679,
      "grad_norm": 0.8676494595180941,
      "learning_rate": 2.3996324096522844e-06,
      "loss": 0.1178,
      "step": 23442
    },
    {
      "epoch": 0.6839080459770115,
      "grad_norm": 0.8251007198965699,
      "learning_rate": 2.3992289050905194e-06,
      "loss": 0.1004,
      "step": 23443
    },
    {
      "epoch": 0.683937219207655,
      "grad_norm": 0.8984564437788387,
      "learning_rate": 2.3988254237478164e-06,
      "loss": 0.1245,
      "step": 23444
    },
    {
      "epoch": 0.6839663924382986,
      "grad_norm": 0.7892369663173008,
      "learning_rate": 2.3984219656277807e-06,
      "loss": 0.1111,
      "step": 23445
    },
    {
      "epoch": 0.6839955656689422,
      "grad_norm": 1.003280951784191,
      "learning_rate": 2.3980185307340127e-06,
      "loss": 0.1026,
      "step": 23446
    },
    {
      "epoch": 0.6840247388995857,
      "grad_norm": 0.7889691182335306,
      "learning_rate": 2.3976151190701123e-06,
      "loss": 0.1122,
      "step": 23447
    },
    {
      "epoch": 0.6840539121302293,
      "grad_norm": 0.6360571341951934,
      "learning_rate": 2.3972117306396823e-06,
      "loss": 0.121,
      "step": 23448
    },
    {
      "epoch": 0.6840830853608728,
      "grad_norm": 1.0484686636854017,
      "learning_rate": 2.3968083654463277e-06,
      "loss": 0.1325,
      "step": 23449
    },
    {
      "epoch": 0.6841122585915165,
      "grad_norm": 1.1633400083088463,
      "learning_rate": 2.396405023493646e-06,
      "loss": 0.1189,
      "step": 23450
    },
    {
      "epoch": 0.68414143182216,
      "grad_norm": 0.7852866233186016,
      "learning_rate": 2.3960017047852362e-06,
      "loss": 0.14,
      "step": 23451
    },
    {
      "epoch": 0.6841706050528036,
      "grad_norm": 0.9356886034288822,
      "learning_rate": 2.395598409324704e-06,
      "loss": 0.1323,
      "step": 23452
    },
    {
      "epoch": 0.6841997782834471,
      "grad_norm": 0.9572955367971694,
      "learning_rate": 2.395195137115646e-06,
      "loss": 0.1307,
      "step": 23453
    },
    {
      "epoch": 0.6842289515140907,
      "grad_norm": 0.7671869608111163,
      "learning_rate": 2.394791888161663e-06,
      "loss": 0.1137,
      "step": 23454
    },
    {
      "epoch": 0.6842581247447342,
      "grad_norm": 0.9668753842441362,
      "learning_rate": 2.3943886624663586e-06,
      "loss": 0.1277,
      "step": 23455
    },
    {
      "epoch": 0.6842872979753778,
      "grad_norm": 0.7377271997149412,
      "learning_rate": 2.393985460033331e-06,
      "loss": 0.1213,
      "step": 23456
    },
    {
      "epoch": 0.6843164712060213,
      "grad_norm": 0.707175469957673,
      "learning_rate": 2.393582280866176e-06,
      "loss": 0.1252,
      "step": 23457
    },
    {
      "epoch": 0.6843456444366649,
      "grad_norm": 0.8718198956609994,
      "learning_rate": 2.393179124968498e-06,
      "loss": 0.1152,
      "step": 23458
    },
    {
      "epoch": 0.6843748176673085,
      "grad_norm": 0.8221659544288544,
      "learning_rate": 2.3927759923438936e-06,
      "loss": 0.1276,
      "step": 23459
    },
    {
      "epoch": 0.684403990897952,
      "grad_norm": 0.7789014250704809,
      "learning_rate": 2.392372882995964e-06,
      "loss": 0.1136,
      "step": 23460
    },
    {
      "epoch": 0.6844331641285956,
      "grad_norm": 0.7690363735768622,
      "learning_rate": 2.391969796928305e-06,
      "loss": 0.1068,
      "step": 23461
    },
    {
      "epoch": 0.6844623373592391,
      "grad_norm": 0.8627675179824307,
      "learning_rate": 2.3915667341445194e-06,
      "loss": 0.1178,
      "step": 23462
    },
    {
      "epoch": 0.6844915105898828,
      "grad_norm": 0.9365312772700242,
      "learning_rate": 2.3911636946482024e-06,
      "loss": 0.1415,
      "step": 23463
    },
    {
      "epoch": 0.6845206838205263,
      "grad_norm": 1.0855533313368741,
      "learning_rate": 2.390760678442952e-06,
      "loss": 0.1212,
      "step": 23464
    },
    {
      "epoch": 0.6845498570511699,
      "grad_norm": 0.8870097026272866,
      "learning_rate": 2.3903576855323676e-06,
      "loss": 0.1198,
      "step": 23465
    },
    {
      "epoch": 0.6845790302818134,
      "grad_norm": 0.6458907704108451,
      "learning_rate": 2.3899547159200478e-06,
      "loss": 0.1033,
      "step": 23466
    },
    {
      "epoch": 0.684608203512457,
      "grad_norm": 0.7754345369737338,
      "learning_rate": 2.389551769609588e-06,
      "loss": 0.1012,
      "step": 23467
    },
    {
      "epoch": 0.6846373767431005,
      "grad_norm": 0.8907205336921865,
      "learning_rate": 2.389148846604588e-06,
      "loss": 0.1336,
      "step": 23468
    },
    {
      "epoch": 0.6846665499737441,
      "grad_norm": 0.9164401893153775,
      "learning_rate": 2.388745946908645e-06,
      "loss": 0.1102,
      "step": 23469
    },
    {
      "epoch": 0.6846957232043877,
      "grad_norm": 0.9711317861214254,
      "learning_rate": 2.3883430705253517e-06,
      "loss": 0.1224,
      "step": 23470
    },
    {
      "epoch": 0.6847248964350312,
      "grad_norm": 0.9728623130958626,
      "learning_rate": 2.387940217458309e-06,
      "loss": 0.1204,
      "step": 23471
    },
    {
      "epoch": 0.6847540696656748,
      "grad_norm": 0.7746221587657227,
      "learning_rate": 2.387537387711114e-06,
      "loss": 0.0982,
      "step": 23472
    },
    {
      "epoch": 0.6847832428963183,
      "grad_norm": 1.0602203978021345,
      "learning_rate": 2.3871345812873614e-06,
      "loss": 0.1259,
      "step": 23473
    },
    {
      "epoch": 0.6848124161269619,
      "grad_norm": 0.7457289504548994,
      "learning_rate": 2.386731798190646e-06,
      "loss": 0.0918,
      "step": 23474
    },
    {
      "epoch": 0.6848415893576054,
      "grad_norm": 0.9372285937573585,
      "learning_rate": 2.386329038424567e-06,
      "loss": 0.1355,
      "step": 23475
    },
    {
      "epoch": 0.684870762588249,
      "grad_norm": 0.8393391738534541,
      "learning_rate": 2.3859263019927183e-06,
      "loss": 0.116,
      "step": 23476
    },
    {
      "epoch": 0.6848999358188926,
      "grad_norm": 0.8456078402600375,
      "learning_rate": 2.3855235888986934e-06,
      "loss": 0.1154,
      "step": 23477
    },
    {
      "epoch": 0.6849291090495362,
      "grad_norm": 0.9142331599646668,
      "learning_rate": 2.38512089914609e-06,
      "loss": 0.1232,
      "step": 23478
    },
    {
      "epoch": 0.6849582822801797,
      "grad_norm": 0.8034376540527108,
      "learning_rate": 2.384718232738505e-06,
      "loss": 0.1045,
      "step": 23479
    },
    {
      "epoch": 0.6849874555108233,
      "grad_norm": 0.9115998323203565,
      "learning_rate": 2.3843155896795312e-06,
      "loss": 0.1163,
      "step": 23480
    },
    {
      "epoch": 0.6850166287414668,
      "grad_norm": 0.8322445656168338,
      "learning_rate": 2.383912969972762e-06,
      "loss": 0.1149,
      "step": 23481
    },
    {
      "epoch": 0.6850458019721104,
      "grad_norm": 0.9240018036308494,
      "learning_rate": 2.3835103736217946e-06,
      "loss": 0.1094,
      "step": 23482
    },
    {
      "epoch": 0.685074975202754,
      "grad_norm": 0.936371784389214,
      "learning_rate": 2.38310780063022e-06,
      "loss": 0.1487,
      "step": 23483
    },
    {
      "epoch": 0.6851041484333975,
      "grad_norm": 0.8993352478631058,
      "learning_rate": 2.3827052510016345e-06,
      "loss": 0.1371,
      "step": 23484
    },
    {
      "epoch": 0.6851333216640411,
      "grad_norm": 0.866249042145432,
      "learning_rate": 2.3823027247396336e-06,
      "loss": 0.0973,
      "step": 23485
    },
    {
      "epoch": 0.6851624948946846,
      "grad_norm": 0.8120086252175812,
      "learning_rate": 2.3819002218478095e-06,
      "loss": 0.1294,
      "step": 23486
    },
    {
      "epoch": 0.6851916681253282,
      "grad_norm": 0.9075515094803396,
      "learning_rate": 2.3814977423297525e-06,
      "loss": 0.1142,
      "step": 23487
    },
    {
      "epoch": 0.6852208413559717,
      "grad_norm": 0.8194432468452225,
      "learning_rate": 2.381095286189061e-06,
      "loss": 0.0967,
      "step": 23488
    },
    {
      "epoch": 0.6852500145866153,
      "grad_norm": 0.911996593340731,
      "learning_rate": 2.380692853429324e-06,
      "loss": 0.1093,
      "step": 23489
    },
    {
      "epoch": 0.6852791878172588,
      "grad_norm": 0.7433077733892137,
      "learning_rate": 2.380290444054137e-06,
      "loss": 0.1153,
      "step": 23490
    },
    {
      "epoch": 0.6853083610479025,
      "grad_norm": 0.6776526815796657,
      "learning_rate": 2.37988805806709e-06,
      "loss": 0.1125,
      "step": 23491
    },
    {
      "epoch": 0.685337534278546,
      "grad_norm": 0.9523202249609923,
      "learning_rate": 2.379485695471779e-06,
      "loss": 0.1318,
      "step": 23492
    },
    {
      "epoch": 0.6853667075091896,
      "grad_norm": 0.7864939572064003,
      "learning_rate": 2.3790833562717942e-06,
      "loss": 0.1148,
      "step": 23493
    },
    {
      "epoch": 0.6853958807398332,
      "grad_norm": 0.7627730295940247,
      "learning_rate": 2.3786810404707255e-06,
      "loss": 0.1162,
      "step": 23494
    },
    {
      "epoch": 0.6854250539704767,
      "grad_norm": 0.7648168097564796,
      "learning_rate": 2.3782787480721665e-06,
      "loss": 0.1123,
      "step": 23495
    },
    {
      "epoch": 0.6854542272011203,
      "grad_norm": 0.681673275124196,
      "learning_rate": 2.377876479079711e-06,
      "loss": 0.1036,
      "step": 23496
    },
    {
      "epoch": 0.6854834004317638,
      "grad_norm": 0.7671201754749983,
      "learning_rate": 2.3774742334969463e-06,
      "loss": 0.1245,
      "step": 23497
    },
    {
      "epoch": 0.6855125736624074,
      "grad_norm": 0.8428541625357826,
      "learning_rate": 2.3770720113274683e-06,
      "loss": 0.127,
      "step": 23498
    },
    {
      "epoch": 0.6855417468930509,
      "grad_norm": 1.0171462256525663,
      "learning_rate": 2.3766698125748646e-06,
      "loss": 0.1248,
      "step": 23499
    },
    {
      "epoch": 0.6855709201236945,
      "grad_norm": 0.9667422677895646,
      "learning_rate": 2.3762676372427247e-06,
      "loss": 0.1088,
      "step": 23500
    },
    {
      "epoch": 0.685600093354338,
      "grad_norm": 0.6010521933297716,
      "learning_rate": 2.3758654853346407e-06,
      "loss": 0.1219,
      "step": 23501
    },
    {
      "epoch": 0.6856292665849816,
      "grad_norm": 0.93863480082487,
      "learning_rate": 2.3754633568542056e-06,
      "loss": 0.112,
      "step": 23502
    },
    {
      "epoch": 0.6856584398156251,
      "grad_norm": 1.0692147299668187,
      "learning_rate": 2.375061251805007e-06,
      "loss": 0.1307,
      "step": 23503
    },
    {
      "epoch": 0.6856876130462688,
      "grad_norm": 0.7559153663930311,
      "learning_rate": 2.374659170190633e-06,
      "loss": 0.1185,
      "step": 23504
    },
    {
      "epoch": 0.6857167862769123,
      "grad_norm": 0.6871234650276633,
      "learning_rate": 2.3742571120146767e-06,
      "loss": 0.1123,
      "step": 23505
    },
    {
      "epoch": 0.6857459595075559,
      "grad_norm": 1.0453357658913915,
      "learning_rate": 2.373855077280727e-06,
      "loss": 0.1149,
      "step": 23506
    },
    {
      "epoch": 0.6857751327381995,
      "grad_norm": 0.9279121847645624,
      "learning_rate": 2.3734530659923695e-06,
      "loss": 0.1181,
      "step": 23507
    },
    {
      "epoch": 0.685804305968843,
      "grad_norm": 0.7215969775900233,
      "learning_rate": 2.373051078153196e-06,
      "loss": 0.1215,
      "step": 23508
    },
    {
      "epoch": 0.6858334791994866,
      "grad_norm": 0.9561340154637641,
      "learning_rate": 2.372649113766798e-06,
      "loss": 0.1324,
      "step": 23509
    },
    {
      "epoch": 0.6858626524301301,
      "grad_norm": 0.8386322298917255,
      "learning_rate": 2.3722471728367613e-06,
      "loss": 0.1242,
      "step": 23510
    },
    {
      "epoch": 0.6858918256607737,
      "grad_norm": 0.9650038111161826,
      "learning_rate": 2.371845255366672e-06,
      "loss": 0.1396,
      "step": 23511
    },
    {
      "epoch": 0.6859209988914172,
      "grad_norm": 1.0789013915669485,
      "learning_rate": 2.3714433613601236e-06,
      "loss": 0.1324,
      "step": 23512
    },
    {
      "epoch": 0.6859501721220608,
      "grad_norm": 0.8364909352913613,
      "learning_rate": 2.3710414908206993e-06,
      "loss": 0.1165,
      "step": 23513
    },
    {
      "epoch": 0.6859793453527043,
      "grad_norm": 1.068960133563225,
      "learning_rate": 2.3706396437519884e-06,
      "loss": 0.1058,
      "step": 23514
    },
    {
      "epoch": 0.6860085185833479,
      "grad_norm": 1.4907302630864512,
      "learning_rate": 2.3702378201575813e-06,
      "loss": 0.1169,
      "step": 23515
    },
    {
      "epoch": 0.6860376918139914,
      "grad_norm": 0.9768349753225332,
      "learning_rate": 2.3698360200410637e-06,
      "loss": 0.1369,
      "step": 23516
    },
    {
      "epoch": 0.686066865044635,
      "grad_norm": 1.0809606732262722,
      "learning_rate": 2.3694342434060197e-06,
      "loss": 0.1306,
      "step": 23517
    },
    {
      "epoch": 0.6860960382752787,
      "grad_norm": 1.178862064829292,
      "learning_rate": 2.369032490256041e-06,
      "loss": 0.11,
      "step": 23518
    },
    {
      "epoch": 0.6861252115059222,
      "grad_norm": 1.0757149116537466,
      "learning_rate": 2.36863076059471e-06,
      "loss": 0.1141,
      "step": 23519
    },
    {
      "epoch": 0.6861543847365658,
      "grad_norm": 1.0754990610337491,
      "learning_rate": 2.3682290544256177e-06,
      "loss": 0.1072,
      "step": 23520
    },
    {
      "epoch": 0.6861835579672093,
      "grad_norm": 0.9965731383422439,
      "learning_rate": 2.367827371752346e-06,
      "loss": 0.1138,
      "step": 23521
    },
    {
      "epoch": 0.6862127311978529,
      "grad_norm": 1.0173008119921403,
      "learning_rate": 2.367425712578485e-06,
      "loss": 0.0927,
      "step": 23522
    },
    {
      "epoch": 0.6862419044284964,
      "grad_norm": 1.0887430923450627,
      "learning_rate": 2.367024076907619e-06,
      "loss": 0.1315,
      "step": 23523
    },
    {
      "epoch": 0.68627107765914,
      "grad_norm": 0.9403419166037104,
      "learning_rate": 2.3666224647433316e-06,
      "loss": 0.1346,
      "step": 23524
    },
    {
      "epoch": 0.6863002508897835,
      "grad_norm": 1.0754113609847809,
      "learning_rate": 2.36622087608921e-06,
      "loss": 0.1456,
      "step": 23525
    },
    {
      "epoch": 0.6863294241204271,
      "grad_norm": 0.891411413120843,
      "learning_rate": 2.365819310948842e-06,
      "loss": 0.1239,
      "step": 23526
    },
    {
      "epoch": 0.6863585973510706,
      "grad_norm": 0.8652430958392724,
      "learning_rate": 2.365417769325808e-06,
      "loss": 0.1306,
      "step": 23527
    },
    {
      "epoch": 0.6863877705817142,
      "grad_norm": 0.8521861191231433,
      "learning_rate": 2.3650162512236976e-06,
      "loss": 0.123,
      "step": 23528
    },
    {
      "epoch": 0.6864169438123577,
      "grad_norm": 0.7609696299282188,
      "learning_rate": 2.3646147566460925e-06,
      "loss": 0.0889,
      "step": 23529
    },
    {
      "epoch": 0.6864461170430013,
      "grad_norm": 0.8147779881198222,
      "learning_rate": 2.364213285596576e-06,
      "loss": 0.1262,
      "step": 23530
    },
    {
      "epoch": 0.686475290273645,
      "grad_norm": 0.6912119070395232,
      "learning_rate": 2.3638118380787343e-06,
      "loss": 0.1108,
      "step": 23531
    },
    {
      "epoch": 0.6865044635042885,
      "grad_norm": 0.7823489429027166,
      "learning_rate": 2.3634104140961526e-06,
      "loss": 0.1118,
      "step": 23532
    },
    {
      "epoch": 0.6865336367349321,
      "grad_norm": 0.8024496981683377,
      "learning_rate": 2.363009013652414e-06,
      "loss": 0.1224,
      "step": 23533
    },
    {
      "epoch": 0.6865628099655756,
      "grad_norm": 0.8262579075470796,
      "learning_rate": 2.362607636751099e-06,
      "loss": 0.1344,
      "step": 23534
    },
    {
      "epoch": 0.6865919831962192,
      "grad_norm": 0.7887056906078238,
      "learning_rate": 2.362206283395796e-06,
      "loss": 0.1151,
      "step": 23535
    },
    {
      "epoch": 0.6866211564268627,
      "grad_norm": 0.7806153772020534,
      "learning_rate": 2.361804953590085e-06,
      "loss": 0.1355,
      "step": 23536
    },
    {
      "epoch": 0.6866503296575063,
      "grad_norm": 0.8653411093637556,
      "learning_rate": 2.361403647337548e-06,
      "loss": 0.1285,
      "step": 23537
    },
    {
      "epoch": 0.6866795028881498,
      "grad_norm": 1.4017383155451708,
      "learning_rate": 2.361002364641769e-06,
      "loss": 0.1322,
      "step": 23538
    },
    {
      "epoch": 0.6867086761187934,
      "grad_norm": 1.144977948650432,
      "learning_rate": 2.3606011055063334e-06,
      "loss": 0.1307,
      "step": 23539
    },
    {
      "epoch": 0.6867378493494369,
      "grad_norm": 0.7977310722793292,
      "learning_rate": 2.3601998699348204e-06,
      "loss": 0.1241,
      "step": 23540
    },
    {
      "epoch": 0.6867670225800805,
      "grad_norm": 0.9778755835224564,
      "learning_rate": 2.359798657930811e-06,
      "loss": 0.1252,
      "step": 23541
    },
    {
      "epoch": 0.686796195810724,
      "grad_norm": 0.9654940653137173,
      "learning_rate": 2.359397469497891e-06,
      "loss": 0.1346,
      "step": 23542
    },
    {
      "epoch": 0.6868253690413676,
      "grad_norm": 0.8260042062894757,
      "learning_rate": 2.358996304639638e-06,
      "loss": 0.1188,
      "step": 23543
    },
    {
      "epoch": 0.6868545422720111,
      "grad_norm": 0.8274462611216946,
      "learning_rate": 2.3585951633596355e-06,
      "loss": 0.1116,
      "step": 23544
    },
    {
      "epoch": 0.6868837155026548,
      "grad_norm": 1.1001433245277148,
      "learning_rate": 2.358194045661467e-06,
      "loss": 0.1436,
      "step": 23545
    },
    {
      "epoch": 0.6869128887332984,
      "grad_norm": 0.9133574728664574,
      "learning_rate": 2.3577929515487114e-06,
      "loss": 0.1107,
      "step": 23546
    },
    {
      "epoch": 0.6869420619639419,
      "grad_norm": 0.953493345282723,
      "learning_rate": 2.3573918810249474e-06,
      "loss": 0.1138,
      "step": 23547
    },
    {
      "epoch": 0.6869712351945855,
      "grad_norm": 1.194121128930686,
      "learning_rate": 2.35699083409376e-06,
      "loss": 0.1361,
      "step": 23548
    },
    {
      "epoch": 0.687000408425229,
      "grad_norm": 0.8862759394539529,
      "learning_rate": 2.3565898107587252e-06,
      "loss": 0.1255,
      "step": 23549
    },
    {
      "epoch": 0.6870295816558726,
      "grad_norm": 0.9633645413471328,
      "learning_rate": 2.3561888110234282e-06,
      "loss": 0.1089,
      "step": 23550
    },
    {
      "epoch": 0.6870587548865161,
      "grad_norm": 0.7766659798818281,
      "learning_rate": 2.355787834891444e-06,
      "loss": 0.1031,
      "step": 23551
    },
    {
      "epoch": 0.6870879281171597,
      "grad_norm": 0.9616354443413634,
      "learning_rate": 2.3553868823663566e-06,
      "loss": 0.1298,
      "step": 23552
    },
    {
      "epoch": 0.6871171013478032,
      "grad_norm": 0.8658744516447879,
      "learning_rate": 2.354985953451744e-06,
      "loss": 0.1279,
      "step": 23553
    },
    {
      "epoch": 0.6871462745784468,
      "grad_norm": 0.7759188916579862,
      "learning_rate": 2.354585048151183e-06,
      "loss": 0.1247,
      "step": 23554
    },
    {
      "epoch": 0.6871754478090903,
      "grad_norm": 0.9923030343845581,
      "learning_rate": 2.3541841664682557e-06,
      "loss": 0.1286,
      "step": 23555
    },
    {
      "epoch": 0.6872046210397339,
      "grad_norm": 0.9361035162978071,
      "learning_rate": 2.353783308406542e-06,
      "loss": 0.1208,
      "step": 23556
    },
    {
      "epoch": 0.6872337942703775,
      "grad_norm": 0.842792622859444,
      "learning_rate": 2.3533824739696177e-06,
      "loss": 0.1457,
      "step": 23557
    },
    {
      "epoch": 0.6872629675010211,
      "grad_norm": 0.9782272165786156,
      "learning_rate": 2.352981663161065e-06,
      "loss": 0.1268,
      "step": 23558
    },
    {
      "epoch": 0.6872921407316647,
      "grad_norm": 0.8433063502457212,
      "learning_rate": 2.3525808759844597e-06,
      "loss": 0.1263,
      "step": 23559
    },
    {
      "epoch": 0.6873213139623082,
      "grad_norm": 0.8262014928374279,
      "learning_rate": 2.3521801124433785e-06,
      "loss": 0.1188,
      "step": 23560
    },
    {
      "epoch": 0.6873504871929518,
      "grad_norm": 0.8564325546700844,
      "learning_rate": 2.3517793725414012e-06,
      "loss": 0.104,
      "step": 23561
    },
    {
      "epoch": 0.6873796604235953,
      "grad_norm": 0.8299099115305489,
      "learning_rate": 2.3513786562821074e-06,
      "loss": 0.12,
      "step": 23562
    },
    {
      "epoch": 0.6874088336542389,
      "grad_norm": 0.8302422510429294,
      "learning_rate": 2.350977963669073e-06,
      "loss": 0.136,
      "step": 23563
    },
    {
      "epoch": 0.6874380068848824,
      "grad_norm": 1.0926938136093967,
      "learning_rate": 2.3505772947058724e-06,
      "loss": 0.1236,
      "step": 23564
    },
    {
      "epoch": 0.687467180115526,
      "grad_norm": 0.8118820649428803,
      "learning_rate": 2.3501766493960877e-06,
      "loss": 0.1447,
      "step": 23565
    },
    {
      "epoch": 0.6874963533461695,
      "grad_norm": 0.7483750510343007,
      "learning_rate": 2.349776027743293e-06,
      "loss": 0.1215,
      "step": 23566
    },
    {
      "epoch": 0.6875255265768131,
      "grad_norm": 0.9237602169403718,
      "learning_rate": 2.3493754297510633e-06,
      "loss": 0.1183,
      "step": 23567
    },
    {
      "epoch": 0.6875546998074566,
      "grad_norm": 0.9099250676978813,
      "learning_rate": 2.3489748554229776e-06,
      "loss": 0.0973,
      "step": 23568
    },
    {
      "epoch": 0.6875838730381002,
      "grad_norm": 0.8952863050396787,
      "learning_rate": 2.348574304762613e-06,
      "loss": 0.1191,
      "step": 23569
    },
    {
      "epoch": 0.6876130462687438,
      "grad_norm": 0.8276489452699551,
      "learning_rate": 2.3481737777735442e-06,
      "loss": 0.1178,
      "step": 23570
    },
    {
      "epoch": 0.6876422194993873,
      "grad_norm": 1.82843189096159,
      "learning_rate": 2.3477732744593447e-06,
      "loss": 0.1413,
      "step": 23571
    },
    {
      "epoch": 0.687671392730031,
      "grad_norm": 0.906001687367933,
      "learning_rate": 2.3473727948235942e-06,
      "loss": 0.1161,
      "step": 23572
    },
    {
      "epoch": 0.6877005659606745,
      "grad_norm": 0.7681955824426587,
      "learning_rate": 2.3469723388698647e-06,
      "loss": 0.1358,
      "step": 23573
    },
    {
      "epoch": 0.6877297391913181,
      "grad_norm": 0.8080897467609642,
      "learning_rate": 2.3465719066017323e-06,
      "loss": 0.1245,
      "step": 23574
    },
    {
      "epoch": 0.6877589124219616,
      "grad_norm": 0.8433169982377376,
      "learning_rate": 2.3461714980227744e-06,
      "loss": 0.1013,
      "step": 23575
    },
    {
      "epoch": 0.6877880856526052,
      "grad_norm": 0.8831885607690425,
      "learning_rate": 2.345771113136564e-06,
      "loss": 0.1329,
      "step": 23576
    },
    {
      "epoch": 0.6878172588832487,
      "grad_norm": 0.8231088923519766,
      "learning_rate": 2.345370751946674e-06,
      "loss": 0.1281,
      "step": 23577
    },
    {
      "epoch": 0.6878464321138923,
      "grad_norm": 0.8871687192229474,
      "learning_rate": 2.3449704144566817e-06,
      "loss": 0.1291,
      "step": 23578
    },
    {
      "epoch": 0.6878756053445358,
      "grad_norm": 0.8846479280956788,
      "learning_rate": 2.3445701006701576e-06,
      "loss": 0.1237,
      "step": 23579
    },
    {
      "epoch": 0.6879047785751794,
      "grad_norm": 0.8714965548841259,
      "learning_rate": 2.34416981059068e-06,
      "loss": 0.1027,
      "step": 23580
    },
    {
      "epoch": 0.687933951805823,
      "grad_norm": 0.8758882087340438,
      "learning_rate": 2.3437695442218184e-06,
      "loss": 0.1043,
      "step": 23581
    },
    {
      "epoch": 0.6879631250364665,
      "grad_norm": 0.7658119347014151,
      "learning_rate": 2.3433693015671498e-06,
      "loss": 0.105,
      "step": 23582
    },
    {
      "epoch": 0.6879922982671101,
      "grad_norm": 0.9644642036937223,
      "learning_rate": 2.3429690826302464e-06,
      "loss": 0.1235,
      "step": 23583
    },
    {
      "epoch": 0.6880214714977536,
      "grad_norm": 0.7584404917842424,
      "learning_rate": 2.3425688874146787e-06,
      "loss": 0.1224,
      "step": 23584
    },
    {
      "epoch": 0.6880506447283973,
      "grad_norm": 1.0403847680416156,
      "learning_rate": 2.3421687159240214e-06,
      "loss": 0.1183,
      "step": 23585
    },
    {
      "epoch": 0.6880798179590408,
      "grad_norm": 0.9765791641162657,
      "learning_rate": 2.341768568161849e-06,
      "loss": 0.1051,
      "step": 23586
    },
    {
      "epoch": 0.6881089911896844,
      "grad_norm": 0.8023847088816684,
      "learning_rate": 2.341368444131733e-06,
      "loss": 0.0974,
      "step": 23587
    },
    {
      "epoch": 0.6881381644203279,
      "grad_norm": 1.3936574373681199,
      "learning_rate": 2.3409683438372427e-06,
      "loss": 0.1291,
      "step": 23588
    },
    {
      "epoch": 0.6881673376509715,
      "grad_norm": 0.9726354913736999,
      "learning_rate": 2.3405682672819534e-06,
      "loss": 0.1296,
      "step": 23589
    },
    {
      "epoch": 0.688196510881615,
      "grad_norm": 1.2558747537163648,
      "learning_rate": 2.3401682144694347e-06,
      "loss": 0.1268,
      "step": 23590
    },
    {
      "epoch": 0.6882256841122586,
      "grad_norm": 1.049934863308751,
      "learning_rate": 2.339768185403259e-06,
      "loss": 0.1142,
      "step": 23591
    },
    {
      "epoch": 0.6882548573429021,
      "grad_norm": 0.9673425357026465,
      "learning_rate": 2.339368180087e-06,
      "loss": 0.1105,
      "step": 23592
    },
    {
      "epoch": 0.6882840305735457,
      "grad_norm": 0.8431165119550438,
      "learning_rate": 2.338968198524226e-06,
      "loss": 0.1207,
      "step": 23593
    },
    {
      "epoch": 0.6883132038041893,
      "grad_norm": 0.934694810059534,
      "learning_rate": 2.338568240718508e-06,
      "loss": 0.1051,
      "step": 23594
    },
    {
      "epoch": 0.6883423770348328,
      "grad_norm": 1.0677053958956826,
      "learning_rate": 2.3381683066734182e-06,
      "loss": 0.1272,
      "step": 23595
    },
    {
      "epoch": 0.6883715502654764,
      "grad_norm": 0.8241464009186282,
      "learning_rate": 2.3377683963925252e-06,
      "loss": 0.1262,
      "step": 23596
    },
    {
      "epoch": 0.6884007234961199,
      "grad_norm": 0.8876436793994138,
      "learning_rate": 2.3373685098794017e-06,
      "loss": 0.11,
      "step": 23597
    },
    {
      "epoch": 0.6884298967267635,
      "grad_norm": 0.7850310122455091,
      "learning_rate": 2.336968647137615e-06,
      "loss": 0.119,
      "step": 23598
    },
    {
      "epoch": 0.6884590699574071,
      "grad_norm": 0.762904058366715,
      "learning_rate": 2.3365688081707383e-06,
      "loss": 0.111,
      "step": 23599
    },
    {
      "epoch": 0.6884882431880507,
      "grad_norm": 0.8473425197337645,
      "learning_rate": 2.3361689929823396e-06,
      "loss": 0.1203,
      "step": 23600
    },
    {
      "epoch": 0.6885174164186942,
      "grad_norm": 0.7210537937423882,
      "learning_rate": 2.335769201575986e-06,
      "loss": 0.1177,
      "step": 23601
    },
    {
      "epoch": 0.6885465896493378,
      "grad_norm": 0.6633151120612071,
      "learning_rate": 2.335369433955249e-06,
      "loss": 0.1084,
      "step": 23602
    },
    {
      "epoch": 0.6885757628799813,
      "grad_norm": 0.8600509904948564,
      "learning_rate": 2.3349696901236995e-06,
      "loss": 0.1313,
      "step": 23603
    },
    {
      "epoch": 0.6886049361106249,
      "grad_norm": 0.6893909561461504,
      "learning_rate": 2.334569970084903e-06,
      "loss": 0.1178,
      "step": 23604
    },
    {
      "epoch": 0.6886341093412685,
      "grad_norm": 0.698221538234911,
      "learning_rate": 2.334170273842431e-06,
      "loss": 0.1129,
      "step": 23605
    },
    {
      "epoch": 0.688663282571912,
      "grad_norm": 0.8706561084080148,
      "learning_rate": 2.3337706013998508e-06,
      "loss": 0.1436,
      "step": 23606
    },
    {
      "epoch": 0.6886924558025556,
      "grad_norm": 0.7001568737571077,
      "learning_rate": 2.333370952760728e-06,
      "loss": 0.1254,
      "step": 23607
    },
    {
      "epoch": 0.6887216290331991,
      "grad_norm": 0.7872645545012035,
      "learning_rate": 2.3329713279286325e-06,
      "loss": 0.1285,
      "step": 23608
    },
    {
      "epoch": 0.6887508022638427,
      "grad_norm": 0.8688513455437255,
      "learning_rate": 2.3325717269071346e-06,
      "loss": 0.1594,
      "step": 23609
    },
    {
      "epoch": 0.6887799754944862,
      "grad_norm": 0.9447196492204607,
      "learning_rate": 2.332172149699799e-06,
      "loss": 0.1018,
      "step": 23610
    },
    {
      "epoch": 0.6888091487251298,
      "grad_norm": 3.555944155520488,
      "learning_rate": 2.3317725963101923e-06,
      "loss": 0.1203,
      "step": 23611
    },
    {
      "epoch": 0.6888383219557734,
      "grad_norm": 1.043843622347159,
      "learning_rate": 2.3313730667418846e-06,
      "loss": 0.1391,
      "step": 23612
    },
    {
      "epoch": 0.688867495186417,
      "grad_norm": 1.1034160358026852,
      "learning_rate": 2.3309735609984414e-06,
      "loss": 0.1444,
      "step": 23613
    },
    {
      "epoch": 0.6888966684170605,
      "grad_norm": 0.7520143710781712,
      "learning_rate": 2.3305740790834263e-06,
      "loss": 0.1004,
      "step": 23614
    },
    {
      "epoch": 0.6889258416477041,
      "grad_norm": 0.770345667529433,
      "learning_rate": 2.3301746210004094e-06,
      "loss": 0.1314,
      "step": 23615
    },
    {
      "epoch": 0.6889550148783476,
      "grad_norm": 0.7418194805544203,
      "learning_rate": 2.3297751867529578e-06,
      "loss": 0.1275,
      "step": 23616
    },
    {
      "epoch": 0.6889841881089912,
      "grad_norm": 0.991478484394904,
      "learning_rate": 2.329375776344636e-06,
      "loss": 0.1157,
      "step": 23617
    },
    {
      "epoch": 0.6890133613396348,
      "grad_norm": 0.722884649775238,
      "learning_rate": 2.328976389779008e-06,
      "loss": 0.1095,
      "step": 23618
    },
    {
      "epoch": 0.6890425345702783,
      "grad_norm": 0.7579140561546834,
      "learning_rate": 2.3285770270596424e-06,
      "loss": 0.1126,
      "step": 23619
    },
    {
      "epoch": 0.6890717078009219,
      "grad_norm": 0.7967802307443903,
      "learning_rate": 2.328177688190102e-06,
      "loss": 0.1139,
      "step": 23620
    },
    {
      "epoch": 0.6891008810315654,
      "grad_norm": 0.8044319982174284,
      "learning_rate": 2.3277783731739532e-06,
      "loss": 0.1157,
      "step": 23621
    },
    {
      "epoch": 0.689130054262209,
      "grad_norm": 0.593504956725694,
      "learning_rate": 2.3273790820147634e-06,
      "loss": 0.12,
      "step": 23622
    },
    {
      "epoch": 0.6891592274928525,
      "grad_norm": 0.934777912502349,
      "learning_rate": 2.326979814716095e-06,
      "loss": 0.1277,
      "step": 23623
    },
    {
      "epoch": 0.6891884007234961,
      "grad_norm": 0.9523415607532826,
      "learning_rate": 2.326580571281511e-06,
      "loss": 0.1304,
      "step": 23624
    },
    {
      "epoch": 0.6892175739541396,
      "grad_norm": 0.7213247178173505,
      "learning_rate": 2.3261813517145787e-06,
      "loss": 0.108,
      "step": 23625
    },
    {
      "epoch": 0.6892467471847833,
      "grad_norm": 0.5879304315447973,
      "learning_rate": 2.32578215601886e-06,
      "loss": 0.0956,
      "step": 23626
    },
    {
      "epoch": 0.6892759204154268,
      "grad_norm": 0.8829091017552038,
      "learning_rate": 2.325382984197921e-06,
      "loss": 0.1355,
      "step": 23627
    },
    {
      "epoch": 0.6893050936460704,
      "grad_norm": 1.078545661877942,
      "learning_rate": 2.3249838362553224e-06,
      "loss": 0.0971,
      "step": 23628
    },
    {
      "epoch": 0.689334266876714,
      "grad_norm": 0.8490332553845026,
      "learning_rate": 2.3245847121946314e-06,
      "loss": 0.111,
      "step": 23629
    },
    {
      "epoch": 0.6893634401073575,
      "grad_norm": 0.7672113373591892,
      "learning_rate": 2.3241856120194094e-06,
      "loss": 0.1227,
      "step": 23630
    },
    {
      "epoch": 0.6893926133380011,
      "grad_norm": 1.0170675625170897,
      "learning_rate": 2.3237865357332185e-06,
      "loss": 0.1175,
      "step": 23631
    },
    {
      "epoch": 0.6894217865686446,
      "grad_norm": 0.7222560693625352,
      "learning_rate": 2.3233874833396213e-06,
      "loss": 0.1017,
      "step": 23632
    },
    {
      "epoch": 0.6894509597992882,
      "grad_norm": 0.9561382202915274,
      "learning_rate": 2.3229884548421844e-06,
      "loss": 0.1212,
      "step": 23633
    },
    {
      "epoch": 0.6894801330299317,
      "grad_norm": 0.7687430226846184,
      "learning_rate": 2.322589450244465e-06,
      "loss": 0.1221,
      "step": 23634
    },
    {
      "epoch": 0.6895093062605753,
      "grad_norm": 0.8756176070118019,
      "learning_rate": 2.32219046955003e-06,
      "loss": 0.1118,
      "step": 23635
    },
    {
      "epoch": 0.6895384794912188,
      "grad_norm": 0.8219276207287353,
      "learning_rate": 2.32179151276244e-06,
      "loss": 0.1428,
      "step": 23636
    },
    {
      "epoch": 0.6895676527218624,
      "grad_norm": 0.7935008080019541,
      "learning_rate": 2.3213925798852534e-06,
      "loss": 0.1393,
      "step": 23637
    },
    {
      "epoch": 0.6895968259525059,
      "grad_norm": 1.013543929059074,
      "learning_rate": 2.3209936709220343e-06,
      "loss": 0.1336,
      "step": 23638
    },
    {
      "epoch": 0.6896259991831496,
      "grad_norm": 0.8030855768031363,
      "learning_rate": 2.320594785876346e-06,
      "loss": 0.1259,
      "step": 23639
    },
    {
      "epoch": 0.6896551724137931,
      "grad_norm": 0.9639021359278168,
      "learning_rate": 2.320195924751748e-06,
      "loss": 0.1383,
      "step": 23640
    },
    {
      "epoch": 0.6896843456444367,
      "grad_norm": 0.8700975954012806,
      "learning_rate": 2.3197970875517995e-06,
      "loss": 0.1281,
      "step": 23641
    },
    {
      "epoch": 0.6897135188750803,
      "grad_norm": 0.8136724596954176,
      "learning_rate": 2.3193982742800647e-06,
      "loss": 0.1443,
      "step": 23642
    },
    {
      "epoch": 0.6897426921057238,
      "grad_norm": 1.0189556984596226,
      "learning_rate": 2.3189994849401015e-06,
      "loss": 0.1379,
      "step": 23643
    },
    {
      "epoch": 0.6897718653363674,
      "grad_norm": 0.9742464893363215,
      "learning_rate": 2.31860071953547e-06,
      "loss": 0.1059,
      "step": 23644
    },
    {
      "epoch": 0.6898010385670109,
      "grad_norm": 0.7963701868180952,
      "learning_rate": 2.31820197806973e-06,
      "loss": 0.1269,
      "step": 23645
    },
    {
      "epoch": 0.6898302117976545,
      "grad_norm": 0.8386964550617722,
      "learning_rate": 2.317803260546445e-06,
      "loss": 0.131,
      "step": 23646
    },
    {
      "epoch": 0.689859385028298,
      "grad_norm": 0.9893009468384862,
      "learning_rate": 2.3174045669691724e-06,
      "loss": 0.1367,
      "step": 23647
    },
    {
      "epoch": 0.6898885582589416,
      "grad_norm": 0.8806406178431144,
      "learning_rate": 2.3170058973414696e-06,
      "loss": 0.1219,
      "step": 23648
    },
    {
      "epoch": 0.6899177314895851,
      "grad_norm": 0.9552274158140003,
      "learning_rate": 2.3166072516668992e-06,
      "loss": 0.1331,
      "step": 23649
    },
    {
      "epoch": 0.6899469047202287,
      "grad_norm": 1.0930118529298434,
      "learning_rate": 2.316208629949017e-06,
      "loss": 0.1157,
      "step": 23650
    },
    {
      "epoch": 0.6899760779508722,
      "grad_norm": 0.7272985772568562,
      "learning_rate": 2.3158100321913836e-06,
      "loss": 0.1108,
      "step": 23651
    },
    {
      "epoch": 0.6900052511815158,
      "grad_norm": 0.8064813106794204,
      "learning_rate": 2.315411458397559e-06,
      "loss": 0.1249,
      "step": 23652
    },
    {
      "epoch": 0.6900344244121595,
      "grad_norm": 0.9704866191529995,
      "learning_rate": 2.3150129085710998e-06,
      "loss": 0.12,
      "step": 23653
    },
    {
      "epoch": 0.690063597642803,
      "grad_norm": 0.9910425680357245,
      "learning_rate": 2.314614382715563e-06,
      "loss": 0.113,
      "step": 23654
    },
    {
      "epoch": 0.6900927708734466,
      "grad_norm": 0.9334528401558666,
      "learning_rate": 2.31421588083451e-06,
      "loss": 0.123,
      "step": 23655
    },
    {
      "epoch": 0.6901219441040901,
      "grad_norm": 0.9126471488991307,
      "learning_rate": 2.313817402931494e-06,
      "loss": 0.1528,
      "step": 23656
    },
    {
      "epoch": 0.6901511173347337,
      "grad_norm": 0.9908774099220438,
      "learning_rate": 2.3134189490100773e-06,
      "loss": 0.1204,
      "step": 23657
    },
    {
      "epoch": 0.6901802905653772,
      "grad_norm": 0.819538434326238,
      "learning_rate": 2.313020519073813e-06,
      "loss": 0.1182,
      "step": 23658
    },
    {
      "epoch": 0.6902094637960208,
      "grad_norm": 0.800339781816359,
      "learning_rate": 2.3126221131262614e-06,
      "loss": 0.1181,
      "step": 23659
    },
    {
      "epoch": 0.6902386370266643,
      "grad_norm": 0.8457097695409919,
      "learning_rate": 2.312223731170979e-06,
      "loss": 0.1153,
      "step": 23660
    },
    {
      "epoch": 0.6902678102573079,
      "grad_norm": 0.95150576897296,
      "learning_rate": 2.3118253732115186e-06,
      "loss": 0.1182,
      "step": 23661
    },
    {
      "epoch": 0.6902969834879514,
      "grad_norm": 0.8905554901884798,
      "learning_rate": 2.3114270392514404e-06,
      "loss": 0.1168,
      "step": 23662
    },
    {
      "epoch": 0.690326156718595,
      "grad_norm": 1.042156377872879,
      "learning_rate": 2.311028729294301e-06,
      "loss": 0.1083,
      "step": 23663
    },
    {
      "epoch": 0.6903553299492385,
      "grad_norm": 0.8702989898114386,
      "learning_rate": 2.310630443343654e-06,
      "loss": 0.1129,
      "step": 23664
    },
    {
      "epoch": 0.6903845031798821,
      "grad_norm": 1.0316251151985667,
      "learning_rate": 2.3102321814030577e-06,
      "loss": 0.1381,
      "step": 23665
    },
    {
      "epoch": 0.6904136764105258,
      "grad_norm": 1.038377239359597,
      "learning_rate": 2.309833943476067e-06,
      "loss": 0.1058,
      "step": 23666
    },
    {
      "epoch": 0.6904428496411693,
      "grad_norm": 0.8366944025737572,
      "learning_rate": 2.309435729566234e-06,
      "loss": 0.1254,
      "step": 23667
    },
    {
      "epoch": 0.6904720228718129,
      "grad_norm": 0.6714876382825048,
      "learning_rate": 2.309037539677117e-06,
      "loss": 0.1089,
      "step": 23668
    },
    {
      "epoch": 0.6905011961024564,
      "grad_norm": 0.91272258081961,
      "learning_rate": 2.3086393738122718e-06,
      "loss": 0.1167,
      "step": 23669
    },
    {
      "epoch": 0.6905303693331,
      "grad_norm": 1.1446665184893436,
      "learning_rate": 2.3082412319752525e-06,
      "loss": 0.142,
      "step": 23670
    },
    {
      "epoch": 0.6905595425637435,
      "grad_norm": 0.9077590475189545,
      "learning_rate": 2.307843114169611e-06,
      "loss": 0.1242,
      "step": 23671
    },
    {
      "epoch": 0.6905887157943871,
      "grad_norm": 0.6449499241531631,
      "learning_rate": 2.3074450203989046e-06,
      "loss": 0.1024,
      "step": 23672
    },
    {
      "epoch": 0.6906178890250306,
      "grad_norm": 1.0168014356209796,
      "learning_rate": 2.307046950666687e-06,
      "loss": 0.1385,
      "step": 23673
    },
    {
      "epoch": 0.6906470622556742,
      "grad_norm": 0.9092898333155542,
      "learning_rate": 2.3066489049765096e-06,
      "loss": 0.1162,
      "step": 23674
    },
    {
      "epoch": 0.6906762354863177,
      "grad_norm": 0.9193741929850859,
      "learning_rate": 2.3062508833319273e-06,
      "loss": 0.1428,
      "step": 23675
    },
    {
      "epoch": 0.6907054087169613,
      "grad_norm": 0.8523432817325961,
      "learning_rate": 2.3058528857364963e-06,
      "loss": 0.1107,
      "step": 23676
    },
    {
      "epoch": 0.6907345819476048,
      "grad_norm": 0.8534216923236597,
      "learning_rate": 2.3054549121937674e-06,
      "loss": 0.1242,
      "step": 23677
    },
    {
      "epoch": 0.6907637551782484,
      "grad_norm": 0.9272245904195249,
      "learning_rate": 2.305056962707292e-06,
      "loss": 0.1297,
      "step": 23678
    },
    {
      "epoch": 0.690792928408892,
      "grad_norm": 1.1494664409395687,
      "learning_rate": 2.3046590372806268e-06,
      "loss": 0.1071,
      "step": 23679
    },
    {
      "epoch": 0.6908221016395356,
      "grad_norm": 0.9187653162047094,
      "learning_rate": 2.30426113591732e-06,
      "loss": 0.1232,
      "step": 23680
    },
    {
      "epoch": 0.6908512748701792,
      "grad_norm": 0.8784483484111315,
      "learning_rate": 2.3038632586209264e-06,
      "loss": 0.1436,
      "step": 23681
    },
    {
      "epoch": 0.6908804481008227,
      "grad_norm": 0.8719874068627423,
      "learning_rate": 2.303465405395e-06,
      "loss": 0.1136,
      "step": 23682
    },
    {
      "epoch": 0.6909096213314663,
      "grad_norm": 1.1241996492284672,
      "learning_rate": 2.3030675762430906e-06,
      "loss": 0.1352,
      "step": 23683
    },
    {
      "epoch": 0.6909387945621098,
      "grad_norm": 0.9061017200117076,
      "learning_rate": 2.3026697711687477e-06,
      "loss": 0.1456,
      "step": 23684
    },
    {
      "epoch": 0.6909679677927534,
      "grad_norm": 0.9601401034716334,
      "learning_rate": 2.302271990175528e-06,
      "loss": 0.1281,
      "step": 23685
    },
    {
      "epoch": 0.6909971410233969,
      "grad_norm": 0.9931266493491848,
      "learning_rate": 2.3018742332669775e-06,
      "loss": 0.11,
      "step": 23686
    },
    {
      "epoch": 0.6910263142540405,
      "grad_norm": 0.8268954279743886,
      "learning_rate": 2.301476500446652e-06,
      "loss": 0.1063,
      "step": 23687
    },
    {
      "epoch": 0.691055487484684,
      "grad_norm": 0.7578368801598571,
      "learning_rate": 2.301078791718098e-06,
      "loss": 0.1133,
      "step": 23688
    },
    {
      "epoch": 0.6910846607153276,
      "grad_norm": 1.0595097715640953,
      "learning_rate": 2.30068110708487e-06,
      "loss": 0.1094,
      "step": 23689
    },
    {
      "epoch": 0.6911138339459711,
      "grad_norm": 0.7898769517130635,
      "learning_rate": 2.300283446550517e-06,
      "loss": 0.1308,
      "step": 23690
    },
    {
      "epoch": 0.6911430071766147,
      "grad_norm": 1.1145970578791549,
      "learning_rate": 2.2998858101185873e-06,
      "loss": 0.1293,
      "step": 23691
    },
    {
      "epoch": 0.6911721804072583,
      "grad_norm": 0.836399848990084,
      "learning_rate": 2.299488197792632e-06,
      "loss": 0.1221,
      "step": 23692
    },
    {
      "epoch": 0.6912013536379019,
      "grad_norm": 0.7747572602824823,
      "learning_rate": 2.2990906095762033e-06,
      "loss": 0.125,
      "step": 23693
    },
    {
      "epoch": 0.6912305268685455,
      "grad_norm": 0.9102249540121757,
      "learning_rate": 2.2986930454728474e-06,
      "loss": 0.1108,
      "step": 23694
    },
    {
      "epoch": 0.691259700099189,
      "grad_norm": 0.7265958771338673,
      "learning_rate": 2.2982955054861166e-06,
      "loss": 0.1251,
      "step": 23695
    },
    {
      "epoch": 0.6912888733298326,
      "grad_norm": 0.9534936034246931,
      "learning_rate": 2.2978979896195587e-06,
      "loss": 0.1257,
      "step": 23696
    },
    {
      "epoch": 0.6913180465604761,
      "grad_norm": 0.7784759412891694,
      "learning_rate": 2.2975004978767206e-06,
      "loss": 0.1051,
      "step": 23697
    },
    {
      "epoch": 0.6913472197911197,
      "grad_norm": 1.0035190140289736,
      "learning_rate": 2.297103030261153e-06,
      "loss": 0.1273,
      "step": 23698
    },
    {
      "epoch": 0.6913763930217632,
      "grad_norm": 0.8378814986653959,
      "learning_rate": 2.296705586776406e-06,
      "loss": 0.1235,
      "step": 23699
    },
    {
      "epoch": 0.6914055662524068,
      "grad_norm": 0.7593074884696484,
      "learning_rate": 2.2963081674260267e-06,
      "loss": 0.1148,
      "step": 23700
    },
    {
      "epoch": 0.6914347394830503,
      "grad_norm": 0.9424191104104974,
      "learning_rate": 2.2959107722135603e-06,
      "loss": 0.1188,
      "step": 23701
    },
    {
      "epoch": 0.6914639127136939,
      "grad_norm": 0.9697443066315308,
      "learning_rate": 2.295513401142559e-06,
      "loss": 0.1372,
      "step": 23702
    },
    {
      "epoch": 0.6914930859443374,
      "grad_norm": 0.8505360642343872,
      "learning_rate": 2.2951160542165684e-06,
      "loss": 0.1226,
      "step": 23703
    },
    {
      "epoch": 0.691522259174981,
      "grad_norm": 0.6855969249332672,
      "learning_rate": 2.2947187314391346e-06,
      "loss": 0.0891,
      "step": 23704
    },
    {
      "epoch": 0.6915514324056246,
      "grad_norm": 0.906423769640861,
      "learning_rate": 2.294321432813805e-06,
      "loss": 0.1031,
      "step": 23705
    },
    {
      "epoch": 0.6915806056362681,
      "grad_norm": 0.7999588831953454,
      "learning_rate": 2.2939241583441308e-06,
      "loss": 0.1298,
      "step": 23706
    },
    {
      "epoch": 0.6916097788669118,
      "grad_norm": 0.8355790758687139,
      "learning_rate": 2.2935269080336555e-06,
      "loss": 0.1134,
      "step": 23707
    },
    {
      "epoch": 0.6916389520975553,
      "grad_norm": 0.7848295123297799,
      "learning_rate": 2.2931296818859233e-06,
      "loss": 0.1226,
      "step": 23708
    },
    {
      "epoch": 0.6916681253281989,
      "grad_norm": 1.133574432844111,
      "learning_rate": 2.2927324799044858e-06,
      "loss": 0.1296,
      "step": 23709
    },
    {
      "epoch": 0.6916972985588424,
      "grad_norm": 0.8236149312082659,
      "learning_rate": 2.292335302092884e-06,
      "loss": 0.127,
      "step": 23710
    },
    {
      "epoch": 0.691726471789486,
      "grad_norm": 0.8715150804512658,
      "learning_rate": 2.2919381484546665e-06,
      "loss": 0.1163,
      "step": 23711
    },
    {
      "epoch": 0.6917556450201295,
      "grad_norm": 0.9397516361988193,
      "learning_rate": 2.2915410189933807e-06,
      "loss": 0.1062,
      "step": 23712
    },
    {
      "epoch": 0.6917848182507731,
      "grad_norm": 0.8283909892003528,
      "learning_rate": 2.29114391371257e-06,
      "loss": 0.124,
      "step": 23713
    },
    {
      "epoch": 0.6918139914814166,
      "grad_norm": 0.912337308859186,
      "learning_rate": 2.2907468326157777e-06,
      "loss": 0.1146,
      "step": 23714
    },
    {
      "epoch": 0.6918431647120602,
      "grad_norm": 0.8580879529726056,
      "learning_rate": 2.290349775706553e-06,
      "loss": 0.1235,
      "step": 23715
    },
    {
      "epoch": 0.6918723379427038,
      "grad_norm": 0.9868941377315759,
      "learning_rate": 2.289952742988437e-06,
      "loss": 0.1247,
      "step": 23716
    },
    {
      "epoch": 0.6919015111733473,
      "grad_norm": 1.0810218537199563,
      "learning_rate": 2.2895557344649777e-06,
      "loss": 0.124,
      "step": 23717
    },
    {
      "epoch": 0.6919306844039909,
      "grad_norm": 1.0714996206285003,
      "learning_rate": 2.2891587501397157e-06,
      "loss": 0.119,
      "step": 23718
    },
    {
      "epoch": 0.6919598576346344,
      "grad_norm": 1.0334090868066341,
      "learning_rate": 2.2887617900161996e-06,
      "loss": 0.1319,
      "step": 23719
    },
    {
      "epoch": 0.6919890308652781,
      "grad_norm": 1.0229856177819148,
      "learning_rate": 2.28836485409797e-06,
      "loss": 0.1351,
      "step": 23720
    },
    {
      "epoch": 0.6920182040959216,
      "grad_norm": 1.0630437252729676,
      "learning_rate": 2.2879679423885708e-06,
      "loss": 0.1147,
      "step": 23721
    },
    {
      "epoch": 0.6920473773265652,
      "grad_norm": 0.8101866141088259,
      "learning_rate": 2.2875710548915464e-06,
      "loss": 0.1241,
      "step": 23722
    },
    {
      "epoch": 0.6920765505572087,
      "grad_norm": 1.1746830181502605,
      "learning_rate": 2.2871741916104414e-06,
      "loss": 0.133,
      "step": 23723
    },
    {
      "epoch": 0.6921057237878523,
      "grad_norm": 1.3231516665272993,
      "learning_rate": 2.286777352548796e-06,
      "loss": 0.1341,
      "step": 23724
    },
    {
      "epoch": 0.6921348970184958,
      "grad_norm": 0.7157855577153792,
      "learning_rate": 2.2863805377101565e-06,
      "loss": 0.1034,
      "step": 23725
    },
    {
      "epoch": 0.6921640702491394,
      "grad_norm": 0.6594134682756123,
      "learning_rate": 2.2859837470980638e-06,
      "loss": 0.1252,
      "step": 23726
    },
    {
      "epoch": 0.692193243479783,
      "grad_norm": 0.8131794627778848,
      "learning_rate": 2.2855869807160588e-06,
      "loss": 0.1151,
      "step": 23727
    },
    {
      "epoch": 0.6922224167104265,
      "grad_norm": 1.2535305456139525,
      "learning_rate": 2.285190238567685e-06,
      "loss": 0.1341,
      "step": 23728
    },
    {
      "epoch": 0.6922515899410701,
      "grad_norm": 0.778980524726283,
      "learning_rate": 2.2847935206564865e-06,
      "loss": 0.1209,
      "step": 23729
    },
    {
      "epoch": 0.6922807631717136,
      "grad_norm": 0.8349112723321234,
      "learning_rate": 2.284396826986003e-06,
      "loss": 0.1353,
      "step": 23730
    },
    {
      "epoch": 0.6923099364023572,
      "grad_norm": 0.8500357543524237,
      "learning_rate": 2.284000157559775e-06,
      "loss": 0.1287,
      "step": 23731
    },
    {
      "epoch": 0.6923391096330007,
      "grad_norm": 0.7999652808402606,
      "learning_rate": 2.2836035123813466e-06,
      "loss": 0.1012,
      "step": 23732
    },
    {
      "epoch": 0.6923682828636443,
      "grad_norm": 0.967336312167194,
      "learning_rate": 2.2832068914542575e-06,
      "loss": 0.1263,
      "step": 23733
    },
    {
      "epoch": 0.6923974560942879,
      "grad_norm": 0.9032378828977392,
      "learning_rate": 2.2828102947820476e-06,
      "loss": 0.1406,
      "step": 23734
    },
    {
      "epoch": 0.6924266293249315,
      "grad_norm": 0.9399886097246759,
      "learning_rate": 2.282413722368258e-06,
      "loss": 0.1121,
      "step": 23735
    },
    {
      "epoch": 0.692455802555575,
      "grad_norm": 0.8896660325406226,
      "learning_rate": 2.282017174216432e-06,
      "loss": 0.12,
      "step": 23736
    },
    {
      "epoch": 0.6924849757862186,
      "grad_norm": 0.9735989195568626,
      "learning_rate": 2.281620650330108e-06,
      "loss": 0.1147,
      "step": 23737
    },
    {
      "epoch": 0.6925141490168621,
      "grad_norm": 0.8502541808391574,
      "learning_rate": 2.281224150712824e-06,
      "loss": 0.1269,
      "step": 23738
    },
    {
      "epoch": 0.6925433222475057,
      "grad_norm": 0.7925147321656687,
      "learning_rate": 2.2808276753681243e-06,
      "loss": 0.1381,
      "step": 23739
    },
    {
      "epoch": 0.6925724954781493,
      "grad_norm": 0.7868908178188531,
      "learning_rate": 2.280431224299543e-06,
      "loss": 0.1406,
      "step": 23740
    },
    {
      "epoch": 0.6926016687087928,
      "grad_norm": 1.0087179670265431,
      "learning_rate": 2.280034797510623e-06,
      "loss": 0.1135,
      "step": 23741
    },
    {
      "epoch": 0.6926308419394364,
      "grad_norm": 0.7662286779625004,
      "learning_rate": 2.279638395004905e-06,
      "loss": 0.1129,
      "step": 23742
    },
    {
      "epoch": 0.6926600151700799,
      "grad_norm": 0.8378206059463131,
      "learning_rate": 2.279242016785926e-06,
      "loss": 0.1106,
      "step": 23743
    },
    {
      "epoch": 0.6926891884007235,
      "grad_norm": 0.752022430396605,
      "learning_rate": 2.2788456628572227e-06,
      "loss": 0.1391,
      "step": 23744
    },
    {
      "epoch": 0.692718361631367,
      "grad_norm": 1.0032264413742915,
      "learning_rate": 2.2784493332223375e-06,
      "loss": 0.1302,
      "step": 23745
    },
    {
      "epoch": 0.6927475348620106,
      "grad_norm": 0.8548099192713413,
      "learning_rate": 2.278053027884805e-06,
      "loss": 0.1223,
      "step": 23746
    },
    {
      "epoch": 0.6927767080926541,
      "grad_norm": 1.001924147167816,
      "learning_rate": 2.2776567468481674e-06,
      "loss": 0.1323,
      "step": 23747
    },
    {
      "epoch": 0.6928058813232978,
      "grad_norm": 0.7231677851603999,
      "learning_rate": 2.277260490115959e-06,
      "loss": 0.1388,
      "step": 23748
    },
    {
      "epoch": 0.6928350545539413,
      "grad_norm": 0.8067489608143206,
      "learning_rate": 2.2768642576917206e-06,
      "loss": 0.101,
      "step": 23749
    },
    {
      "epoch": 0.6928642277845849,
      "grad_norm": 0.9504971563223352,
      "learning_rate": 2.2764680495789874e-06,
      "loss": 0.1103,
      "step": 23750
    },
    {
      "epoch": 0.6928934010152284,
      "grad_norm": 0.8027638111751217,
      "learning_rate": 2.2760718657812964e-06,
      "loss": 0.1183,
      "step": 23751
    },
    {
      "epoch": 0.692922574245872,
      "grad_norm": 0.9116895444309365,
      "learning_rate": 2.275675706302185e-06,
      "loss": 0.1304,
      "step": 23752
    },
    {
      "epoch": 0.6929517474765156,
      "grad_norm": 0.8590398988302214,
      "learning_rate": 2.2752795711451926e-06,
      "loss": 0.116,
      "step": 23753
    },
    {
      "epoch": 0.6929809207071591,
      "grad_norm": 0.7807675862826012,
      "learning_rate": 2.274883460313852e-06,
      "loss": 0.1227,
      "step": 23754
    },
    {
      "epoch": 0.6930100939378027,
      "grad_norm": 0.8985680026686662,
      "learning_rate": 2.274487373811703e-06,
      "loss": 0.1205,
      "step": 23755
    },
    {
      "epoch": 0.6930392671684462,
      "grad_norm": 0.9048122272525334,
      "learning_rate": 2.2740913116422796e-06,
      "loss": 0.0981,
      "step": 23756
    },
    {
      "epoch": 0.6930684403990898,
      "grad_norm": 0.7144588342863268,
      "learning_rate": 2.2736952738091173e-06,
      "loss": 0.088,
      "step": 23757
    },
    {
      "epoch": 0.6930976136297333,
      "grad_norm": 0.6768792673281033,
      "learning_rate": 2.273299260315752e-06,
      "loss": 0.135,
      "step": 23758
    },
    {
      "epoch": 0.6931267868603769,
      "grad_norm": 0.8819290215802724,
      "learning_rate": 2.2729032711657224e-06,
      "loss": 0.1014,
      "step": 23759
    },
    {
      "epoch": 0.6931559600910204,
      "grad_norm": 0.890492511431514,
      "learning_rate": 2.272507306362561e-06,
      "loss": 0.09,
      "step": 23760
    },
    {
      "epoch": 0.6931851333216641,
      "grad_norm": 0.793613907706273,
      "learning_rate": 2.2721113659098013e-06,
      "loss": 0.1237,
      "step": 23761
    },
    {
      "epoch": 0.6932143065523076,
      "grad_norm": 0.8754751817467925,
      "learning_rate": 2.271715449810982e-06,
      "loss": 0.1342,
      "step": 23762
    },
    {
      "epoch": 0.6932434797829512,
      "grad_norm": 0.8301535246481814,
      "learning_rate": 2.271319558069637e-06,
      "loss": 0.1349,
      "step": 23763
    },
    {
      "epoch": 0.6932726530135948,
      "grad_norm": 0.8639729205611231,
      "learning_rate": 2.2709236906892967e-06,
      "loss": 0.1098,
      "step": 23764
    },
    {
      "epoch": 0.6933018262442383,
      "grad_norm": 0.8410730677761642,
      "learning_rate": 2.2705278476734984e-06,
      "loss": 0.1211,
      "step": 23765
    },
    {
      "epoch": 0.6933309994748819,
      "grad_norm": 0.84913739173186,
      "learning_rate": 2.270132029025777e-06,
      "loss": 0.1245,
      "step": 23766
    },
    {
      "epoch": 0.6933601727055254,
      "grad_norm": 0.8301701775348659,
      "learning_rate": 2.2697362347496665e-06,
      "loss": 0.1298,
      "step": 23767
    },
    {
      "epoch": 0.693389345936169,
      "grad_norm": 0.797361811400939,
      "learning_rate": 2.269340464848697e-06,
      "loss": 0.11,
      "step": 23768
    },
    {
      "epoch": 0.6934185191668125,
      "grad_norm": 0.7244305386077172,
      "learning_rate": 2.268944719326405e-06,
      "loss": 0.1346,
      "step": 23769
    },
    {
      "epoch": 0.6934476923974561,
      "grad_norm": 0.9910751266872838,
      "learning_rate": 2.268548998186321e-06,
      "loss": 0.1052,
      "step": 23770
    },
    {
      "epoch": 0.6934768656280996,
      "grad_norm": 0.6653589091318827,
      "learning_rate": 2.26815330143198e-06,
      "loss": 0.1017,
      "step": 23771
    },
    {
      "epoch": 0.6935060388587432,
      "grad_norm": 0.8051877190686334,
      "learning_rate": 2.2677576290669157e-06,
      "loss": 0.1186,
      "step": 23772
    },
    {
      "epoch": 0.6935352120893867,
      "grad_norm": 0.7642106374320058,
      "learning_rate": 2.267361981094659e-06,
      "loss": 0.1434,
      "step": 23773
    },
    {
      "epoch": 0.6935643853200303,
      "grad_norm": 0.8472071730476597,
      "learning_rate": 2.2669663575187407e-06,
      "loss": 0.1052,
      "step": 23774
    },
    {
      "epoch": 0.693593558550674,
      "grad_norm": 0.8994965254843768,
      "learning_rate": 2.266570758342696e-06,
      "loss": 0.1192,
      "step": 23775
    },
    {
      "epoch": 0.6936227317813175,
      "grad_norm": 0.9891177393959779,
      "learning_rate": 2.266175183570053e-06,
      "loss": 0.1448,
      "step": 23776
    },
    {
      "epoch": 0.6936519050119611,
      "grad_norm": 0.8472114327687793,
      "learning_rate": 2.2657796332043476e-06,
      "loss": 0.1149,
      "step": 23777
    },
    {
      "epoch": 0.6936810782426046,
      "grad_norm": 1.0004615352191524,
      "learning_rate": 2.265384107249106e-06,
      "loss": 0.1123,
      "step": 23778
    },
    {
      "epoch": 0.6937102514732482,
      "grad_norm": 0.9177297504770803,
      "learning_rate": 2.264988605707865e-06,
      "loss": 0.1008,
      "step": 23779
    },
    {
      "epoch": 0.6937394247038917,
      "grad_norm": 0.9711912489017491,
      "learning_rate": 2.2645931285841533e-06,
      "loss": 0.1154,
      "step": 23780
    },
    {
      "epoch": 0.6937685979345353,
      "grad_norm": 0.8451203658486415,
      "learning_rate": 2.2641976758814966e-06,
      "loss": 0.1218,
      "step": 23781
    },
    {
      "epoch": 0.6937977711651788,
      "grad_norm": 0.7791037540092294,
      "learning_rate": 2.263802247603434e-06,
      "loss": 0.0981,
      "step": 23782
    },
    {
      "epoch": 0.6938269443958224,
      "grad_norm": 0.9122742883437229,
      "learning_rate": 2.263406843753492e-06,
      "loss": 0.1196,
      "step": 23783
    },
    {
      "epoch": 0.6938561176264659,
      "grad_norm": 0.9145510104429517,
      "learning_rate": 2.263011464335198e-06,
      "loss": 0.136,
      "step": 23784
    },
    {
      "epoch": 0.6938852908571095,
      "grad_norm": 0.8272283121849784,
      "learning_rate": 2.2626161093520866e-06,
      "loss": 0.1124,
      "step": 23785
    },
    {
      "epoch": 0.693914464087753,
      "grad_norm": 0.9133058887329731,
      "learning_rate": 2.2622207788076848e-06,
      "loss": 0.1052,
      "step": 23786
    },
    {
      "epoch": 0.6939436373183966,
      "grad_norm": 0.7982430957193886,
      "learning_rate": 2.2618254727055206e-06,
      "loss": 0.1194,
      "step": 23787
    },
    {
      "epoch": 0.6939728105490403,
      "grad_norm": 1.0187002693919793,
      "learning_rate": 2.261430191049125e-06,
      "loss": 0.1213,
      "step": 23788
    },
    {
      "epoch": 0.6940019837796838,
      "grad_norm": 0.854908536529277,
      "learning_rate": 2.2610349338420283e-06,
      "loss": 0.1149,
      "step": 23789
    },
    {
      "epoch": 0.6940311570103274,
      "grad_norm": 0.8299541363462521,
      "learning_rate": 2.2606397010877585e-06,
      "loss": 0.1414,
      "step": 23790
    },
    {
      "epoch": 0.6940603302409709,
      "grad_norm": 1.0390679356647874,
      "learning_rate": 2.2602444927898413e-06,
      "loss": 0.1295,
      "step": 23791
    },
    {
      "epoch": 0.6940895034716145,
      "grad_norm": 0.8770794811603092,
      "learning_rate": 2.2598493089518093e-06,
      "loss": 0.1317,
      "step": 23792
    },
    {
      "epoch": 0.694118676702258,
      "grad_norm": 0.818535052142983,
      "learning_rate": 2.2594541495771866e-06,
      "loss": 0.1207,
      "step": 23793
    },
    {
      "epoch": 0.6941478499329016,
      "grad_norm": 0.8774843127880833,
      "learning_rate": 2.2590590146695053e-06,
      "loss": 0.1445,
      "step": 23794
    },
    {
      "epoch": 0.6941770231635451,
      "grad_norm": 0.8805688386968867,
      "learning_rate": 2.258663904232288e-06,
      "loss": 0.1111,
      "step": 23795
    },
    {
      "epoch": 0.6942061963941887,
      "grad_norm": 0.8019469765437267,
      "learning_rate": 2.2582688182690674e-06,
      "loss": 0.1041,
      "step": 23796
    },
    {
      "epoch": 0.6942353696248322,
      "grad_norm": 0.9178817772973342,
      "learning_rate": 2.2578737567833688e-06,
      "loss": 0.1313,
      "step": 23797
    },
    {
      "epoch": 0.6942645428554758,
      "grad_norm": 0.7250859828107897,
      "learning_rate": 2.2574787197787155e-06,
      "loss": 0.1207,
      "step": 23798
    },
    {
      "epoch": 0.6942937160861193,
      "grad_norm": 0.8084459599289897,
      "learning_rate": 2.257083707258639e-06,
      "loss": 0.1133,
      "step": 23799
    },
    {
      "epoch": 0.6943228893167629,
      "grad_norm": 0.9793508490564601,
      "learning_rate": 2.256688719226665e-06,
      "loss": 0.1258,
      "step": 23800
    },
    {
      "epoch": 0.6943520625474064,
      "grad_norm": 0.6774895543187904,
      "learning_rate": 2.256293755686318e-06,
      "loss": 0.1306,
      "step": 23801
    },
    {
      "epoch": 0.6943812357780501,
      "grad_norm": 0.7826264163921307,
      "learning_rate": 2.255898816641127e-06,
      "loss": 0.1206,
      "step": 23802
    },
    {
      "epoch": 0.6944104090086937,
      "grad_norm": 1.5171655582646877,
      "learning_rate": 2.2555039020946163e-06,
      "loss": 0.1008,
      "step": 23803
    },
    {
      "epoch": 0.6944395822393372,
      "grad_norm": 1.0481798594953093,
      "learning_rate": 2.25510901205031e-06,
      "loss": 0.1161,
      "step": 23804
    },
    {
      "epoch": 0.6944687554699808,
      "grad_norm": 0.7933968532635306,
      "learning_rate": 2.254714146511735e-06,
      "loss": 0.1198,
      "step": 23805
    },
    {
      "epoch": 0.6944979287006243,
      "grad_norm": 0.9066459719823079,
      "learning_rate": 2.2543193054824185e-06,
      "loss": 0.1133,
      "step": 23806
    },
    {
      "epoch": 0.6945271019312679,
      "grad_norm": 1.2308532338379525,
      "learning_rate": 2.253924488965884e-06,
      "loss": 0.1051,
      "step": 23807
    },
    {
      "epoch": 0.6945562751619114,
      "grad_norm": 0.9219605085205348,
      "learning_rate": 2.2535296969656547e-06,
      "loss": 0.1212,
      "step": 23808
    },
    {
      "epoch": 0.694585448392555,
      "grad_norm": 0.9368588291885176,
      "learning_rate": 2.253134929485257e-06,
      "loss": 0.118,
      "step": 23809
    },
    {
      "epoch": 0.6946146216231985,
      "grad_norm": 0.8434970865606336,
      "learning_rate": 2.252740186528216e-06,
      "loss": 0.1143,
      "step": 23810
    },
    {
      "epoch": 0.6946437948538421,
      "grad_norm": 1.1473254781592623,
      "learning_rate": 2.252345468098051e-06,
      "loss": 0.0986,
      "step": 23811
    },
    {
      "epoch": 0.6946729680844856,
      "grad_norm": 0.8269881720214735,
      "learning_rate": 2.251950774198294e-06,
      "loss": 0.1225,
      "step": 23812
    },
    {
      "epoch": 0.6947021413151292,
      "grad_norm": 0.9458941903347555,
      "learning_rate": 2.2515561048324637e-06,
      "loss": 0.097,
      "step": 23813
    },
    {
      "epoch": 0.6947313145457727,
      "grad_norm": 0.7442355627088012,
      "learning_rate": 2.251161460004083e-06,
      "loss": 0.1228,
      "step": 23814
    },
    {
      "epoch": 0.6947604877764164,
      "grad_norm": 0.9360742571652984,
      "learning_rate": 2.2507668397166778e-06,
      "loss": 0.1091,
      "step": 23815
    },
    {
      "epoch": 0.69478966100706,
      "grad_norm": 0.9932204254641761,
      "learning_rate": 2.25037224397377e-06,
      "loss": 0.1336,
      "step": 23816
    },
    {
      "epoch": 0.6948188342377035,
      "grad_norm": 1.0242302724736048,
      "learning_rate": 2.2499776727788815e-06,
      "loss": 0.114,
      "step": 23817
    },
    {
      "epoch": 0.6948480074683471,
      "grad_norm": 0.816982814096064,
      "learning_rate": 2.249583126135535e-06,
      "loss": 0.1075,
      "step": 23818
    },
    {
      "epoch": 0.6948771806989906,
      "grad_norm": 1.0143184258574376,
      "learning_rate": 2.249188604047256e-06,
      "loss": 0.1252,
      "step": 23819
    },
    {
      "epoch": 0.6949063539296342,
      "grad_norm": 0.9985708969606071,
      "learning_rate": 2.2487941065175646e-06,
      "loss": 0.1165,
      "step": 23820
    },
    {
      "epoch": 0.6949355271602777,
      "grad_norm": 2.625312710630318,
      "learning_rate": 2.2483996335499804e-06,
      "loss": 0.1235,
      "step": 23821
    },
    {
      "epoch": 0.6949647003909213,
      "grad_norm": 0.8656596326522185,
      "learning_rate": 2.2480051851480296e-06,
      "loss": 0.1151,
      "step": 23822
    },
    {
      "epoch": 0.6949938736215648,
      "grad_norm": 1.0129732594232637,
      "learning_rate": 2.247610761315229e-06,
      "loss": 0.1367,
      "step": 23823
    },
    {
      "epoch": 0.6950230468522084,
      "grad_norm": 0.968424923186662,
      "learning_rate": 2.247216362055105e-06,
      "loss": 0.1194,
      "step": 23824
    },
    {
      "epoch": 0.695052220082852,
      "grad_norm": 1.0327727585847357,
      "learning_rate": 2.2468219873711737e-06,
      "loss": 0.114,
      "step": 23825
    },
    {
      "epoch": 0.6950813933134955,
      "grad_norm": 0.9663400679826721,
      "learning_rate": 2.2464276372669615e-06,
      "loss": 0.1176,
      "step": 23826
    },
    {
      "epoch": 0.695110566544139,
      "grad_norm": 1.0742687267998512,
      "learning_rate": 2.246033311745985e-06,
      "loss": 0.1364,
      "step": 23827
    },
    {
      "epoch": 0.6951397397747826,
      "grad_norm": 0.8803108750502666,
      "learning_rate": 2.245639010811764e-06,
      "loss": 0.1052,
      "step": 23828
    },
    {
      "epoch": 0.6951689130054263,
      "grad_norm": 0.8469585046262382,
      "learning_rate": 2.245244734467821e-06,
      "loss": 0.1337,
      "step": 23829
    },
    {
      "epoch": 0.6951980862360698,
      "grad_norm": 0.8042952636604691,
      "learning_rate": 2.2448504827176767e-06,
      "loss": 0.1168,
      "step": 23830
    },
    {
      "epoch": 0.6952272594667134,
      "grad_norm": 1.339756206521757,
      "learning_rate": 2.2444562555648474e-06,
      "loss": 0.1357,
      "step": 23831
    },
    {
      "epoch": 0.6952564326973569,
      "grad_norm": 0.7626741984499985,
      "learning_rate": 2.2440620530128572e-06,
      "loss": 0.1198,
      "step": 23832
    },
    {
      "epoch": 0.6952856059280005,
      "grad_norm": 0.623325959472245,
      "learning_rate": 2.243667875065223e-06,
      "loss": 0.1342,
      "step": 23833
    },
    {
      "epoch": 0.695314779158644,
      "grad_norm": 1.1762003072366538,
      "learning_rate": 2.2432737217254617e-06,
      "loss": 0.1132,
      "step": 23834
    },
    {
      "epoch": 0.6953439523892876,
      "grad_norm": 0.8413067163168597,
      "learning_rate": 2.2428795929970952e-06,
      "loss": 0.1132,
      "step": 23835
    },
    {
      "epoch": 0.6953731256199311,
      "grad_norm": 0.7803250474449454,
      "learning_rate": 2.2424854888836434e-06,
      "loss": 0.1185,
      "step": 23836
    },
    {
      "epoch": 0.6954022988505747,
      "grad_norm": 0.7790511825822023,
      "learning_rate": 2.2420914093886227e-06,
      "loss": 0.1202,
      "step": 23837
    },
    {
      "epoch": 0.6954314720812182,
      "grad_norm": 0.9583893332195279,
      "learning_rate": 2.2416973545155496e-06,
      "loss": 0.1339,
      "step": 23838
    },
    {
      "epoch": 0.6954606453118618,
      "grad_norm": 0.8739164437819695,
      "learning_rate": 2.2413033242679456e-06,
      "loss": 0.1181,
      "step": 23839
    },
    {
      "epoch": 0.6954898185425054,
      "grad_norm": 0.959295105755949,
      "learning_rate": 2.2409093186493276e-06,
      "loss": 0.1506,
      "step": 23840
    },
    {
      "epoch": 0.6955189917731489,
      "grad_norm": 0.8142623408323446,
      "learning_rate": 2.240515337663208e-06,
      "loss": 0.1186,
      "step": 23841
    },
    {
      "epoch": 0.6955481650037926,
      "grad_norm": 1.064582232367325,
      "learning_rate": 2.2401213813131133e-06,
      "loss": 0.1152,
      "step": 23842
    },
    {
      "epoch": 0.6955773382344361,
      "grad_norm": 1.2373366780234374,
      "learning_rate": 2.239727449602556e-06,
      "loss": 0.118,
      "step": 23843
    },
    {
      "epoch": 0.6956065114650797,
      "grad_norm": 0.7205967437679306,
      "learning_rate": 2.239333542535051e-06,
      "loss": 0.1287,
      "step": 23844
    },
    {
      "epoch": 0.6956356846957232,
      "grad_norm": 0.8186552784652059,
      "learning_rate": 2.2389396601141188e-06,
      "loss": 0.1103,
      "step": 23845
    },
    {
      "epoch": 0.6956648579263668,
      "grad_norm": 0.8412739325173756,
      "learning_rate": 2.2385458023432742e-06,
      "loss": 0.1075,
      "step": 23846
    },
    {
      "epoch": 0.6956940311570103,
      "grad_norm": 1.1737791197407967,
      "learning_rate": 2.2381519692260318e-06,
      "loss": 0.105,
      "step": 23847
    },
    {
      "epoch": 0.6957232043876539,
      "grad_norm": 0.7235996361954059,
      "learning_rate": 2.2377581607659095e-06,
      "loss": 0.1274,
      "step": 23848
    },
    {
      "epoch": 0.6957523776182974,
      "grad_norm": 0.7677692176770433,
      "learning_rate": 2.2373643769664243e-06,
      "loss": 0.126,
      "step": 23849
    },
    {
      "epoch": 0.695781550848941,
      "grad_norm": 0.9732730500905161,
      "learning_rate": 2.236970617831091e-06,
      "loss": 0.1165,
      "step": 23850
    },
    {
      "epoch": 0.6958107240795846,
      "grad_norm": 1.2348858461385546,
      "learning_rate": 2.236576883363422e-06,
      "loss": 0.1369,
      "step": 23851
    },
    {
      "epoch": 0.6958398973102281,
      "grad_norm": 1.0381658883900549,
      "learning_rate": 2.236183173566937e-06,
      "loss": 0.1144,
      "step": 23852
    },
    {
      "epoch": 0.6958690705408717,
      "grad_norm": 0.6237444371201205,
      "learning_rate": 2.235789488445147e-06,
      "loss": 0.0984,
      "step": 23853
    },
    {
      "epoch": 0.6958982437715152,
      "grad_norm": 0.7176785407206546,
      "learning_rate": 2.2353958280015703e-06,
      "loss": 0.1085,
      "step": 23854
    },
    {
      "epoch": 0.6959274170021588,
      "grad_norm": 0.9308137456355501,
      "learning_rate": 2.235002192239718e-06,
      "loss": 0.1161,
      "step": 23855
    },
    {
      "epoch": 0.6959565902328024,
      "grad_norm": 0.7946992493326904,
      "learning_rate": 2.234608581163108e-06,
      "loss": 0.1126,
      "step": 23856
    },
    {
      "epoch": 0.695985763463446,
      "grad_norm": 1.000559179116095,
      "learning_rate": 2.234214994775252e-06,
      "loss": 0.1274,
      "step": 23857
    },
    {
      "epoch": 0.6960149366940895,
      "grad_norm": 0.6762911103499605,
      "learning_rate": 2.2338214330796633e-06,
      "loss": 0.1051,
      "step": 23858
    },
    {
      "epoch": 0.6960441099247331,
      "grad_norm": 1.0782851143233587,
      "learning_rate": 2.233427896079856e-06,
      "loss": 0.0999,
      "step": 23859
    },
    {
      "epoch": 0.6960732831553766,
      "grad_norm": 0.8465079972471626,
      "learning_rate": 2.233034383779346e-06,
      "loss": 0.1377,
      "step": 23860
    },
    {
      "epoch": 0.6961024563860202,
      "grad_norm": 1.0538568765976666,
      "learning_rate": 2.2326408961816425e-06,
      "loss": 0.123,
      "step": 23861
    },
    {
      "epoch": 0.6961316296166637,
      "grad_norm": 1.0404147483084252,
      "learning_rate": 2.232247433290262e-06,
      "loss": 0.1282,
      "step": 23862
    },
    {
      "epoch": 0.6961608028473073,
      "grad_norm": 1.304273376189725,
      "learning_rate": 2.231853995108716e-06,
      "loss": 0.1027,
      "step": 23863
    },
    {
      "epoch": 0.6961899760779509,
      "grad_norm": 1.1026164216958099,
      "learning_rate": 2.231460581640515e-06,
      "loss": 0.1075,
      "step": 23864
    },
    {
      "epoch": 0.6962191493085944,
      "grad_norm": 0.8837947441898395,
      "learning_rate": 2.231067192889173e-06,
      "loss": 0.136,
      "step": 23865
    },
    {
      "epoch": 0.696248322539238,
      "grad_norm": 0.8644623049803704,
      "learning_rate": 2.2306738288582036e-06,
      "loss": 0.1273,
      "step": 23866
    },
    {
      "epoch": 0.6962774957698815,
      "grad_norm": 0.7718012780034156,
      "learning_rate": 2.2302804895511177e-06,
      "loss": 0.0974,
      "step": 23867
    },
    {
      "epoch": 0.6963066690005251,
      "grad_norm": 0.8881331523894219,
      "learning_rate": 2.229887174971424e-06,
      "loss": 0.1084,
      "step": 23868
    },
    {
      "epoch": 0.6963358422311687,
      "grad_norm": 0.9709450734308829,
      "learning_rate": 2.2294938851226387e-06,
      "loss": 0.1181,
      "step": 23869
    },
    {
      "epoch": 0.6963650154618123,
      "grad_norm": 1.0097910025405348,
      "learning_rate": 2.2291006200082705e-06,
      "loss": 0.1385,
      "step": 23870
    },
    {
      "epoch": 0.6963941886924558,
      "grad_norm": 1.2827005423027638,
      "learning_rate": 2.2287073796318266e-06,
      "loss": 0.0969,
      "step": 23871
    },
    {
      "epoch": 0.6964233619230994,
      "grad_norm": 0.708415974217797,
      "learning_rate": 2.2283141639968254e-06,
      "loss": 0.099,
      "step": 23872
    },
    {
      "epoch": 0.696452535153743,
      "grad_norm": 0.9949620544624855,
      "learning_rate": 2.2279209731067736e-06,
      "loss": 0.0995,
      "step": 23873
    },
    {
      "epoch": 0.6964817083843865,
      "grad_norm": 1.0254344330917917,
      "learning_rate": 2.22752780696518e-06,
      "loss": 0.136,
      "step": 23874
    },
    {
      "epoch": 0.69651088161503,
      "grad_norm": 0.9702845823420303,
      "learning_rate": 2.2271346655755577e-06,
      "loss": 0.1169,
      "step": 23875
    },
    {
      "epoch": 0.6965400548456736,
      "grad_norm": 0.7426787782128693,
      "learning_rate": 2.226741548941416e-06,
      "loss": 0.1129,
      "step": 23876
    },
    {
      "epoch": 0.6965692280763172,
      "grad_norm": 0.8579837307191444,
      "learning_rate": 2.226348457066261e-06,
      "loss": 0.1492,
      "step": 23877
    },
    {
      "epoch": 0.6965984013069607,
      "grad_norm": 0.9695186357645721,
      "learning_rate": 2.225955389953605e-06,
      "loss": 0.1313,
      "step": 23878
    },
    {
      "epoch": 0.6966275745376043,
      "grad_norm": 1.0191855552721403,
      "learning_rate": 2.2255623476069595e-06,
      "loss": 0.1281,
      "step": 23879
    },
    {
      "epoch": 0.6966567477682478,
      "grad_norm": 0.9127613348863842,
      "learning_rate": 2.2251693300298306e-06,
      "loss": 0.108,
      "step": 23880
    },
    {
      "epoch": 0.6966859209988914,
      "grad_norm": 1.0601321119294365,
      "learning_rate": 2.2247763372257253e-06,
      "loss": 0.1284,
      "step": 23881
    },
    {
      "epoch": 0.6967150942295349,
      "grad_norm": 0.7341699527352541,
      "learning_rate": 2.224383369198157e-06,
      "loss": 0.1079,
      "step": 23882
    },
    {
      "epoch": 0.6967442674601786,
      "grad_norm": 0.904234228900846,
      "learning_rate": 2.223990425950629e-06,
      "loss": 0.1073,
      "step": 23883
    },
    {
      "epoch": 0.6967734406908221,
      "grad_norm": 0.7317430894835597,
      "learning_rate": 2.223597507486654e-06,
      "loss": 0.1243,
      "step": 23884
    },
    {
      "epoch": 0.6968026139214657,
      "grad_norm": 0.8553002245374118,
      "learning_rate": 2.223204613809736e-06,
      "loss": 0.1077,
      "step": 23885
    },
    {
      "epoch": 0.6968317871521092,
      "grad_norm": 0.8570750394948702,
      "learning_rate": 2.2228117449233853e-06,
      "loss": 0.098,
      "step": 23886
    },
    {
      "epoch": 0.6968609603827528,
      "grad_norm": 0.8867475923460965,
      "learning_rate": 2.2224189008311088e-06,
      "loss": 0.1422,
      "step": 23887
    },
    {
      "epoch": 0.6968901336133964,
      "grad_norm": 0.9701536648985049,
      "learning_rate": 2.2220260815364113e-06,
      "loss": 0.1207,
      "step": 23888
    },
    {
      "epoch": 0.6969193068440399,
      "grad_norm": 0.8527957005656143,
      "learning_rate": 2.2216332870428025e-06,
      "loss": 0.1097,
      "step": 23889
    },
    {
      "epoch": 0.6969484800746835,
      "grad_norm": 0.8667439819336876,
      "learning_rate": 2.22124051735379e-06,
      "loss": 0.1198,
      "step": 23890
    },
    {
      "epoch": 0.696977653305327,
      "grad_norm": 0.6934904212398832,
      "learning_rate": 2.2208477724728765e-06,
      "loss": 0.1255,
      "step": 23891
    },
    {
      "epoch": 0.6970068265359706,
      "grad_norm": 1.0531606372921636,
      "learning_rate": 2.220455052403573e-06,
      "loss": 0.1344,
      "step": 23892
    },
    {
      "epoch": 0.6970359997666141,
      "grad_norm": 1.079285750979756,
      "learning_rate": 2.220062357149383e-06,
      "loss": 0.1121,
      "step": 23893
    },
    {
      "epoch": 0.6970651729972577,
      "grad_norm": 0.6888322008395154,
      "learning_rate": 2.219669686713811e-06,
      "loss": 0.1346,
      "step": 23894
    },
    {
      "epoch": 0.6970943462279012,
      "grad_norm": 0.9814590642120479,
      "learning_rate": 2.2192770411003638e-06,
      "loss": 0.1348,
      "step": 23895
    },
    {
      "epoch": 0.6971235194585449,
      "grad_norm": 1.0259025493308334,
      "learning_rate": 2.21888442031255e-06,
      "loss": 0.1356,
      "step": 23896
    },
    {
      "epoch": 0.6971526926891884,
      "grad_norm": 0.8542920359060812,
      "learning_rate": 2.2184918243538717e-06,
      "loss": 0.1181,
      "step": 23897
    },
    {
      "epoch": 0.697181865919832,
      "grad_norm": 0.8486031292820476,
      "learning_rate": 2.218099253227832e-06,
      "loss": 0.1371,
      "step": 23898
    },
    {
      "epoch": 0.6972110391504756,
      "grad_norm": 0.9135604801082281,
      "learning_rate": 2.217706706937941e-06,
      "loss": 0.1202,
      "step": 23899
    },
    {
      "epoch": 0.6972402123811191,
      "grad_norm": 1.0489641449001594,
      "learning_rate": 2.2173141854877e-06,
      "loss": 0.1291,
      "step": 23900
    },
    {
      "epoch": 0.6972693856117627,
      "grad_norm": 0.9118790562128938,
      "learning_rate": 2.21692168888061e-06,
      "loss": 0.1281,
      "step": 23901
    },
    {
      "epoch": 0.6972985588424062,
      "grad_norm": 0.8182972087568022,
      "learning_rate": 2.216529217120182e-06,
      "loss": 0.1277,
      "step": 23902
    },
    {
      "epoch": 0.6973277320730498,
      "grad_norm": 1.023858829391313,
      "learning_rate": 2.2161367702099172e-06,
      "loss": 0.1135,
      "step": 23903
    },
    {
      "epoch": 0.6973569053036933,
      "grad_norm": 0.8734826668400932,
      "learning_rate": 2.2157443481533165e-06,
      "loss": 0.1329,
      "step": 23904
    },
    {
      "epoch": 0.6973860785343369,
      "grad_norm": 0.7654454023172019,
      "learning_rate": 2.215351950953888e-06,
      "loss": 0.1125,
      "step": 23905
    },
    {
      "epoch": 0.6974152517649804,
      "grad_norm": 1.0366101945281585,
      "learning_rate": 2.214959578615132e-06,
      "loss": 0.1042,
      "step": 23906
    },
    {
      "epoch": 0.697444424995624,
      "grad_norm": 0.8992822757576193,
      "learning_rate": 2.2145672311405505e-06,
      "loss": 0.0937,
      "step": 23907
    },
    {
      "epoch": 0.6974735982262675,
      "grad_norm": 0.8214520386037066,
      "learning_rate": 2.2141749085336476e-06,
      "loss": 0.1563,
      "step": 23908
    },
    {
      "epoch": 0.6975027714569111,
      "grad_norm": 0.849531784424523,
      "learning_rate": 2.213782610797928e-06,
      "loss": 0.0978,
      "step": 23909
    },
    {
      "epoch": 0.6975319446875548,
      "grad_norm": 0.9702522776466446,
      "learning_rate": 2.213390337936892e-06,
      "loss": 0.1175,
      "step": 23910
    },
    {
      "epoch": 0.6975611179181983,
      "grad_norm": 0.6985722729712117,
      "learning_rate": 2.2129980899540403e-06,
      "loss": 0.1029,
      "step": 23911
    },
    {
      "epoch": 0.6975902911488419,
      "grad_norm": 0.6521646867903083,
      "learning_rate": 2.2126058668528784e-06,
      "loss": 0.1161,
      "step": 23912
    },
    {
      "epoch": 0.6976194643794854,
      "grad_norm": 0.8726392551150108,
      "learning_rate": 2.2122136686369038e-06,
      "loss": 0.1104,
      "step": 23913
    },
    {
      "epoch": 0.697648637610129,
      "grad_norm": 1.0279031197328325,
      "learning_rate": 2.2118214953096218e-06,
      "loss": 0.1098,
      "step": 23914
    },
    {
      "epoch": 0.6976778108407725,
      "grad_norm": 0.724605864266765,
      "learning_rate": 2.2114293468745302e-06,
      "loss": 0.1183,
      "step": 23915
    },
    {
      "epoch": 0.6977069840714161,
      "grad_norm": 0.8200723129100194,
      "learning_rate": 2.2110372233351334e-06,
      "loss": 0.1206,
      "step": 23916
    },
    {
      "epoch": 0.6977361573020596,
      "grad_norm": 0.9265442451272636,
      "learning_rate": 2.2106451246949307e-06,
      "loss": 0.1191,
      "step": 23917
    },
    {
      "epoch": 0.6977653305327032,
      "grad_norm": 0.6997308658185218,
      "learning_rate": 2.2102530509574204e-06,
      "loss": 0.1108,
      "step": 23918
    },
    {
      "epoch": 0.6977945037633467,
      "grad_norm": 0.7151739698896734,
      "learning_rate": 2.2098610021261046e-06,
      "loss": 0.1191,
      "step": 23919
    },
    {
      "epoch": 0.6978236769939903,
      "grad_norm": 0.9734372742959848,
      "learning_rate": 2.2094689782044857e-06,
      "loss": 0.1135,
      "step": 23920
    },
    {
      "epoch": 0.6978528502246338,
      "grad_norm": 0.7242728998374421,
      "learning_rate": 2.2090769791960604e-06,
      "loss": 0.1104,
      "step": 23921
    },
    {
      "epoch": 0.6978820234552774,
      "grad_norm": 1.1515168965564313,
      "learning_rate": 2.2086850051043314e-06,
      "loss": 0.1116,
      "step": 23922
    },
    {
      "epoch": 0.697911196685921,
      "grad_norm": 0.7441902855450077,
      "learning_rate": 2.2082930559327955e-06,
      "loss": 0.1283,
      "step": 23923
    },
    {
      "epoch": 0.6979403699165646,
      "grad_norm": 0.8467353741366159,
      "learning_rate": 2.2079011316849515e-06,
      "loss": 0.1259,
      "step": 23924
    },
    {
      "epoch": 0.6979695431472082,
      "grad_norm": 0.7578443751350888,
      "learning_rate": 2.207509232364299e-06,
      "loss": 0.1087,
      "step": 23925
    },
    {
      "epoch": 0.6979987163778517,
      "grad_norm": 0.7694819193593879,
      "learning_rate": 2.2071173579743405e-06,
      "loss": 0.1455,
      "step": 23926
    },
    {
      "epoch": 0.6980278896084953,
      "grad_norm": 0.8157212116854585,
      "learning_rate": 2.2067255085185707e-06,
      "loss": 0.1081,
      "step": 23927
    },
    {
      "epoch": 0.6980570628391388,
      "grad_norm": 0.8149733385337679,
      "learning_rate": 2.2063336840004868e-06,
      "loss": 0.124,
      "step": 23928
    },
    {
      "epoch": 0.6980862360697824,
      "grad_norm": 0.7288876374731305,
      "learning_rate": 2.2059418844235912e-06,
      "loss": 0.1165,
      "step": 23929
    },
    {
      "epoch": 0.6981154093004259,
      "grad_norm": 0.7677110662916525,
      "learning_rate": 2.205550109791379e-06,
      "loss": 0.1052,
      "step": 23930
    },
    {
      "epoch": 0.6981445825310695,
      "grad_norm": 0.8223754948929111,
      "learning_rate": 2.205158360107345e-06,
      "loss": 0.1192,
      "step": 23931
    },
    {
      "epoch": 0.698173755761713,
      "grad_norm": 0.8390319313943528,
      "learning_rate": 2.2047666353749936e-06,
      "loss": 0.1118,
      "step": 23932
    },
    {
      "epoch": 0.6982029289923566,
      "grad_norm": 0.7492390754786155,
      "learning_rate": 2.2043749355978183e-06,
      "loss": 0.1139,
      "step": 23933
    },
    {
      "epoch": 0.6982321022230001,
      "grad_norm": 0.7380429326903747,
      "learning_rate": 2.203983260779314e-06,
      "loss": 0.1162,
      "step": 23934
    },
    {
      "epoch": 0.6982612754536437,
      "grad_norm": 0.8982197852339344,
      "learning_rate": 2.203591610922982e-06,
      "loss": 0.1161,
      "step": 23935
    },
    {
      "epoch": 0.6982904486842872,
      "grad_norm": 0.8736667339876081,
      "learning_rate": 2.2031999860323165e-06,
      "loss": 0.1392,
      "step": 23936
    },
    {
      "epoch": 0.6983196219149309,
      "grad_norm": 0.6820437877376293,
      "learning_rate": 2.2028083861108123e-06,
      "loss": 0.126,
      "step": 23937
    },
    {
      "epoch": 0.6983487951455745,
      "grad_norm": 1.1657274485988667,
      "learning_rate": 2.2024168111619666e-06,
      "loss": 0.1394,
      "step": 23938
    },
    {
      "epoch": 0.698377968376218,
      "grad_norm": 0.720350106438394,
      "learning_rate": 2.202025261189278e-06,
      "loss": 0.1027,
      "step": 23939
    },
    {
      "epoch": 0.6984071416068616,
      "grad_norm": 0.842727327060568,
      "learning_rate": 2.20163373619624e-06,
      "loss": 0.1467,
      "step": 23940
    },
    {
      "epoch": 0.6984363148375051,
      "grad_norm": 0.7339955273043258,
      "learning_rate": 2.2012422361863457e-06,
      "loss": 0.1152,
      "step": 23941
    },
    {
      "epoch": 0.6984654880681487,
      "grad_norm": 0.9130713180378784,
      "learning_rate": 2.200850761163095e-06,
      "loss": 0.1123,
      "step": 23942
    },
    {
      "epoch": 0.6984946612987922,
      "grad_norm": 0.8363496227683581,
      "learning_rate": 2.200459311129978e-06,
      "loss": 0.1122,
      "step": 23943
    },
    {
      "epoch": 0.6985238345294358,
      "grad_norm": 0.9935560450590118,
      "learning_rate": 2.200067886090494e-06,
      "loss": 0.1146,
      "step": 23944
    },
    {
      "epoch": 0.6985530077600793,
      "grad_norm": 0.7691839559194878,
      "learning_rate": 2.1996764860481334e-06,
      "loss": 0.1337,
      "step": 23945
    },
    {
      "epoch": 0.6985821809907229,
      "grad_norm": 0.7693515060529519,
      "learning_rate": 2.1992851110063953e-06,
      "loss": 0.1089,
      "step": 23946
    },
    {
      "epoch": 0.6986113542213664,
      "grad_norm": 0.9126551268711051,
      "learning_rate": 2.1988937609687707e-06,
      "loss": 0.1246,
      "step": 23947
    },
    {
      "epoch": 0.69864052745201,
      "grad_norm": 0.7305308384724504,
      "learning_rate": 2.198502435938752e-06,
      "loss": 0.1282,
      "step": 23948
    },
    {
      "epoch": 0.6986697006826535,
      "grad_norm": 1.1475363486012335,
      "learning_rate": 2.198111135919834e-06,
      "loss": 0.1286,
      "step": 23949
    },
    {
      "epoch": 0.6986988739132972,
      "grad_norm": 1.3417151758689219,
      "learning_rate": 2.197719860915514e-06,
      "loss": 0.1108,
      "step": 23950
    },
    {
      "epoch": 0.6987280471439408,
      "grad_norm": 0.6706037887177457,
      "learning_rate": 2.19732861092928e-06,
      "loss": 0.1268,
      "step": 23951
    },
    {
      "epoch": 0.6987572203745843,
      "grad_norm": 0.86667105947099,
      "learning_rate": 2.1969373859646287e-06,
      "loss": 0.1082,
      "step": 23952
    },
    {
      "epoch": 0.6987863936052279,
      "grad_norm": 1.02242976326532,
      "learning_rate": 2.1965461860250515e-06,
      "loss": 0.1122,
      "step": 23953
    },
    {
      "epoch": 0.6988155668358714,
      "grad_norm": 0.8762455769409735,
      "learning_rate": 2.196155011114039e-06,
      "loss": 0.1131,
      "step": 23954
    },
    {
      "epoch": 0.698844740066515,
      "grad_norm": 0.89106618415591,
      "learning_rate": 2.1957638612350846e-06,
      "loss": 0.1103,
      "step": 23955
    },
    {
      "epoch": 0.6988739132971585,
      "grad_norm": 0.999620249948512,
      "learning_rate": 2.1953727363916833e-06,
      "loss": 0.136,
      "step": 23956
    },
    {
      "epoch": 0.6989030865278021,
      "grad_norm": 0.9502451622796657,
      "learning_rate": 2.194981636587325e-06,
      "loss": 0.1146,
      "step": 23957
    },
    {
      "epoch": 0.6989322597584456,
      "grad_norm": 0.7329955936712534,
      "learning_rate": 2.1945905618254985e-06,
      "loss": 0.098,
      "step": 23958
    },
    {
      "epoch": 0.6989614329890892,
      "grad_norm": 0.771353050670828,
      "learning_rate": 2.1941995121096997e-06,
      "loss": 0.1229,
      "step": 23959
    },
    {
      "epoch": 0.6989906062197327,
      "grad_norm": 0.9019231796641504,
      "learning_rate": 2.1938084874434184e-06,
      "loss": 0.1359,
      "step": 23960
    },
    {
      "epoch": 0.6990197794503763,
      "grad_norm": 0.8461351662171521,
      "learning_rate": 2.193417487830141e-06,
      "loss": 0.1132,
      "step": 23961
    },
    {
      "epoch": 0.6990489526810199,
      "grad_norm": 0.9079653025212666,
      "learning_rate": 2.1930265132733663e-06,
      "loss": 0.1577,
      "step": 23962
    },
    {
      "epoch": 0.6990781259116634,
      "grad_norm": 0.9916480713194713,
      "learning_rate": 2.1926355637765805e-06,
      "loss": 0.1072,
      "step": 23963
    },
    {
      "epoch": 0.6991072991423071,
      "grad_norm": 0.675691073335476,
      "learning_rate": 2.192244639343272e-06,
      "loss": 0.1155,
      "step": 23964
    },
    {
      "epoch": 0.6991364723729506,
      "grad_norm": 0.74496196362572,
      "learning_rate": 2.1918537399769358e-06,
      "loss": 0.1145,
      "step": 23965
    },
    {
      "epoch": 0.6991656456035942,
      "grad_norm": 0.8457333082543786,
      "learning_rate": 2.191462865681058e-06,
      "loss": 0.1355,
      "step": 23966
    },
    {
      "epoch": 0.6991948188342377,
      "grad_norm": 0.9140121292847121,
      "learning_rate": 2.191072016459129e-06,
      "loss": 0.1184,
      "step": 23967
    },
    {
      "epoch": 0.6992239920648813,
      "grad_norm": 0.9728469106059102,
      "learning_rate": 2.190681192314637e-06,
      "loss": 0.1065,
      "step": 23968
    },
    {
      "epoch": 0.6992531652955248,
      "grad_norm": 0.8514995422077425,
      "learning_rate": 2.1902903932510748e-06,
      "loss": 0.1197,
      "step": 23969
    },
    {
      "epoch": 0.6992823385261684,
      "grad_norm": 1.085130976010377,
      "learning_rate": 2.1898996192719297e-06,
      "loss": 0.1294,
      "step": 23970
    },
    {
      "epoch": 0.6993115117568119,
      "grad_norm": 0.8148898121292455,
      "learning_rate": 2.1895088703806877e-06,
      "loss": 0.1123,
      "step": 23971
    },
    {
      "epoch": 0.6993406849874555,
      "grad_norm": 0.8148589495926012,
      "learning_rate": 2.189118146580842e-06,
      "loss": 0.1368,
      "step": 23972
    },
    {
      "epoch": 0.699369858218099,
      "grad_norm": 0.8087628675117582,
      "learning_rate": 2.188727447875876e-06,
      "loss": 0.1269,
      "step": 23973
    },
    {
      "epoch": 0.6993990314487426,
      "grad_norm": 0.8024056352754089,
      "learning_rate": 2.1883367742692824e-06,
      "loss": 0.116,
      "step": 23974
    },
    {
      "epoch": 0.6994282046793862,
      "grad_norm": 0.807474093646638,
      "learning_rate": 2.1879461257645453e-06,
      "loss": 0.1368,
      "step": 23975
    },
    {
      "epoch": 0.6994573779100297,
      "grad_norm": 1.0470631070479548,
      "learning_rate": 2.1875555023651552e-06,
      "loss": 0.1174,
      "step": 23976
    },
    {
      "epoch": 0.6994865511406733,
      "grad_norm": 0.8551891079223305,
      "learning_rate": 2.1871649040745984e-06,
      "loss": 0.1115,
      "step": 23977
    },
    {
      "epoch": 0.6995157243713169,
      "grad_norm": 0.7515422966205455,
      "learning_rate": 2.1867743308963585e-06,
      "loss": 0.0985,
      "step": 23978
    },
    {
      "epoch": 0.6995448976019605,
      "grad_norm": 0.8912524447396101,
      "learning_rate": 2.186383782833929e-06,
      "loss": 0.1059,
      "step": 23979
    },
    {
      "epoch": 0.699574070832604,
      "grad_norm": 0.9428279664495918,
      "learning_rate": 2.1859932598907933e-06,
      "loss": 0.1312,
      "step": 23980
    },
    {
      "epoch": 0.6996032440632476,
      "grad_norm": 1.4812676271230822,
      "learning_rate": 2.1856027620704367e-06,
      "loss": 0.1031,
      "step": 23981
    },
    {
      "epoch": 0.6996324172938911,
      "grad_norm": 0.8477961693049424,
      "learning_rate": 2.1852122893763484e-06,
      "loss": 0.0992,
      "step": 23982
    },
    {
      "epoch": 0.6996615905245347,
      "grad_norm": 0.8264979109448708,
      "learning_rate": 2.1848218418120134e-06,
      "loss": 0.126,
      "step": 23983
    },
    {
      "epoch": 0.6996907637551782,
      "grad_norm": 1.1011081401884897,
      "learning_rate": 2.184431419380914e-06,
      "loss": 0.1329,
      "step": 23984
    },
    {
      "epoch": 0.6997199369858218,
      "grad_norm": 1.0591944467203402,
      "learning_rate": 2.1840410220865394e-06,
      "loss": 0.1013,
      "step": 23985
    },
    {
      "epoch": 0.6997491102164654,
      "grad_norm": 0.789704705507825,
      "learning_rate": 2.183650649932376e-06,
      "loss": 0.1472,
      "step": 23986
    },
    {
      "epoch": 0.6997782834471089,
      "grad_norm": 0.9502062566605105,
      "learning_rate": 2.1832603029219074e-06,
      "loss": 0.1409,
      "step": 23987
    },
    {
      "epoch": 0.6998074566777525,
      "grad_norm": 0.8025599434789594,
      "learning_rate": 2.182869981058617e-06,
      "loss": 0.1229,
      "step": 23988
    },
    {
      "epoch": 0.699836629908396,
      "grad_norm": 0.8018413521504824,
      "learning_rate": 2.1824796843459916e-06,
      "loss": 0.1063,
      "step": 23989
    },
    {
      "epoch": 0.6998658031390396,
      "grad_norm": 0.8754198713761392,
      "learning_rate": 2.182089412787514e-06,
      "loss": 0.1156,
      "step": 23990
    },
    {
      "epoch": 0.6998949763696832,
      "grad_norm": 0.991015694336839,
      "learning_rate": 2.1816991663866692e-06,
      "loss": 0.1253,
      "step": 23991
    },
    {
      "epoch": 0.6999241496003268,
      "grad_norm": 0.8127540572983639,
      "learning_rate": 2.1813089451469436e-06,
      "loss": 0.1432,
      "step": 23992
    },
    {
      "epoch": 0.6999533228309703,
      "grad_norm": 0.9592306562067225,
      "learning_rate": 2.1809187490718185e-06,
      "loss": 0.1228,
      "step": 23993
    },
    {
      "epoch": 0.6999824960616139,
      "grad_norm": 0.8551294010357323,
      "learning_rate": 2.180528578164776e-06,
      "loss": 0.1225,
      "step": 23994
    },
    {
      "epoch": 0.7000116692922574,
      "grad_norm": 1.0370886387783538,
      "learning_rate": 2.1801384324293036e-06,
      "loss": 0.1251,
      "step": 23995
    },
    {
      "epoch": 0.700040842522901,
      "grad_norm": 1.1952787806638514,
      "learning_rate": 2.17974831186888e-06,
      "loss": 0.1325,
      "step": 23996
    },
    {
      "epoch": 0.7000700157535446,
      "grad_norm": 0.7795203523082717,
      "learning_rate": 2.179358216486992e-06,
      "loss": 0.133,
      "step": 23997
    },
    {
      "epoch": 0.7000991889841881,
      "grad_norm": 0.8025040785672175,
      "learning_rate": 2.178968146287119e-06,
      "loss": 0.1296,
      "step": 23998
    },
    {
      "epoch": 0.7001283622148317,
      "grad_norm": 1.0730825866736446,
      "learning_rate": 2.1785781012727457e-06,
      "loss": 0.1214,
      "step": 23999
    },
    {
      "epoch": 0.7001575354454752,
      "grad_norm": 0.6690442535288196,
      "learning_rate": 2.1781880814473545e-06,
      "loss": 0.1107,
      "step": 24000
    },
    {
      "epoch": 0.7001867086761188,
      "grad_norm": 0.8040909148802236,
      "learning_rate": 2.1777980868144245e-06,
      "loss": 0.1437,
      "step": 24001
    },
    {
      "epoch": 0.7002158819067623,
      "grad_norm": 1.084027438536715,
      "learning_rate": 2.17740811737744e-06,
      "loss": 0.1134,
      "step": 24002
    },
    {
      "epoch": 0.7002450551374059,
      "grad_norm": 1.0520530415588623,
      "learning_rate": 2.177018173139883e-06,
      "loss": 0.133,
      "step": 24003
    },
    {
      "epoch": 0.7002742283680494,
      "grad_norm": 0.8252624810035873,
      "learning_rate": 2.176628254105234e-06,
      "loss": 0.102,
      "step": 24004
    },
    {
      "epoch": 0.7003034015986931,
      "grad_norm": 0.9945593723678332,
      "learning_rate": 2.176238360276972e-06,
      "loss": 0.1294,
      "step": 24005
    },
    {
      "epoch": 0.7003325748293366,
      "grad_norm": 1.7703537817533679,
      "learning_rate": 2.1758484916585828e-06,
      "loss": 0.0969,
      "step": 24006
    },
    {
      "epoch": 0.7003617480599802,
      "grad_norm": 0.8443668737230785,
      "learning_rate": 2.175458648253543e-06,
      "loss": 0.1137,
      "step": 24007
    },
    {
      "epoch": 0.7003909212906237,
      "grad_norm": 1.1310173847517508,
      "learning_rate": 2.1750688300653307e-06,
      "loss": 0.1452,
      "step": 24008
    },
    {
      "epoch": 0.7004200945212673,
      "grad_norm": 0.9392790970680069,
      "learning_rate": 2.174679037097433e-06,
      "loss": 0.1296,
      "step": 24009
    },
    {
      "epoch": 0.7004492677519109,
      "grad_norm": 1.1362254631985376,
      "learning_rate": 2.1742892693533263e-06,
      "loss": 0.1098,
      "step": 24010
    },
    {
      "epoch": 0.7004784409825544,
      "grad_norm": 0.7340869338528412,
      "learning_rate": 2.1738995268364893e-06,
      "loss": 0.1163,
      "step": 24011
    },
    {
      "epoch": 0.700507614213198,
      "grad_norm": 0.7099221677250814,
      "learning_rate": 2.1735098095504036e-06,
      "loss": 0.1208,
      "step": 24012
    },
    {
      "epoch": 0.7005367874438415,
      "grad_norm": 1.126775946889232,
      "learning_rate": 2.1731201174985484e-06,
      "loss": 0.1133,
      "step": 24013
    },
    {
      "epoch": 0.7005659606744851,
      "grad_norm": 0.8955328508740298,
      "learning_rate": 2.1727304506843998e-06,
      "loss": 0.1221,
      "step": 24014
    },
    {
      "epoch": 0.7005951339051286,
      "grad_norm": 0.8803672228134498,
      "learning_rate": 2.172340809111439e-06,
      "loss": 0.1302,
      "step": 24015
    },
    {
      "epoch": 0.7006243071357722,
      "grad_norm": 0.7970264155337651,
      "learning_rate": 2.171951192783146e-06,
      "loss": 0.1226,
      "step": 24016
    },
    {
      "epoch": 0.7006534803664157,
      "grad_norm": 0.9698896300487313,
      "learning_rate": 2.171561601702998e-06,
      "loss": 0.1264,
      "step": 24017
    },
    {
      "epoch": 0.7006826535970594,
      "grad_norm": 0.8072280167044456,
      "learning_rate": 2.1711720358744704e-06,
      "loss": 0.1267,
      "step": 24018
    },
    {
      "epoch": 0.7007118268277029,
      "grad_norm": 0.7550862104546195,
      "learning_rate": 2.170782495301046e-06,
      "loss": 0.1096,
      "step": 24019
    },
    {
      "epoch": 0.7007410000583465,
      "grad_norm": 1.1206125222080219,
      "learning_rate": 2.170392979986198e-06,
      "loss": 0.1383,
      "step": 24020
    },
    {
      "epoch": 0.70077017328899,
      "grad_norm": 0.7952524095088658,
      "learning_rate": 2.1700034899334056e-06,
      "loss": 0.11,
      "step": 24021
    },
    {
      "epoch": 0.7007993465196336,
      "grad_norm": 0.6707995429890307,
      "learning_rate": 2.169614025146149e-06,
      "loss": 0.1164,
      "step": 24022
    },
    {
      "epoch": 0.7008285197502772,
      "grad_norm": 1.335294007389897,
      "learning_rate": 2.169224585627902e-06,
      "loss": 0.122,
      "step": 24023
    },
    {
      "epoch": 0.7008576929809207,
      "grad_norm": 0.8019056674990741,
      "learning_rate": 2.168835171382141e-06,
      "loss": 0.1076,
      "step": 24024
    },
    {
      "epoch": 0.7008868662115643,
      "grad_norm": 0.9430484276438996,
      "learning_rate": 2.168445782412345e-06,
      "loss": 0.0955,
      "step": 24025
    },
    {
      "epoch": 0.7009160394422078,
      "grad_norm": 0.9831013496598711,
      "learning_rate": 2.1680564187219877e-06,
      "loss": 0.1253,
      "step": 24026
    },
    {
      "epoch": 0.7009452126728514,
      "grad_norm": 0.9485212158111505,
      "learning_rate": 2.1676670803145483e-06,
      "loss": 0.115,
      "step": 24027
    },
    {
      "epoch": 0.7009743859034949,
      "grad_norm": 1.155568103595562,
      "learning_rate": 2.167277767193499e-06,
      "loss": 0.1115,
      "step": 24028
    },
    {
      "epoch": 0.7010035591341385,
      "grad_norm": 1.0984865711311678,
      "learning_rate": 2.1668884793623202e-06,
      "loss": 0.1152,
      "step": 24029
    },
    {
      "epoch": 0.701032732364782,
      "grad_norm": 0.7952488702239149,
      "learning_rate": 2.166499216824484e-06,
      "loss": 0.1273,
      "step": 24030
    },
    {
      "epoch": 0.7010619055954256,
      "grad_norm": 0.8422218644867794,
      "learning_rate": 2.166109979583465e-06,
      "loss": 0.1226,
      "step": 24031
    },
    {
      "epoch": 0.7010910788260692,
      "grad_norm": 0.8822999547685475,
      "learning_rate": 2.1657207676427395e-06,
      "loss": 0.1188,
      "step": 24032
    },
    {
      "epoch": 0.7011202520567128,
      "grad_norm": 1.0246611848438882,
      "learning_rate": 2.165331581005784e-06,
      "loss": 0.0944,
      "step": 24033
    },
    {
      "epoch": 0.7011494252873564,
      "grad_norm": 1.0312611627298989,
      "learning_rate": 2.1649424196760717e-06,
      "loss": 0.1456,
      "step": 24034
    },
    {
      "epoch": 0.7011785985179999,
      "grad_norm": 0.7709151509435308,
      "learning_rate": 2.1645532836570744e-06,
      "loss": 0.1109,
      "step": 24035
    },
    {
      "epoch": 0.7012077717486435,
      "grad_norm": 0.8516470804910463,
      "learning_rate": 2.1641641729522705e-06,
      "loss": 0.1331,
      "step": 24036
    },
    {
      "epoch": 0.701236944979287,
      "grad_norm": 1.006222626001592,
      "learning_rate": 2.163775087565132e-06,
      "loss": 0.1055,
      "step": 24037
    },
    {
      "epoch": 0.7012661182099306,
      "grad_norm": 2.064272706827919,
      "learning_rate": 2.163386027499129e-06,
      "loss": 0.1243,
      "step": 24038
    },
    {
      "epoch": 0.7012952914405741,
      "grad_norm": 1.2481984411755531,
      "learning_rate": 2.1629969927577417e-06,
      "loss": 0.1146,
      "step": 24039
    },
    {
      "epoch": 0.7013244646712177,
      "grad_norm": 0.6976811043586211,
      "learning_rate": 2.16260798334444e-06,
      "loss": 0.0884,
      "step": 24040
    },
    {
      "epoch": 0.7013536379018612,
      "grad_norm": 0.7852034296196531,
      "learning_rate": 2.1622189992626956e-06,
      "loss": 0.1081,
      "step": 24041
    },
    {
      "epoch": 0.7013828111325048,
      "grad_norm": 1.2743735386200303,
      "learning_rate": 2.1618300405159844e-06,
      "loss": 0.1288,
      "step": 24042
    },
    {
      "epoch": 0.7014119843631483,
      "grad_norm": 1.1277866592727162,
      "learning_rate": 2.1614411071077764e-06,
      "loss": 0.1128,
      "step": 24043
    },
    {
      "epoch": 0.7014411575937919,
      "grad_norm": 0.8391056870980765,
      "learning_rate": 2.161052199041543e-06,
      "loss": 0.1128,
      "step": 24044
    },
    {
      "epoch": 0.7014703308244356,
      "grad_norm": 0.7315497197928056,
      "learning_rate": 2.160663316320758e-06,
      "loss": 0.1439,
      "step": 24045
    },
    {
      "epoch": 0.7014995040550791,
      "grad_norm": 0.7604841604689835,
      "learning_rate": 2.1602744589488944e-06,
      "loss": 0.104,
      "step": 24046
    },
    {
      "epoch": 0.7015286772857227,
      "grad_norm": 0.9601341189787869,
      "learning_rate": 2.1598856269294234e-06,
      "loss": 0.1232,
      "step": 24047
    },
    {
      "epoch": 0.7015578505163662,
      "grad_norm": 0.9800180073239532,
      "learning_rate": 2.159496820265813e-06,
      "loss": 0.1305,
      "step": 24048
    },
    {
      "epoch": 0.7015870237470098,
      "grad_norm": 0.9845668064896056,
      "learning_rate": 2.1591080389615386e-06,
      "loss": 0.1205,
      "step": 24049
    },
    {
      "epoch": 0.7016161969776533,
      "grad_norm": 0.8112349456134588,
      "learning_rate": 2.1587192830200683e-06,
      "loss": 0.0993,
      "step": 24050
    },
    {
      "epoch": 0.7016453702082969,
      "grad_norm": 1.1056914242147518,
      "learning_rate": 2.158330552444874e-06,
      "loss": 0.1197,
      "step": 24051
    },
    {
      "epoch": 0.7016745434389404,
      "grad_norm": 1.0860628694299137,
      "learning_rate": 2.1579418472394274e-06,
      "loss": 0.1228,
      "step": 24052
    },
    {
      "epoch": 0.701703716669584,
      "grad_norm": 0.9195609440088643,
      "learning_rate": 2.157553167407198e-06,
      "loss": 0.1199,
      "step": 24053
    },
    {
      "epoch": 0.7017328899002275,
      "grad_norm": 0.8267432432912861,
      "learning_rate": 2.1571645129516533e-06,
      "loss": 0.1103,
      "step": 24054
    },
    {
      "epoch": 0.7017620631308711,
      "grad_norm": 0.9119221876899662,
      "learning_rate": 2.156775883876267e-06,
      "loss": 0.1102,
      "step": 24055
    },
    {
      "epoch": 0.7017912363615146,
      "grad_norm": 1.1060395320976149,
      "learning_rate": 2.156387280184505e-06,
      "loss": 0.1258,
      "step": 24056
    },
    {
      "epoch": 0.7018204095921582,
      "grad_norm": 1.2418193467656058,
      "learning_rate": 2.1559987018798407e-06,
      "loss": 0.108,
      "step": 24057
    },
    {
      "epoch": 0.7018495828228017,
      "grad_norm": 0.6941779523718972,
      "learning_rate": 2.155610148965739e-06,
      "loss": 0.1126,
      "step": 24058
    },
    {
      "epoch": 0.7018787560534454,
      "grad_norm": 0.7468745610187636,
      "learning_rate": 2.155221621445673e-06,
      "loss": 0.106,
      "step": 24059
    },
    {
      "epoch": 0.701907929284089,
      "grad_norm": 1.3963220490013728,
      "learning_rate": 2.154833119323109e-06,
      "loss": 0.1153,
      "step": 24060
    },
    {
      "epoch": 0.7019371025147325,
      "grad_norm": 1.010510213635547,
      "learning_rate": 2.1544446426015137e-06,
      "loss": 0.119,
      "step": 24061
    },
    {
      "epoch": 0.7019662757453761,
      "grad_norm": 0.843969770784331,
      "learning_rate": 2.1540561912843577e-06,
      "loss": 0.1247,
      "step": 24062
    },
    {
      "epoch": 0.7019954489760196,
      "grad_norm": 0.9308770573205402,
      "learning_rate": 2.1536677653751103e-06,
      "loss": 0.1054,
      "step": 24063
    },
    {
      "epoch": 0.7020246222066632,
      "grad_norm": 1.1167229292695007,
      "learning_rate": 2.1532793648772376e-06,
      "loss": 0.1071,
      "step": 24064
    },
    {
      "epoch": 0.7020537954373067,
      "grad_norm": 0.6777538264692602,
      "learning_rate": 2.152890989794205e-06,
      "loss": 0.1162,
      "step": 24065
    },
    {
      "epoch": 0.7020829686679503,
      "grad_norm": 0.7712540637357135,
      "learning_rate": 2.1525026401294846e-06,
      "loss": 0.1106,
      "step": 24066
    },
    {
      "epoch": 0.7021121418985938,
      "grad_norm": 1.0568101289978415,
      "learning_rate": 2.1521143158865403e-06,
      "loss": 0.1249,
      "step": 24067
    },
    {
      "epoch": 0.7021413151292374,
      "grad_norm": 0.7967857620394879,
      "learning_rate": 2.1517260170688357e-06,
      "loss": 0.1057,
      "step": 24068
    },
    {
      "epoch": 0.7021704883598809,
      "grad_norm": 0.6882672674262725,
      "learning_rate": 2.1513377436798454e-06,
      "loss": 0.1286,
      "step": 24069
    },
    {
      "epoch": 0.7021996615905245,
      "grad_norm": 0.8283639053360913,
      "learning_rate": 2.150949495723032e-06,
      "loss": 0.1169,
      "step": 24070
    },
    {
      "epoch": 0.702228834821168,
      "grad_norm": 0.9816232198395284,
      "learning_rate": 2.1505612732018588e-06,
      "loss": 0.098,
      "step": 24071
    },
    {
      "epoch": 0.7022580080518117,
      "grad_norm": 0.8261055672546759,
      "learning_rate": 2.1501730761197962e-06,
      "loss": 0.1189,
      "step": 24072
    },
    {
      "epoch": 0.7022871812824553,
      "grad_norm": 0.6476406893872703,
      "learning_rate": 2.1497849044803088e-06,
      "loss": 0.1215,
      "step": 24073
    },
    {
      "epoch": 0.7023163545130988,
      "grad_norm": 0.8205112972150803,
      "learning_rate": 2.149396758286859e-06,
      "loss": 0.1185,
      "step": 24074
    },
    {
      "epoch": 0.7023455277437424,
      "grad_norm": 1.075725340492133,
      "learning_rate": 2.1490086375429146e-06,
      "loss": 0.1242,
      "step": 24075
    },
    {
      "epoch": 0.7023747009743859,
      "grad_norm": 0.777168613703323,
      "learning_rate": 2.1486205422519426e-06,
      "loss": 0.1072,
      "step": 24076
    },
    {
      "epoch": 0.7024038742050295,
      "grad_norm": 0.9361846554945391,
      "learning_rate": 2.1482324724174052e-06,
      "loss": 0.1484,
      "step": 24077
    },
    {
      "epoch": 0.702433047435673,
      "grad_norm": 0.8325886361286319,
      "learning_rate": 2.1478444280427657e-06,
      "loss": 0.1271,
      "step": 24078
    },
    {
      "epoch": 0.7024622206663166,
      "grad_norm": 0.706949477634099,
      "learning_rate": 2.1474564091314925e-06,
      "loss": 0.1249,
      "step": 24079
    },
    {
      "epoch": 0.7024913938969601,
      "grad_norm": 0.8312963660706355,
      "learning_rate": 2.1470684156870454e-06,
      "loss": 0.1086,
      "step": 24080
    },
    {
      "epoch": 0.7025205671276037,
      "grad_norm": 1.151050570465602,
      "learning_rate": 2.1466804477128905e-06,
      "loss": 0.1077,
      "step": 24081
    },
    {
      "epoch": 0.7025497403582472,
      "grad_norm": 0.8598811482320603,
      "learning_rate": 2.1462925052124934e-06,
      "loss": 0.1149,
      "step": 24082
    },
    {
      "epoch": 0.7025789135888908,
      "grad_norm": 0.789679013115193,
      "learning_rate": 2.1459045881893154e-06,
      "loss": 0.1103,
      "step": 24083
    },
    {
      "epoch": 0.7026080868195343,
      "grad_norm": 0.7495025644090153,
      "learning_rate": 2.1455166966468177e-06,
      "loss": 0.1204,
      "step": 24084
    },
    {
      "epoch": 0.7026372600501779,
      "grad_norm": 1.095025131927912,
      "learning_rate": 2.1451288305884683e-06,
      "loss": 0.1316,
      "step": 24085
    },
    {
      "epoch": 0.7026664332808216,
      "grad_norm": 0.7283491155742952,
      "learning_rate": 2.144740990017725e-06,
      "loss": 0.1111,
      "step": 24086
    },
    {
      "epoch": 0.7026956065114651,
      "grad_norm": 0.8667311375326322,
      "learning_rate": 2.1443531749380538e-06,
      "loss": 0.1185,
      "step": 24087
    },
    {
      "epoch": 0.7027247797421087,
      "grad_norm": 0.6838411245661873,
      "learning_rate": 2.143965385352914e-06,
      "loss": 0.1259,
      "step": 24088
    },
    {
      "epoch": 0.7027539529727522,
      "grad_norm": 1.052758190353409,
      "learning_rate": 2.1435776212657715e-06,
      "loss": 0.1414,
      "step": 24089
    },
    {
      "epoch": 0.7027831262033958,
      "grad_norm": 0.823716241046075,
      "learning_rate": 2.1431898826800866e-06,
      "loss": 0.1294,
      "step": 24090
    },
    {
      "epoch": 0.7028122994340393,
      "grad_norm": 0.9334328281490369,
      "learning_rate": 2.1428021695993184e-06,
      "loss": 0.1433,
      "step": 24091
    },
    {
      "epoch": 0.7028414726646829,
      "grad_norm": 0.8547639693236345,
      "learning_rate": 2.14241448202693e-06,
      "loss": 0.122,
      "step": 24092
    },
    {
      "epoch": 0.7028706458953264,
      "grad_norm": 0.6344574823642141,
      "learning_rate": 2.1420268199663854e-06,
      "loss": 0.1006,
      "step": 24093
    },
    {
      "epoch": 0.70289981912597,
      "grad_norm": 0.8307319846047498,
      "learning_rate": 2.141639183421142e-06,
      "loss": 0.1277,
      "step": 24094
    },
    {
      "epoch": 0.7029289923566135,
      "grad_norm": 0.792771071556801,
      "learning_rate": 2.141251572394661e-06,
      "loss": 0.1368,
      "step": 24095
    },
    {
      "epoch": 0.7029581655872571,
      "grad_norm": 0.6623395859811816,
      "learning_rate": 2.1408639868904046e-06,
      "loss": 0.1212,
      "step": 24096
    },
    {
      "epoch": 0.7029873388179007,
      "grad_norm": 0.8286602012946489,
      "learning_rate": 2.140476426911832e-06,
      "loss": 0.1317,
      "step": 24097
    },
    {
      "epoch": 0.7030165120485442,
      "grad_norm": 0.8179453115130316,
      "learning_rate": 2.1400888924623995e-06,
      "loss": 0.1441,
      "step": 24098
    },
    {
      "epoch": 0.7030456852791879,
      "grad_norm": 1.248903539581955,
      "learning_rate": 2.139701383545575e-06,
      "loss": 0.0981,
      "step": 24099
    },
    {
      "epoch": 0.7030748585098314,
      "grad_norm": 0.8845381821470489,
      "learning_rate": 2.139313900164813e-06,
      "loss": 0.1057,
      "step": 24100
    },
    {
      "epoch": 0.703104031740475,
      "grad_norm": 0.8787044640074702,
      "learning_rate": 2.1389264423235725e-06,
      "loss": 0.1166,
      "step": 24101
    },
    {
      "epoch": 0.7031332049711185,
      "grad_norm": 0.9290614703170608,
      "learning_rate": 2.138539010025315e-06,
      "loss": 0.1373,
      "step": 24102
    },
    {
      "epoch": 0.7031623782017621,
      "grad_norm": 0.8282834718855829,
      "learning_rate": 2.1381516032734985e-06,
      "loss": 0.1069,
      "step": 24103
    },
    {
      "epoch": 0.7031915514324056,
      "grad_norm": 0.8220651876508512,
      "learning_rate": 2.137764222071579e-06,
      "loss": 0.1075,
      "step": 24104
    },
    {
      "epoch": 0.7032207246630492,
      "grad_norm": 0.9565262772610033,
      "learning_rate": 2.137376866423018e-06,
      "loss": 0.1057,
      "step": 24105
    },
    {
      "epoch": 0.7032498978936927,
      "grad_norm": 1.1878622190301287,
      "learning_rate": 2.1369895363312735e-06,
      "loss": 0.1295,
      "step": 24106
    },
    {
      "epoch": 0.7032790711243363,
      "grad_norm": 0.9622737987465088,
      "learning_rate": 2.1366022317998042e-06,
      "loss": 0.1148,
      "step": 24107
    },
    {
      "epoch": 0.7033082443549799,
      "grad_norm": 0.9526874065691252,
      "learning_rate": 2.1362149528320646e-06,
      "loss": 0.1295,
      "step": 24108
    },
    {
      "epoch": 0.7033374175856234,
      "grad_norm": 1.354666136293969,
      "learning_rate": 2.135827699431516e-06,
      "loss": 0.1227,
      "step": 24109
    },
    {
      "epoch": 0.703366590816267,
      "grad_norm": 1.2650871130812544,
      "learning_rate": 2.135440471601612e-06,
      "loss": 0.1212,
      "step": 24110
    },
    {
      "epoch": 0.7033957640469105,
      "grad_norm": 0.847655465678781,
      "learning_rate": 2.1350532693458117e-06,
      "loss": 0.1034,
      "step": 24111
    },
    {
      "epoch": 0.7034249372775541,
      "grad_norm": 0.9253122018880892,
      "learning_rate": 2.1346660926675732e-06,
      "loss": 0.135,
      "step": 24112
    },
    {
      "epoch": 0.7034541105081977,
      "grad_norm": 0.8972131725035827,
      "learning_rate": 2.1342789415703524e-06,
      "loss": 0.111,
      "step": 24113
    },
    {
      "epoch": 0.7034832837388413,
      "grad_norm": 0.9270774916417938,
      "learning_rate": 2.1338918160576033e-06,
      "loss": 0.1209,
      "step": 24114
    },
    {
      "epoch": 0.7035124569694848,
      "grad_norm": 0.9024003077394835,
      "learning_rate": 2.1335047161327853e-06,
      "loss": 0.1249,
      "step": 24115
    },
    {
      "epoch": 0.7035416302001284,
      "grad_norm": 0.9166456653557409,
      "learning_rate": 2.1331176417993517e-06,
      "loss": 0.1104,
      "step": 24116
    },
    {
      "epoch": 0.7035708034307719,
      "grad_norm": 1.0659154021750539,
      "learning_rate": 2.1327305930607605e-06,
      "loss": 0.1047,
      "step": 24117
    },
    {
      "epoch": 0.7035999766614155,
      "grad_norm": 1.8382653879684283,
      "learning_rate": 2.1323435699204646e-06,
      "loss": 0.1144,
      "step": 24118
    },
    {
      "epoch": 0.703629149892059,
      "grad_norm": 0.8872106360749761,
      "learning_rate": 2.131956572381923e-06,
      "loss": 0.1426,
      "step": 24119
    },
    {
      "epoch": 0.7036583231227026,
      "grad_norm": 0.7547890595575304,
      "learning_rate": 2.131569600448588e-06,
      "loss": 0.1437,
      "step": 24120
    },
    {
      "epoch": 0.7036874963533462,
      "grad_norm": 1.0143542045822773,
      "learning_rate": 2.1311826541239133e-06,
      "loss": 0.1078,
      "step": 24121
    },
    {
      "epoch": 0.7037166695839897,
      "grad_norm": 0.7800617755717483,
      "learning_rate": 2.130795733411355e-06,
      "loss": 0.1429,
      "step": 24122
    },
    {
      "epoch": 0.7037458428146333,
      "grad_norm": 0.8782027026296402,
      "learning_rate": 2.130408838314369e-06,
      "loss": 0.1302,
      "step": 24123
    },
    {
      "epoch": 0.7037750160452768,
      "grad_norm": 0.9126599810947121,
      "learning_rate": 2.1300219688364078e-06,
      "loss": 0.1213,
      "step": 24124
    },
    {
      "epoch": 0.7038041892759204,
      "grad_norm": 0.8159882221397627,
      "learning_rate": 2.1296351249809237e-06,
      "loss": 0.1027,
      "step": 24125
    },
    {
      "epoch": 0.703833362506564,
      "grad_norm": 0.8295197981214928,
      "learning_rate": 2.129248306751374e-06,
      "loss": 0.1145,
      "step": 24126
    },
    {
      "epoch": 0.7038625357372076,
      "grad_norm": 1.2707174944966297,
      "learning_rate": 2.1288615141512098e-06,
      "loss": 0.1279,
      "step": 24127
    },
    {
      "epoch": 0.7038917089678511,
      "grad_norm": 0.9607515254219565,
      "learning_rate": 2.128474747183881e-06,
      "loss": 0.114,
      "step": 24128
    },
    {
      "epoch": 0.7039208821984947,
      "grad_norm": 0.8996369639105667,
      "learning_rate": 2.128088005852848e-06,
      "loss": 0.143,
      "step": 24129
    },
    {
      "epoch": 0.7039500554291382,
      "grad_norm": 0.9824472072603855,
      "learning_rate": 2.1277012901615595e-06,
      "loss": 0.135,
      "step": 24130
    },
    {
      "epoch": 0.7039792286597818,
      "grad_norm": 1.1287121871293229,
      "learning_rate": 2.1273146001134672e-06,
      "loss": 0.1507,
      "step": 24131
    },
    {
      "epoch": 0.7040084018904254,
      "grad_norm": 0.7840416532663803,
      "learning_rate": 2.126927935712025e-06,
      "loss": 0.1395,
      "step": 24132
    },
    {
      "epoch": 0.7040375751210689,
      "grad_norm": 1.1163970757759962,
      "learning_rate": 2.1265412969606846e-06,
      "loss": 0.1093,
      "step": 24133
    },
    {
      "epoch": 0.7040667483517125,
      "grad_norm": 0.8538029088499534,
      "learning_rate": 2.126154683862896e-06,
      "loss": 0.1193,
      "step": 24134
    },
    {
      "epoch": 0.704095921582356,
      "grad_norm": 0.7897568904118584,
      "learning_rate": 2.125768096422113e-06,
      "loss": 0.1015,
      "step": 24135
    },
    {
      "epoch": 0.7041250948129996,
      "grad_norm": 0.583966894031173,
      "learning_rate": 2.1253815346417873e-06,
      "loss": 0.0895,
      "step": 24136
    },
    {
      "epoch": 0.7041542680436431,
      "grad_norm": 0.7785901180582534,
      "learning_rate": 2.1249949985253686e-06,
      "loss": 0.1098,
      "step": 24137
    },
    {
      "epoch": 0.7041834412742867,
      "grad_norm": 0.7387064927061772,
      "learning_rate": 2.1246084880763073e-06,
      "loss": 0.1239,
      "step": 24138
    },
    {
      "epoch": 0.7042126145049302,
      "grad_norm": 0.8556757756360723,
      "learning_rate": 2.1242220032980563e-06,
      "loss": 0.1164,
      "step": 24139
    },
    {
      "epoch": 0.7042417877355739,
      "grad_norm": 0.899373524585836,
      "learning_rate": 2.1238355441940634e-06,
      "loss": 0.1032,
      "step": 24140
    },
    {
      "epoch": 0.7042709609662174,
      "grad_norm": 0.7693524759253813,
      "learning_rate": 2.1234491107677802e-06,
      "loss": 0.123,
      "step": 24141
    },
    {
      "epoch": 0.704300134196861,
      "grad_norm": 0.8646075646004397,
      "learning_rate": 2.123062703022658e-06,
      "loss": 0.098,
      "step": 24142
    },
    {
      "epoch": 0.7043293074275045,
      "grad_norm": 1.0283003112861857,
      "learning_rate": 2.1226763209621452e-06,
      "loss": 0.0983,
      "step": 24143
    },
    {
      "epoch": 0.7043584806581481,
      "grad_norm": 0.7773822812720221,
      "learning_rate": 2.12228996458969e-06,
      "loss": 0.1226,
      "step": 24144
    },
    {
      "epoch": 0.7043876538887917,
      "grad_norm": 1.1564431781030586,
      "learning_rate": 2.1219036339087447e-06,
      "loss": 0.1237,
      "step": 24145
    },
    {
      "epoch": 0.7044168271194352,
      "grad_norm": 0.8573595090024431,
      "learning_rate": 2.121517328922754e-06,
      "loss": 0.1174,
      "step": 24146
    },
    {
      "epoch": 0.7044460003500788,
      "grad_norm": 1.0830024680069363,
      "learning_rate": 2.1211310496351724e-06,
      "loss": 0.1275,
      "step": 24147
    },
    {
      "epoch": 0.7044751735807223,
      "grad_norm": 0.8617612438167792,
      "learning_rate": 2.120744796049443e-06,
      "loss": 0.0998,
      "step": 24148
    },
    {
      "epoch": 0.7045043468113659,
      "grad_norm": 1.0647450513089958,
      "learning_rate": 2.120358568169019e-06,
      "loss": 0.1069,
      "step": 24149
    },
    {
      "epoch": 0.7045335200420094,
      "grad_norm": 0.891693853593121,
      "learning_rate": 2.1199723659973466e-06,
      "loss": 0.1153,
      "step": 24150
    },
    {
      "epoch": 0.704562693272653,
      "grad_norm": 0.7222184254171092,
      "learning_rate": 2.1195861895378704e-06,
      "loss": 0.1167,
      "step": 24151
    },
    {
      "epoch": 0.7045918665032965,
      "grad_norm": 1.0003390452738457,
      "learning_rate": 2.119200038794042e-06,
      "loss": 0.1014,
      "step": 24152
    },
    {
      "epoch": 0.7046210397339402,
      "grad_norm": 1.0467032394858526,
      "learning_rate": 2.11881391376931e-06,
      "loss": 0.1533,
      "step": 24153
    },
    {
      "epoch": 0.7046502129645837,
      "grad_norm": 0.8797110334678755,
      "learning_rate": 2.118427814467119e-06,
      "loss": 0.0998,
      "step": 24154
    },
    {
      "epoch": 0.7046793861952273,
      "grad_norm": 1.316442117975834,
      "learning_rate": 2.118041740890915e-06,
      "loss": 0.1305,
      "step": 24155
    },
    {
      "epoch": 0.7047085594258709,
      "grad_norm": 0.8996957206478129,
      "learning_rate": 2.117655693044148e-06,
      "loss": 0.1029,
      "step": 24156
    },
    {
      "epoch": 0.7047377326565144,
      "grad_norm": 0.9367241576178393,
      "learning_rate": 2.117269670930263e-06,
      "loss": 0.1178,
      "step": 24157
    },
    {
      "epoch": 0.704766905887158,
      "grad_norm": 0.7263129627173697,
      "learning_rate": 2.116883674552703e-06,
      "loss": 0.1061,
      "step": 24158
    },
    {
      "epoch": 0.7047960791178015,
      "grad_norm": 1.4787850330764907,
      "learning_rate": 2.1164977039149203e-06,
      "loss": 0.1385,
      "step": 24159
    },
    {
      "epoch": 0.7048252523484451,
      "grad_norm": 0.798915100234943,
      "learning_rate": 2.116111759020358e-06,
      "loss": 0.1044,
      "step": 24160
    },
    {
      "epoch": 0.7048544255790886,
      "grad_norm": 1.0609816344790315,
      "learning_rate": 2.1157258398724593e-06,
      "loss": 0.1362,
      "step": 24161
    },
    {
      "epoch": 0.7048835988097322,
      "grad_norm": 0.64276459998314,
      "learning_rate": 2.1153399464746736e-06,
      "loss": 0.1334,
      "step": 24162
    },
    {
      "epoch": 0.7049127720403757,
      "grad_norm": 0.8832299790936908,
      "learning_rate": 2.1149540788304452e-06,
      "loss": 0.1103,
      "step": 24163
    },
    {
      "epoch": 0.7049419452710193,
      "grad_norm": 1.2314604621688052,
      "learning_rate": 2.1145682369432153e-06,
      "loss": 0.1119,
      "step": 24164
    },
    {
      "epoch": 0.7049711185016628,
      "grad_norm": 1.0568850293044327,
      "learning_rate": 2.114182420816432e-06,
      "loss": 0.1112,
      "step": 24165
    },
    {
      "epoch": 0.7050002917323064,
      "grad_norm": 0.9384272229348917,
      "learning_rate": 2.1137966304535407e-06,
      "loss": 0.1089,
      "step": 24166
    },
    {
      "epoch": 0.70502946496295,
      "grad_norm": 0.7942333966452567,
      "learning_rate": 2.1134108658579837e-06,
      "loss": 0.1192,
      "step": 24167
    },
    {
      "epoch": 0.7050586381935936,
      "grad_norm": 0.8440102570808579,
      "learning_rate": 2.1130251270332042e-06,
      "loss": 0.1113,
      "step": 24168
    },
    {
      "epoch": 0.7050878114242372,
      "grad_norm": 1.235031544150926,
      "learning_rate": 2.1126394139826468e-06,
      "loss": 0.1291,
      "step": 24169
    },
    {
      "epoch": 0.7051169846548807,
      "grad_norm": 1.1967882927456939,
      "learning_rate": 2.112253726709757e-06,
      "loss": 0.1146,
      "step": 24170
    },
    {
      "epoch": 0.7051461578855243,
      "grad_norm": 0.9534341476626986,
      "learning_rate": 2.111868065217975e-06,
      "loss": 0.1207,
      "step": 24171
    },
    {
      "epoch": 0.7051753311161678,
      "grad_norm": 0.8584359280085143,
      "learning_rate": 2.111482429510748e-06,
      "loss": 0.1253,
      "step": 24172
    },
    {
      "epoch": 0.7052045043468114,
      "grad_norm": 0.8391558648829219,
      "learning_rate": 2.1110968195915153e-06,
      "loss": 0.1258,
      "step": 24173
    },
    {
      "epoch": 0.7052336775774549,
      "grad_norm": 0.9438799524487655,
      "learning_rate": 2.1107112354637194e-06,
      "loss": 0.1256,
      "step": 24174
    },
    {
      "epoch": 0.7052628508080985,
      "grad_norm": 0.7457031989603938,
      "learning_rate": 2.1103256771308033e-06,
      "loss": 0.1043,
      "step": 24175
    },
    {
      "epoch": 0.705292024038742,
      "grad_norm": 0.9067185036060394,
      "learning_rate": 2.109940144596212e-06,
      "loss": 0.1295,
      "step": 24176
    },
    {
      "epoch": 0.7053211972693856,
      "grad_norm": 0.8066912346339389,
      "learning_rate": 2.109554637863385e-06,
      "loss": 0.1361,
      "step": 24177
    },
    {
      "epoch": 0.7053503705000291,
      "grad_norm": 0.7969519618243077,
      "learning_rate": 2.1091691569357626e-06,
      "loss": 0.1051,
      "step": 24178
    },
    {
      "epoch": 0.7053795437306727,
      "grad_norm": 0.8721099242465415,
      "learning_rate": 2.1087837018167893e-06,
      "loss": 0.1085,
      "step": 24179
    },
    {
      "epoch": 0.7054087169613164,
      "grad_norm": 1.0506240532952547,
      "learning_rate": 2.1083982725099055e-06,
      "loss": 0.1113,
      "step": 24180
    },
    {
      "epoch": 0.7054378901919599,
      "grad_norm": 1.008842102514274,
      "learning_rate": 2.108012869018549e-06,
      "loss": 0.1162,
      "step": 24181
    },
    {
      "epoch": 0.7054670634226035,
      "grad_norm": 0.814216316192569,
      "learning_rate": 2.107627491346164e-06,
      "loss": 0.1041,
      "step": 24182
    },
    {
      "epoch": 0.705496236653247,
      "grad_norm": 1.0236797961912931,
      "learning_rate": 2.107242139496192e-06,
      "loss": 0.1177,
      "step": 24183
    },
    {
      "epoch": 0.7055254098838906,
      "grad_norm": 1.13023933406375,
      "learning_rate": 2.1068568134720714e-06,
      "loss": 0.1356,
      "step": 24184
    },
    {
      "epoch": 0.7055545831145341,
      "grad_norm": 0.838739405281954,
      "learning_rate": 2.1064715132772406e-06,
      "loss": 0.1187,
      "step": 24185
    },
    {
      "epoch": 0.7055837563451777,
      "grad_norm": 0.8367746237079655,
      "learning_rate": 2.106086238915143e-06,
      "loss": 0.105,
      "step": 24186
    },
    {
      "epoch": 0.7056129295758212,
      "grad_norm": 0.6874452186299084,
      "learning_rate": 2.1057009903892155e-06,
      "loss": 0.1326,
      "step": 24187
    },
    {
      "epoch": 0.7056421028064648,
      "grad_norm": 0.8836810306274153,
      "learning_rate": 2.1053157677028985e-06,
      "loss": 0.1312,
      "step": 24188
    },
    {
      "epoch": 0.7056712760371083,
      "grad_norm": 0.8753607875102475,
      "learning_rate": 2.1049305708596322e-06,
      "loss": 0.1117,
      "step": 24189
    },
    {
      "epoch": 0.7057004492677519,
      "grad_norm": 0.7125699108263962,
      "learning_rate": 2.1045453998628555e-06,
      "loss": 0.1341,
      "step": 24190
    },
    {
      "epoch": 0.7057296224983954,
      "grad_norm": 0.8221614538570634,
      "learning_rate": 2.1041602547160043e-06,
      "loss": 0.1089,
      "step": 24191
    },
    {
      "epoch": 0.705758795729039,
      "grad_norm": 1.0271439627238976,
      "learning_rate": 2.103775135422521e-06,
      "loss": 0.1272,
      "step": 24192
    },
    {
      "epoch": 0.7057879689596825,
      "grad_norm": 0.7532382507475655,
      "learning_rate": 2.10339004198584e-06,
      "loss": 0.1149,
      "step": 24193
    },
    {
      "epoch": 0.7058171421903262,
      "grad_norm": 0.7786412246151072,
      "learning_rate": 2.1030049744094033e-06,
      "loss": 0.1363,
      "step": 24194
    },
    {
      "epoch": 0.7058463154209698,
      "grad_norm": 0.8096487627525432,
      "learning_rate": 2.1026199326966447e-06,
      "loss": 0.1155,
      "step": 24195
    },
    {
      "epoch": 0.7058754886516133,
      "grad_norm": 0.802356014632334,
      "learning_rate": 2.1022349168510047e-06,
      "loss": 0.1113,
      "step": 24196
    },
    {
      "epoch": 0.7059046618822569,
      "grad_norm": 0.7717028846688864,
      "learning_rate": 2.10184992687592e-06,
      "loss": 0.1165,
      "step": 24197
    },
    {
      "epoch": 0.7059338351129004,
      "grad_norm": 0.8428359819795502,
      "learning_rate": 2.1014649627748262e-06,
      "loss": 0.1439,
      "step": 24198
    },
    {
      "epoch": 0.705963008343544,
      "grad_norm": 1.003031010013575,
      "learning_rate": 2.101080024551161e-06,
      "loss": 0.1208,
      "step": 24199
    },
    {
      "epoch": 0.7059921815741875,
      "grad_norm": 0.8247121288769128,
      "learning_rate": 2.1006951122083626e-06,
      "loss": 0.1353,
      "step": 24200
    },
    {
      "epoch": 0.7060213548048311,
      "grad_norm": 1.0078679597901747,
      "learning_rate": 2.100310225749865e-06,
      "loss": 0.1276,
      "step": 24201
    },
    {
      "epoch": 0.7060505280354746,
      "grad_norm": 1.0383652940890002,
      "learning_rate": 2.099925365179107e-06,
      "loss": 0.1086,
      "step": 24202
    },
    {
      "epoch": 0.7060797012661182,
      "grad_norm": 1.040648646029909,
      "learning_rate": 2.0995405304995227e-06,
      "loss": 0.1285,
      "step": 24203
    },
    {
      "epoch": 0.7061088744967617,
      "grad_norm": 0.9445478766082092,
      "learning_rate": 2.0991557217145464e-06,
      "loss": 0.1142,
      "step": 24204
    },
    {
      "epoch": 0.7061380477274053,
      "grad_norm": 0.7634335003745596,
      "learning_rate": 2.0987709388276155e-06,
      "loss": 0.1146,
      "step": 24205
    },
    {
      "epoch": 0.7061672209580488,
      "grad_norm": 0.8196127515404932,
      "learning_rate": 2.098386181842167e-06,
      "loss": 0.1126,
      "step": 24206
    },
    {
      "epoch": 0.7061963941886925,
      "grad_norm": 1.191064054478148,
      "learning_rate": 2.0980014507616334e-06,
      "loss": 0.1299,
      "step": 24207
    },
    {
      "epoch": 0.7062255674193361,
      "grad_norm": 0.7970035370632912,
      "learning_rate": 2.097616745589449e-06,
      "loss": 0.124,
      "step": 24208
    },
    {
      "epoch": 0.7062547406499796,
      "grad_norm": 0.8143835814356327,
      "learning_rate": 2.097232066329051e-06,
      "loss": 0.1377,
      "step": 24209
    },
    {
      "epoch": 0.7062839138806232,
      "grad_norm": 0.8335081394825827,
      "learning_rate": 2.0968474129838724e-06,
      "loss": 0.1233,
      "step": 24210
    },
    {
      "epoch": 0.7063130871112667,
      "grad_norm": 0.8513562958901598,
      "learning_rate": 2.096462785557345e-06,
      "loss": 0.1177,
      "step": 24211
    },
    {
      "epoch": 0.7063422603419103,
      "grad_norm": 0.8739121203912802,
      "learning_rate": 2.096078184052905e-06,
      "loss": 0.1476,
      "step": 24212
    },
    {
      "epoch": 0.7063714335725538,
      "grad_norm": 0.9402304193632139,
      "learning_rate": 2.095693608473987e-06,
      "loss": 0.1411,
      "step": 24213
    },
    {
      "epoch": 0.7064006068031974,
      "grad_norm": 0.8652467146531886,
      "learning_rate": 2.095309058824024e-06,
      "loss": 0.1398,
      "step": 24214
    },
    {
      "epoch": 0.7064297800338409,
      "grad_norm": 0.7144904842046801,
      "learning_rate": 2.0949245351064456e-06,
      "loss": 0.0986,
      "step": 24215
    },
    {
      "epoch": 0.7064589532644845,
      "grad_norm": 0.8454451122292086,
      "learning_rate": 2.09454003732469e-06,
      "loss": 0.1203,
      "step": 24216
    },
    {
      "epoch": 0.706488126495128,
      "grad_norm": 0.8771193551816933,
      "learning_rate": 2.094155565482185e-06,
      "loss": 0.1024,
      "step": 24217
    },
    {
      "epoch": 0.7065172997257716,
      "grad_norm": 0.963960932283189,
      "learning_rate": 2.0937711195823658e-06,
      "loss": 0.1068,
      "step": 24218
    },
    {
      "epoch": 0.7065464729564152,
      "grad_norm": 0.8074460197791143,
      "learning_rate": 2.0933866996286656e-06,
      "loss": 0.1263,
      "step": 24219
    },
    {
      "epoch": 0.7065756461870587,
      "grad_norm": 0.9488724299713767,
      "learning_rate": 2.0930023056245156e-06,
      "loss": 0.1471,
      "step": 24220
    },
    {
      "epoch": 0.7066048194177024,
      "grad_norm": 0.8089768162625798,
      "learning_rate": 2.092617937573345e-06,
      "loss": 0.1039,
      "step": 24221
    },
    {
      "epoch": 0.7066339926483459,
      "grad_norm": 0.8412545528659938,
      "learning_rate": 2.0922335954785893e-06,
      "loss": 0.1205,
      "step": 24222
    },
    {
      "epoch": 0.7066631658789895,
      "grad_norm": 0.8330026310860443,
      "learning_rate": 2.091849279343676e-06,
      "loss": 0.1301,
      "step": 24223
    },
    {
      "epoch": 0.706692339109633,
      "grad_norm": 1.028899814171179,
      "learning_rate": 2.09146498917204e-06,
      "loss": 0.1276,
      "step": 24224
    },
    {
      "epoch": 0.7067215123402766,
      "grad_norm": 0.9870883693822461,
      "learning_rate": 2.0910807249671085e-06,
      "loss": 0.1294,
      "step": 24225
    },
    {
      "epoch": 0.7067506855709201,
      "grad_norm": 0.7054511588804655,
      "learning_rate": 2.0906964867323154e-06,
      "loss": 0.1016,
      "step": 24226
    },
    {
      "epoch": 0.7067798588015637,
      "grad_norm": 0.8912403946712257,
      "learning_rate": 2.0903122744710896e-06,
      "loss": 0.1027,
      "step": 24227
    },
    {
      "epoch": 0.7068090320322072,
      "grad_norm": 1.1862637949493537,
      "learning_rate": 2.08992808818686e-06,
      "loss": 0.1303,
      "step": 24228
    },
    {
      "epoch": 0.7068382052628508,
      "grad_norm": 0.7571432834830695,
      "learning_rate": 2.089543927883057e-06,
      "loss": 0.0978,
      "step": 24229
    },
    {
      "epoch": 0.7068673784934943,
      "grad_norm": 0.7935119833746682,
      "learning_rate": 2.0891597935631134e-06,
      "loss": 0.1307,
      "step": 24230
    },
    {
      "epoch": 0.7068965517241379,
      "grad_norm": 0.9305514332896524,
      "learning_rate": 2.088775685230454e-06,
      "loss": 0.1373,
      "step": 24231
    },
    {
      "epoch": 0.7069257249547815,
      "grad_norm": 1.0160859880353177,
      "learning_rate": 2.0883916028885126e-06,
      "loss": 0.1078,
      "step": 24232
    },
    {
      "epoch": 0.706954898185425,
      "grad_norm": 0.6911868767085836,
      "learning_rate": 2.0880075465407156e-06,
      "loss": 0.1099,
      "step": 24233
    },
    {
      "epoch": 0.7069840714160686,
      "grad_norm": 0.8930064859506339,
      "learning_rate": 2.08762351619049e-06,
      "loss": 0.106,
      "step": 24234
    },
    {
      "epoch": 0.7070132446467122,
      "grad_norm": 0.9108804388813418,
      "learning_rate": 2.0872395118412667e-06,
      "loss": 0.1099,
      "step": 24235
    },
    {
      "epoch": 0.7070424178773558,
      "grad_norm": 0.8310533896476148,
      "learning_rate": 2.086855533496476e-06,
      "loss": 0.1188,
      "step": 24236
    },
    {
      "epoch": 0.7070715911079993,
      "grad_norm": 0.7936093721437792,
      "learning_rate": 2.0864715811595433e-06,
      "loss": 0.109,
      "step": 24237
    },
    {
      "epoch": 0.7071007643386429,
      "grad_norm": 0.8778617281934136,
      "learning_rate": 2.0860876548338948e-06,
      "loss": 0.1226,
      "step": 24238
    },
    {
      "epoch": 0.7071299375692864,
      "grad_norm": 0.9491890116658876,
      "learning_rate": 2.085703754522962e-06,
      "loss": 0.1257,
      "step": 24239
    },
    {
      "epoch": 0.70715911079993,
      "grad_norm": 0.7687823104756915,
      "learning_rate": 2.0853198802301705e-06,
      "loss": 0.1164,
      "step": 24240
    },
    {
      "epoch": 0.7071882840305735,
      "grad_norm": 0.7390914642422536,
      "learning_rate": 2.0849360319589456e-06,
      "loss": 0.0944,
      "step": 24241
    },
    {
      "epoch": 0.7072174572612171,
      "grad_norm": 0.8037881174333962,
      "learning_rate": 2.0845522097127156e-06,
      "loss": 0.1069,
      "step": 24242
    },
    {
      "epoch": 0.7072466304918607,
      "grad_norm": 0.8585303715610015,
      "learning_rate": 2.08416841349491e-06,
      "loss": 0.1247,
      "step": 24243
    },
    {
      "epoch": 0.7072758037225042,
      "grad_norm": 0.9026023504426673,
      "learning_rate": 2.0837846433089516e-06,
      "loss": 0.118,
      "step": 24244
    },
    {
      "epoch": 0.7073049769531478,
      "grad_norm": 0.9478598025329202,
      "learning_rate": 2.0834008991582666e-06,
      "loss": 0.1312,
      "step": 24245
    },
    {
      "epoch": 0.7073341501837913,
      "grad_norm": 1.3842624142818118,
      "learning_rate": 2.083017181046284e-06,
      "loss": 0.1089,
      "step": 24246
    },
    {
      "epoch": 0.7073633234144349,
      "grad_norm": 0.8416271506515415,
      "learning_rate": 2.0826334889764254e-06,
      "loss": 0.1181,
      "step": 24247
    },
    {
      "epoch": 0.7073924966450785,
      "grad_norm": 0.9323315489613925,
      "learning_rate": 2.0822498229521195e-06,
      "loss": 0.1063,
      "step": 24248
    },
    {
      "epoch": 0.7074216698757221,
      "grad_norm": 1.0434978980243386,
      "learning_rate": 2.0818661829767915e-06,
      "loss": 0.148,
      "step": 24249
    },
    {
      "epoch": 0.7074508431063656,
      "grad_norm": 1.0057344394718042,
      "learning_rate": 2.081482569053866e-06,
      "loss": 0.1129,
      "step": 24250
    },
    {
      "epoch": 0.7074800163370092,
      "grad_norm": 0.8645375539330916,
      "learning_rate": 2.0810989811867656e-06,
      "loss": 0.136,
      "step": 24251
    },
    {
      "epoch": 0.7075091895676527,
      "grad_norm": 1.0281069821712148,
      "learning_rate": 2.0807154193789185e-06,
      "loss": 0.1147,
      "step": 24252
    },
    {
      "epoch": 0.7075383627982963,
      "grad_norm": 0.9376559980706509,
      "learning_rate": 2.0803318836337453e-06,
      "loss": 0.1102,
      "step": 24253
    },
    {
      "epoch": 0.7075675360289398,
      "grad_norm": 0.9411723033745302,
      "learning_rate": 2.0799483739546745e-06,
      "loss": 0.1153,
      "step": 24254
    },
    {
      "epoch": 0.7075967092595834,
      "grad_norm": 0.7318044208269741,
      "learning_rate": 2.0795648903451247e-06,
      "loss": 0.1083,
      "step": 24255
    },
    {
      "epoch": 0.707625882490227,
      "grad_norm": 0.8638369251224559,
      "learning_rate": 2.079181432808525e-06,
      "loss": 0.0976,
      "step": 24256
    },
    {
      "epoch": 0.7076550557208705,
      "grad_norm": 1.1350800421229454,
      "learning_rate": 2.0787980013482963e-06,
      "loss": 0.1056,
      "step": 24257
    },
    {
      "epoch": 0.7076842289515141,
      "grad_norm": 0.9195655840391993,
      "learning_rate": 2.0784145959678592e-06,
      "loss": 0.1165,
      "step": 24258
    },
    {
      "epoch": 0.7077134021821576,
      "grad_norm": 0.8873180874749146,
      "learning_rate": 2.0780312166706396e-06,
      "loss": 0.1298,
      "step": 24259
    },
    {
      "epoch": 0.7077425754128012,
      "grad_norm": 0.8909275358850974,
      "learning_rate": 2.0776478634600616e-06,
      "loss": 0.1162,
      "step": 24260
    },
    {
      "epoch": 0.7077717486434447,
      "grad_norm": 0.802875624164899,
      "learning_rate": 2.077264536339544e-06,
      "loss": 0.1368,
      "step": 24261
    },
    {
      "epoch": 0.7078009218740884,
      "grad_norm": 0.9929377481657958,
      "learning_rate": 2.076881235312512e-06,
      "loss": 0.1111,
      "step": 24262
    },
    {
      "epoch": 0.7078300951047319,
      "grad_norm": 1.263614547830916,
      "learning_rate": 2.0764979603823877e-06,
      "loss": 0.1292,
      "step": 24263
    },
    {
      "epoch": 0.7078592683353755,
      "grad_norm": 0.8915764046134468,
      "learning_rate": 2.076114711552589e-06,
      "loss": 0.1119,
      "step": 24264
    },
    {
      "epoch": 0.707888441566019,
      "grad_norm": 0.9872741977126354,
      "learning_rate": 2.0757314888265404e-06,
      "loss": 0.1076,
      "step": 24265
    },
    {
      "epoch": 0.7079176147966626,
      "grad_norm": 1.0237986457693453,
      "learning_rate": 2.075348292207665e-06,
      "loss": 0.1314,
      "step": 24266
    },
    {
      "epoch": 0.7079467880273062,
      "grad_norm": 1.2074351477954173,
      "learning_rate": 2.074965121699382e-06,
      "loss": 0.1433,
      "step": 24267
    },
    {
      "epoch": 0.7079759612579497,
      "grad_norm": 1.128488236197808,
      "learning_rate": 2.0745819773051103e-06,
      "loss": 0.1077,
      "step": 24268
    },
    {
      "epoch": 0.7080051344885933,
      "grad_norm": 0.6981280622894253,
      "learning_rate": 2.074198859028274e-06,
      "loss": 0.115,
      "step": 24269
    },
    {
      "epoch": 0.7080343077192368,
      "grad_norm": 1.0420269362369856,
      "learning_rate": 2.073815766872292e-06,
      "loss": 0.115,
      "step": 24270
    },
    {
      "epoch": 0.7080634809498804,
      "grad_norm": 0.8642225907002064,
      "learning_rate": 2.073432700840582e-06,
      "loss": 0.1263,
      "step": 24271
    },
    {
      "epoch": 0.7080926541805239,
      "grad_norm": 1.2527782822964186,
      "learning_rate": 2.073049660936567e-06,
      "loss": 0.1155,
      "step": 24272
    },
    {
      "epoch": 0.7081218274111675,
      "grad_norm": 1.03955845050689,
      "learning_rate": 2.072666647163667e-06,
      "loss": 0.1222,
      "step": 24273
    },
    {
      "epoch": 0.708151000641811,
      "grad_norm": 1.1281478865665038,
      "learning_rate": 2.0722836595253004e-06,
      "loss": 0.1242,
      "step": 24274
    },
    {
      "epoch": 0.7081801738724547,
      "grad_norm": 0.638413992170037,
      "learning_rate": 2.071900698024885e-06,
      "loss": 0.1027,
      "step": 24275
    },
    {
      "epoch": 0.7082093471030982,
      "grad_norm": 1.2789385595384781,
      "learning_rate": 2.0715177626658427e-06,
      "loss": 0.1112,
      "step": 24276
    },
    {
      "epoch": 0.7082385203337418,
      "grad_norm": 0.9262113633692753,
      "learning_rate": 2.071134853451589e-06,
      "loss": 0.1096,
      "step": 24277
    },
    {
      "epoch": 0.7082676935643853,
      "grad_norm": 0.9699446681662852,
      "learning_rate": 2.0707519703855446e-06,
      "loss": 0.1058,
      "step": 24278
    },
    {
      "epoch": 0.7082968667950289,
      "grad_norm": 0.8942764422388755,
      "learning_rate": 2.0703691134711284e-06,
      "loss": 0.1041,
      "step": 24279
    },
    {
      "epoch": 0.7083260400256725,
      "grad_norm": 0.8257282047171776,
      "learning_rate": 2.0699862827117576e-06,
      "loss": 0.1106,
      "step": 24280
    },
    {
      "epoch": 0.708355213256316,
      "grad_norm": 0.8997744291216947,
      "learning_rate": 2.069603478110848e-06,
      "loss": 0.1266,
      "step": 24281
    },
    {
      "epoch": 0.7083843864869596,
      "grad_norm": 0.8860122375314241,
      "learning_rate": 2.069220699671821e-06,
      "loss": 0.1163,
      "step": 24282
    },
    {
      "epoch": 0.7084135597176031,
      "grad_norm": 0.7941591419252904,
      "learning_rate": 2.0688379473980904e-06,
      "loss": 0.1143,
      "step": 24283
    },
    {
      "epoch": 0.7084427329482467,
      "grad_norm": 1.0087317633787842,
      "learning_rate": 2.068455221293076e-06,
      "loss": 0.1213,
      "step": 24284
    },
    {
      "epoch": 0.7084719061788902,
      "grad_norm": 0.8425652259654369,
      "learning_rate": 2.068072521360192e-06,
      "loss": 0.1065,
      "step": 24285
    },
    {
      "epoch": 0.7085010794095338,
      "grad_norm": 0.839806586690853,
      "learning_rate": 2.067689847602859e-06,
      "loss": 0.1361,
      "step": 24286
    },
    {
      "epoch": 0.7085302526401773,
      "grad_norm": 0.6598314202266496,
      "learning_rate": 2.0673072000244902e-06,
      "loss": 0.122,
      "step": 24287
    },
    {
      "epoch": 0.7085594258708209,
      "grad_norm": 0.9482659430429057,
      "learning_rate": 2.0669245786285015e-06,
      "loss": 0.1239,
      "step": 24288
    },
    {
      "epoch": 0.7085885991014645,
      "grad_norm": 0.8901788825063378,
      "learning_rate": 2.0665419834183093e-06,
      "loss": 0.1107,
      "step": 24289
    },
    {
      "epoch": 0.7086177723321081,
      "grad_norm": 0.7752036240710392,
      "learning_rate": 2.0661594143973323e-06,
      "loss": 0.1134,
      "step": 24290
    },
    {
      "epoch": 0.7086469455627517,
      "grad_norm": 0.9534933633836123,
      "learning_rate": 2.065776871568982e-06,
      "loss": 0.1192,
      "step": 24291
    },
    {
      "epoch": 0.7086761187933952,
      "grad_norm": 0.7428279053234718,
      "learning_rate": 2.0653943549366768e-06,
      "loss": 0.1058,
      "step": 24292
    },
    {
      "epoch": 0.7087052920240388,
      "grad_norm": 0.8873485982934974,
      "learning_rate": 2.0650118645038304e-06,
      "loss": 0.0999,
      "step": 24293
    },
    {
      "epoch": 0.7087344652546823,
      "grad_norm": 0.6832893915531372,
      "learning_rate": 2.0646294002738555e-06,
      "loss": 0.1284,
      "step": 24294
    },
    {
      "epoch": 0.7087636384853259,
      "grad_norm": 0.9197184713157679,
      "learning_rate": 2.0642469622501686e-06,
      "loss": 0.1205,
      "step": 24295
    },
    {
      "epoch": 0.7087928117159694,
      "grad_norm": 0.7557684728704201,
      "learning_rate": 2.0638645504361858e-06,
      "loss": 0.1118,
      "step": 24296
    },
    {
      "epoch": 0.708821984946613,
      "grad_norm": 0.6931123673153663,
      "learning_rate": 2.0634821648353197e-06,
      "loss": 0.1037,
      "step": 24297
    },
    {
      "epoch": 0.7088511581772565,
      "grad_norm": 0.855582365801452,
      "learning_rate": 2.063099805450982e-06,
      "loss": 0.1334,
      "step": 24298
    },
    {
      "epoch": 0.7088803314079001,
      "grad_norm": 0.8469217777266271,
      "learning_rate": 2.0627174722865894e-06,
      "loss": 0.1021,
      "step": 24299
    },
    {
      "epoch": 0.7089095046385436,
      "grad_norm": 0.8223315291781041,
      "learning_rate": 2.062335165345555e-06,
      "loss": 0.1028,
      "step": 24300
    },
    {
      "epoch": 0.7089386778691872,
      "grad_norm": 0.8780101299214722,
      "learning_rate": 2.0619528846312882e-06,
      "loss": 0.1283,
      "step": 24301
    },
    {
      "epoch": 0.7089678510998308,
      "grad_norm": 0.6556728397295637,
      "learning_rate": 2.061570630147205e-06,
      "loss": 0.1347,
      "step": 24302
    },
    {
      "epoch": 0.7089970243304744,
      "grad_norm": 0.9950550449399701,
      "learning_rate": 2.0611884018967195e-06,
      "loss": 0.1252,
      "step": 24303
    },
    {
      "epoch": 0.709026197561118,
      "grad_norm": 0.8847755573130276,
      "learning_rate": 2.0608061998832423e-06,
      "loss": 0.1046,
      "step": 24304
    },
    {
      "epoch": 0.7090553707917615,
      "grad_norm": 0.7599713932287432,
      "learning_rate": 2.0604240241101843e-06,
      "loss": 0.1364,
      "step": 24305
    },
    {
      "epoch": 0.7090845440224051,
      "grad_norm": 1.1163898676023372,
      "learning_rate": 2.0600418745809602e-06,
      "loss": 0.1397,
      "step": 24306
    },
    {
      "epoch": 0.7091137172530486,
      "grad_norm": 0.7909592846118836,
      "learning_rate": 2.059659751298979e-06,
      "loss": 0.1252,
      "step": 24307
    },
    {
      "epoch": 0.7091428904836922,
      "grad_norm": 0.9164340973327898,
      "learning_rate": 2.0592776542676535e-06,
      "loss": 0.157,
      "step": 24308
    },
    {
      "epoch": 0.7091720637143357,
      "grad_norm": 0.9238638354559386,
      "learning_rate": 2.0588955834903966e-06,
      "loss": 0.1241,
      "step": 24309
    },
    {
      "epoch": 0.7092012369449793,
      "grad_norm": 0.8793393489014347,
      "learning_rate": 2.0585135389706185e-06,
      "loss": 0.1131,
      "step": 24310
    },
    {
      "epoch": 0.7092304101756228,
      "grad_norm": 0.6373542181329356,
      "learning_rate": 2.058131520711727e-06,
      "loss": 0.1114,
      "step": 24311
    },
    {
      "epoch": 0.7092595834062664,
      "grad_norm": 0.8941034902251721,
      "learning_rate": 2.0577495287171374e-06,
      "loss": 0.1167,
      "step": 24312
    },
    {
      "epoch": 0.7092887566369099,
      "grad_norm": 0.7132029447358353,
      "learning_rate": 2.057367562990255e-06,
      "loss": 0.1386,
      "step": 24313
    },
    {
      "epoch": 0.7093179298675535,
      "grad_norm": 1.4641892671757206,
      "learning_rate": 2.0569856235344947e-06,
      "loss": 0.1013,
      "step": 24314
    },
    {
      "epoch": 0.709347103098197,
      "grad_norm": 0.9844328535665606,
      "learning_rate": 2.0566037103532628e-06,
      "loss": 0.1235,
      "step": 24315
    },
    {
      "epoch": 0.7093762763288407,
      "grad_norm": 0.9058529002089648,
      "learning_rate": 2.0562218234499714e-06,
      "loss": 0.1089,
      "step": 24316
    },
    {
      "epoch": 0.7094054495594843,
      "grad_norm": 0.828066655121191,
      "learning_rate": 2.055839962828029e-06,
      "loss": 0.1327,
      "step": 24317
    },
    {
      "epoch": 0.7094346227901278,
      "grad_norm": 0.7649869333445348,
      "learning_rate": 2.055458128490843e-06,
      "loss": 0.1158,
      "step": 24318
    },
    {
      "epoch": 0.7094637960207714,
      "grad_norm": 0.8032000343573327,
      "learning_rate": 2.055076320441824e-06,
      "loss": 0.128,
      "step": 24319
    },
    {
      "epoch": 0.7094929692514149,
      "grad_norm": 0.9063916433840569,
      "learning_rate": 2.0546945386843826e-06,
      "loss": 0.1304,
      "step": 24320
    },
    {
      "epoch": 0.7095221424820585,
      "grad_norm": 0.7374586396840892,
      "learning_rate": 2.0543127832219246e-06,
      "loss": 0.1068,
      "step": 24321
    },
    {
      "epoch": 0.709551315712702,
      "grad_norm": 0.7476665841702215,
      "learning_rate": 2.053931054057857e-06,
      "loss": 0.122,
      "step": 24322
    },
    {
      "epoch": 0.7095804889433456,
      "grad_norm": 0.8550252027256865,
      "learning_rate": 2.0535493511955925e-06,
      "loss": 0.1347,
      "step": 24323
    },
    {
      "epoch": 0.7096096621739891,
      "grad_norm": 0.8523804032655143,
      "learning_rate": 2.053167674638533e-06,
      "loss": 0.1207,
      "step": 24324
    },
    {
      "epoch": 0.7096388354046327,
      "grad_norm": 0.6336614375024203,
      "learning_rate": 2.0527860243900898e-06,
      "loss": 0.0878,
      "step": 24325
    },
    {
      "epoch": 0.7096680086352762,
      "grad_norm": 0.7924964057217228,
      "learning_rate": 2.0524044004536716e-06,
      "loss": 0.1415,
      "step": 24326
    },
    {
      "epoch": 0.7096971818659198,
      "grad_norm": 0.8762698703750451,
      "learning_rate": 2.052022802832682e-06,
      "loss": 0.1052,
      "step": 24327
    },
    {
      "epoch": 0.7097263550965633,
      "grad_norm": 0.9481505755848734,
      "learning_rate": 2.0516412315305282e-06,
      "loss": 0.1204,
      "step": 24328
    },
    {
      "epoch": 0.709755528327207,
      "grad_norm": 0.9267284542420775,
      "learning_rate": 2.0512596865506195e-06,
      "loss": 0.1297,
      "step": 24329
    },
    {
      "epoch": 0.7097847015578506,
      "grad_norm": 0.8690038560467187,
      "learning_rate": 2.05087816789636e-06,
      "loss": 0.1129,
      "step": 24330
    },
    {
      "epoch": 0.7098138747884941,
      "grad_norm": 0.9277653152803282,
      "learning_rate": 2.0504966755711547e-06,
      "loss": 0.107,
      "step": 24331
    },
    {
      "epoch": 0.7098430480191377,
      "grad_norm": 0.9933305147356886,
      "learning_rate": 2.0501152095784105e-06,
      "loss": 0.1085,
      "step": 24332
    },
    {
      "epoch": 0.7098722212497812,
      "grad_norm": 0.9288477974599171,
      "learning_rate": 2.049733769921536e-06,
      "loss": 0.1298,
      "step": 24333
    },
    {
      "epoch": 0.7099013944804248,
      "grad_norm": 1.02337141711138,
      "learning_rate": 2.0493523566039334e-06,
      "loss": 0.1251,
      "step": 24334
    },
    {
      "epoch": 0.7099305677110683,
      "grad_norm": 1.1745951293250398,
      "learning_rate": 2.0489709696290073e-06,
      "loss": 0.1173,
      "step": 24335
    },
    {
      "epoch": 0.7099597409417119,
      "grad_norm": 0.8583992001256617,
      "learning_rate": 2.0485896090001657e-06,
      "loss": 0.1343,
      "step": 24336
    },
    {
      "epoch": 0.7099889141723554,
      "grad_norm": 0.8578304036689427,
      "learning_rate": 2.0482082747208092e-06,
      "loss": 0.1042,
      "step": 24337
    },
    {
      "epoch": 0.710018087402999,
      "grad_norm": 0.942450149665433,
      "learning_rate": 2.0478269667943453e-06,
      "loss": 0.1128,
      "step": 24338
    },
    {
      "epoch": 0.7100472606336425,
      "grad_norm": 0.9497845138690882,
      "learning_rate": 2.047445685224179e-06,
      "loss": 0.1183,
      "step": 24339
    },
    {
      "epoch": 0.7100764338642861,
      "grad_norm": 0.7537585600108339,
      "learning_rate": 2.047064430013713e-06,
      "loss": 0.1251,
      "step": 24340
    },
    {
      "epoch": 0.7101056070949296,
      "grad_norm": 0.9194084478200095,
      "learning_rate": 2.0466832011663486e-06,
      "loss": 0.1167,
      "step": 24341
    },
    {
      "epoch": 0.7101347803255732,
      "grad_norm": 0.7379825674920547,
      "learning_rate": 2.0463019986854932e-06,
      "loss": 0.1152,
      "step": 24342
    },
    {
      "epoch": 0.7101639535562169,
      "grad_norm": 0.8357722724183001,
      "learning_rate": 2.045920822574547e-06,
      "loss": 0.1121,
      "step": 24343
    },
    {
      "epoch": 0.7101931267868604,
      "grad_norm": 0.9264669869312903,
      "learning_rate": 2.0455396728369165e-06,
      "loss": 0.1283,
      "step": 24344
    },
    {
      "epoch": 0.710222300017504,
      "grad_norm": 0.8289058552466958,
      "learning_rate": 2.045158549476e-06,
      "loss": 0.1166,
      "step": 24345
    },
    {
      "epoch": 0.7102514732481475,
      "grad_norm": 0.8389492003646455,
      "learning_rate": 2.0447774524952054e-06,
      "loss": 0.1097,
      "step": 24346
    },
    {
      "epoch": 0.7102806464787911,
      "grad_norm": 0.7369558637489452,
      "learning_rate": 2.0443963818979318e-06,
      "loss": 0.1299,
      "step": 24347
    },
    {
      "epoch": 0.7103098197094346,
      "grad_norm": 1.345947586898109,
      "learning_rate": 2.0440153376875797e-06,
      "loss": 0.1277,
      "step": 24348
    },
    {
      "epoch": 0.7103389929400782,
      "grad_norm": 0.8749608023462779,
      "learning_rate": 2.0436343198675535e-06,
      "loss": 0.1122,
      "step": 24349
    },
    {
      "epoch": 0.7103681661707217,
      "grad_norm": 0.9801531819234112,
      "learning_rate": 2.0432533284412556e-06,
      "loss": 0.1079,
      "step": 24350
    },
    {
      "epoch": 0.7103973394013653,
      "grad_norm": 0.7246153212649536,
      "learning_rate": 2.0428723634120864e-06,
      "loss": 0.1288,
      "step": 24351
    },
    {
      "epoch": 0.7104265126320088,
      "grad_norm": 0.8065126575482403,
      "learning_rate": 2.042491424783445e-06,
      "loss": 0.0999,
      "step": 24352
    },
    {
      "epoch": 0.7104556858626524,
      "grad_norm": 0.8847770818441456,
      "learning_rate": 2.042110512558736e-06,
      "loss": 0.1155,
      "step": 24353
    },
    {
      "epoch": 0.710484859093296,
      "grad_norm": 0.86288922139475,
      "learning_rate": 2.0417296267413562e-06,
      "loss": 0.1134,
      "step": 24354
    },
    {
      "epoch": 0.7105140323239395,
      "grad_norm": 1.185462512963098,
      "learning_rate": 2.0413487673347083e-06,
      "loss": 0.1066,
      "step": 24355
    },
    {
      "epoch": 0.7105432055545832,
      "grad_norm": 1.3018141769592924,
      "learning_rate": 2.040967934342194e-06,
      "loss": 0.1435,
      "step": 24356
    },
    {
      "epoch": 0.7105723787852267,
      "grad_norm": 0.8400628496943888,
      "learning_rate": 2.040587127767212e-06,
      "loss": 0.1224,
      "step": 24357
    },
    {
      "epoch": 0.7106015520158703,
      "grad_norm": 0.8008670181168767,
      "learning_rate": 2.0402063476131593e-06,
      "loss": 0.1306,
      "step": 24358
    },
    {
      "epoch": 0.7106307252465138,
      "grad_norm": 0.8951122619476052,
      "learning_rate": 2.03982559388344e-06,
      "loss": 0.1312,
      "step": 24359
    },
    {
      "epoch": 0.7106598984771574,
      "grad_norm": 1.170453836271604,
      "learning_rate": 2.039444866581451e-06,
      "loss": 0.1099,
      "step": 24360
    },
    {
      "epoch": 0.7106890717078009,
      "grad_norm": 1.05398976968565,
      "learning_rate": 2.03906416571059e-06,
      "loss": 0.1231,
      "step": 24361
    },
    {
      "epoch": 0.7107182449384445,
      "grad_norm": 4.280590207084984,
      "learning_rate": 2.0386834912742566e-06,
      "loss": 0.1252,
      "step": 24362
    },
    {
      "epoch": 0.710747418169088,
      "grad_norm": 0.8509599858629096,
      "learning_rate": 2.0383028432758522e-06,
      "loss": 0.1259,
      "step": 24363
    },
    {
      "epoch": 0.7107765913997316,
      "grad_norm": 1.0586904553300105,
      "learning_rate": 2.037922221718773e-06,
      "loss": 0.1291,
      "step": 24364
    },
    {
      "epoch": 0.7108057646303751,
      "grad_norm": 0.7578885581554284,
      "learning_rate": 2.037541626606416e-06,
      "loss": 0.1159,
      "step": 24365
    },
    {
      "epoch": 0.7108349378610187,
      "grad_norm": 0.9950357619484919,
      "learning_rate": 2.037161057942179e-06,
      "loss": 0.125,
      "step": 24366
    },
    {
      "epoch": 0.7108641110916623,
      "grad_norm": 0.938517775451267,
      "learning_rate": 2.036780515729463e-06,
      "loss": 0.1073,
      "step": 24367
    },
    {
      "epoch": 0.7108932843223058,
      "grad_norm": 1.1184516395218789,
      "learning_rate": 2.0363999999716618e-06,
      "loss": 0.128,
      "step": 24368
    },
    {
      "epoch": 0.7109224575529494,
      "grad_norm": 0.9023110909870798,
      "learning_rate": 2.036019510672175e-06,
      "loss": 0.1273,
      "step": 24369
    },
    {
      "epoch": 0.710951630783593,
      "grad_norm": 1.4001399121659068,
      "learning_rate": 2.035639047834399e-06,
      "loss": 0.1337,
      "step": 24370
    },
    {
      "epoch": 0.7109808040142366,
      "grad_norm": 2.0301668528336987,
      "learning_rate": 2.035258611461728e-06,
      "loss": 0.1075,
      "step": 24371
    },
    {
      "epoch": 0.7110099772448801,
      "grad_norm": 1.1384594601914078,
      "learning_rate": 2.03487820155756e-06,
      "loss": 0.1136,
      "step": 24372
    },
    {
      "epoch": 0.7110391504755237,
      "grad_norm": 1.107004423559062,
      "learning_rate": 2.034497818125294e-06,
      "loss": 0.1233,
      "step": 24373
    },
    {
      "epoch": 0.7110683237061672,
      "grad_norm": 1.0133925683935907,
      "learning_rate": 2.0341174611683235e-06,
      "loss": 0.13,
      "step": 24374
    },
    {
      "epoch": 0.7110974969368108,
      "grad_norm": 0.7104769885495591,
      "learning_rate": 2.033737130690042e-06,
      "loss": 0.0905,
      "step": 24375
    },
    {
      "epoch": 0.7111266701674543,
      "grad_norm": 1.422098025882749,
      "learning_rate": 2.0333568266938498e-06,
      "loss": 0.1442,
      "step": 24376
    },
    {
      "epoch": 0.7111558433980979,
      "grad_norm": 1.0058471267758462,
      "learning_rate": 2.032976549183139e-06,
      "loss": 0.116,
      "step": 24377
    },
    {
      "epoch": 0.7111850166287415,
      "grad_norm": 0.927592664479417,
      "learning_rate": 2.0325962981613036e-06,
      "loss": 0.1098,
      "step": 24378
    },
    {
      "epoch": 0.711214189859385,
      "grad_norm": 0.7931636033481047,
      "learning_rate": 2.0322160736317404e-06,
      "loss": 0.0929,
      "step": 24379
    },
    {
      "epoch": 0.7112433630900286,
      "grad_norm": 0.9806693334105764,
      "learning_rate": 2.031835875597845e-06,
      "loss": 0.1099,
      "step": 24380
    },
    {
      "epoch": 0.7112725363206721,
      "grad_norm": 1.0011884145644832,
      "learning_rate": 2.0314557040630106e-06,
      "loss": 0.1041,
      "step": 24381
    },
    {
      "epoch": 0.7113017095513157,
      "grad_norm": 0.9600760848975869,
      "learning_rate": 2.031075559030629e-06,
      "loss": 0.1219,
      "step": 24382
    },
    {
      "epoch": 0.7113308827819593,
      "grad_norm": 0.8918457468674876,
      "learning_rate": 2.0306954405040984e-06,
      "loss": 0.1084,
      "step": 24383
    },
    {
      "epoch": 0.7113600560126029,
      "grad_norm": 0.9725798074427667,
      "learning_rate": 2.0303153484868077e-06,
      "loss": 0.1124,
      "step": 24384
    },
    {
      "epoch": 0.7113892292432464,
      "grad_norm": 0.9163828993242817,
      "learning_rate": 2.0299352829821535e-06,
      "loss": 0.1243,
      "step": 24385
    },
    {
      "epoch": 0.71141840247389,
      "grad_norm": 1.2597823766770704,
      "learning_rate": 2.029555243993529e-06,
      "loss": 0.1184,
      "step": 24386
    },
    {
      "epoch": 0.7114475757045335,
      "grad_norm": 1.1218957050688485,
      "learning_rate": 2.029175231524326e-06,
      "loss": 0.1266,
      "step": 24387
    },
    {
      "epoch": 0.7114767489351771,
      "grad_norm": 0.8709065580069354,
      "learning_rate": 2.0287952455779365e-06,
      "loss": 0.1238,
      "step": 24388
    },
    {
      "epoch": 0.7115059221658206,
      "grad_norm": 0.8367507895732899,
      "learning_rate": 2.028415286157755e-06,
      "loss": 0.0999,
      "step": 24389
    },
    {
      "epoch": 0.7115350953964642,
      "grad_norm": 0.9890103427532823,
      "learning_rate": 2.0280353532671704e-06,
      "loss": 0.1289,
      "step": 24390
    },
    {
      "epoch": 0.7115642686271078,
      "grad_norm": 1.0479322528274433,
      "learning_rate": 2.0276554469095787e-06,
      "loss": 0.1257,
      "step": 24391
    },
    {
      "epoch": 0.7115934418577513,
      "grad_norm": 0.9110123973017815,
      "learning_rate": 2.027275567088368e-06,
      "loss": 0.0999,
      "step": 24392
    },
    {
      "epoch": 0.7116226150883949,
      "grad_norm": 0.8516766976858033,
      "learning_rate": 2.0268957138069336e-06,
      "loss": 0.1203,
      "step": 24393
    },
    {
      "epoch": 0.7116517883190384,
      "grad_norm": 0.8787287052162154,
      "learning_rate": 2.0265158870686636e-06,
      "loss": 0.1271,
      "step": 24394
    },
    {
      "epoch": 0.711680961549682,
      "grad_norm": 1.320506184430548,
      "learning_rate": 2.0261360868769487e-06,
      "loss": 0.1239,
      "step": 24395
    },
    {
      "epoch": 0.7117101347803255,
      "grad_norm": 0.9819746947924424,
      "learning_rate": 2.0257563132351808e-06,
      "loss": 0.1338,
      "step": 24396
    },
    {
      "epoch": 0.7117393080109692,
      "grad_norm": 0.8328015881292921,
      "learning_rate": 2.0253765661467523e-06,
      "loss": 0.1228,
      "step": 24397
    },
    {
      "epoch": 0.7117684812416127,
      "grad_norm": 0.9631663464689367,
      "learning_rate": 2.0249968456150497e-06,
      "loss": 0.1373,
      "step": 24398
    },
    {
      "epoch": 0.7117976544722563,
      "grad_norm": 1.0840115522703666,
      "learning_rate": 2.024617151643467e-06,
      "loss": 0.099,
      "step": 24399
    },
    {
      "epoch": 0.7118268277028998,
      "grad_norm": 0.7550602826241688,
      "learning_rate": 2.024237484235392e-06,
      "loss": 0.0898,
      "step": 24400
    },
    {
      "epoch": 0.7118560009335434,
      "grad_norm": 0.8204516668833315,
      "learning_rate": 2.023857843394213e-06,
      "loss": 0.1115,
      "step": 24401
    },
    {
      "epoch": 0.711885174164187,
      "grad_norm": 0.9812200065073052,
      "learning_rate": 2.0234782291233207e-06,
      "loss": 0.1138,
      "step": 24402
    },
    {
      "epoch": 0.7119143473948305,
      "grad_norm": 1.069486808913791,
      "learning_rate": 2.0230986414261056e-06,
      "loss": 0.1037,
      "step": 24403
    },
    {
      "epoch": 0.7119435206254741,
      "grad_norm": 0.7081987164093422,
      "learning_rate": 2.0227190803059554e-06,
      "loss": 0.1047,
      "step": 24404
    },
    {
      "epoch": 0.7119726938561176,
      "grad_norm": 0.6843058317568871,
      "learning_rate": 2.0223395457662572e-06,
      "loss": 0.1349,
      "step": 24405
    },
    {
      "epoch": 0.7120018670867612,
      "grad_norm": 0.8905275066612361,
      "learning_rate": 2.0219600378104014e-06,
      "loss": 0.1478,
      "step": 24406
    },
    {
      "epoch": 0.7120310403174047,
      "grad_norm": 0.8933715644007169,
      "learning_rate": 2.021580556441776e-06,
      "loss": 0.0965,
      "step": 24407
    },
    {
      "epoch": 0.7120602135480483,
      "grad_norm": 1.0223465465187809,
      "learning_rate": 2.0212011016637667e-06,
      "loss": 0.1127,
      "step": 24408
    },
    {
      "epoch": 0.7120893867786918,
      "grad_norm": 0.7702065574061616,
      "learning_rate": 2.0208216734797632e-06,
      "loss": 0.1162,
      "step": 24409
    },
    {
      "epoch": 0.7121185600093355,
      "grad_norm": 0.9735199034492721,
      "learning_rate": 2.0204422718931538e-06,
      "loss": 0.1361,
      "step": 24410
    },
    {
      "epoch": 0.712147733239979,
      "grad_norm": 0.8188490262918654,
      "learning_rate": 2.0200628969073248e-06,
      "loss": 0.1074,
      "step": 24411
    },
    {
      "epoch": 0.7121769064706226,
      "grad_norm": 1.2201197741130494,
      "learning_rate": 2.019683548525661e-06,
      "loss": 0.1255,
      "step": 24412
    },
    {
      "epoch": 0.7122060797012661,
      "grad_norm": 1.057590408778701,
      "learning_rate": 2.0193042267515526e-06,
      "loss": 0.1107,
      "step": 24413
    },
    {
      "epoch": 0.7122352529319097,
      "grad_norm": 0.7202570838943055,
      "learning_rate": 2.018924931588383e-06,
      "loss": 0.1212,
      "step": 24414
    },
    {
      "epoch": 0.7122644261625533,
      "grad_norm": 0.7679363626552997,
      "learning_rate": 2.01854566303954e-06,
      "loss": 0.1191,
      "step": 24415
    },
    {
      "epoch": 0.7122935993931968,
      "grad_norm": 1.1375598628888894,
      "learning_rate": 2.0181664211084114e-06,
      "loss": 0.1163,
      "step": 24416
    },
    {
      "epoch": 0.7123227726238404,
      "grad_norm": 0.903098200513465,
      "learning_rate": 2.017787205798381e-06,
      "loss": 0.1203,
      "step": 24417
    },
    {
      "epoch": 0.7123519458544839,
      "grad_norm": 0.7710524434318854,
      "learning_rate": 2.017408017112833e-06,
      "loss": 0.1228,
      "step": 24418
    },
    {
      "epoch": 0.7123811190851275,
      "grad_norm": 0.8126255961949865,
      "learning_rate": 2.017028855055156e-06,
      "loss": 0.1241,
      "step": 24419
    },
    {
      "epoch": 0.712410292315771,
      "grad_norm": 0.8710504505193271,
      "learning_rate": 2.016649719628731e-06,
      "loss": 0.1165,
      "step": 24420
    },
    {
      "epoch": 0.7124394655464146,
      "grad_norm": 0.7745623945296772,
      "learning_rate": 2.0162706108369473e-06,
      "loss": 0.1196,
      "step": 24421
    },
    {
      "epoch": 0.7124686387770581,
      "grad_norm": 0.9979151301807482,
      "learning_rate": 2.0158915286831852e-06,
      "loss": 0.1055,
      "step": 24422
    },
    {
      "epoch": 0.7124978120077017,
      "grad_norm": 0.7711457160494226,
      "learning_rate": 2.0155124731708337e-06,
      "loss": 0.1141,
      "step": 24423
    },
    {
      "epoch": 0.7125269852383453,
      "grad_norm": 0.771775854309958,
      "learning_rate": 2.015133444303274e-06,
      "loss": 0.1099,
      "step": 24424
    },
    {
      "epoch": 0.7125561584689889,
      "grad_norm": 0.9915515612916951,
      "learning_rate": 2.0147544420838883e-06,
      "loss": 0.1063,
      "step": 24425
    },
    {
      "epoch": 0.7125853316996325,
      "grad_norm": 0.7671812323739688,
      "learning_rate": 2.014375466516062e-06,
      "loss": 0.1266,
      "step": 24426
    },
    {
      "epoch": 0.712614504930276,
      "grad_norm": 0.9770055202806369,
      "learning_rate": 2.013996517603181e-06,
      "loss": 0.1341,
      "step": 24427
    },
    {
      "epoch": 0.7126436781609196,
      "grad_norm": 0.8835667304460261,
      "learning_rate": 2.013617595348625e-06,
      "loss": 0.1451,
      "step": 24428
    },
    {
      "epoch": 0.7126728513915631,
      "grad_norm": 0.7354664119934039,
      "learning_rate": 2.0132386997557795e-06,
      "loss": 0.1238,
      "step": 24429
    },
    {
      "epoch": 0.7127020246222067,
      "grad_norm": 0.7179745594529715,
      "learning_rate": 2.0128598308280255e-06,
      "loss": 0.1088,
      "step": 24430
    },
    {
      "epoch": 0.7127311978528502,
      "grad_norm": 0.866216764674149,
      "learning_rate": 2.0124809885687448e-06,
      "loss": 0.1171,
      "step": 24431
    },
    {
      "epoch": 0.7127603710834938,
      "grad_norm": 0.8510728509096759,
      "learning_rate": 2.0121021729813207e-06,
      "loss": 0.133,
      "step": 24432
    },
    {
      "epoch": 0.7127895443141373,
      "grad_norm": 0.7179885008171474,
      "learning_rate": 2.0117233840691364e-06,
      "loss": 0.1385,
      "step": 24433
    },
    {
      "epoch": 0.7128187175447809,
      "grad_norm": 0.7350504815158918,
      "learning_rate": 2.0113446218355727e-06,
      "loss": 0.1302,
      "step": 24434
    },
    {
      "epoch": 0.7128478907754244,
      "grad_norm": 1.4719421673535458,
      "learning_rate": 2.0109658862840085e-06,
      "loss": 0.1232,
      "step": 24435
    },
    {
      "epoch": 0.712877064006068,
      "grad_norm": 0.8215421131912853,
      "learning_rate": 2.0105871774178293e-06,
      "loss": 0.1176,
      "step": 24436
    },
    {
      "epoch": 0.7129062372367116,
      "grad_norm": 0.8400742032577411,
      "learning_rate": 2.0102084952404145e-06,
      "loss": 0.1281,
      "step": 24437
    },
    {
      "epoch": 0.7129354104673552,
      "grad_norm": 0.9365317718177184,
      "learning_rate": 2.0098298397551423e-06,
      "loss": 0.1291,
      "step": 24438
    },
    {
      "epoch": 0.7129645836979988,
      "grad_norm": 0.964290388631854,
      "learning_rate": 2.009451210965396e-06,
      "loss": 0.1121,
      "step": 24439
    },
    {
      "epoch": 0.7129937569286423,
      "grad_norm": 1.1698157614872702,
      "learning_rate": 2.0090726088745566e-06,
      "loss": 0.1408,
      "step": 24440
    },
    {
      "epoch": 0.7130229301592859,
      "grad_norm": 0.8204052766706701,
      "learning_rate": 2.008694033486003e-06,
      "loss": 0.1213,
      "step": 24441
    },
    {
      "epoch": 0.7130521033899294,
      "grad_norm": 0.7812560378061456,
      "learning_rate": 2.008315484803114e-06,
      "loss": 0.1138,
      "step": 24442
    },
    {
      "epoch": 0.713081276620573,
      "grad_norm": 1.079505207040131,
      "learning_rate": 2.007936962829271e-06,
      "loss": 0.1454,
      "step": 24443
    },
    {
      "epoch": 0.7131104498512165,
      "grad_norm": 0.8291730172821995,
      "learning_rate": 2.0075584675678516e-06,
      "loss": 0.1129,
      "step": 24444
    },
    {
      "epoch": 0.7131396230818601,
      "grad_norm": 0.7556905008613477,
      "learning_rate": 2.007179999022235e-06,
      "loss": 0.1177,
      "step": 24445
    },
    {
      "epoch": 0.7131687963125036,
      "grad_norm": 1.0318833643097356,
      "learning_rate": 2.006801557195803e-06,
      "loss": 0.1074,
      "step": 24446
    },
    {
      "epoch": 0.7131979695431472,
      "grad_norm": 0.9405996013697249,
      "learning_rate": 2.006423142091933e-06,
      "loss": 0.1127,
      "step": 24447
    },
    {
      "epoch": 0.7132271427737907,
      "grad_norm": 0.9335895698959623,
      "learning_rate": 2.006044753714e-06,
      "loss": 0.1317,
      "step": 24448
    },
    {
      "epoch": 0.7132563160044343,
      "grad_norm": 1.1095474684342954,
      "learning_rate": 2.0056663920653865e-06,
      "loss": 0.1236,
      "step": 24449
    },
    {
      "epoch": 0.7132854892350778,
      "grad_norm": 0.7760666738870904,
      "learning_rate": 2.0052880571494665e-06,
      "loss": 0.1319,
      "step": 24450
    },
    {
      "epoch": 0.7133146624657215,
      "grad_norm": 0.7959525494247813,
      "learning_rate": 2.004909748969622e-06,
      "loss": 0.129,
      "step": 24451
    },
    {
      "epoch": 0.7133438356963651,
      "grad_norm": 1.0227122749939923,
      "learning_rate": 2.0045314675292265e-06,
      "loss": 0.119,
      "step": 24452
    },
    {
      "epoch": 0.7133730089270086,
      "grad_norm": 0.8146023359839762,
      "learning_rate": 2.004153212831661e-06,
      "loss": 0.1148,
      "step": 24453
    },
    {
      "epoch": 0.7134021821576522,
      "grad_norm": 1.1375692905926735,
      "learning_rate": 2.0037749848803002e-06,
      "loss": 0.1268,
      "step": 24454
    },
    {
      "epoch": 0.7134313553882957,
      "grad_norm": 0.8212560295520828,
      "learning_rate": 2.0033967836785196e-06,
      "loss": 0.1198,
      "step": 24455
    },
    {
      "epoch": 0.7134605286189393,
      "grad_norm": 0.9104696570081723,
      "learning_rate": 2.0030186092296965e-06,
      "loss": 0.1333,
      "step": 24456
    },
    {
      "epoch": 0.7134897018495828,
      "grad_norm": 0.9347454139472186,
      "learning_rate": 2.00264046153721e-06,
      "loss": 0.1143,
      "step": 24457
    },
    {
      "epoch": 0.7135188750802264,
      "grad_norm": 0.6949505851193986,
      "learning_rate": 2.002262340604432e-06,
      "loss": 0.1094,
      "step": 24458
    },
    {
      "epoch": 0.7135480483108699,
      "grad_norm": 0.7705963383300264,
      "learning_rate": 2.0018842464347427e-06,
      "loss": 0.1431,
      "step": 24459
    },
    {
      "epoch": 0.7135772215415135,
      "grad_norm": 0.9104706678332688,
      "learning_rate": 2.001506179031514e-06,
      "loss": 0.1031,
      "step": 24460
    },
    {
      "epoch": 0.713606394772157,
      "grad_norm": 0.7063606372601169,
      "learning_rate": 2.001128138398121e-06,
      "loss": 0.116,
      "step": 24461
    },
    {
      "epoch": 0.7136355680028006,
      "grad_norm": 0.7953551910751111,
      "learning_rate": 2.0007501245379408e-06,
      "loss": 0.1087,
      "step": 24462
    },
    {
      "epoch": 0.7136647412334441,
      "grad_norm": 1.077359935229099,
      "learning_rate": 2.000372137454349e-06,
      "loss": 0.138,
      "step": 24463
    },
    {
      "epoch": 0.7136939144640878,
      "grad_norm": 0.9097983110699532,
      "learning_rate": 1.999994177150718e-06,
      "loss": 0.1165,
      "step": 24464
    },
    {
      "epoch": 0.7137230876947314,
      "grad_norm": 0.7102176656054635,
      "learning_rate": 1.9996162436304217e-06,
      "loss": 0.1092,
      "step": 24465
    },
    {
      "epoch": 0.7137522609253749,
      "grad_norm": 0.8545017317221432,
      "learning_rate": 1.9992383368968364e-06,
      "loss": 0.1377,
      "step": 24466
    },
    {
      "epoch": 0.7137814341560185,
      "grad_norm": 0.9044676141278017,
      "learning_rate": 1.9988604569533353e-06,
      "loss": 0.1191,
      "step": 24467
    },
    {
      "epoch": 0.713810607386662,
      "grad_norm": 0.7449057819921429,
      "learning_rate": 1.99848260380329e-06,
      "loss": 0.121,
      "step": 24468
    },
    {
      "epoch": 0.7138397806173056,
      "grad_norm": 0.8174339871375383,
      "learning_rate": 1.9981047774500755e-06,
      "loss": 0.1368,
      "step": 24469
    },
    {
      "epoch": 0.7138689538479491,
      "grad_norm": 0.8317875323554248,
      "learning_rate": 1.9977269778970666e-06,
      "loss": 0.1111,
      "step": 24470
    },
    {
      "epoch": 0.7138981270785927,
      "grad_norm": 0.864772193549524,
      "learning_rate": 1.9973492051476345e-06,
      "loss": 0.0925,
      "step": 24471
    },
    {
      "epoch": 0.7139273003092362,
      "grad_norm": 0.719659647315192,
      "learning_rate": 1.9969714592051506e-06,
      "loss": 0.1008,
      "step": 24472
    },
    {
      "epoch": 0.7139564735398798,
      "grad_norm": 0.884086106888888,
      "learning_rate": 1.9965937400729895e-06,
      "loss": 0.1215,
      "step": 24473
    },
    {
      "epoch": 0.7139856467705233,
      "grad_norm": 1.1009363270742796,
      "learning_rate": 1.996216047754521e-06,
      "loss": 0.1207,
      "step": 24474
    },
    {
      "epoch": 0.7140148200011669,
      "grad_norm": 0.8531369406368245,
      "learning_rate": 1.995838382253119e-06,
      "loss": 0.0985,
      "step": 24475
    },
    {
      "epoch": 0.7140439932318104,
      "grad_norm": 0.7103027521649443,
      "learning_rate": 1.995460743572156e-06,
      "loss": 0.1192,
      "step": 24476
    },
    {
      "epoch": 0.714073166462454,
      "grad_norm": 1.0738885162982494,
      "learning_rate": 1.995083131715003e-06,
      "loss": 0.1364,
      "step": 24477
    },
    {
      "epoch": 0.7141023396930977,
      "grad_norm": 0.9695200049495182,
      "learning_rate": 1.9947055466850283e-06,
      "loss": 0.1333,
      "step": 24478
    },
    {
      "epoch": 0.7141315129237412,
      "grad_norm": 1.0123868722289286,
      "learning_rate": 1.9943279884856065e-06,
      "loss": 0.1164,
      "step": 24479
    },
    {
      "epoch": 0.7141606861543848,
      "grad_norm": 1.0769255112161285,
      "learning_rate": 1.9939504571201055e-06,
      "loss": 0.1268,
      "step": 24480
    },
    {
      "epoch": 0.7141898593850283,
      "grad_norm": 0.9194611986223226,
      "learning_rate": 1.993572952591899e-06,
      "loss": 0.1328,
      "step": 24481
    },
    {
      "epoch": 0.7142190326156719,
      "grad_norm": 0.8962777665479411,
      "learning_rate": 1.9931954749043535e-06,
      "loss": 0.1041,
      "step": 24482
    },
    {
      "epoch": 0.7142482058463154,
      "grad_norm": 0.7625310960114333,
      "learning_rate": 1.992818024060843e-06,
      "loss": 0.1178,
      "step": 24483
    },
    {
      "epoch": 0.714277379076959,
      "grad_norm": 1.265964283956046,
      "learning_rate": 1.9924406000647354e-06,
      "loss": 0.125,
      "step": 24484
    },
    {
      "epoch": 0.7143065523076025,
      "grad_norm": 1.0880981977655597,
      "learning_rate": 1.992063202919398e-06,
      "loss": 0.1163,
      "step": 24485
    },
    {
      "epoch": 0.7143357255382461,
      "grad_norm": 0.8285814688788601,
      "learning_rate": 1.991685832628202e-06,
      "loss": 0.1187,
      "step": 24486
    },
    {
      "epoch": 0.7143648987688896,
      "grad_norm": 0.9923470221096321,
      "learning_rate": 1.9913084891945195e-06,
      "loss": 0.1306,
      "step": 24487
    },
    {
      "epoch": 0.7143940719995332,
      "grad_norm": 0.997270190992129,
      "learning_rate": 1.9909311726217144e-06,
      "loss": 0.1234,
      "step": 24488
    },
    {
      "epoch": 0.7144232452301768,
      "grad_norm": 0.8188236531698319,
      "learning_rate": 1.9905538829131594e-06,
      "loss": 0.1136,
      "step": 24489
    },
    {
      "epoch": 0.7144524184608203,
      "grad_norm": 0.6162367141874275,
      "learning_rate": 1.9901766200722205e-06,
      "loss": 0.1076,
      "step": 24490
    },
    {
      "epoch": 0.7144815916914639,
      "grad_norm": 1.061775721054025,
      "learning_rate": 1.9897993841022643e-06,
      "loss": 0.1173,
      "step": 24491
    },
    {
      "epoch": 0.7145107649221075,
      "grad_norm": 0.8772880924671139,
      "learning_rate": 1.989422175006661e-06,
      "loss": 0.1603,
      "step": 24492
    },
    {
      "epoch": 0.7145399381527511,
      "grad_norm": 0.9203970716723464,
      "learning_rate": 1.9890449927887796e-06,
      "loss": 0.103,
      "step": 24493
    },
    {
      "epoch": 0.7145691113833946,
      "grad_norm": 0.798478354769862,
      "learning_rate": 1.988667837451986e-06,
      "loss": 0.1119,
      "step": 24494
    },
    {
      "epoch": 0.7145982846140382,
      "grad_norm": 0.9197447757365986,
      "learning_rate": 1.9882907089996453e-06,
      "loss": 0.1174,
      "step": 24495
    },
    {
      "epoch": 0.7146274578446817,
      "grad_norm": 0.6913238297298403,
      "learning_rate": 1.9879136074351276e-06,
      "loss": 0.1149,
      "step": 24496
    },
    {
      "epoch": 0.7146566310753253,
      "grad_norm": 0.762813617857346,
      "learning_rate": 1.987536532761798e-06,
      "loss": 0.126,
      "step": 24497
    },
    {
      "epoch": 0.7146858043059688,
      "grad_norm": 0.6810020343943598,
      "learning_rate": 1.9871594849830213e-06,
      "loss": 0.104,
      "step": 24498
    },
    {
      "epoch": 0.7147149775366124,
      "grad_norm": 0.6555628971500574,
      "learning_rate": 1.986782464102166e-06,
      "loss": 0.1173,
      "step": 24499
    },
    {
      "epoch": 0.714744150767256,
      "grad_norm": 1.1312181162398898,
      "learning_rate": 1.9864054701225986e-06,
      "loss": 0.1133,
      "step": 24500
    },
    {
      "epoch": 0.7147733239978995,
      "grad_norm": 0.8451512766943347,
      "learning_rate": 1.9860285030476844e-06,
      "loss": 0.1326,
      "step": 24501
    },
    {
      "epoch": 0.714802497228543,
      "grad_norm": 0.9255673831474458,
      "learning_rate": 1.9856515628807865e-06,
      "loss": 0.1262,
      "step": 24502
    },
    {
      "epoch": 0.7148316704591866,
      "grad_norm": 0.6826310324274596,
      "learning_rate": 1.9852746496252735e-06,
      "loss": 0.1007,
      "step": 24503
    },
    {
      "epoch": 0.7148608436898302,
      "grad_norm": 0.7154138159018919,
      "learning_rate": 1.984897763284507e-06,
      "loss": 0.1248,
      "step": 24504
    },
    {
      "epoch": 0.7148900169204738,
      "grad_norm": 0.856043848945111,
      "learning_rate": 1.984520903861853e-06,
      "loss": 0.1311,
      "step": 24505
    },
    {
      "epoch": 0.7149191901511174,
      "grad_norm": 0.9118751512671261,
      "learning_rate": 1.984144071360679e-06,
      "loss": 0.1547,
      "step": 24506
    },
    {
      "epoch": 0.7149483633817609,
      "grad_norm": 0.8120519707589329,
      "learning_rate": 1.9837672657843467e-06,
      "loss": 0.1119,
      "step": 24507
    },
    {
      "epoch": 0.7149775366124045,
      "grad_norm": 0.9660033861049498,
      "learning_rate": 1.983390487136218e-06,
      "loss": 0.1214,
      "step": 24508
    },
    {
      "epoch": 0.715006709843048,
      "grad_norm": 0.7301469264130491,
      "learning_rate": 1.983013735419661e-06,
      "loss": 0.1246,
      "step": 24509
    },
    {
      "epoch": 0.7150358830736916,
      "grad_norm": 0.9168246822075329,
      "learning_rate": 1.982637010638035e-06,
      "loss": 0.1148,
      "step": 24510
    },
    {
      "epoch": 0.7150650563043351,
      "grad_norm": 0.802518765472926,
      "learning_rate": 1.9822603127947076e-06,
      "loss": 0.1095,
      "step": 24511
    },
    {
      "epoch": 0.7150942295349787,
      "grad_norm": 1.0253697786433456,
      "learning_rate": 1.981883641893038e-06,
      "loss": 0.1368,
      "step": 24512
    },
    {
      "epoch": 0.7151234027656223,
      "grad_norm": 1.0496533173020024,
      "learning_rate": 1.9815069979363927e-06,
      "loss": 0.1242,
      "step": 24513
    },
    {
      "epoch": 0.7151525759962658,
      "grad_norm": 0.9436918956143182,
      "learning_rate": 1.9811303809281318e-06,
      "loss": 0.1204,
      "step": 24514
    },
    {
      "epoch": 0.7151817492269094,
      "grad_norm": 0.9034584311781312,
      "learning_rate": 1.980753790871617e-06,
      "loss": 0.1138,
      "step": 24515
    },
    {
      "epoch": 0.7152109224575529,
      "grad_norm": 0.9629977658340991,
      "learning_rate": 1.980377227770211e-06,
      "loss": 0.1262,
      "step": 24516
    },
    {
      "epoch": 0.7152400956881965,
      "grad_norm": 0.7637325551091692,
      "learning_rate": 1.9800006916272785e-06,
      "loss": 0.1073,
      "step": 24517
    },
    {
      "epoch": 0.71526926891884,
      "grad_norm": 0.9062947721927214,
      "learning_rate": 1.979624182446177e-06,
      "loss": 0.1225,
      "step": 24518
    },
    {
      "epoch": 0.7152984421494837,
      "grad_norm": 0.9542482483793993,
      "learning_rate": 1.9792477002302713e-06,
      "loss": 0.1096,
      "step": 24519
    },
    {
      "epoch": 0.7153276153801272,
      "grad_norm": 0.9360330193100261,
      "learning_rate": 1.9788712449829213e-06,
      "loss": 0.1276,
      "step": 24520
    },
    {
      "epoch": 0.7153567886107708,
      "grad_norm": 1.2684726042405354,
      "learning_rate": 1.9784948167074856e-06,
      "loss": 0.1171,
      "step": 24521
    },
    {
      "epoch": 0.7153859618414143,
      "grad_norm": 1.0361858156709687,
      "learning_rate": 1.9781184154073273e-06,
      "loss": 0.1395,
      "step": 24522
    },
    {
      "epoch": 0.7154151350720579,
      "grad_norm": 0.8714331582704156,
      "learning_rate": 1.977742041085808e-06,
      "loss": 0.1205,
      "step": 24523
    },
    {
      "epoch": 0.7154443083027014,
      "grad_norm": 1.257545422964162,
      "learning_rate": 1.9773656937462867e-06,
      "loss": 0.1198,
      "step": 24524
    },
    {
      "epoch": 0.715473481533345,
      "grad_norm": 0.8290693970217324,
      "learning_rate": 1.97698937339212e-06,
      "loss": 0.0947,
      "step": 24525
    },
    {
      "epoch": 0.7155026547639886,
      "grad_norm": 0.7808933215197137,
      "learning_rate": 1.976613080026673e-06,
      "loss": 0.1198,
      "step": 24526
    },
    {
      "epoch": 0.7155318279946321,
      "grad_norm": 0.9400499470621698,
      "learning_rate": 1.976236813653303e-06,
      "loss": 0.1228,
      "step": 24527
    },
    {
      "epoch": 0.7155610012252757,
      "grad_norm": 0.8092859321774526,
      "learning_rate": 1.9758605742753665e-06,
      "loss": 0.0983,
      "step": 24528
    },
    {
      "epoch": 0.7155901744559192,
      "grad_norm": 0.9741125357720664,
      "learning_rate": 1.9754843618962255e-06,
      "loss": 0.1192,
      "step": 24529
    },
    {
      "epoch": 0.7156193476865628,
      "grad_norm": 0.9429728273169273,
      "learning_rate": 1.975108176519239e-06,
      "loss": 0.1304,
      "step": 24530
    },
    {
      "epoch": 0.7156485209172063,
      "grad_norm": 0.8955526795595488,
      "learning_rate": 1.974732018147766e-06,
      "loss": 0.1215,
      "step": 24531
    },
    {
      "epoch": 0.71567769414785,
      "grad_norm": 0.8555186048254568,
      "learning_rate": 1.9743558867851605e-06,
      "loss": 0.0976,
      "step": 24532
    },
    {
      "epoch": 0.7157068673784935,
      "grad_norm": 0.9543932338643378,
      "learning_rate": 1.973979782434785e-06,
      "loss": 0.0959,
      "step": 24533
    },
    {
      "epoch": 0.7157360406091371,
      "grad_norm": 0.9182165873944451,
      "learning_rate": 1.9736037050999946e-06,
      "loss": 0.1214,
      "step": 24534
    },
    {
      "epoch": 0.7157652138397806,
      "grad_norm": 0.8839100903731244,
      "learning_rate": 1.9732276547841473e-06,
      "loss": 0.1114,
      "step": 24535
    },
    {
      "epoch": 0.7157943870704242,
      "grad_norm": 0.783510492837181,
      "learning_rate": 1.9728516314906034e-06,
      "loss": 0.1316,
      "step": 24536
    },
    {
      "epoch": 0.7158235603010678,
      "grad_norm": 0.9539275579482954,
      "learning_rate": 1.9724756352227163e-06,
      "loss": 0.1344,
      "step": 24537
    },
    {
      "epoch": 0.7158527335317113,
      "grad_norm": 0.9102475346754038,
      "learning_rate": 1.9720996659838433e-06,
      "loss": 0.1017,
      "step": 24538
    },
    {
      "epoch": 0.7158819067623549,
      "grad_norm": 1.0290131903244208,
      "learning_rate": 1.9717237237773428e-06,
      "loss": 0.1271,
      "step": 24539
    },
    {
      "epoch": 0.7159110799929984,
      "grad_norm": 0.8750834969039144,
      "learning_rate": 1.9713478086065686e-06,
      "loss": 0.1263,
      "step": 24540
    },
    {
      "epoch": 0.715940253223642,
      "grad_norm": 0.7595404203221637,
      "learning_rate": 1.97097192047488e-06,
      "loss": 0.1269,
      "step": 24541
    },
    {
      "epoch": 0.7159694264542855,
      "grad_norm": 1.070364503939263,
      "learning_rate": 1.9705960593856287e-06,
      "loss": 0.1346,
      "step": 24542
    },
    {
      "epoch": 0.7159985996849291,
      "grad_norm": 0.7964632149951953,
      "learning_rate": 1.970220225342175e-06,
      "loss": 0.1214,
      "step": 24543
    },
    {
      "epoch": 0.7160277729155726,
      "grad_norm": 0.7370001324735711,
      "learning_rate": 1.9698444183478715e-06,
      "loss": 0.1262,
      "step": 24544
    },
    {
      "epoch": 0.7160569461462162,
      "grad_norm": 0.9609390321102597,
      "learning_rate": 1.9694686384060726e-06,
      "loss": 0.1458,
      "step": 24545
    },
    {
      "epoch": 0.7160861193768598,
      "grad_norm": 0.6317432156243418,
      "learning_rate": 1.969092885520133e-06,
      "loss": 0.1063,
      "step": 24546
    },
    {
      "epoch": 0.7161152926075034,
      "grad_norm": 0.7703462693873755,
      "learning_rate": 1.9687171596934112e-06,
      "loss": 0.1112,
      "step": 24547
    },
    {
      "epoch": 0.716144465838147,
      "grad_norm": 0.7059559100543018,
      "learning_rate": 1.9683414609292573e-06,
      "loss": 0.1171,
      "step": 24548
    },
    {
      "epoch": 0.7161736390687905,
      "grad_norm": 0.718228407163729,
      "learning_rate": 1.967965789231028e-06,
      "loss": 0.1234,
      "step": 24549
    },
    {
      "epoch": 0.716202812299434,
      "grad_norm": 0.7392834156275169,
      "learning_rate": 1.967590144602077e-06,
      "loss": 0.0972,
      "step": 24550
    },
    {
      "epoch": 0.7162319855300776,
      "grad_norm": 0.966034325521619,
      "learning_rate": 1.9672145270457553e-06,
      "loss": 0.1226,
      "step": 24551
    },
    {
      "epoch": 0.7162611587607212,
      "grad_norm": 1.193308368363112,
      "learning_rate": 1.966838936565419e-06,
      "loss": 0.1279,
      "step": 24552
    },
    {
      "epoch": 0.7162903319913647,
      "grad_norm": 0.6950364685412184,
      "learning_rate": 1.9664633731644215e-06,
      "loss": 0.0993,
      "step": 24553
    },
    {
      "epoch": 0.7163195052220083,
      "grad_norm": 0.7244842209202247,
      "learning_rate": 1.9660878368461156e-06,
      "loss": 0.0979,
      "step": 24554
    },
    {
      "epoch": 0.7163486784526518,
      "grad_norm": 0.8063818433393074,
      "learning_rate": 1.9657123276138507e-06,
      "loss": 0.1345,
      "step": 24555
    },
    {
      "epoch": 0.7163778516832954,
      "grad_norm": 0.8561936995955228,
      "learning_rate": 1.9653368454709844e-06,
      "loss": 0.1138,
      "step": 24556
    },
    {
      "epoch": 0.7164070249139389,
      "grad_norm": 0.8185377889627363,
      "learning_rate": 1.9649613904208637e-06,
      "loss": 0.0989,
      "step": 24557
    },
    {
      "epoch": 0.7164361981445825,
      "grad_norm": 0.7543619490448702,
      "learning_rate": 1.9645859624668455e-06,
      "loss": 0.1232,
      "step": 24558
    },
    {
      "epoch": 0.7164653713752261,
      "grad_norm": 0.8433671728047157,
      "learning_rate": 1.9642105616122768e-06,
      "loss": 0.1097,
      "step": 24559
    },
    {
      "epoch": 0.7164945446058697,
      "grad_norm": 0.8634624804529792,
      "learning_rate": 1.963835187860514e-06,
      "loss": 0.1122,
      "step": 24560
    },
    {
      "epoch": 0.7165237178365133,
      "grad_norm": 0.7328743190271523,
      "learning_rate": 1.9634598412149056e-06,
      "loss": 0.1302,
      "step": 24561
    },
    {
      "epoch": 0.7165528910671568,
      "grad_norm": 0.936673912964834,
      "learning_rate": 1.9630845216788016e-06,
      "loss": 0.1269,
      "step": 24562
    },
    {
      "epoch": 0.7165820642978004,
      "grad_norm": 0.8894157851527578,
      "learning_rate": 1.9627092292555534e-06,
      "loss": 0.0939,
      "step": 24563
    },
    {
      "epoch": 0.7166112375284439,
      "grad_norm": 1.090818452881439,
      "learning_rate": 1.9623339639485133e-06,
      "loss": 0.1186,
      "step": 24564
    },
    {
      "epoch": 0.7166404107590875,
      "grad_norm": 0.9359231326597849,
      "learning_rate": 1.9619587257610296e-06,
      "loss": 0.1198,
      "step": 24565
    },
    {
      "epoch": 0.716669583989731,
      "grad_norm": 0.7439177195500215,
      "learning_rate": 1.9615835146964547e-06,
      "loss": 0.1407,
      "step": 24566
    },
    {
      "epoch": 0.7166987572203746,
      "grad_norm": 0.9931004330733285,
      "learning_rate": 1.961208330758137e-06,
      "loss": 0.1209,
      "step": 24567
    },
    {
      "epoch": 0.7167279304510181,
      "grad_norm": 1.208322531175956,
      "learning_rate": 1.960833173949424e-06,
      "loss": 0.1027,
      "step": 24568
    },
    {
      "epoch": 0.7167571036816617,
      "grad_norm": 0.8527797878275589,
      "learning_rate": 1.960458044273667e-06,
      "loss": 0.1251,
      "step": 24569
    },
    {
      "epoch": 0.7167862769123052,
      "grad_norm": 0.8364728856212879,
      "learning_rate": 1.9600829417342166e-06,
      "loss": 0.1251,
      "step": 24570
    },
    {
      "epoch": 0.7168154501429488,
      "grad_norm": 0.8511367927807031,
      "learning_rate": 1.95970786633442e-06,
      "loss": 0.1095,
      "step": 24571
    },
    {
      "epoch": 0.7168446233735923,
      "grad_norm": 0.7868164798833528,
      "learning_rate": 1.959332818077624e-06,
      "loss": 0.1273,
      "step": 24572
    },
    {
      "epoch": 0.716873796604236,
      "grad_norm": 0.8839613641074617,
      "learning_rate": 1.9589577969671808e-06,
      "loss": 0.1394,
      "step": 24573
    },
    {
      "epoch": 0.7169029698348796,
      "grad_norm": 1.0469856369584865,
      "learning_rate": 1.958582803006436e-06,
      "loss": 0.1357,
      "step": 24574
    },
    {
      "epoch": 0.7169321430655231,
      "grad_norm": 0.7026496730411657,
      "learning_rate": 1.9582078361987345e-06,
      "loss": 0.1099,
      "step": 24575
    },
    {
      "epoch": 0.7169613162961667,
      "grad_norm": 0.7384995100826249,
      "learning_rate": 1.9578328965474306e-06,
      "loss": 0.1227,
      "step": 24576
    },
    {
      "epoch": 0.7169904895268102,
      "grad_norm": 0.7931694308950297,
      "learning_rate": 1.957457984055869e-06,
      "loss": 0.1175,
      "step": 24577
    },
    {
      "epoch": 0.7170196627574538,
      "grad_norm": 0.6955838590790396,
      "learning_rate": 1.9570830987273944e-06,
      "loss": 0.1035,
      "step": 24578
    },
    {
      "epoch": 0.7170488359880973,
      "grad_norm": 0.9837980017332323,
      "learning_rate": 1.9567082405653565e-06,
      "loss": 0.115,
      "step": 24579
    },
    {
      "epoch": 0.7170780092187409,
      "grad_norm": 0.7276555376558514,
      "learning_rate": 1.956333409573102e-06,
      "loss": 0.1079,
      "step": 24580
    },
    {
      "epoch": 0.7171071824493844,
      "grad_norm": 0.7667172904919755,
      "learning_rate": 1.9559586057539737e-06,
      "loss": 0.1113,
      "step": 24581
    },
    {
      "epoch": 0.717136355680028,
      "grad_norm": 0.7961658916482846,
      "learning_rate": 1.9555838291113205e-06,
      "loss": 0.1291,
      "step": 24582
    },
    {
      "epoch": 0.7171655289106715,
      "grad_norm": 0.785833715893844,
      "learning_rate": 1.9552090796484896e-06,
      "loss": 0.1193,
      "step": 24583
    },
    {
      "epoch": 0.7171947021413151,
      "grad_norm": 0.8239789626906368,
      "learning_rate": 1.9548343573688256e-06,
      "loss": 0.1407,
      "step": 24584
    },
    {
      "epoch": 0.7172238753719586,
      "grad_norm": 0.9487799334267942,
      "learning_rate": 1.9544596622756716e-06,
      "loss": 0.1259,
      "step": 24585
    },
    {
      "epoch": 0.7172530486026023,
      "grad_norm": 0.7663403839748532,
      "learning_rate": 1.954084994372376e-06,
      "loss": 0.1075,
      "step": 24586
    },
    {
      "epoch": 0.7172822218332459,
      "grad_norm": 0.7928594910962723,
      "learning_rate": 1.9537103536622813e-06,
      "loss": 0.1141,
      "step": 24587
    },
    {
      "epoch": 0.7173113950638894,
      "grad_norm": 0.9657934542018983,
      "learning_rate": 1.9533357401487352e-06,
      "loss": 0.1147,
      "step": 24588
    },
    {
      "epoch": 0.717340568294533,
      "grad_norm": 0.9545381189212526,
      "learning_rate": 1.9529611538350785e-06,
      "loss": 0.1211,
      "step": 24589
    },
    {
      "epoch": 0.7173697415251765,
      "grad_norm": 0.8322622904427825,
      "learning_rate": 1.9525865947246587e-06,
      "loss": 0.1509,
      "step": 24590
    },
    {
      "epoch": 0.7173989147558201,
      "grad_norm": 0.9111697633194339,
      "learning_rate": 1.9522120628208186e-06,
      "loss": 0.1383,
      "step": 24591
    },
    {
      "epoch": 0.7174280879864636,
      "grad_norm": 0.8057984566038064,
      "learning_rate": 1.9518375581268993e-06,
      "loss": 0.1187,
      "step": 24592
    },
    {
      "epoch": 0.7174572612171072,
      "grad_norm": 0.8861541754385086,
      "learning_rate": 1.951463080646247e-06,
      "loss": 0.115,
      "step": 24593
    },
    {
      "epoch": 0.7174864344477507,
      "grad_norm": 0.8192781769631182,
      "learning_rate": 1.951088630382206e-06,
      "loss": 0.1174,
      "step": 24594
    },
    {
      "epoch": 0.7175156076783943,
      "grad_norm": 0.9404181893206632,
      "learning_rate": 1.9507142073381167e-06,
      "loss": 0.124,
      "step": 24595
    },
    {
      "epoch": 0.7175447809090378,
      "grad_norm": 2.2864494159241726,
      "learning_rate": 1.950339811517325e-06,
      "loss": 0.1146,
      "step": 24596
    },
    {
      "epoch": 0.7175739541396814,
      "grad_norm": 0.8838459596196421,
      "learning_rate": 1.949965442923171e-06,
      "loss": 0.1373,
      "step": 24597
    },
    {
      "epoch": 0.717603127370325,
      "grad_norm": 0.7264295874041378,
      "learning_rate": 1.9495911015589957e-06,
      "loss": 0.1183,
      "step": 24598
    },
    {
      "epoch": 0.7176323006009685,
      "grad_norm": 0.7934864666564094,
      "learning_rate": 1.9492167874281425e-06,
      "loss": 0.1234,
      "step": 24599
    },
    {
      "epoch": 0.7176614738316122,
      "grad_norm": 0.8892097528522984,
      "learning_rate": 1.9488425005339555e-06,
      "loss": 0.1149,
      "step": 24600
    },
    {
      "epoch": 0.7176906470622557,
      "grad_norm": 1.0318295023072517,
      "learning_rate": 1.948468240879775e-06,
      "loss": 0.141,
      "step": 24601
    },
    {
      "epoch": 0.7177198202928993,
      "grad_norm": 0.8342628317211979,
      "learning_rate": 1.9480940084689394e-06,
      "loss": 0.1156,
      "step": 24602
    },
    {
      "epoch": 0.7177489935235428,
      "grad_norm": 0.9779654918887197,
      "learning_rate": 1.9477198033047933e-06,
      "loss": 0.104,
      "step": 24603
    },
    {
      "epoch": 0.7177781667541864,
      "grad_norm": 0.6842079680824421,
      "learning_rate": 1.9473456253906764e-06,
      "loss": 0.0947,
      "step": 24604
    },
    {
      "epoch": 0.7178073399848299,
      "grad_norm": 0.9837934333206318,
      "learning_rate": 1.946971474729926e-06,
      "loss": 0.1245,
      "step": 24605
    },
    {
      "epoch": 0.7178365132154735,
      "grad_norm": 0.7094418965261642,
      "learning_rate": 1.946597351325888e-06,
      "loss": 0.1154,
      "step": 24606
    },
    {
      "epoch": 0.717865686446117,
      "grad_norm": 0.8102633184500629,
      "learning_rate": 1.9462232551819006e-06,
      "loss": 0.0963,
      "step": 24607
    },
    {
      "epoch": 0.7178948596767606,
      "grad_norm": 1.2047205682955549,
      "learning_rate": 1.9458491863013006e-06,
      "loss": 0.1071,
      "step": 24608
    },
    {
      "epoch": 0.7179240329074041,
      "grad_norm": 0.6767291762863835,
      "learning_rate": 1.9454751446874328e-06,
      "loss": 0.1197,
      "step": 24609
    },
    {
      "epoch": 0.7179532061380477,
      "grad_norm": 0.790391055486654,
      "learning_rate": 1.945101130343633e-06,
      "loss": 0.1211,
      "step": 24610
    },
    {
      "epoch": 0.7179823793686912,
      "grad_norm": 0.905758586230062,
      "learning_rate": 1.944727143273239e-06,
      "loss": 0.1238,
      "step": 24611
    },
    {
      "epoch": 0.7180115525993348,
      "grad_norm": 0.6817967211946586,
      "learning_rate": 1.9443531834795927e-06,
      "loss": 0.1193,
      "step": 24612
    },
    {
      "epoch": 0.7180407258299785,
      "grad_norm": 1.153489879461069,
      "learning_rate": 1.943979250966033e-06,
      "loss": 0.1255,
      "step": 24613
    },
    {
      "epoch": 0.718069899060622,
      "grad_norm": 0.914770188982417,
      "learning_rate": 1.943605345735897e-06,
      "loss": 0.1097,
      "step": 24614
    },
    {
      "epoch": 0.7180990722912656,
      "grad_norm": 0.6866580799115839,
      "learning_rate": 1.9432314677925207e-06,
      "loss": 0.1216,
      "step": 24615
    },
    {
      "epoch": 0.7181282455219091,
      "grad_norm": 0.722579575940004,
      "learning_rate": 1.9428576171392462e-06,
      "loss": 0.1432,
      "step": 24616
    },
    {
      "epoch": 0.7181574187525527,
      "grad_norm": 0.937653794165693,
      "learning_rate": 1.942483793779407e-06,
      "loss": 0.118,
      "step": 24617
    },
    {
      "epoch": 0.7181865919831962,
      "grad_norm": 0.8842048175150861,
      "learning_rate": 1.942109997716345e-06,
      "loss": 0.1082,
      "step": 24618
    },
    {
      "epoch": 0.7182157652138398,
      "grad_norm": 0.8019699131399003,
      "learning_rate": 1.9417362289533933e-06,
      "loss": 0.1352,
      "step": 24619
    },
    {
      "epoch": 0.7182449384444833,
      "grad_norm": 0.9522965798772958,
      "learning_rate": 1.9413624874938915e-06,
      "loss": 0.1234,
      "step": 24620
    },
    {
      "epoch": 0.7182741116751269,
      "grad_norm": 0.9299597553798609,
      "learning_rate": 1.940988773341176e-06,
      "loss": 0.1119,
      "step": 24621
    },
    {
      "epoch": 0.7183032849057704,
      "grad_norm": 0.8737066950837651,
      "learning_rate": 1.94061508649858e-06,
      "loss": 0.1155,
      "step": 24622
    },
    {
      "epoch": 0.718332458136414,
      "grad_norm": 0.7935505559490547,
      "learning_rate": 1.9402414269694425e-06,
      "loss": 0.1489,
      "step": 24623
    },
    {
      "epoch": 0.7183616313670576,
      "grad_norm": 0.8908221747073154,
      "learning_rate": 1.939867794757101e-06,
      "loss": 0.1266,
      "step": 24624
    },
    {
      "epoch": 0.7183908045977011,
      "grad_norm": 0.7495130405297756,
      "learning_rate": 1.9394941898648874e-06,
      "loss": 0.11,
      "step": 24625
    },
    {
      "epoch": 0.7184199778283447,
      "grad_norm": 0.8056942599242546,
      "learning_rate": 1.939120612296141e-06,
      "loss": 0.1098,
      "step": 24626
    },
    {
      "epoch": 0.7184491510589883,
      "grad_norm": 0.856716002032613,
      "learning_rate": 1.938747062054195e-06,
      "loss": 0.1222,
      "step": 24627
    },
    {
      "epoch": 0.7184783242896319,
      "grad_norm": 0.8408169293756518,
      "learning_rate": 1.9383735391423826e-06,
      "loss": 0.104,
      "step": 24628
    },
    {
      "epoch": 0.7185074975202754,
      "grad_norm": 1.0431592999174712,
      "learning_rate": 1.9380000435640407e-06,
      "loss": 0.109,
      "step": 24629
    },
    {
      "epoch": 0.718536670750919,
      "grad_norm": 1.024265358311274,
      "learning_rate": 1.9376265753225047e-06,
      "loss": 0.1316,
      "step": 24630
    },
    {
      "epoch": 0.7185658439815625,
      "grad_norm": 0.9089588791649065,
      "learning_rate": 1.9372531344211076e-06,
      "loss": 0.1205,
      "step": 24631
    },
    {
      "epoch": 0.7185950172122061,
      "grad_norm": 0.9693610113332035,
      "learning_rate": 1.9368797208631822e-06,
      "loss": 0.1135,
      "step": 24632
    },
    {
      "epoch": 0.7186241904428496,
      "grad_norm": 0.8042025428336325,
      "learning_rate": 1.9365063346520645e-06,
      "loss": 0.1305,
      "step": 24633
    },
    {
      "epoch": 0.7186533636734932,
      "grad_norm": 0.8853523697634652,
      "learning_rate": 1.9361329757910875e-06,
      "loss": 0.1296,
      "step": 24634
    },
    {
      "epoch": 0.7186825369041367,
      "grad_norm": 0.9365344251950367,
      "learning_rate": 1.93575964428358e-06,
      "loss": 0.1328,
      "step": 24635
    },
    {
      "epoch": 0.7187117101347803,
      "grad_norm": 0.7650189822371252,
      "learning_rate": 1.9353863401328827e-06,
      "loss": 0.1083,
      "step": 24636
    },
    {
      "epoch": 0.7187408833654239,
      "grad_norm": 0.9849746651651492,
      "learning_rate": 1.9350130633423247e-06,
      "loss": 0.1307,
      "step": 24637
    },
    {
      "epoch": 0.7187700565960674,
      "grad_norm": 0.7478866575294362,
      "learning_rate": 1.934639813915236e-06,
      "loss": 0.1339,
      "step": 24638
    },
    {
      "epoch": 0.718799229826711,
      "grad_norm": 0.8851299734756334,
      "learning_rate": 1.9342665918549534e-06,
      "loss": 0.1089,
      "step": 24639
    },
    {
      "epoch": 0.7188284030573546,
      "grad_norm": 0.9144544786870371,
      "learning_rate": 1.933893397164807e-06,
      "loss": 0.1196,
      "step": 24640
    },
    {
      "epoch": 0.7188575762879982,
      "grad_norm": 0.9876098518976142,
      "learning_rate": 1.9335202298481267e-06,
      "loss": 0.1251,
      "step": 24641
    },
    {
      "epoch": 0.7188867495186417,
      "grad_norm": 0.7706470829293179,
      "learning_rate": 1.9331470899082457e-06,
      "loss": 0.1144,
      "step": 24642
    },
    {
      "epoch": 0.7189159227492853,
      "grad_norm": 0.7820956399378031,
      "learning_rate": 1.9327739773484968e-06,
      "loss": 0.1186,
      "step": 24643
    },
    {
      "epoch": 0.7189450959799288,
      "grad_norm": 0.8181877304881421,
      "learning_rate": 1.93240089217221e-06,
      "loss": 0.121,
      "step": 24644
    },
    {
      "epoch": 0.7189742692105724,
      "grad_norm": 1.3578256533319701,
      "learning_rate": 1.932027834382714e-06,
      "loss": 0.1276,
      "step": 24645
    },
    {
      "epoch": 0.719003442441216,
      "grad_norm": 1.3414013371998352,
      "learning_rate": 1.9316548039833423e-06,
      "loss": 0.1142,
      "step": 24646
    },
    {
      "epoch": 0.7190326156718595,
      "grad_norm": 0.7576906087842425,
      "learning_rate": 1.9312818009774227e-06,
      "loss": 0.1113,
      "step": 24647
    },
    {
      "epoch": 0.719061788902503,
      "grad_norm": 0.8616321200133674,
      "learning_rate": 1.9309088253682884e-06,
      "loss": 0.1269,
      "step": 24648
    },
    {
      "epoch": 0.7190909621331466,
      "grad_norm": 0.9510041875764456,
      "learning_rate": 1.930535877159265e-06,
      "loss": 0.1167,
      "step": 24649
    },
    {
      "epoch": 0.7191201353637902,
      "grad_norm": 1.021867022805468,
      "learning_rate": 1.930162956353687e-06,
      "loss": 0.1108,
      "step": 24650
    },
    {
      "epoch": 0.7191493085944337,
      "grad_norm": 0.9265718907821453,
      "learning_rate": 1.9297900629548817e-06,
      "loss": 0.1138,
      "step": 24651
    },
    {
      "epoch": 0.7191784818250773,
      "grad_norm": 0.8233226303401939,
      "learning_rate": 1.9294171969661756e-06,
      "loss": 0.1268,
      "step": 24652
    },
    {
      "epoch": 0.7192076550557208,
      "grad_norm": 0.8893054989634243,
      "learning_rate": 1.9290443583908996e-06,
      "loss": 0.1181,
      "step": 24653
    },
    {
      "epoch": 0.7192368282863645,
      "grad_norm": 0.8629398360858149,
      "learning_rate": 1.928671547232384e-06,
      "loss": 0.1029,
      "step": 24654
    },
    {
      "epoch": 0.719266001517008,
      "grad_norm": 0.880249060721745,
      "learning_rate": 1.928298763493954e-06,
      "loss": 0.1289,
      "step": 24655
    },
    {
      "epoch": 0.7192951747476516,
      "grad_norm": 1.0290716867981546,
      "learning_rate": 1.927926007178942e-06,
      "loss": 0.1135,
      "step": 24656
    },
    {
      "epoch": 0.7193243479782951,
      "grad_norm": 0.9821125362305515,
      "learning_rate": 1.9275532782906726e-06,
      "loss": 0.0944,
      "step": 24657
    },
    {
      "epoch": 0.7193535212089387,
      "grad_norm": 0.9376056462310045,
      "learning_rate": 1.927180576832472e-06,
      "loss": 0.1052,
      "step": 24658
    },
    {
      "epoch": 0.7193826944395822,
      "grad_norm": 0.9270168486362149,
      "learning_rate": 1.9268079028076705e-06,
      "loss": 0.1325,
      "step": 24659
    },
    {
      "epoch": 0.7194118676702258,
      "grad_norm": 0.758389496752336,
      "learning_rate": 1.9264352562195953e-06,
      "loss": 0.1186,
      "step": 24660
    },
    {
      "epoch": 0.7194410409008694,
      "grad_norm": 0.836732520175048,
      "learning_rate": 1.926062637071573e-06,
      "loss": 0.1135,
      "step": 24661
    },
    {
      "epoch": 0.7194702141315129,
      "grad_norm": 0.8483575778597005,
      "learning_rate": 1.9256900453669273e-06,
      "loss": 0.1073,
      "step": 24662
    },
    {
      "epoch": 0.7194993873621565,
      "grad_norm": 0.8408754717403444,
      "learning_rate": 1.9253174811089892e-06,
      "loss": 0.1134,
      "step": 24663
    },
    {
      "epoch": 0.7195285605928,
      "grad_norm": 1.1023176611870964,
      "learning_rate": 1.9249449443010825e-06,
      "loss": 0.1243,
      "step": 24664
    },
    {
      "epoch": 0.7195577338234436,
      "grad_norm": 0.917530972983815,
      "learning_rate": 1.92457243494653e-06,
      "loss": 0.1195,
      "step": 24665
    },
    {
      "epoch": 0.7195869070540871,
      "grad_norm": 0.8137738459398199,
      "learning_rate": 1.9241999530486636e-06,
      "loss": 0.1069,
      "step": 24666
    },
    {
      "epoch": 0.7196160802847308,
      "grad_norm": 1.033795804264537,
      "learning_rate": 1.923827498610806e-06,
      "loss": 0.1276,
      "step": 24667
    },
    {
      "epoch": 0.7196452535153743,
      "grad_norm": 0.8122321928286242,
      "learning_rate": 1.923455071636281e-06,
      "loss": 0.1468,
      "step": 24668
    },
    {
      "epoch": 0.7196744267460179,
      "grad_norm": 1.1422027589284771,
      "learning_rate": 1.923082672128416e-06,
      "loss": 0.1115,
      "step": 24669
    },
    {
      "epoch": 0.7197035999766614,
      "grad_norm": 1.1093760544730045,
      "learning_rate": 1.9227103000905346e-06,
      "loss": 0.1183,
      "step": 24670
    },
    {
      "epoch": 0.719732773207305,
      "grad_norm": 0.862616174265635,
      "learning_rate": 1.9223379555259587e-06,
      "loss": 0.1089,
      "step": 24671
    },
    {
      "epoch": 0.7197619464379486,
      "grad_norm": 0.8495067077110474,
      "learning_rate": 1.921965638438015e-06,
      "loss": 0.1222,
      "step": 24672
    },
    {
      "epoch": 0.7197911196685921,
      "grad_norm": 1.1431146322021712,
      "learning_rate": 1.921593348830029e-06,
      "loss": 0.1173,
      "step": 24673
    },
    {
      "epoch": 0.7198202928992357,
      "grad_norm": 0.8670236887396516,
      "learning_rate": 1.9212210867053235e-06,
      "loss": 0.1199,
      "step": 24674
    },
    {
      "epoch": 0.7198494661298792,
      "grad_norm": 0.8445353815309623,
      "learning_rate": 1.9208488520672185e-06,
      "loss": 0.0979,
      "step": 24675
    },
    {
      "epoch": 0.7198786393605228,
      "grad_norm": 0.9907716916234535,
      "learning_rate": 1.9204766449190422e-06,
      "loss": 0.1297,
      "step": 24676
    },
    {
      "epoch": 0.7199078125911663,
      "grad_norm": 1.2213455095106163,
      "learning_rate": 1.9201044652641134e-06,
      "loss": 0.1272,
      "step": 24677
    },
    {
      "epoch": 0.7199369858218099,
      "grad_norm": 0.8837624913930062,
      "learning_rate": 1.9197323131057582e-06,
      "loss": 0.1103,
      "step": 24678
    },
    {
      "epoch": 0.7199661590524534,
      "grad_norm": 0.8819692157390737,
      "learning_rate": 1.9193601884472963e-06,
      "loss": 0.1219,
      "step": 24679
    },
    {
      "epoch": 0.719995332283097,
      "grad_norm": 0.9645443741095989,
      "learning_rate": 1.918988091292052e-06,
      "loss": 0.0995,
      "step": 24680
    },
    {
      "epoch": 0.7200245055137406,
      "grad_norm": 1.0528319556078618,
      "learning_rate": 1.9186160216433475e-06,
      "loss": 0.0999,
      "step": 24681
    },
    {
      "epoch": 0.7200536787443842,
      "grad_norm": 1.3322366515643702,
      "learning_rate": 1.9182439795045014e-06,
      "loss": 0.1169,
      "step": 24682
    },
    {
      "epoch": 0.7200828519750277,
      "grad_norm": 0.7331645887220264,
      "learning_rate": 1.917871964878838e-06,
      "loss": 0.1282,
      "step": 24683
    },
    {
      "epoch": 0.7201120252056713,
      "grad_norm": 0.8474656686504487,
      "learning_rate": 1.917499977769679e-06,
      "loss": 0.1031,
      "step": 24684
    },
    {
      "epoch": 0.7201411984363149,
      "grad_norm": 1.2961017605312743,
      "learning_rate": 1.9171280181803427e-06,
      "loss": 0.1291,
      "step": 24685
    },
    {
      "epoch": 0.7201703716669584,
      "grad_norm": 0.9402362561328396,
      "learning_rate": 1.916756086114153e-06,
      "loss": 0.111,
      "step": 24686
    },
    {
      "epoch": 0.720199544897602,
      "grad_norm": 1.0759894057376236,
      "learning_rate": 1.9163841815744295e-06,
      "loss": 0.1293,
      "step": 24687
    },
    {
      "epoch": 0.7202287181282455,
      "grad_norm": 0.9344755495602592,
      "learning_rate": 1.91601230456449e-06,
      "loss": 0.1148,
      "step": 24688
    },
    {
      "epoch": 0.7202578913588891,
      "grad_norm": 1.008159646040027,
      "learning_rate": 1.9156404550876563e-06,
      "loss": 0.119,
      "step": 24689
    },
    {
      "epoch": 0.7202870645895326,
      "grad_norm": 1.2464510090837626,
      "learning_rate": 1.9152686331472505e-06,
      "loss": 0.1168,
      "step": 24690
    },
    {
      "epoch": 0.7203162378201762,
      "grad_norm": 1.0474997117594915,
      "learning_rate": 1.9148968387465895e-06,
      "loss": 0.1237,
      "step": 24691
    },
    {
      "epoch": 0.7203454110508197,
      "grad_norm": 0.7978159082470053,
      "learning_rate": 1.914525071888991e-06,
      "loss": 0.1018,
      "step": 24692
    },
    {
      "epoch": 0.7203745842814633,
      "grad_norm": 0.77938685141896,
      "learning_rate": 1.9141533325777785e-06,
      "loss": 0.122,
      "step": 24693
    },
    {
      "epoch": 0.720403757512107,
      "grad_norm": 0.7419460825517156,
      "learning_rate": 1.913781620816268e-06,
      "loss": 0.1304,
      "step": 24694
    },
    {
      "epoch": 0.7204329307427505,
      "grad_norm": 1.1909270497883744,
      "learning_rate": 1.913409936607775e-06,
      "loss": 0.1239,
      "step": 24695
    },
    {
      "epoch": 0.720462103973394,
      "grad_norm": 0.7898081197878707,
      "learning_rate": 1.9130382799556253e-06,
      "loss": 0.1277,
      "step": 24696
    },
    {
      "epoch": 0.7204912772040376,
      "grad_norm": 0.7746590602597376,
      "learning_rate": 1.9126666508631324e-06,
      "loss": 0.115,
      "step": 24697
    },
    {
      "epoch": 0.7205204504346812,
      "grad_norm": 0.9502854095795547,
      "learning_rate": 1.912295049333613e-06,
      "loss": 0.1323,
      "step": 24698
    },
    {
      "epoch": 0.7205496236653247,
      "grad_norm": 1.0002000160621727,
      "learning_rate": 1.911923475370388e-06,
      "loss": 0.128,
      "step": 24699
    },
    {
      "epoch": 0.7205787968959683,
      "grad_norm": 0.8516728307827025,
      "learning_rate": 1.911551928976773e-06,
      "loss": 0.1122,
      "step": 24700
    },
    {
      "epoch": 0.7206079701266118,
      "grad_norm": 0.7974658657158188,
      "learning_rate": 1.911180410156083e-06,
      "loss": 0.1325,
      "step": 24701
    },
    {
      "epoch": 0.7206371433572554,
      "grad_norm": 0.8218616731306487,
      "learning_rate": 1.9108089189116374e-06,
      "loss": 0.1095,
      "step": 24702
    },
    {
      "epoch": 0.7206663165878989,
      "grad_norm": 0.771458787265972,
      "learning_rate": 1.9104374552467542e-06,
      "loss": 0.1242,
      "step": 24703
    },
    {
      "epoch": 0.7206954898185425,
      "grad_norm": 0.9622223656294758,
      "learning_rate": 1.910066019164748e-06,
      "loss": 0.1117,
      "step": 24704
    },
    {
      "epoch": 0.720724663049186,
      "grad_norm": 0.8817943316492531,
      "learning_rate": 1.9096946106689322e-06,
      "loss": 0.1244,
      "step": 24705
    },
    {
      "epoch": 0.7207538362798296,
      "grad_norm": 0.8791277836520458,
      "learning_rate": 1.9093232297626278e-06,
      "loss": 0.14,
      "step": 24706
    },
    {
      "epoch": 0.7207830095104731,
      "grad_norm": 0.868448183477267,
      "learning_rate": 1.9089518764491453e-06,
      "loss": 0.0991,
      "step": 24707
    },
    {
      "epoch": 0.7208121827411168,
      "grad_norm": 0.821549394991177,
      "learning_rate": 1.908580550731805e-06,
      "loss": 0.1266,
      "step": 24708
    },
    {
      "epoch": 0.7208413559717604,
      "grad_norm": 0.7550515482338176,
      "learning_rate": 1.9082092526139175e-06,
      "loss": 0.1109,
      "step": 24709
    },
    {
      "epoch": 0.7208705292024039,
      "grad_norm": 0.8146217101684364,
      "learning_rate": 1.9078379820988014e-06,
      "loss": 0.0992,
      "step": 24710
    },
    {
      "epoch": 0.7208997024330475,
      "grad_norm": 0.9098167480942209,
      "learning_rate": 1.9074667391897694e-06,
      "loss": 0.1129,
      "step": 24711
    },
    {
      "epoch": 0.720928875663691,
      "grad_norm": 0.8360465489386156,
      "learning_rate": 1.9070955238901352e-06,
      "loss": 0.1102,
      "step": 24712
    },
    {
      "epoch": 0.7209580488943346,
      "grad_norm": 1.012354929219495,
      "learning_rate": 1.9067243362032128e-06,
      "loss": 0.144,
      "step": 24713
    },
    {
      "epoch": 0.7209872221249781,
      "grad_norm": 0.6986973803491306,
      "learning_rate": 1.9063531761323195e-06,
      "loss": 0.1103,
      "step": 24714
    },
    {
      "epoch": 0.7210163953556217,
      "grad_norm": 0.7190303083356872,
      "learning_rate": 1.9059820436807646e-06,
      "loss": 0.127,
      "step": 24715
    },
    {
      "epoch": 0.7210455685862652,
      "grad_norm": 0.7472901284349369,
      "learning_rate": 1.9056109388518652e-06,
      "loss": 0.1149,
      "step": 24716
    },
    {
      "epoch": 0.7210747418169088,
      "grad_norm": 0.8931698248751406,
      "learning_rate": 1.9052398616489325e-06,
      "loss": 0.1285,
      "step": 24717
    },
    {
      "epoch": 0.7211039150475523,
      "grad_norm": 0.6280847754916922,
      "learning_rate": 1.9048688120752785e-06,
      "loss": 0.0837,
      "step": 24718
    },
    {
      "epoch": 0.7211330882781959,
      "grad_norm": 0.664066802237107,
      "learning_rate": 1.904497790134216e-06,
      "loss": 0.129,
      "step": 24719
    },
    {
      "epoch": 0.7211622615088394,
      "grad_norm": 0.9312753488511102,
      "learning_rate": 1.9041267958290604e-06,
      "loss": 0.1249,
      "step": 24720
    },
    {
      "epoch": 0.721191434739483,
      "grad_norm": 0.9114255991829319,
      "learning_rate": 1.9037558291631215e-06,
      "loss": 0.098,
      "step": 24721
    },
    {
      "epoch": 0.7212206079701267,
      "grad_norm": 0.8471499709460484,
      "learning_rate": 1.9033848901397101e-06,
      "loss": 0.1275,
      "step": 24722
    },
    {
      "epoch": 0.7212497812007702,
      "grad_norm": 0.8323807539702123,
      "learning_rate": 1.9030139787621405e-06,
      "loss": 0.1179,
      "step": 24723
    },
    {
      "epoch": 0.7212789544314138,
      "grad_norm": 0.9121685995045828,
      "learning_rate": 1.9026430950337227e-06,
      "loss": 0.1273,
      "step": 24724
    },
    {
      "epoch": 0.7213081276620573,
      "grad_norm": 0.8911803084946347,
      "learning_rate": 1.9022722389577648e-06,
      "loss": 0.143,
      "step": 24725
    },
    {
      "epoch": 0.7213373008927009,
      "grad_norm": 0.8695092308868242,
      "learning_rate": 1.9019014105375843e-06,
      "loss": 0.1317,
      "step": 24726
    },
    {
      "epoch": 0.7213664741233444,
      "grad_norm": 1.005277111469028,
      "learning_rate": 1.9015306097764885e-06,
      "loss": 0.1261,
      "step": 24727
    },
    {
      "epoch": 0.721395647353988,
      "grad_norm": 0.9282153258894128,
      "learning_rate": 1.9011598366777855e-06,
      "loss": 0.1347,
      "step": 24728
    },
    {
      "epoch": 0.7214248205846315,
      "grad_norm": 0.823270135603786,
      "learning_rate": 1.9007890912447902e-06,
      "loss": 0.1142,
      "step": 24729
    },
    {
      "epoch": 0.7214539938152751,
      "grad_norm": 0.7616597909380265,
      "learning_rate": 1.9004183734808097e-06,
      "loss": 0.1229,
      "step": 24730
    },
    {
      "epoch": 0.7214831670459186,
      "grad_norm": 1.1750734034252135,
      "learning_rate": 1.9000476833891518e-06,
      "loss": 0.1339,
      "step": 24731
    },
    {
      "epoch": 0.7215123402765622,
      "grad_norm": 1.0496333424439988,
      "learning_rate": 1.8996770209731291e-06,
      "loss": 0.1049,
      "step": 24732
    },
    {
      "epoch": 0.7215415135072057,
      "grad_norm": 0.8141086163389445,
      "learning_rate": 1.8993063862360512e-06,
      "loss": 0.1375,
      "step": 24733
    },
    {
      "epoch": 0.7215706867378493,
      "grad_norm": 0.8109456699197971,
      "learning_rate": 1.8989357791812253e-06,
      "loss": 0.1074,
      "step": 24734
    },
    {
      "epoch": 0.721599859968493,
      "grad_norm": 0.9727899938713124,
      "learning_rate": 1.8985651998119592e-06,
      "loss": 0.114,
      "step": 24735
    },
    {
      "epoch": 0.7216290331991365,
      "grad_norm": 0.8227569421172481,
      "learning_rate": 1.8981946481315645e-06,
      "loss": 0.1009,
      "step": 24736
    },
    {
      "epoch": 0.7216582064297801,
      "grad_norm": 0.8011658588593519,
      "learning_rate": 1.8978241241433454e-06,
      "loss": 0.0992,
      "step": 24737
    },
    {
      "epoch": 0.7216873796604236,
      "grad_norm": 0.7985781412771426,
      "learning_rate": 1.8974536278506134e-06,
      "loss": 0.1265,
      "step": 24738
    },
    {
      "epoch": 0.7217165528910672,
      "grad_norm": 0.6669561995164679,
      "learning_rate": 1.8970831592566734e-06,
      "loss": 0.1188,
      "step": 24739
    },
    {
      "epoch": 0.7217457261217107,
      "grad_norm": 0.894483025862555,
      "learning_rate": 1.8967127183648365e-06,
      "loss": 0.1097,
      "step": 24740
    },
    {
      "epoch": 0.7217748993523543,
      "grad_norm": 0.9363555923795046,
      "learning_rate": 1.896342305178407e-06,
      "loss": 0.1145,
      "step": 24741
    },
    {
      "epoch": 0.7218040725829978,
      "grad_norm": 0.8364978455189405,
      "learning_rate": 1.8959719197006909e-06,
      "loss": 0.1218,
      "step": 24742
    },
    {
      "epoch": 0.7218332458136414,
      "grad_norm": 0.8276715893798426,
      "learning_rate": 1.8956015619349966e-06,
      "loss": 0.1287,
      "step": 24743
    },
    {
      "epoch": 0.7218624190442849,
      "grad_norm": 0.8775399174639029,
      "learning_rate": 1.8952312318846323e-06,
      "loss": 0.135,
      "step": 24744
    },
    {
      "epoch": 0.7218915922749285,
      "grad_norm": 1.222685766362629,
      "learning_rate": 1.8948609295529002e-06,
      "loss": 0.0928,
      "step": 24745
    },
    {
      "epoch": 0.721920765505572,
      "grad_norm": 0.961631466462116,
      "learning_rate": 1.8944906549431108e-06,
      "loss": 0.1305,
      "step": 24746
    },
    {
      "epoch": 0.7219499387362156,
      "grad_norm": 1.0119973346594153,
      "learning_rate": 1.8941204080585667e-06,
      "loss": 0.1436,
      "step": 24747
    },
    {
      "epoch": 0.7219791119668592,
      "grad_norm": 1.1602630861195822,
      "learning_rate": 1.8937501889025732e-06,
      "loss": 0.1256,
      "step": 24748
    },
    {
      "epoch": 0.7220082851975028,
      "grad_norm": 1.1980093307519621,
      "learning_rate": 1.893379997478436e-06,
      "loss": 0.1248,
      "step": 24749
    },
    {
      "epoch": 0.7220374584281464,
      "grad_norm": 1.202511537934108,
      "learning_rate": 1.8930098337894626e-06,
      "loss": 0.1243,
      "step": 24750
    },
    {
      "epoch": 0.7220666316587899,
      "grad_norm": 0.7734793074211808,
      "learning_rate": 1.8926396978389554e-06,
      "loss": 0.1225,
      "step": 24751
    },
    {
      "epoch": 0.7220958048894335,
      "grad_norm": 0.9730790432600112,
      "learning_rate": 1.892269589630218e-06,
      "loss": 0.1315,
      "step": 24752
    },
    {
      "epoch": 0.722124978120077,
      "grad_norm": 1.2765906400452278,
      "learning_rate": 1.891899509166557e-06,
      "loss": 0.1545,
      "step": 24753
    },
    {
      "epoch": 0.7221541513507206,
      "grad_norm": 0.9315206832768864,
      "learning_rate": 1.8915294564512737e-06,
      "loss": 0.1215,
      "step": 24754
    },
    {
      "epoch": 0.7221833245813641,
      "grad_norm": 0.745017946426986,
      "learning_rate": 1.8911594314876736e-06,
      "loss": 0.1117,
      "step": 24755
    },
    {
      "epoch": 0.7222124978120077,
      "grad_norm": 0.8528260442145464,
      "learning_rate": 1.8907894342790617e-06,
      "loss": 0.1519,
      "step": 24756
    },
    {
      "epoch": 0.7222416710426512,
      "grad_norm": 0.9805517312070635,
      "learning_rate": 1.8904194648287394e-06,
      "loss": 0.1131,
      "step": 24757
    },
    {
      "epoch": 0.7222708442732948,
      "grad_norm": 0.7800230015632025,
      "learning_rate": 1.8900495231400079e-06,
      "loss": 0.1179,
      "step": 24758
    },
    {
      "epoch": 0.7223000175039384,
      "grad_norm": 0.9897083552934886,
      "learning_rate": 1.8896796092161735e-06,
      "loss": 0.1299,
      "step": 24759
    },
    {
      "epoch": 0.7223291907345819,
      "grad_norm": 0.8757966254637451,
      "learning_rate": 1.8893097230605356e-06,
      "loss": 0.1204,
      "step": 24760
    },
    {
      "epoch": 0.7223583639652255,
      "grad_norm": 1.1702030094005131,
      "learning_rate": 1.888939864676399e-06,
      "loss": 0.1134,
      "step": 24761
    },
    {
      "epoch": 0.7223875371958691,
      "grad_norm": 0.8368577500639368,
      "learning_rate": 1.8885700340670638e-06,
      "loss": 0.12,
      "step": 24762
    },
    {
      "epoch": 0.7224167104265127,
      "grad_norm": 0.8328544580460618,
      "learning_rate": 1.8882002312358337e-06,
      "loss": 0.1248,
      "step": 24763
    },
    {
      "epoch": 0.7224458836571562,
      "grad_norm": 0.7411473191067168,
      "learning_rate": 1.8878304561860094e-06,
      "loss": 0.0925,
      "step": 24764
    },
    {
      "epoch": 0.7224750568877998,
      "grad_norm": 0.8998334911980462,
      "learning_rate": 1.8874607089208901e-06,
      "loss": 0.131,
      "step": 24765
    },
    {
      "epoch": 0.7225042301184433,
      "grad_norm": 0.8310207083478498,
      "learning_rate": 1.8870909894437783e-06,
      "loss": 0.1492,
      "step": 24766
    },
    {
      "epoch": 0.7225334033490869,
      "grad_norm": 0.6911958778317953,
      "learning_rate": 1.886721297757977e-06,
      "loss": 0.1091,
      "step": 24767
    },
    {
      "epoch": 0.7225625765797304,
      "grad_norm": 0.7323282250704054,
      "learning_rate": 1.8863516338667847e-06,
      "loss": 0.1067,
      "step": 24768
    },
    {
      "epoch": 0.722591749810374,
      "grad_norm": 0.7857957241548171,
      "learning_rate": 1.8859819977735e-06,
      "loss": 0.1111,
      "step": 24769
    },
    {
      "epoch": 0.7226209230410175,
      "grad_norm": 0.758324899343193,
      "learning_rate": 1.885612389481426e-06,
      "loss": 0.1268,
      "step": 24770
    },
    {
      "epoch": 0.7226500962716611,
      "grad_norm": 0.8682851454231227,
      "learning_rate": 1.885242808993862e-06,
      "loss": 0.1303,
      "step": 24771
    },
    {
      "epoch": 0.7226792695023047,
      "grad_norm": 0.7170521968542871,
      "learning_rate": 1.8848732563141026e-06,
      "loss": 0.1328,
      "step": 24772
    },
    {
      "epoch": 0.7227084427329482,
      "grad_norm": 0.945809200661344,
      "learning_rate": 1.8845037314454544e-06,
      "loss": 0.1194,
      "step": 24773
    },
    {
      "epoch": 0.7227376159635918,
      "grad_norm": 0.8845968746110616,
      "learning_rate": 1.8841342343912134e-06,
      "loss": 0.1156,
      "step": 24774
    },
    {
      "epoch": 0.7227667891942353,
      "grad_norm": 0.953148813227204,
      "learning_rate": 1.8837647651546765e-06,
      "loss": 0.1114,
      "step": 24775
    },
    {
      "epoch": 0.722795962424879,
      "grad_norm": 0.8157125894220421,
      "learning_rate": 1.8833953237391456e-06,
      "loss": 0.1212,
      "step": 24776
    },
    {
      "epoch": 0.7228251356555225,
      "grad_norm": 1.2872659325605247,
      "learning_rate": 1.883025910147917e-06,
      "loss": 0.1063,
      "step": 24777
    },
    {
      "epoch": 0.7228543088861661,
      "grad_norm": 0.9941306782651606,
      "learning_rate": 1.8826565243842877e-06,
      "loss": 0.1342,
      "step": 24778
    },
    {
      "epoch": 0.7228834821168096,
      "grad_norm": 0.8272135801291861,
      "learning_rate": 1.8822871664515562e-06,
      "loss": 0.1115,
      "step": 24779
    },
    {
      "epoch": 0.7229126553474532,
      "grad_norm": 0.8707805844517327,
      "learning_rate": 1.8819178363530226e-06,
      "loss": 0.101,
      "step": 24780
    },
    {
      "epoch": 0.7229418285780967,
      "grad_norm": 1.0051174270341248,
      "learning_rate": 1.8815485340919825e-06,
      "loss": 0.1169,
      "step": 24781
    },
    {
      "epoch": 0.7229710018087403,
      "grad_norm": 1.081533538448045,
      "learning_rate": 1.881179259671731e-06,
      "loss": 0.0972,
      "step": 24782
    },
    {
      "epoch": 0.7230001750393839,
      "grad_norm": 0.6500338650694056,
      "learning_rate": 1.8808100130955676e-06,
      "loss": 0.1131,
      "step": 24783
    },
    {
      "epoch": 0.7230293482700274,
      "grad_norm": 0.8756280400598867,
      "learning_rate": 1.8804407943667869e-06,
      "loss": 0.1173,
      "step": 24784
    },
    {
      "epoch": 0.723058521500671,
      "grad_norm": 0.861538997600829,
      "learning_rate": 1.880071603488685e-06,
      "loss": 0.0986,
      "step": 24785
    },
    {
      "epoch": 0.7230876947313145,
      "grad_norm": 0.740512192965018,
      "learning_rate": 1.879702440464562e-06,
      "loss": 0.1009,
      "step": 24786
    },
    {
      "epoch": 0.7231168679619581,
      "grad_norm": 0.9904485291951682,
      "learning_rate": 1.8793333052977098e-06,
      "loss": 0.1195,
      "step": 24787
    },
    {
      "epoch": 0.7231460411926016,
      "grad_norm": 0.8104199356771068,
      "learning_rate": 1.8789641979914237e-06,
      "loss": 0.1142,
      "step": 24788
    },
    {
      "epoch": 0.7231752144232453,
      "grad_norm": 0.7682817340857583,
      "learning_rate": 1.8785951185490014e-06,
      "loss": 0.1132,
      "step": 24789
    },
    {
      "epoch": 0.7232043876538888,
      "grad_norm": 0.8446815571771478,
      "learning_rate": 1.8782260669737357e-06,
      "loss": 0.1306,
      "step": 24790
    },
    {
      "epoch": 0.7232335608845324,
      "grad_norm": 0.7795864492754341,
      "learning_rate": 1.8778570432689236e-06,
      "loss": 0.1157,
      "step": 24791
    },
    {
      "epoch": 0.7232627341151759,
      "grad_norm": 0.7912001186866039,
      "learning_rate": 1.8774880474378571e-06,
      "loss": 0.1042,
      "step": 24792
    },
    {
      "epoch": 0.7232919073458195,
      "grad_norm": 1.1973824302252325,
      "learning_rate": 1.8771190794838333e-06,
      "loss": 0.1379,
      "step": 24793
    },
    {
      "epoch": 0.723321080576463,
      "grad_norm": 0.8313170598836324,
      "learning_rate": 1.876750139410145e-06,
      "loss": 0.1326,
      "step": 24794
    },
    {
      "epoch": 0.7233502538071066,
      "grad_norm": 0.7775042501154492,
      "learning_rate": 1.8763812272200843e-06,
      "loss": 0.1114,
      "step": 24795
    },
    {
      "epoch": 0.7233794270377502,
      "grad_norm": 0.9891694611844759,
      "learning_rate": 1.8760123429169464e-06,
      "loss": 0.119,
      "step": 24796
    },
    {
      "epoch": 0.7234086002683937,
      "grad_norm": 0.7944133919249249,
      "learning_rate": 1.8756434865040262e-06,
      "loss": 0.1205,
      "step": 24797
    },
    {
      "epoch": 0.7234377734990373,
      "grad_norm": 0.7436630045707892,
      "learning_rate": 1.8752746579846148e-06,
      "loss": 0.1104,
      "step": 24798
    },
    {
      "epoch": 0.7234669467296808,
      "grad_norm": 1.0096995984949833,
      "learning_rate": 1.8749058573620039e-06,
      "loss": 0.1255,
      "step": 24799
    },
    {
      "epoch": 0.7234961199603244,
      "grad_norm": 0.8741876356871757,
      "learning_rate": 1.8745370846394894e-06,
      "loss": 0.0964,
      "step": 24800
    },
    {
      "epoch": 0.7235252931909679,
      "grad_norm": 0.8314448827100319,
      "learning_rate": 1.8741683398203614e-06,
      "loss": 0.1074,
      "step": 24801
    },
    {
      "epoch": 0.7235544664216115,
      "grad_norm": 0.966429973558122,
      "learning_rate": 1.8737996229079086e-06,
      "loss": 0.1169,
      "step": 24802
    },
    {
      "epoch": 0.7235836396522551,
      "grad_norm": 0.8734627794263268,
      "learning_rate": 1.8734309339054308e-06,
      "loss": 0.1107,
      "step": 24803
    },
    {
      "epoch": 0.7236128128828987,
      "grad_norm": 1.170058015271319,
      "learning_rate": 1.8730622728162146e-06,
      "loss": 0.1326,
      "step": 24804
    },
    {
      "epoch": 0.7236419861135422,
      "grad_norm": 0.9210037414578498,
      "learning_rate": 1.8726936396435502e-06,
      "loss": 0.1201,
      "step": 24805
    },
    {
      "epoch": 0.7236711593441858,
      "grad_norm": 0.7295522152079452,
      "learning_rate": 1.8723250343907323e-06,
      "loss": 0.1044,
      "step": 24806
    },
    {
      "epoch": 0.7237003325748294,
      "grad_norm": 0.9642615316670107,
      "learning_rate": 1.8719564570610494e-06,
      "loss": 0.1201,
      "step": 24807
    },
    {
      "epoch": 0.7237295058054729,
      "grad_norm": 0.8333528157145845,
      "learning_rate": 1.8715879076577915e-06,
      "loss": 0.1115,
      "step": 24808
    },
    {
      "epoch": 0.7237586790361165,
      "grad_norm": 0.8918676910593354,
      "learning_rate": 1.8712193861842498e-06,
      "loss": 0.1096,
      "step": 24809
    },
    {
      "epoch": 0.72378785226676,
      "grad_norm": 0.9016907346872983,
      "learning_rate": 1.8708508926437157e-06,
      "loss": 0.1103,
      "step": 24810
    },
    {
      "epoch": 0.7238170254974036,
      "grad_norm": 1.0659294528626322,
      "learning_rate": 1.8704824270394783e-06,
      "loss": 0.1153,
      "step": 24811
    },
    {
      "epoch": 0.7238461987280471,
      "grad_norm": 0.9068302087451171,
      "learning_rate": 1.870113989374825e-06,
      "loss": 0.1364,
      "step": 24812
    },
    {
      "epoch": 0.7238753719586907,
      "grad_norm": 1.0306284501844045,
      "learning_rate": 1.8697455796530483e-06,
      "loss": 0.1066,
      "step": 24813
    },
    {
      "epoch": 0.7239045451893342,
      "grad_norm": 1.1067347553094529,
      "learning_rate": 1.8693771978774345e-06,
      "loss": 0.1244,
      "step": 24814
    },
    {
      "epoch": 0.7239337184199778,
      "grad_norm": 0.9523719170841769,
      "learning_rate": 1.8690088440512738e-06,
      "loss": 0.1223,
      "step": 24815
    },
    {
      "epoch": 0.7239628916506214,
      "grad_norm": 0.7094855379979579,
      "learning_rate": 1.8686405181778562e-06,
      "loss": 0.1037,
      "step": 24816
    },
    {
      "epoch": 0.723992064881265,
      "grad_norm": 1.1803413326087289,
      "learning_rate": 1.8682722202604681e-06,
      "loss": 0.1171,
      "step": 24817
    },
    {
      "epoch": 0.7240212381119085,
      "grad_norm": 1.0737906151829388,
      "learning_rate": 1.8679039503023972e-06,
      "loss": 0.1283,
      "step": 24818
    },
    {
      "epoch": 0.7240504113425521,
      "grad_norm": 0.8839486697134835,
      "learning_rate": 1.8675357083069328e-06,
      "loss": 0.1355,
      "step": 24819
    },
    {
      "epoch": 0.7240795845731957,
      "grad_norm": 0.9648421191721859,
      "learning_rate": 1.867167494277361e-06,
      "loss": 0.1232,
      "step": 24820
    },
    {
      "epoch": 0.7241087578038392,
      "grad_norm": 1.0422206619084138,
      "learning_rate": 1.8667993082169712e-06,
      "loss": 0.1416,
      "step": 24821
    },
    {
      "epoch": 0.7241379310344828,
      "grad_norm": 1.1206186102912703,
      "learning_rate": 1.8664311501290478e-06,
      "loss": 0.1203,
      "step": 24822
    },
    {
      "epoch": 0.7241671042651263,
      "grad_norm": 1.2156421710641572,
      "learning_rate": 1.8660630200168806e-06,
      "loss": 0.1068,
      "step": 24823
    },
    {
      "epoch": 0.7241962774957699,
      "grad_norm": 0.7844188174103284,
      "learning_rate": 1.8656949178837547e-06,
      "loss": 0.122,
      "step": 24824
    },
    {
      "epoch": 0.7242254507264134,
      "grad_norm": 0.707850413303378,
      "learning_rate": 1.8653268437329542e-06,
      "loss": 0.1119,
      "step": 24825
    },
    {
      "epoch": 0.724254623957057,
      "grad_norm": 1.2279871966228981,
      "learning_rate": 1.864958797567768e-06,
      "loss": 0.1229,
      "step": 24826
    },
    {
      "epoch": 0.7242837971877005,
      "grad_norm": 0.9589869772952597,
      "learning_rate": 1.8645907793914826e-06,
      "loss": 0.1132,
      "step": 24827
    },
    {
      "epoch": 0.7243129704183441,
      "grad_norm": 0.753126405914351,
      "learning_rate": 1.864222789207382e-06,
      "loss": 0.1301,
      "step": 24828
    },
    {
      "epoch": 0.7243421436489876,
      "grad_norm": 0.7513366561510253,
      "learning_rate": 1.8638548270187505e-06,
      "loss": 0.1166,
      "step": 24829
    },
    {
      "epoch": 0.7243713168796313,
      "grad_norm": 0.9634536537768936,
      "learning_rate": 1.8634868928288757e-06,
      "loss": 0.115,
      "step": 24830
    },
    {
      "epoch": 0.7244004901102749,
      "grad_norm": 0.8460884475997975,
      "learning_rate": 1.863118986641042e-06,
      "loss": 0.1171,
      "step": 24831
    },
    {
      "epoch": 0.7244296633409184,
      "grad_norm": 0.8555442129602252,
      "learning_rate": 1.8627511084585293e-06,
      "loss": 0.1067,
      "step": 24832
    },
    {
      "epoch": 0.724458836571562,
      "grad_norm": 0.7231277466632015,
      "learning_rate": 1.8623832582846291e-06,
      "loss": 0.1054,
      "step": 24833
    },
    {
      "epoch": 0.7244880098022055,
      "grad_norm": 1.2338736174024925,
      "learning_rate": 1.8620154361226218e-06,
      "loss": 0.1427,
      "step": 24834
    },
    {
      "epoch": 0.7245171830328491,
      "grad_norm": 0.8570689943214541,
      "learning_rate": 1.8616476419757907e-06,
      "loss": 0.0983,
      "step": 24835
    },
    {
      "epoch": 0.7245463562634926,
      "grad_norm": 0.7948866818502712,
      "learning_rate": 1.861279875847421e-06,
      "loss": 0.139,
      "step": 24836
    },
    {
      "epoch": 0.7245755294941362,
      "grad_norm": 0.8685926764892993,
      "learning_rate": 1.8609121377407963e-06,
      "loss": 0.1235,
      "step": 24837
    },
    {
      "epoch": 0.7246047027247797,
      "grad_norm": 0.8490860723033917,
      "learning_rate": 1.8605444276591961e-06,
      "loss": 0.1263,
      "step": 24838
    },
    {
      "epoch": 0.7246338759554233,
      "grad_norm": 0.9168573640964627,
      "learning_rate": 1.8601767456059062e-06,
      "loss": 0.1075,
      "step": 24839
    },
    {
      "epoch": 0.7246630491860668,
      "grad_norm": 0.7589705049956947,
      "learning_rate": 1.8598090915842105e-06,
      "loss": 0.1204,
      "step": 24840
    },
    {
      "epoch": 0.7246922224167104,
      "grad_norm": 0.7442359511864499,
      "learning_rate": 1.8594414655973898e-06,
      "loss": 0.1145,
      "step": 24841
    },
    {
      "epoch": 0.7247213956473539,
      "grad_norm": 0.8111783044886859,
      "learning_rate": 1.8590738676487242e-06,
      "loss": 0.1094,
      "step": 24842
    },
    {
      "epoch": 0.7247505688779976,
      "grad_norm": 1.3516789518929335,
      "learning_rate": 1.8587062977414987e-06,
      "loss": 0.1195,
      "step": 24843
    },
    {
      "epoch": 0.7247797421086412,
      "grad_norm": 0.7454359558463477,
      "learning_rate": 1.8583387558789916e-06,
      "loss": 0.1217,
      "step": 24844
    },
    {
      "epoch": 0.7248089153392847,
      "grad_norm": 0.8358274388093241,
      "learning_rate": 1.8579712420644869e-06,
      "loss": 0.1352,
      "step": 24845
    },
    {
      "epoch": 0.7248380885699283,
      "grad_norm": 0.6239342387495486,
      "learning_rate": 1.8576037563012662e-06,
      "loss": 0.0952,
      "step": 24846
    },
    {
      "epoch": 0.7248672618005718,
      "grad_norm": 0.7656401194383812,
      "learning_rate": 1.857236298592609e-06,
      "loss": 0.1604,
      "step": 24847
    },
    {
      "epoch": 0.7248964350312154,
      "grad_norm": 1.0440809395948463,
      "learning_rate": 1.856868868941794e-06,
      "loss": 0.1326,
      "step": 24848
    },
    {
      "epoch": 0.7249256082618589,
      "grad_norm": 0.8446142292096692,
      "learning_rate": 1.8565014673521053e-06,
      "loss": 0.1331,
      "step": 24849
    },
    {
      "epoch": 0.7249547814925025,
      "grad_norm": 0.814132853791011,
      "learning_rate": 1.8561340938268196e-06,
      "loss": 0.1386,
      "step": 24850
    },
    {
      "epoch": 0.724983954723146,
      "grad_norm": 0.7609068087208614,
      "learning_rate": 1.8557667483692193e-06,
      "loss": 0.1274,
      "step": 24851
    },
    {
      "epoch": 0.7250131279537896,
      "grad_norm": 0.9634910209977473,
      "learning_rate": 1.8553994309825818e-06,
      "loss": 0.1122,
      "step": 24852
    },
    {
      "epoch": 0.7250423011844331,
      "grad_norm": 0.7403786149102356,
      "learning_rate": 1.8550321416701888e-06,
      "loss": 0.114,
      "step": 24853
    },
    {
      "epoch": 0.7250714744150767,
      "grad_norm": 0.8961740815132433,
      "learning_rate": 1.8546648804353185e-06,
      "loss": 0.1115,
      "step": 24854
    },
    {
      "epoch": 0.7251006476457202,
      "grad_norm": 0.7385042438001875,
      "learning_rate": 1.8542976472812474e-06,
      "loss": 0.0985,
      "step": 24855
    },
    {
      "epoch": 0.7251298208763638,
      "grad_norm": 1.104591922664808,
      "learning_rate": 1.8539304422112558e-06,
      "loss": 0.1389,
      "step": 24856
    },
    {
      "epoch": 0.7251589941070075,
      "grad_norm": 0.6378269624568983,
      "learning_rate": 1.853563265228624e-06,
      "loss": 0.1044,
      "step": 24857
    },
    {
      "epoch": 0.725188167337651,
      "grad_norm": 0.8157868387104741,
      "learning_rate": 1.853196116336628e-06,
      "loss": 0.1358,
      "step": 24858
    },
    {
      "epoch": 0.7252173405682946,
      "grad_norm": 0.7862279870745205,
      "learning_rate": 1.8528289955385443e-06,
      "loss": 0.1112,
      "step": 24859
    },
    {
      "epoch": 0.7252465137989381,
      "grad_norm": 0.9385475456855338,
      "learning_rate": 1.8524619028376539e-06,
      "loss": 0.11,
      "step": 24860
    },
    {
      "epoch": 0.7252756870295817,
      "grad_norm": 0.7027755140495263,
      "learning_rate": 1.8520948382372323e-06,
      "loss": 0.1069,
      "step": 24861
    },
    {
      "epoch": 0.7253048602602252,
      "grad_norm": 0.8164494945298956,
      "learning_rate": 1.8517278017405532e-06,
      "loss": 0.125,
      "step": 24862
    },
    {
      "epoch": 0.7253340334908688,
      "grad_norm": 0.7517332609638138,
      "learning_rate": 1.8513607933508999e-06,
      "loss": 0.1097,
      "step": 24863
    },
    {
      "epoch": 0.7253632067215123,
      "grad_norm": 0.7674813511763025,
      "learning_rate": 1.8509938130715455e-06,
      "loss": 0.0931,
      "step": 24864
    },
    {
      "epoch": 0.7253923799521559,
      "grad_norm": 0.8343587845088293,
      "learning_rate": 1.8506268609057653e-06,
      "loss": 0.115,
      "step": 24865
    },
    {
      "epoch": 0.7254215531827994,
      "grad_norm": 0.7310635549617341,
      "learning_rate": 1.8502599368568387e-06,
      "loss": 0.1136,
      "step": 24866
    },
    {
      "epoch": 0.725450726413443,
      "grad_norm": 0.6595938915712233,
      "learning_rate": 1.8498930409280392e-06,
      "loss": 0.1036,
      "step": 24867
    },
    {
      "epoch": 0.7254798996440865,
      "grad_norm": 0.7563890394954799,
      "learning_rate": 1.8495261731226404e-06,
      "loss": 0.0987,
      "step": 24868
    },
    {
      "epoch": 0.7255090728747301,
      "grad_norm": 0.8098649358955136,
      "learning_rate": 1.8491593334439206e-06,
      "loss": 0.1129,
      "step": 24869
    },
    {
      "epoch": 0.7255382461053738,
      "grad_norm": 0.7967892081724012,
      "learning_rate": 1.8487925218951553e-06,
      "loss": 0.1011,
      "step": 24870
    },
    {
      "epoch": 0.7255674193360173,
      "grad_norm": 0.9593705090427759,
      "learning_rate": 1.8484257384796184e-06,
      "loss": 0.1015,
      "step": 24871
    },
    {
      "epoch": 0.7255965925666609,
      "grad_norm": 0.8414936894015588,
      "learning_rate": 1.8480589832005824e-06,
      "loss": 0.1047,
      "step": 24872
    },
    {
      "epoch": 0.7256257657973044,
      "grad_norm": 0.8877625900422822,
      "learning_rate": 1.8476922560613247e-06,
      "loss": 0.1357,
      "step": 24873
    },
    {
      "epoch": 0.725654939027948,
      "grad_norm": 0.9182396538995153,
      "learning_rate": 1.8473255570651167e-06,
      "loss": 0.1372,
      "step": 24874
    },
    {
      "epoch": 0.7256841122585915,
      "grad_norm": 0.7526035537212655,
      "learning_rate": 1.8469588862152338e-06,
      "loss": 0.1043,
      "step": 24875
    },
    {
      "epoch": 0.7257132854892351,
      "grad_norm": 0.7054903788913072,
      "learning_rate": 1.8465922435149502e-06,
      "loss": 0.1259,
      "step": 24876
    },
    {
      "epoch": 0.7257424587198786,
      "grad_norm": 0.8085560954450418,
      "learning_rate": 1.846225628967539e-06,
      "loss": 0.1112,
      "step": 24877
    },
    {
      "epoch": 0.7257716319505222,
      "grad_norm": 0.8898010958408633,
      "learning_rate": 1.8458590425762707e-06,
      "loss": 0.1112,
      "step": 24878
    },
    {
      "epoch": 0.7258008051811657,
      "grad_norm": 0.6961278063567853,
      "learning_rate": 1.8454924843444216e-06,
      "loss": 0.1043,
      "step": 24879
    },
    {
      "epoch": 0.7258299784118093,
      "grad_norm": 0.9587722189224249,
      "learning_rate": 1.8451259542752603e-06,
      "loss": 0.1149,
      "step": 24880
    },
    {
      "epoch": 0.7258591516424528,
      "grad_norm": 0.7762580689387995,
      "learning_rate": 1.8447594523720636e-06,
      "loss": 0.1154,
      "step": 24881
    },
    {
      "epoch": 0.7258883248730964,
      "grad_norm": 0.8285094579084039,
      "learning_rate": 1.8443929786380994e-06,
      "loss": 0.0861,
      "step": 24882
    },
    {
      "epoch": 0.72591749810374,
      "grad_norm": 0.7726023454221855,
      "learning_rate": 1.8440265330766432e-06,
      "loss": 0.1172,
      "step": 24883
    },
    {
      "epoch": 0.7259466713343836,
      "grad_norm": 0.7417302916526094,
      "learning_rate": 1.8436601156909645e-06,
      "loss": 0.1268,
      "step": 24884
    },
    {
      "epoch": 0.7259758445650272,
      "grad_norm": 0.8036061521060736,
      "learning_rate": 1.8432937264843338e-06,
      "loss": 0.1074,
      "step": 24885
    },
    {
      "epoch": 0.7260050177956707,
      "grad_norm": 0.8508839085544653,
      "learning_rate": 1.8429273654600221e-06,
      "loss": 0.1046,
      "step": 24886
    },
    {
      "epoch": 0.7260341910263143,
      "grad_norm": 0.8193173401699769,
      "learning_rate": 1.8425610326213034e-06,
      "loss": 0.1253,
      "step": 24887
    },
    {
      "epoch": 0.7260633642569578,
      "grad_norm": 0.7690229197874942,
      "learning_rate": 1.8421947279714464e-06,
      "loss": 0.1248,
      "step": 24888
    },
    {
      "epoch": 0.7260925374876014,
      "grad_norm": 0.8534059239823847,
      "learning_rate": 1.8418284515137192e-06,
      "loss": 0.0993,
      "step": 24889
    },
    {
      "epoch": 0.7261217107182449,
      "grad_norm": 1.0321327097637345,
      "learning_rate": 1.8414622032513952e-06,
      "loss": 0.1436,
      "step": 24890
    },
    {
      "epoch": 0.7261508839488885,
      "grad_norm": 0.7811226031964913,
      "learning_rate": 1.8410959831877423e-06,
      "loss": 0.1093,
      "step": 24891
    },
    {
      "epoch": 0.726180057179532,
      "grad_norm": 0.8800829918309522,
      "learning_rate": 1.8407297913260274e-06,
      "loss": 0.1217,
      "step": 24892
    },
    {
      "epoch": 0.7262092304101756,
      "grad_norm": 1.0248503224959675,
      "learning_rate": 1.8403636276695263e-06,
      "loss": 0.1079,
      "step": 24893
    },
    {
      "epoch": 0.7262384036408192,
      "grad_norm": 0.7391089423226811,
      "learning_rate": 1.8399974922215042e-06,
      "loss": 0.107,
      "step": 24894
    },
    {
      "epoch": 0.7262675768714627,
      "grad_norm": 1.0746904798867014,
      "learning_rate": 1.8396313849852281e-06,
      "loss": 0.1146,
      "step": 24895
    },
    {
      "epoch": 0.7262967501021063,
      "grad_norm": 0.8649387591485509,
      "learning_rate": 1.8392653059639709e-06,
      "loss": 0.116,
      "step": 24896
    },
    {
      "epoch": 0.7263259233327499,
      "grad_norm": 0.9121882526351213,
      "learning_rate": 1.838899255160998e-06,
      "loss": 0.1134,
      "step": 24897
    },
    {
      "epoch": 0.7263550965633935,
      "grad_norm": 0.965498288637953,
      "learning_rate": 1.838533232579577e-06,
      "loss": 0.1362,
      "step": 24898
    },
    {
      "epoch": 0.726384269794037,
      "grad_norm": 0.836395609570536,
      "learning_rate": 1.838167238222976e-06,
      "loss": 0.1197,
      "step": 24899
    },
    {
      "epoch": 0.7264134430246806,
      "grad_norm": 0.7118901604503297,
      "learning_rate": 1.8378012720944649e-06,
      "loss": 0.1145,
      "step": 24900
    },
    {
      "epoch": 0.7264426162553241,
      "grad_norm": 0.7555779815364517,
      "learning_rate": 1.837435334197309e-06,
      "loss": 0.1266,
      "step": 24901
    },
    {
      "epoch": 0.7264717894859677,
      "grad_norm": 0.9109027979813684,
      "learning_rate": 1.8370694245347736e-06,
      "loss": 0.1153,
      "step": 24902
    },
    {
      "epoch": 0.7265009627166112,
      "grad_norm": 1.0025817004775293,
      "learning_rate": 1.8367035431101293e-06,
      "loss": 0.1309,
      "step": 24903
    },
    {
      "epoch": 0.7265301359472548,
      "grad_norm": 0.8049345831601331,
      "learning_rate": 1.8363376899266394e-06,
      "loss": 0.1137,
      "step": 24904
    },
    {
      "epoch": 0.7265593091778983,
      "grad_norm": 0.8043646836958517,
      "learning_rate": 1.8359718649875708e-06,
      "loss": 0.1176,
      "step": 24905
    },
    {
      "epoch": 0.7265884824085419,
      "grad_norm": 1.1209706309388336,
      "learning_rate": 1.8356060682961918e-06,
      "loss": 0.1536,
      "step": 24906
    },
    {
      "epoch": 0.7266176556391855,
      "grad_norm": 0.7374726980267576,
      "learning_rate": 1.8352402998557667e-06,
      "loss": 0.1154,
      "step": 24907
    },
    {
      "epoch": 0.726646828869829,
      "grad_norm": 0.8924252687802675,
      "learning_rate": 1.834874559669559e-06,
      "loss": 0.0989,
      "step": 24908
    },
    {
      "epoch": 0.7266760021004726,
      "grad_norm": 0.830323596731376,
      "learning_rate": 1.8345088477408368e-06,
      "loss": 0.1395,
      "step": 24909
    },
    {
      "epoch": 0.7267051753311161,
      "grad_norm": 1.0699065525042704,
      "learning_rate": 1.834143164072863e-06,
      "loss": 0.1019,
      "step": 24910
    },
    {
      "epoch": 0.7267343485617598,
      "grad_norm": 1.0995388157216845,
      "learning_rate": 1.8337775086689047e-06,
      "loss": 0.1228,
      "step": 24911
    },
    {
      "epoch": 0.7267635217924033,
      "grad_norm": 0.9635517309064618,
      "learning_rate": 1.8334118815322233e-06,
      "loss": 0.1294,
      "step": 24912
    },
    {
      "epoch": 0.7267926950230469,
      "grad_norm": 1.1126481145869898,
      "learning_rate": 1.833046282666086e-06,
      "loss": 0.1157,
      "step": 24913
    },
    {
      "epoch": 0.7268218682536904,
      "grad_norm": 1.0521044133004638,
      "learning_rate": 1.832680712073756e-06,
      "loss": 0.1127,
      "step": 24914
    },
    {
      "epoch": 0.726851041484334,
      "grad_norm": 0.8842440041359714,
      "learning_rate": 1.8323151697584946e-06,
      "loss": 0.1122,
      "step": 24915
    },
    {
      "epoch": 0.7268802147149775,
      "grad_norm": 0.7412429925750073,
      "learning_rate": 1.8319496557235667e-06,
      "loss": 0.1134,
      "step": 24916
    },
    {
      "epoch": 0.7269093879456211,
      "grad_norm": 0.8617024744473915,
      "learning_rate": 1.8315841699722386e-06,
      "loss": 0.1233,
      "step": 24917
    },
    {
      "epoch": 0.7269385611762647,
      "grad_norm": 0.7391870650891543,
      "learning_rate": 1.8312187125077703e-06,
      "loss": 0.0984,
      "step": 24918
    },
    {
      "epoch": 0.7269677344069082,
      "grad_norm": 0.8149588234882889,
      "learning_rate": 1.830853283333423e-06,
      "loss": 0.1067,
      "step": 24919
    },
    {
      "epoch": 0.7269969076375518,
      "grad_norm": 0.9990044124623102,
      "learning_rate": 1.8304878824524625e-06,
      "loss": 0.1382,
      "step": 24920
    },
    {
      "epoch": 0.7270260808681953,
      "grad_norm": 0.9153443954209698,
      "learning_rate": 1.8301225098681502e-06,
      "loss": 0.091,
      "step": 24921
    },
    {
      "epoch": 0.7270552540988389,
      "grad_norm": 0.6446411753985442,
      "learning_rate": 1.8297571655837437e-06,
      "loss": 0.1143,
      "step": 24922
    },
    {
      "epoch": 0.7270844273294824,
      "grad_norm": 0.7362736806102024,
      "learning_rate": 1.829391849602512e-06,
      "loss": 0.0873,
      "step": 24923
    },
    {
      "epoch": 0.7271136005601261,
      "grad_norm": 0.8609760765727609,
      "learning_rate": 1.8290265619277125e-06,
      "loss": 0.1237,
      "step": 24924
    },
    {
      "epoch": 0.7271427737907696,
      "grad_norm": 1.6472467366255574,
      "learning_rate": 1.8286613025626054e-06,
      "loss": 0.112,
      "step": 24925
    },
    {
      "epoch": 0.7271719470214132,
      "grad_norm": 0.8878191010116272,
      "learning_rate": 1.8282960715104553e-06,
      "loss": 0.1197,
      "step": 24926
    },
    {
      "epoch": 0.7272011202520567,
      "grad_norm": 0.8555477765704576,
      "learning_rate": 1.82793086877452e-06,
      "loss": 0.1128,
      "step": 24927
    },
    {
      "epoch": 0.7272302934827003,
      "grad_norm": 0.848971547739859,
      "learning_rate": 1.8275656943580594e-06,
      "loss": 0.1244,
      "step": 24928
    },
    {
      "epoch": 0.7272594667133438,
      "grad_norm": 0.7995870031921165,
      "learning_rate": 1.8272005482643352e-06,
      "loss": 0.1204,
      "step": 24929
    },
    {
      "epoch": 0.7272886399439874,
      "grad_norm": 0.8450287379984195,
      "learning_rate": 1.8268354304966084e-06,
      "loss": 0.1249,
      "step": 24930
    },
    {
      "epoch": 0.727317813174631,
      "grad_norm": 0.8166409415337273,
      "learning_rate": 1.8264703410581375e-06,
      "loss": 0.1092,
      "step": 24931
    },
    {
      "epoch": 0.7273469864052745,
      "grad_norm": 0.8586100750549512,
      "learning_rate": 1.82610527995218e-06,
      "loss": 0.1089,
      "step": 24932
    },
    {
      "epoch": 0.7273761596359181,
      "grad_norm": 0.9073711672332692,
      "learning_rate": 1.8257402471819991e-06,
      "loss": 0.1217,
      "step": 24933
    },
    {
      "epoch": 0.7274053328665616,
      "grad_norm": 0.8195584608933435,
      "learning_rate": 1.8253752427508493e-06,
      "loss": 0.1119,
      "step": 24934
    },
    {
      "epoch": 0.7274345060972052,
      "grad_norm": 1.159609683019026,
      "learning_rate": 1.8250102666619917e-06,
      "loss": 0.0917,
      "step": 24935
    },
    {
      "epoch": 0.7274636793278487,
      "grad_norm": 0.7340335281291133,
      "learning_rate": 1.8246453189186857e-06,
      "loss": 0.1053,
      "step": 24936
    },
    {
      "epoch": 0.7274928525584923,
      "grad_norm": 0.855828491858628,
      "learning_rate": 1.8242803995241887e-06,
      "loss": 0.1448,
      "step": 24937
    },
    {
      "epoch": 0.7275220257891359,
      "grad_norm": 0.9658714695652126,
      "learning_rate": 1.8239155084817567e-06,
      "loss": 0.1476,
      "step": 24938
    },
    {
      "epoch": 0.7275511990197795,
      "grad_norm": 1.1787234210589748,
      "learning_rate": 1.8235506457946505e-06,
      "loss": 0.1184,
      "step": 24939
    },
    {
      "epoch": 0.727580372250423,
      "grad_norm": 0.7264636736826157,
      "learning_rate": 1.8231858114661238e-06,
      "loss": 0.1262,
      "step": 24940
    },
    {
      "epoch": 0.7276095454810666,
      "grad_norm": 1.0263366576540314,
      "learning_rate": 1.8228210054994377e-06,
      "loss": 0.109,
      "step": 24941
    },
    {
      "epoch": 0.7276387187117102,
      "grad_norm": 1.0271232640973176,
      "learning_rate": 1.8224562278978452e-06,
      "loss": 0.096,
      "step": 24942
    },
    {
      "epoch": 0.7276678919423537,
      "grad_norm": 0.8297968739124395,
      "learning_rate": 1.8220914786646071e-06,
      "loss": 0.0961,
      "step": 24943
    },
    {
      "epoch": 0.7276970651729973,
      "grad_norm": 0.8402880182245117,
      "learning_rate": 1.821726757802978e-06,
      "loss": 0.1192,
      "step": 24944
    },
    {
      "epoch": 0.7277262384036408,
      "grad_norm": 1.144508257056658,
      "learning_rate": 1.8213620653162111e-06,
      "loss": 0.1172,
      "step": 24945
    },
    {
      "epoch": 0.7277554116342844,
      "grad_norm": 1.160513505920864,
      "learning_rate": 1.8209974012075654e-06,
      "loss": 0.1118,
      "step": 24946
    },
    {
      "epoch": 0.7277845848649279,
      "grad_norm": 0.85734785080866,
      "learning_rate": 1.8206327654802975e-06,
      "loss": 0.1146,
      "step": 24947
    },
    {
      "epoch": 0.7278137580955715,
      "grad_norm": 0.8636575756890902,
      "learning_rate": 1.8202681581376614e-06,
      "loss": 0.116,
      "step": 24948
    },
    {
      "epoch": 0.727842931326215,
      "grad_norm": 1.0040967361817545,
      "learning_rate": 1.8199035791829105e-06,
      "loss": 0.1373,
      "step": 24949
    },
    {
      "epoch": 0.7278721045568586,
      "grad_norm": 1.1790709096440317,
      "learning_rate": 1.8195390286193027e-06,
      "loss": 0.1018,
      "step": 24950
    },
    {
      "epoch": 0.7279012777875022,
      "grad_norm": 0.9110015649735659,
      "learning_rate": 1.81917450645009e-06,
      "loss": 0.108,
      "step": 24951
    },
    {
      "epoch": 0.7279304510181458,
      "grad_norm": 0.8987094341283958,
      "learning_rate": 1.8188100126785273e-06,
      "loss": 0.1067,
      "step": 24952
    },
    {
      "epoch": 0.7279596242487893,
      "grad_norm": 0.9444427978391019,
      "learning_rate": 1.8184455473078717e-06,
      "loss": 0.114,
      "step": 24953
    },
    {
      "epoch": 0.7279887974794329,
      "grad_norm": 1.0161299164683342,
      "learning_rate": 1.8180811103413743e-06,
      "loss": 0.0994,
      "step": 24954
    },
    {
      "epoch": 0.7280179707100765,
      "grad_norm": 0.8559058581243981,
      "learning_rate": 1.8177167017822878e-06,
      "loss": 0.1099,
      "step": 24955
    },
    {
      "epoch": 0.72804714394072,
      "grad_norm": 0.9927475393473875,
      "learning_rate": 1.8173523216338685e-06,
      "loss": 0.1209,
      "step": 24956
    },
    {
      "epoch": 0.7280763171713636,
      "grad_norm": 0.8647802236615696,
      "learning_rate": 1.8169879698993665e-06,
      "loss": 0.1179,
      "step": 24957
    },
    {
      "epoch": 0.7281054904020071,
      "grad_norm": 0.7483838827480895,
      "learning_rate": 1.8166236465820375e-06,
      "loss": 0.1006,
      "step": 24958
    },
    {
      "epoch": 0.7281346636326507,
      "grad_norm": 1.0133208695063225,
      "learning_rate": 1.8162593516851308e-06,
      "loss": 0.1194,
      "step": 24959
    },
    {
      "epoch": 0.7281638368632942,
      "grad_norm": 0.8257053792932199,
      "learning_rate": 1.8158950852119024e-06,
      "loss": 0.1186,
      "step": 24960
    },
    {
      "epoch": 0.7281930100939378,
      "grad_norm": 0.7500533278285161,
      "learning_rate": 1.8155308471656024e-06,
      "loss": 0.1047,
      "step": 24961
    },
    {
      "epoch": 0.7282221833245813,
      "grad_norm": 1.0483414779193183,
      "learning_rate": 1.8151666375494815e-06,
      "loss": 0.1224,
      "step": 24962
    },
    {
      "epoch": 0.7282513565552249,
      "grad_norm": 0.8956709519972501,
      "learning_rate": 1.8148024563667926e-06,
      "loss": 0.1093,
      "step": 24963
    },
    {
      "epoch": 0.7282805297858684,
      "grad_norm": 1.3678983466767172,
      "learning_rate": 1.8144383036207886e-06,
      "loss": 0.1462,
      "step": 24964
    },
    {
      "epoch": 0.7283097030165121,
      "grad_norm": 0.762653630359437,
      "learning_rate": 1.8140741793147172e-06,
      "loss": 0.1057,
      "step": 24965
    },
    {
      "epoch": 0.7283388762471557,
      "grad_norm": 0.9833261476048476,
      "learning_rate": 1.8137100834518323e-06,
      "loss": 0.1346,
      "step": 24966
    },
    {
      "epoch": 0.7283680494777992,
      "grad_norm": 1.3145407068180195,
      "learning_rate": 1.8133460160353832e-06,
      "loss": 0.112,
      "step": 24967
    },
    {
      "epoch": 0.7283972227084428,
      "grad_norm": 1.1744594263215486,
      "learning_rate": 1.8129819770686192e-06,
      "loss": 0.0967,
      "step": 24968
    },
    {
      "epoch": 0.7284263959390863,
      "grad_norm": 0.6977637276046625,
      "learning_rate": 1.8126179665547905e-06,
      "loss": 0.1148,
      "step": 24969
    },
    {
      "epoch": 0.7284555691697299,
      "grad_norm": 0.8791605244855828,
      "learning_rate": 1.8122539844971498e-06,
      "loss": 0.1244,
      "step": 24970
    },
    {
      "epoch": 0.7284847424003734,
      "grad_norm": 1.3230451343526601,
      "learning_rate": 1.8118900308989446e-06,
      "loss": 0.1262,
      "step": 24971
    },
    {
      "epoch": 0.728513915631017,
      "grad_norm": 1.0614937945500669,
      "learning_rate": 1.8115261057634226e-06,
      "loss": 0.1222,
      "step": 24972
    },
    {
      "epoch": 0.7285430888616605,
      "grad_norm": 0.8326956579195125,
      "learning_rate": 1.8111622090938357e-06,
      "loss": 0.1358,
      "step": 24973
    },
    {
      "epoch": 0.7285722620923041,
      "grad_norm": 0.8717068756020058,
      "learning_rate": 1.8107983408934315e-06,
      "loss": 0.1004,
      "step": 24974
    },
    {
      "epoch": 0.7286014353229476,
      "grad_norm": 1.1216425684225206,
      "learning_rate": 1.8104345011654566e-06,
      "loss": 0.1105,
      "step": 24975
    },
    {
      "epoch": 0.7286306085535912,
      "grad_norm": 0.7841207142089922,
      "learning_rate": 1.810070689913161e-06,
      "loss": 0.1049,
      "step": 24976
    },
    {
      "epoch": 0.7286597817842347,
      "grad_norm": 0.8524723388647767,
      "learning_rate": 1.8097069071397943e-06,
      "loss": 0.1108,
      "step": 24977
    },
    {
      "epoch": 0.7286889550148783,
      "grad_norm": 0.9369938320213941,
      "learning_rate": 1.8093431528486034e-06,
      "loss": 0.137,
      "step": 24978
    },
    {
      "epoch": 0.728718128245522,
      "grad_norm": 0.8796896474443129,
      "learning_rate": 1.808979427042833e-06,
      "loss": 0.1249,
      "step": 24979
    },
    {
      "epoch": 0.7287473014761655,
      "grad_norm": 0.9817665157287899,
      "learning_rate": 1.8086157297257346e-06,
      "loss": 0.1179,
      "step": 24980
    },
    {
      "epoch": 0.7287764747068091,
      "grad_norm": 0.8483677780760432,
      "learning_rate": 1.808252060900551e-06,
      "loss": 0.1111,
      "step": 24981
    },
    {
      "epoch": 0.7288056479374526,
      "grad_norm": 0.7097324491120331,
      "learning_rate": 1.8078884205705311e-06,
      "loss": 0.1133,
      "step": 24982
    },
    {
      "epoch": 0.7288348211680962,
      "grad_norm": 0.7778382295973635,
      "learning_rate": 1.8075248087389236e-06,
      "loss": 0.124,
      "step": 24983
    },
    {
      "epoch": 0.7288639943987397,
      "grad_norm": 0.7890920754294309,
      "learning_rate": 1.8071612254089722e-06,
      "loss": 0.1055,
      "step": 24984
    },
    {
      "epoch": 0.7288931676293833,
      "grad_norm": 0.7835489966044812,
      "learning_rate": 1.8067976705839208e-06,
      "loss": 0.1204,
      "step": 24985
    },
    {
      "epoch": 0.7289223408600268,
      "grad_norm": 0.8632739473909581,
      "learning_rate": 1.8064341442670203e-06,
      "loss": 0.1078,
      "step": 24986
    },
    {
      "epoch": 0.7289515140906704,
      "grad_norm": 0.747518900822516,
      "learning_rate": 1.8060706464615108e-06,
      "loss": 0.1176,
      "step": 24987
    },
    {
      "epoch": 0.7289806873213139,
      "grad_norm": 0.9503703476024202,
      "learning_rate": 1.8057071771706424e-06,
      "loss": 0.129,
      "step": 24988
    },
    {
      "epoch": 0.7290098605519575,
      "grad_norm": 0.9969023318960142,
      "learning_rate": 1.8053437363976556e-06,
      "loss": 0.1269,
      "step": 24989
    },
    {
      "epoch": 0.729039033782601,
      "grad_norm": 0.8066353602916551,
      "learning_rate": 1.8049803241457996e-06,
      "loss": 0.1094,
      "step": 24990
    },
    {
      "epoch": 0.7290682070132446,
      "grad_norm": 0.923421178093087,
      "learning_rate": 1.8046169404183162e-06,
      "loss": 0.1116,
      "step": 24991
    },
    {
      "epoch": 0.7290973802438883,
      "grad_norm": 0.7958522972223832,
      "learning_rate": 1.8042535852184484e-06,
      "loss": 0.1343,
      "step": 24992
    },
    {
      "epoch": 0.7291265534745318,
      "grad_norm": 0.9977349501167543,
      "learning_rate": 1.8038902585494417e-06,
      "loss": 0.1024,
      "step": 24993
    },
    {
      "epoch": 0.7291557267051754,
      "grad_norm": 0.9675218438224169,
      "learning_rate": 1.803526960414541e-06,
      "loss": 0.133,
      "step": 24994
    },
    {
      "epoch": 0.7291848999358189,
      "grad_norm": 0.7603232056663293,
      "learning_rate": 1.8031636908169876e-06,
      "loss": 0.1067,
      "step": 24995
    },
    {
      "epoch": 0.7292140731664625,
      "grad_norm": 0.788021842468877,
      "learning_rate": 1.8028004497600265e-06,
      "loss": 0.1142,
      "step": 24996
    },
    {
      "epoch": 0.729243246397106,
      "grad_norm": 0.7482645623370769,
      "learning_rate": 1.8024372372469008e-06,
      "loss": 0.1343,
      "step": 24997
    },
    {
      "epoch": 0.7292724196277496,
      "grad_norm": 0.8244271398921885,
      "learning_rate": 1.8020740532808495e-06,
      "loss": 0.1108,
      "step": 24998
    },
    {
      "epoch": 0.7293015928583931,
      "grad_norm": 1.3435602789569605,
      "learning_rate": 1.8017108978651182e-06,
      "loss": 0.1211,
      "step": 24999
    },
    {
      "epoch": 0.7293307660890367,
      "grad_norm": 1.025693838787472,
      "learning_rate": 1.8013477710029498e-06,
      "loss": 0.1218,
      "step": 25000
    },
    {
      "epoch": 0.7293599393196802,
      "grad_norm": 0.7885644961583933,
      "learning_rate": 1.8009846726975849e-06,
      "loss": 0.1245,
      "step": 25001
    },
    {
      "epoch": 0.7293891125503238,
      "grad_norm": 0.8563145559061879,
      "learning_rate": 1.8006216029522638e-06,
      "loss": 0.1336,
      "step": 25002
    },
    {
      "epoch": 0.7294182857809673,
      "grad_norm": 0.7020424218793577,
      "learning_rate": 1.8002585617702313e-06,
      "loss": 0.1084,
      "step": 25003
    },
    {
      "epoch": 0.7294474590116109,
      "grad_norm": 1.0573065590986708,
      "learning_rate": 1.7998955491547254e-06,
      "loss": 0.1287,
      "step": 25004
    },
    {
      "epoch": 0.7294766322422545,
      "grad_norm": 0.7594585977249245,
      "learning_rate": 1.7995325651089873e-06,
      "loss": 0.1316,
      "step": 25005
    },
    {
      "epoch": 0.7295058054728981,
      "grad_norm": 0.7442154971434948,
      "learning_rate": 1.7991696096362582e-06,
      "loss": 0.1058,
      "step": 25006
    },
    {
      "epoch": 0.7295349787035417,
      "grad_norm": 0.914630533632953,
      "learning_rate": 1.7988066827397805e-06,
      "loss": 0.0873,
      "step": 25007
    },
    {
      "epoch": 0.7295641519341852,
      "grad_norm": 0.8939894431960431,
      "learning_rate": 1.7984437844227925e-06,
      "loss": 0.1189,
      "step": 25008
    },
    {
      "epoch": 0.7295933251648288,
      "grad_norm": 1.074739453093371,
      "learning_rate": 1.7980809146885325e-06,
      "loss": 0.1462,
      "step": 25009
    },
    {
      "epoch": 0.7296224983954723,
      "grad_norm": 0.7462093769197482,
      "learning_rate": 1.7977180735402433e-06,
      "loss": 0.1088,
      "step": 25010
    },
    {
      "epoch": 0.7296516716261159,
      "grad_norm": 0.9067292778305343,
      "learning_rate": 1.797355260981161e-06,
      "loss": 0.1046,
      "step": 25011
    },
    {
      "epoch": 0.7296808448567594,
      "grad_norm": 0.707973753350686,
      "learning_rate": 1.7969924770145264e-06,
      "loss": 0.1181,
      "step": 25012
    },
    {
      "epoch": 0.729710018087403,
      "grad_norm": 0.7722361338043253,
      "learning_rate": 1.79662972164358e-06,
      "loss": 0.0997,
      "step": 25013
    },
    {
      "epoch": 0.7297391913180465,
      "grad_norm": 0.8776022001151548,
      "learning_rate": 1.7962669948715594e-06,
      "loss": 0.1326,
      "step": 25014
    },
    {
      "epoch": 0.7297683645486901,
      "grad_norm": 0.9531958295800378,
      "learning_rate": 1.7959042967016998e-06,
      "loss": 0.1354,
      "step": 25015
    },
    {
      "epoch": 0.7297975377793336,
      "grad_norm": 0.9746917684306904,
      "learning_rate": 1.7955416271372438e-06,
      "loss": 0.1289,
      "step": 25016
    },
    {
      "epoch": 0.7298267110099772,
      "grad_norm": 0.9164394104439284,
      "learning_rate": 1.7951789861814251e-06,
      "loss": 0.0989,
      "step": 25017
    },
    {
      "epoch": 0.7298558842406208,
      "grad_norm": 0.727406514704139,
      "learning_rate": 1.7948163738374858e-06,
      "loss": 0.1026,
      "step": 25018
    },
    {
      "epoch": 0.7298850574712644,
      "grad_norm": 0.8699623298374827,
      "learning_rate": 1.7944537901086585e-06,
      "loss": 0.1146,
      "step": 25019
    },
    {
      "epoch": 0.729914230701908,
      "grad_norm": 0.847542047979384,
      "learning_rate": 1.7940912349981844e-06,
      "loss": 0.114,
      "step": 25020
    },
    {
      "epoch": 0.7299434039325515,
      "grad_norm": 0.8382347962971295,
      "learning_rate": 1.793728708509298e-06,
      "loss": 0.096,
      "step": 25021
    },
    {
      "epoch": 0.7299725771631951,
      "grad_norm": 0.6323190932166773,
      "learning_rate": 1.7933662106452349e-06,
      "loss": 0.1168,
      "step": 25022
    },
    {
      "epoch": 0.7300017503938386,
      "grad_norm": 0.7482440952761785,
      "learning_rate": 1.7930037414092333e-06,
      "loss": 0.1158,
      "step": 25023
    },
    {
      "epoch": 0.7300309236244822,
      "grad_norm": 0.8202781357756733,
      "learning_rate": 1.7926413008045296e-06,
      "loss": 0.1095,
      "step": 25024
    },
    {
      "epoch": 0.7300600968551257,
      "grad_norm": 0.7695006838706988,
      "learning_rate": 1.7922788888343574e-06,
      "loss": 0.135,
      "step": 25025
    },
    {
      "epoch": 0.7300892700857693,
      "grad_norm": 0.7794378186839271,
      "learning_rate": 1.7919165055019555e-06,
      "loss": 0.1222,
      "step": 25026
    },
    {
      "epoch": 0.7301184433164128,
      "grad_norm": 0.8003448262206493,
      "learning_rate": 1.7915541508105566e-06,
      "loss": 0.1027,
      "step": 25027
    },
    {
      "epoch": 0.7301476165470564,
      "grad_norm": 0.9064438954442364,
      "learning_rate": 1.7911918247633953e-06,
      "loss": 0.1165,
      "step": 25028
    },
    {
      "epoch": 0.7301767897777,
      "grad_norm": 0.8687801863595818,
      "learning_rate": 1.7908295273637066e-06,
      "loss": 0.099,
      "step": 25029
    },
    {
      "epoch": 0.7302059630083435,
      "grad_norm": 0.743202190253119,
      "learning_rate": 1.790467258614728e-06,
      "loss": 0.1135,
      "step": 25030
    },
    {
      "epoch": 0.7302351362389871,
      "grad_norm": 0.847381812584872,
      "learning_rate": 1.7901050185196916e-06,
      "loss": 0.1356,
      "step": 25031
    },
    {
      "epoch": 0.7302643094696306,
      "grad_norm": 0.8066910304199605,
      "learning_rate": 1.7897428070818295e-06,
      "loss": 0.1318,
      "step": 25032
    },
    {
      "epoch": 0.7302934827002743,
      "grad_norm": 0.6904417673296508,
      "learning_rate": 1.7893806243043794e-06,
      "loss": 0.1082,
      "step": 25033
    },
    {
      "epoch": 0.7303226559309178,
      "grad_norm": 0.8160857484736981,
      "learning_rate": 1.7890184701905723e-06,
      "loss": 0.1323,
      "step": 25034
    },
    {
      "epoch": 0.7303518291615614,
      "grad_norm": 0.8490542342399159,
      "learning_rate": 1.7886563447436394e-06,
      "loss": 0.1166,
      "step": 25035
    },
    {
      "epoch": 0.7303810023922049,
      "grad_norm": 0.9124067324639801,
      "learning_rate": 1.788294247966817e-06,
      "loss": 0.143,
      "step": 25036
    },
    {
      "epoch": 0.7304101756228485,
      "grad_norm": 1.24884392752143,
      "learning_rate": 1.7879321798633381e-06,
      "loss": 0.1181,
      "step": 25037
    },
    {
      "epoch": 0.730439348853492,
      "grad_norm": 0.7764460148314785,
      "learning_rate": 1.7875701404364337e-06,
      "loss": 0.095,
      "step": 25038
    },
    {
      "epoch": 0.7304685220841356,
      "grad_norm": 0.7370511598440967,
      "learning_rate": 1.787208129689335e-06,
      "loss": 0.0896,
      "step": 25039
    },
    {
      "epoch": 0.7304976953147791,
      "grad_norm": 0.8654637129815397,
      "learning_rate": 1.786846147625277e-06,
      "loss": 0.1231,
      "step": 25040
    },
    {
      "epoch": 0.7305268685454227,
      "grad_norm": 0.9567156856282536,
      "learning_rate": 1.7864841942474876e-06,
      "loss": 0.1301,
      "step": 25041
    },
    {
      "epoch": 0.7305560417760663,
      "grad_norm": 0.9964630297756714,
      "learning_rate": 1.7861222695592e-06,
      "loss": 0.111,
      "step": 25042
    },
    {
      "epoch": 0.7305852150067098,
      "grad_norm": 0.8372688229347758,
      "learning_rate": 1.7857603735636475e-06,
      "loss": 0.1111,
      "step": 25043
    },
    {
      "epoch": 0.7306143882373534,
      "grad_norm": 0.7720023441235836,
      "learning_rate": 1.7853985062640589e-06,
      "loss": 0.1134,
      "step": 25044
    },
    {
      "epoch": 0.7306435614679969,
      "grad_norm": 0.7857774481382417,
      "learning_rate": 1.7850366676636632e-06,
      "loss": 0.1057,
      "step": 25045
    },
    {
      "epoch": 0.7306727346986406,
      "grad_norm": 0.674889643651943,
      "learning_rate": 1.7846748577656947e-06,
      "loss": 0.106,
      "step": 25046
    },
    {
      "epoch": 0.7307019079292841,
      "grad_norm": 0.9857571154306463,
      "learning_rate": 1.7843130765733797e-06,
      "loss": 0.1297,
      "step": 25047
    },
    {
      "epoch": 0.7307310811599277,
      "grad_norm": 0.9845988416842765,
      "learning_rate": 1.7839513240899513e-06,
      "loss": 0.1473,
      "step": 25048
    },
    {
      "epoch": 0.7307602543905712,
      "grad_norm": 0.8661578830324631,
      "learning_rate": 1.7835896003186366e-06,
      "loss": 0.1024,
      "step": 25049
    },
    {
      "epoch": 0.7307894276212148,
      "grad_norm": 0.9033248694122342,
      "learning_rate": 1.7832279052626677e-06,
      "loss": 0.1505,
      "step": 25050
    },
    {
      "epoch": 0.7308186008518583,
      "grad_norm": 1.0295070321884365,
      "learning_rate": 1.7828662389252722e-06,
      "loss": 0.145,
      "step": 25051
    },
    {
      "epoch": 0.7308477740825019,
      "grad_norm": 0.8899622472839388,
      "learning_rate": 1.7825046013096769e-06,
      "loss": 0.1216,
      "step": 25052
    },
    {
      "epoch": 0.7308769473131455,
      "grad_norm": 1.0187479512197988,
      "learning_rate": 1.7821429924191125e-06,
      "loss": 0.1072,
      "step": 25053
    },
    {
      "epoch": 0.730906120543789,
      "grad_norm": 1.1032520339193246,
      "learning_rate": 1.781781412256809e-06,
      "loss": 0.1129,
      "step": 25054
    },
    {
      "epoch": 0.7309352937744326,
      "grad_norm": 0.9820851148701948,
      "learning_rate": 1.7814198608259931e-06,
      "loss": 0.1361,
      "step": 25055
    },
    {
      "epoch": 0.7309644670050761,
      "grad_norm": 0.7417339043079931,
      "learning_rate": 1.7810583381298902e-06,
      "loss": 0.0986,
      "step": 25056
    },
    {
      "epoch": 0.7309936402357197,
      "grad_norm": 0.9315353013191379,
      "learning_rate": 1.780696844171732e-06,
      "loss": 0.1075,
      "step": 25057
    },
    {
      "epoch": 0.7310228134663632,
      "grad_norm": 0.6483906621132459,
      "learning_rate": 1.7803353789547422e-06,
      "loss": 0.1147,
      "step": 25058
    },
    {
      "epoch": 0.7310519866970068,
      "grad_norm": 1.0000328173101294,
      "learning_rate": 1.7799739424821494e-06,
      "loss": 0.1553,
      "step": 25059
    },
    {
      "epoch": 0.7310811599276504,
      "grad_norm": 0.7642304727740475,
      "learning_rate": 1.7796125347571825e-06,
      "loss": 0.128,
      "step": 25060
    },
    {
      "epoch": 0.731110333158294,
      "grad_norm": 1.00476660559154,
      "learning_rate": 1.779251155783066e-06,
      "loss": 0.123,
      "step": 25061
    },
    {
      "epoch": 0.7311395063889375,
      "grad_norm": 0.818370657229027,
      "learning_rate": 1.7788898055630243e-06,
      "loss": 0.1273,
      "step": 25062
    },
    {
      "epoch": 0.7311686796195811,
      "grad_norm": 0.9591897965233913,
      "learning_rate": 1.7785284841002876e-06,
      "loss": 0.1256,
      "step": 25063
    },
    {
      "epoch": 0.7311978528502246,
      "grad_norm": 0.9170908665415384,
      "learning_rate": 1.7781671913980797e-06,
      "loss": 0.1303,
      "step": 25064
    },
    {
      "epoch": 0.7312270260808682,
      "grad_norm": 0.687266642683632,
      "learning_rate": 1.7778059274596237e-06,
      "loss": 0.1316,
      "step": 25065
    },
    {
      "epoch": 0.7312561993115118,
      "grad_norm": 1.069808344895264,
      "learning_rate": 1.7774446922881477e-06,
      "loss": 0.1159,
      "step": 25066
    },
    {
      "epoch": 0.7312853725421553,
      "grad_norm": 1.1827380260337113,
      "learning_rate": 1.7770834858868774e-06,
      "loss": 0.13,
      "step": 25067
    },
    {
      "epoch": 0.7313145457727989,
      "grad_norm": 0.8296872513374155,
      "learning_rate": 1.7767223082590368e-06,
      "loss": 0.0954,
      "step": 25068
    },
    {
      "epoch": 0.7313437190034424,
      "grad_norm": 1.013042974837889,
      "learning_rate": 1.7763611594078484e-06,
      "loss": 0.131,
      "step": 25069
    },
    {
      "epoch": 0.731372892234086,
      "grad_norm": 0.7654800282624646,
      "learning_rate": 1.7760000393365396e-06,
      "loss": 0.1373,
      "step": 25070
    },
    {
      "epoch": 0.7314020654647295,
      "grad_norm": 0.8389137047012237,
      "learning_rate": 1.775638948048331e-06,
      "loss": 0.099,
      "step": 25071
    },
    {
      "epoch": 0.7314312386953731,
      "grad_norm": 0.9852190149238672,
      "learning_rate": 1.7752778855464482e-06,
      "loss": 0.1134,
      "step": 25072
    },
    {
      "epoch": 0.7314604119260167,
      "grad_norm": 0.8183655474665511,
      "learning_rate": 1.7749168518341159e-06,
      "loss": 0.1088,
      "step": 25073
    },
    {
      "epoch": 0.7314895851566603,
      "grad_norm": 1.214453919438412,
      "learning_rate": 1.7745558469145563e-06,
      "loss": 0.1254,
      "step": 25074
    },
    {
      "epoch": 0.7315187583873038,
      "grad_norm": 0.9299908590710989,
      "learning_rate": 1.7741948707909906e-06,
      "loss": 0.1075,
      "step": 25075
    },
    {
      "epoch": 0.7315479316179474,
      "grad_norm": 0.7162708437097317,
      "learning_rate": 1.7738339234666453e-06,
      "loss": 0.118,
      "step": 25076
    },
    {
      "epoch": 0.731577104848591,
      "grad_norm": 0.9828499682152776,
      "learning_rate": 1.773473004944738e-06,
      "loss": 0.1276,
      "step": 25077
    },
    {
      "epoch": 0.7316062780792345,
      "grad_norm": 0.9106745842008812,
      "learning_rate": 1.7731121152284952e-06,
      "loss": 0.1032,
      "step": 25078
    },
    {
      "epoch": 0.7316354513098781,
      "grad_norm": 0.8463589980104531,
      "learning_rate": 1.7727512543211356e-06,
      "loss": 0.1061,
      "step": 25079
    },
    {
      "epoch": 0.7316646245405216,
      "grad_norm": 0.8227355149757087,
      "learning_rate": 1.7723904222258842e-06,
      "loss": 0.1241,
      "step": 25080
    },
    {
      "epoch": 0.7316937977711652,
      "grad_norm": 1.0125654110241649,
      "learning_rate": 1.7720296189459607e-06,
      "loss": 0.115,
      "step": 25081
    },
    {
      "epoch": 0.7317229710018087,
      "grad_norm": 0.8123603753577865,
      "learning_rate": 1.7716688444845841e-06,
      "loss": 0.1227,
      "step": 25082
    },
    {
      "epoch": 0.7317521442324523,
      "grad_norm": 1.2190760843158348,
      "learning_rate": 1.7713080988449783e-06,
      "loss": 0.148,
      "step": 25083
    },
    {
      "epoch": 0.7317813174630958,
      "grad_norm": 1.058355541610057,
      "learning_rate": 1.770947382030364e-06,
      "loss": 0.1319,
      "step": 25084
    },
    {
      "epoch": 0.7318104906937394,
      "grad_norm": 1.142991790627484,
      "learning_rate": 1.7705866940439604e-06,
      "loss": 0.1205,
      "step": 25085
    },
    {
      "epoch": 0.7318396639243829,
      "grad_norm": 0.9925538806801744,
      "learning_rate": 1.7702260348889865e-06,
      "loss": 0.1185,
      "step": 25086
    },
    {
      "epoch": 0.7318688371550266,
      "grad_norm": 0.9204263589105607,
      "learning_rate": 1.7698654045686654e-06,
      "loss": 0.1065,
      "step": 25087
    },
    {
      "epoch": 0.7318980103856702,
      "grad_norm": 1.01427959243275,
      "learning_rate": 1.7695048030862133e-06,
      "loss": 0.1213,
      "step": 25088
    },
    {
      "epoch": 0.7319271836163137,
      "grad_norm": 0.7468746509361103,
      "learning_rate": 1.7691442304448508e-06,
      "loss": 0.1171,
      "step": 25089
    },
    {
      "epoch": 0.7319563568469573,
      "grad_norm": 0.7073049353332799,
      "learning_rate": 1.7687836866477992e-06,
      "loss": 0.1103,
      "step": 25090
    },
    {
      "epoch": 0.7319855300776008,
      "grad_norm": 0.9058603297741257,
      "learning_rate": 1.7684231716982753e-06,
      "loss": 0.1062,
      "step": 25091
    },
    {
      "epoch": 0.7320147033082444,
      "grad_norm": 0.7907075778553803,
      "learning_rate": 1.7680626855994964e-06,
      "loss": 0.1273,
      "step": 25092
    },
    {
      "epoch": 0.7320438765388879,
      "grad_norm": 0.8180062151490706,
      "learning_rate": 1.7677022283546835e-06,
      "loss": 0.1212,
      "step": 25093
    },
    {
      "epoch": 0.7320730497695315,
      "grad_norm": 0.976544821222924,
      "learning_rate": 1.7673417999670538e-06,
      "loss": 0.1147,
      "step": 25094
    },
    {
      "epoch": 0.732102223000175,
      "grad_norm": 0.9447337914495483,
      "learning_rate": 1.7669814004398234e-06,
      "loss": 0.1204,
      "step": 25095
    },
    {
      "epoch": 0.7321313962308186,
      "grad_norm": 0.8780045298183542,
      "learning_rate": 1.766621029776211e-06,
      "loss": 0.123,
      "step": 25096
    },
    {
      "epoch": 0.7321605694614621,
      "grad_norm": 0.9278161234312494,
      "learning_rate": 1.7662606879794364e-06,
      "loss": 0.1117,
      "step": 25097
    },
    {
      "epoch": 0.7321897426921057,
      "grad_norm": 1.030305282599738,
      "learning_rate": 1.7659003750527137e-06,
      "loss": 0.1301,
      "step": 25098
    },
    {
      "epoch": 0.7322189159227492,
      "grad_norm": 0.93053434249989,
      "learning_rate": 1.7655400909992592e-06,
      "loss": 0.1201,
      "step": 25099
    },
    {
      "epoch": 0.7322480891533929,
      "grad_norm": 0.8230394303780576,
      "learning_rate": 1.765179835822292e-06,
      "loss": 0.1065,
      "step": 25100
    },
    {
      "epoch": 0.7322772623840365,
      "grad_norm": 0.7766468427595344,
      "learning_rate": 1.7648196095250252e-06,
      "loss": 0.1074,
      "step": 25101
    },
    {
      "epoch": 0.73230643561468,
      "grad_norm": 0.8429244286159766,
      "learning_rate": 1.7644594121106773e-06,
      "loss": 0.1082,
      "step": 25102
    },
    {
      "epoch": 0.7323356088453236,
      "grad_norm": 0.8269239382489441,
      "learning_rate": 1.7640992435824644e-06,
      "loss": 0.1158,
      "step": 25103
    },
    {
      "epoch": 0.7323647820759671,
      "grad_norm": 0.7961464558235928,
      "learning_rate": 1.7637391039436013e-06,
      "loss": 0.1565,
      "step": 25104
    },
    {
      "epoch": 0.7323939553066107,
      "grad_norm": 0.7701526148099882,
      "learning_rate": 1.7633789931973011e-06,
      "loss": 0.126,
      "step": 25105
    },
    {
      "epoch": 0.7324231285372542,
      "grad_norm": 0.9857449419649804,
      "learning_rate": 1.7630189113467827e-06,
      "loss": 0.1365,
      "step": 25106
    },
    {
      "epoch": 0.7324523017678978,
      "grad_norm": 0.8301893093873627,
      "learning_rate": 1.7626588583952564e-06,
      "loss": 0.1344,
      "step": 25107
    },
    {
      "epoch": 0.7324814749985413,
      "grad_norm": 0.8109699195045951,
      "learning_rate": 1.7622988343459412e-06,
      "loss": 0.1064,
      "step": 25108
    },
    {
      "epoch": 0.7325106482291849,
      "grad_norm": 1.0554056330856585,
      "learning_rate": 1.761938839202047e-06,
      "loss": 0.1366,
      "step": 25109
    },
    {
      "epoch": 0.7325398214598284,
      "grad_norm": 0.8565797345130763,
      "learning_rate": 1.761578872966792e-06,
      "loss": 0.1208,
      "step": 25110
    },
    {
      "epoch": 0.732568994690472,
      "grad_norm": 0.7180030160198854,
      "learning_rate": 1.7612189356433873e-06,
      "loss": 0.0986,
      "step": 25111
    },
    {
      "epoch": 0.7325981679211155,
      "grad_norm": 0.9382257106450259,
      "learning_rate": 1.7608590272350452e-06,
      "loss": 0.1468,
      "step": 25112
    },
    {
      "epoch": 0.7326273411517591,
      "grad_norm": 1.027195069618291,
      "learning_rate": 1.7604991477449806e-06,
      "loss": 0.1235,
      "step": 25113
    },
    {
      "epoch": 0.7326565143824028,
      "grad_norm": 0.7406530075175138,
      "learning_rate": 1.760139297176408e-06,
      "loss": 0.1103,
      "step": 25114
    },
    {
      "epoch": 0.7326856876130463,
      "grad_norm": 0.7793079944136352,
      "learning_rate": 1.7597794755325381e-06,
      "loss": 0.1247,
      "step": 25115
    },
    {
      "epoch": 0.7327148608436899,
      "grad_norm": 0.9175384882801038,
      "learning_rate": 1.7594196828165822e-06,
      "loss": 0.1104,
      "step": 25116
    },
    {
      "epoch": 0.7327440340743334,
      "grad_norm": 0.830111568881049,
      "learning_rate": 1.7590599190317553e-06,
      "loss": 0.1234,
      "step": 25117
    },
    {
      "epoch": 0.732773207304977,
      "grad_norm": 0.8619153060807219,
      "learning_rate": 1.7587001841812661e-06,
      "loss": 0.1198,
      "step": 25118
    },
    {
      "epoch": 0.7328023805356205,
      "grad_norm": 1.142988479214071,
      "learning_rate": 1.7583404782683278e-06,
      "loss": 0.1471,
      "step": 25119
    },
    {
      "epoch": 0.7328315537662641,
      "grad_norm": 0.8390633501782565,
      "learning_rate": 1.7579808012961535e-06,
      "loss": 0.1392,
      "step": 25120
    },
    {
      "epoch": 0.7328607269969076,
      "grad_norm": 0.8210486107886237,
      "learning_rate": 1.7576211532679526e-06,
      "loss": 0.1083,
      "step": 25121
    },
    {
      "epoch": 0.7328899002275512,
      "grad_norm": 1.0812884348190424,
      "learning_rate": 1.7572615341869348e-06,
      "loss": 0.1453,
      "step": 25122
    },
    {
      "epoch": 0.7329190734581947,
      "grad_norm": 1.1511242222907994,
      "learning_rate": 1.7569019440563134e-06,
      "loss": 0.113,
      "step": 25123
    },
    {
      "epoch": 0.7329482466888383,
      "grad_norm": 0.8570069548126423,
      "learning_rate": 1.7565423828792971e-06,
      "loss": 0.1043,
      "step": 25124
    },
    {
      "epoch": 0.7329774199194818,
      "grad_norm": 0.8266584965000707,
      "learning_rate": 1.7561828506590944e-06,
      "loss": 0.1092,
      "step": 25125
    },
    {
      "epoch": 0.7330065931501254,
      "grad_norm": 0.7430619098612392,
      "learning_rate": 1.7558233473989172e-06,
      "loss": 0.1058,
      "step": 25126
    },
    {
      "epoch": 0.7330357663807691,
      "grad_norm": 1.0697237572037825,
      "learning_rate": 1.7554638731019757e-06,
      "loss": 0.1212,
      "step": 25127
    },
    {
      "epoch": 0.7330649396114126,
      "grad_norm": 0.7909565495721145,
      "learning_rate": 1.755104427771479e-06,
      "loss": 0.0987,
      "step": 25128
    },
    {
      "epoch": 0.7330941128420562,
      "grad_norm": 0.8600045561286982,
      "learning_rate": 1.7547450114106335e-06,
      "loss": 0.1182,
      "step": 25129
    },
    {
      "epoch": 0.7331232860726997,
      "grad_norm": 0.7879225465652433,
      "learning_rate": 1.754385624022651e-06,
      "loss": 0.1266,
      "step": 25130
    },
    {
      "epoch": 0.7331524593033433,
      "grad_norm": 0.9075453655824288,
      "learning_rate": 1.7540262656107376e-06,
      "loss": 0.1237,
      "step": 25131
    },
    {
      "epoch": 0.7331816325339868,
      "grad_norm": 0.8839351725820979,
      "learning_rate": 1.7536669361781028e-06,
      "loss": 0.1029,
      "step": 25132
    },
    {
      "epoch": 0.7332108057646304,
      "grad_norm": 0.7682383820730142,
      "learning_rate": 1.753307635727956e-06,
      "loss": 0.1175,
      "step": 25133
    },
    {
      "epoch": 0.7332399789952739,
      "grad_norm": 0.7750322859507258,
      "learning_rate": 1.7529483642635042e-06,
      "loss": 0.1293,
      "step": 25134
    },
    {
      "epoch": 0.7332691522259175,
      "grad_norm": 1.0730597542982712,
      "learning_rate": 1.752589121787952e-06,
      "loss": 0.1144,
      "step": 25135
    },
    {
      "epoch": 0.733298325456561,
      "grad_norm": 1.0052931011938175,
      "learning_rate": 1.7522299083045109e-06,
      "loss": 0.1178,
      "step": 25136
    },
    {
      "epoch": 0.7333274986872046,
      "grad_norm": 0.7550743786223016,
      "learning_rate": 1.751870723816384e-06,
      "loss": 0.1164,
      "step": 25137
    },
    {
      "epoch": 0.7333566719178481,
      "grad_norm": 0.8236347357126722,
      "learning_rate": 1.7515115683267818e-06,
      "loss": 0.1176,
      "step": 25138
    },
    {
      "epoch": 0.7333858451484917,
      "grad_norm": 0.9729512647434295,
      "learning_rate": 1.751152441838907e-06,
      "loss": 0.1096,
      "step": 25139
    },
    {
      "epoch": 0.7334150183791353,
      "grad_norm": 0.8616297450036582,
      "learning_rate": 1.7507933443559694e-06,
      "loss": 0.1069,
      "step": 25140
    },
    {
      "epoch": 0.7334441916097789,
      "grad_norm": 0.8177603369689589,
      "learning_rate": 1.7504342758811732e-06,
      "loss": 0.1131,
      "step": 25141
    },
    {
      "epoch": 0.7334733648404225,
      "grad_norm": 0.7055999696259391,
      "learning_rate": 1.750075236417722e-06,
      "loss": 0.1174,
      "step": 25142
    },
    {
      "epoch": 0.733502538071066,
      "grad_norm": 0.8013556770362623,
      "learning_rate": 1.7497162259688238e-06,
      "loss": 0.1383,
      "step": 25143
    },
    {
      "epoch": 0.7335317113017096,
      "grad_norm": 0.7566002384298631,
      "learning_rate": 1.7493572445376845e-06,
      "loss": 0.1082,
      "step": 25144
    },
    {
      "epoch": 0.7335608845323531,
      "grad_norm": 0.7498020039586966,
      "learning_rate": 1.7489982921275077e-06,
      "loss": 0.1188,
      "step": 25145
    },
    {
      "epoch": 0.7335900577629967,
      "grad_norm": 0.7120447492160659,
      "learning_rate": 1.748639368741497e-06,
      "loss": 0.1182,
      "step": 25146
    },
    {
      "epoch": 0.7336192309936402,
      "grad_norm": 0.7978790389234697,
      "learning_rate": 1.748280474382859e-06,
      "loss": 0.1074,
      "step": 25147
    },
    {
      "epoch": 0.7336484042242838,
      "grad_norm": 0.8372600013761905,
      "learning_rate": 1.7479216090547952e-06,
      "loss": 0.1378,
      "step": 25148
    },
    {
      "epoch": 0.7336775774549273,
      "grad_norm": 0.7960443512718854,
      "learning_rate": 1.747562772760511e-06,
      "loss": 0.1008,
      "step": 25149
    },
    {
      "epoch": 0.7337067506855709,
      "grad_norm": 0.8518957134997182,
      "learning_rate": 1.7472039655032113e-06,
      "loss": 0.123,
      "step": 25150
    },
    {
      "epoch": 0.7337359239162144,
      "grad_norm": 0.835281475119563,
      "learning_rate": 1.7468451872860986e-06,
      "loss": 0.1102,
      "step": 25151
    },
    {
      "epoch": 0.733765097146858,
      "grad_norm": 0.8287815632191993,
      "learning_rate": 1.746486438112373e-06,
      "loss": 0.1209,
      "step": 25152
    },
    {
      "epoch": 0.7337942703775016,
      "grad_norm": 0.8533193339576118,
      "learning_rate": 1.746127717985242e-06,
      "loss": 0.0999,
      "step": 25153
    },
    {
      "epoch": 0.7338234436081452,
      "grad_norm": 0.6564936950655875,
      "learning_rate": 1.7457690269079047e-06,
      "loss": 0.0973,
      "step": 25154
    },
    {
      "epoch": 0.7338526168387888,
      "grad_norm": 1.1398355247475862,
      "learning_rate": 1.7454103648835656e-06,
      "loss": 0.1268,
      "step": 25155
    },
    {
      "epoch": 0.7338817900694323,
      "grad_norm": 1.463932068213724,
      "learning_rate": 1.7450517319154247e-06,
      "loss": 0.117,
      "step": 25156
    },
    {
      "epoch": 0.7339109633000759,
      "grad_norm": 1.0145696826910793,
      "learning_rate": 1.7446931280066865e-06,
      "loss": 0.1307,
      "step": 25157
    },
    {
      "epoch": 0.7339401365307194,
      "grad_norm": 1.158387489395097,
      "learning_rate": 1.7443345531605505e-06,
      "loss": 0.1153,
      "step": 25158
    },
    {
      "epoch": 0.733969309761363,
      "grad_norm": 1.115998650436188,
      "learning_rate": 1.743976007380217e-06,
      "loss": 0.1181,
      "step": 25159
    },
    {
      "epoch": 0.7339984829920065,
      "grad_norm": 0.8625697536330599,
      "learning_rate": 1.7436174906688886e-06,
      "loss": 0.1408,
      "step": 25160
    },
    {
      "epoch": 0.7340276562226501,
      "grad_norm": 0.8435931153180534,
      "learning_rate": 1.7432590030297674e-06,
      "loss": 0.1307,
      "step": 25161
    },
    {
      "epoch": 0.7340568294532936,
      "grad_norm": 1.1569062914021235,
      "learning_rate": 1.7429005444660508e-06,
      "loss": 0.1177,
      "step": 25162
    },
    {
      "epoch": 0.7340860026839372,
      "grad_norm": 1.1099160223533906,
      "learning_rate": 1.7425421149809424e-06,
      "loss": 0.1002,
      "step": 25163
    },
    {
      "epoch": 0.7341151759145808,
      "grad_norm": 1.2895727092868505,
      "learning_rate": 1.7421837145776399e-06,
      "loss": 0.1209,
      "step": 25164
    },
    {
      "epoch": 0.7341443491452243,
      "grad_norm": 0.7167036218626416,
      "learning_rate": 1.7418253432593423e-06,
      "loss": 0.1126,
      "step": 25165
    },
    {
      "epoch": 0.7341735223758679,
      "grad_norm": 0.7890719376869357,
      "learning_rate": 1.74146700102925e-06,
      "loss": 0.1005,
      "step": 25166
    },
    {
      "epoch": 0.7342026956065114,
      "grad_norm": 0.8660126753722375,
      "learning_rate": 1.741108687890564e-06,
      "loss": 0.1112,
      "step": 25167
    },
    {
      "epoch": 0.7342318688371551,
      "grad_norm": 1.0932106365105552,
      "learning_rate": 1.7407504038464818e-06,
      "loss": 0.1098,
      "step": 25168
    },
    {
      "epoch": 0.7342610420677986,
      "grad_norm": 0.9895097506666662,
      "learning_rate": 1.7403921489002008e-06,
      "loss": 0.128,
      "step": 25169
    },
    {
      "epoch": 0.7342902152984422,
      "grad_norm": 0.7495357288382063,
      "learning_rate": 1.7400339230549212e-06,
      "loss": 0.1228,
      "step": 25170
    },
    {
      "epoch": 0.7343193885290857,
      "grad_norm": 0.7804076536488669,
      "learning_rate": 1.7396757263138415e-06,
      "loss": 0.106,
      "step": 25171
    },
    {
      "epoch": 0.7343485617597293,
      "grad_norm": 0.8424890119681546,
      "learning_rate": 1.7393175586801564e-06,
      "loss": 0.1157,
      "step": 25172
    },
    {
      "epoch": 0.7343777349903728,
      "grad_norm": 0.9289119281859283,
      "learning_rate": 1.738959420157066e-06,
      "loss": 0.1095,
      "step": 25173
    },
    {
      "epoch": 0.7344069082210164,
      "grad_norm": 0.9027837895824324,
      "learning_rate": 1.738601310747769e-06,
      "loss": 0.1034,
      "step": 25174
    },
    {
      "epoch": 0.73443608145166,
      "grad_norm": 0.8428337517467128,
      "learning_rate": 1.7382432304554609e-06,
      "loss": 0.1128,
      "step": 25175
    },
    {
      "epoch": 0.7344652546823035,
      "grad_norm": 0.9047097875695138,
      "learning_rate": 1.7378851792833368e-06,
      "loss": 0.1119,
      "step": 25176
    },
    {
      "epoch": 0.7344944279129471,
      "grad_norm": 0.8795970607848003,
      "learning_rate": 1.737527157234597e-06,
      "loss": 0.1248,
      "step": 25177
    },
    {
      "epoch": 0.7345236011435906,
      "grad_norm": 0.7990268085180928,
      "learning_rate": 1.7371691643124338e-06,
      "loss": 0.1036,
      "step": 25178
    },
    {
      "epoch": 0.7345527743742342,
      "grad_norm": 0.793030213764276,
      "learning_rate": 1.736811200520046e-06,
      "loss": 0.1164,
      "step": 25179
    },
    {
      "epoch": 0.7345819476048777,
      "grad_norm": 1.0068930119539534,
      "learning_rate": 1.7364532658606304e-06,
      "loss": 0.1253,
      "step": 25180
    },
    {
      "epoch": 0.7346111208355214,
      "grad_norm": 0.800583865149948,
      "learning_rate": 1.736095360337381e-06,
      "loss": 0.1095,
      "step": 25181
    },
    {
      "epoch": 0.7346402940661649,
      "grad_norm": 1.045965267800114,
      "learning_rate": 1.7357374839534907e-06,
      "loss": 0.0936,
      "step": 25182
    },
    {
      "epoch": 0.7346694672968085,
      "grad_norm": 1.3238259145904048,
      "learning_rate": 1.7353796367121594e-06,
      "loss": 0.1062,
      "step": 25183
    },
    {
      "epoch": 0.734698640527452,
      "grad_norm": 0.8951687075971192,
      "learning_rate": 1.7350218186165774e-06,
      "loss": 0.129,
      "step": 25184
    },
    {
      "epoch": 0.7347278137580956,
      "grad_norm": 0.9953421898794453,
      "learning_rate": 1.7346640296699424e-06,
      "loss": 0.1017,
      "step": 25185
    },
    {
      "epoch": 0.7347569869887391,
      "grad_norm": 2.017025881446626,
      "learning_rate": 1.7343062698754465e-06,
      "loss": 0.1061,
      "step": 25186
    },
    {
      "epoch": 0.7347861602193827,
      "grad_norm": 0.6387944226869747,
      "learning_rate": 1.733948539236286e-06,
      "loss": 0.1083,
      "step": 25187
    },
    {
      "epoch": 0.7348153334500263,
      "grad_norm": 1.7999285802911165,
      "learning_rate": 1.7335908377556533e-06,
      "loss": 0.1064,
      "step": 25188
    },
    {
      "epoch": 0.7348445066806698,
      "grad_norm": 0.8051056740792877,
      "learning_rate": 1.73323316543674e-06,
      "loss": 0.1074,
      "step": 25189
    },
    {
      "epoch": 0.7348736799113134,
      "grad_norm": 0.7310949681442523,
      "learning_rate": 1.7328755222827414e-06,
      "loss": 0.1384,
      "step": 25190
    },
    {
      "epoch": 0.7349028531419569,
      "grad_norm": 0.836118745669304,
      "learning_rate": 1.732517908296852e-06,
      "loss": 0.1124,
      "step": 25191
    },
    {
      "epoch": 0.7349320263726005,
      "grad_norm": 0.8581381736726805,
      "learning_rate": 1.7321603234822608e-06,
      "loss": 0.1232,
      "step": 25192
    },
    {
      "epoch": 0.734961199603244,
      "grad_norm": 0.7946901559174058,
      "learning_rate": 1.7318027678421638e-06,
      "loss": 0.0923,
      "step": 25193
    },
    {
      "epoch": 0.7349903728338876,
      "grad_norm": 0.987180674950145,
      "learning_rate": 1.7314452413797517e-06,
      "loss": 0.1269,
      "step": 25194
    },
    {
      "epoch": 0.7350195460645312,
      "grad_norm": 0.8264194715410034,
      "learning_rate": 1.7310877440982144e-06,
      "loss": 0.1069,
      "step": 25195
    },
    {
      "epoch": 0.7350487192951748,
      "grad_norm": 0.6918364701282613,
      "learning_rate": 1.730730276000745e-06,
      "loss": 0.1106,
      "step": 25196
    },
    {
      "epoch": 0.7350778925258183,
      "grad_norm": 0.6190853723432631,
      "learning_rate": 1.7303728370905377e-06,
      "loss": 0.1043,
      "step": 25197
    },
    {
      "epoch": 0.7351070657564619,
      "grad_norm": 0.7734153098972022,
      "learning_rate": 1.7300154273707803e-06,
      "loss": 0.117,
      "step": 25198
    },
    {
      "epoch": 0.7351362389871055,
      "grad_norm": 1.5546059569207948,
      "learning_rate": 1.7296580468446638e-06,
      "loss": 0.1432,
      "step": 25199
    },
    {
      "epoch": 0.735165412217749,
      "grad_norm": 0.8826353310839861,
      "learning_rate": 1.7293006955153808e-06,
      "loss": 0.1054,
      "step": 25200
    },
    {
      "epoch": 0.7351945854483926,
      "grad_norm": 0.8863678540996224,
      "learning_rate": 1.7289433733861206e-06,
      "loss": 0.1189,
      "step": 25201
    },
    {
      "epoch": 0.7352237586790361,
      "grad_norm": 0.7047809478398082,
      "learning_rate": 1.7285860804600708e-06,
      "loss": 0.1327,
      "step": 25202
    },
    {
      "epoch": 0.7352529319096797,
      "grad_norm": 0.9617434252540195,
      "learning_rate": 1.7282288167404243e-06,
      "loss": 0.118,
      "step": 25203
    },
    {
      "epoch": 0.7352821051403232,
      "grad_norm": 0.6185126927262421,
      "learning_rate": 1.727871582230371e-06,
      "loss": 0.1262,
      "step": 25204
    },
    {
      "epoch": 0.7353112783709668,
      "grad_norm": 0.7054006349378011,
      "learning_rate": 1.7275143769330994e-06,
      "loss": 0.1045,
      "step": 25205
    },
    {
      "epoch": 0.7353404516016103,
      "grad_norm": 0.7796769073497872,
      "learning_rate": 1.7271572008517968e-06,
      "loss": 0.1218,
      "step": 25206
    },
    {
      "epoch": 0.7353696248322539,
      "grad_norm": 0.788207880874118,
      "learning_rate": 1.7268000539896545e-06,
      "loss": 0.1114,
      "step": 25207
    },
    {
      "epoch": 0.7353987980628975,
      "grad_norm": 0.8660248515564732,
      "learning_rate": 1.7264429363498587e-06,
      "loss": 0.1444,
      "step": 25208
    },
    {
      "epoch": 0.7354279712935411,
      "grad_norm": 0.7349303066376833,
      "learning_rate": 1.7260858479355986e-06,
      "loss": 0.1372,
      "step": 25209
    },
    {
      "epoch": 0.7354571445241846,
      "grad_norm": 0.6860352387906503,
      "learning_rate": 1.7257287887500645e-06,
      "loss": 0.1188,
      "step": 25210
    },
    {
      "epoch": 0.7354863177548282,
      "grad_norm": 0.8315110432364982,
      "learning_rate": 1.7253717587964419e-06,
      "loss": 0.0989,
      "step": 25211
    },
    {
      "epoch": 0.7355154909854718,
      "grad_norm": 0.8596638787874854,
      "learning_rate": 1.725014758077917e-06,
      "loss": 0.1108,
      "step": 25212
    },
    {
      "epoch": 0.7355446642161153,
      "grad_norm": 0.752770762716311,
      "learning_rate": 1.72465778659768e-06,
      "loss": 0.111,
      "step": 25213
    },
    {
      "epoch": 0.7355738374467589,
      "grad_norm": 0.6867116489178011,
      "learning_rate": 1.7243008443589148e-06,
      "loss": 0.1021,
      "step": 25214
    },
    {
      "epoch": 0.7356030106774024,
      "grad_norm": 0.7910658847762618,
      "learning_rate": 1.7239439313648115e-06,
      "loss": 0.098,
      "step": 25215
    },
    {
      "epoch": 0.735632183908046,
      "grad_norm": 0.7865668459400588,
      "learning_rate": 1.7235870476185528e-06,
      "loss": 0.1053,
      "step": 25216
    },
    {
      "epoch": 0.7356613571386895,
      "grad_norm": 0.7660660298399999,
      "learning_rate": 1.7232301931233287e-06,
      "loss": 0.113,
      "step": 25217
    },
    {
      "epoch": 0.7356905303693331,
      "grad_norm": 0.774868231792155,
      "learning_rate": 1.7228733678823234e-06,
      "loss": 0.1273,
      "step": 25218
    },
    {
      "epoch": 0.7357197035999766,
      "grad_norm": 0.6727820209920885,
      "learning_rate": 1.7225165718987203e-06,
      "loss": 0.1022,
      "step": 25219
    },
    {
      "epoch": 0.7357488768306202,
      "grad_norm": 0.8086784982776921,
      "learning_rate": 1.7221598051757066e-06,
      "loss": 0.1196,
      "step": 25220
    },
    {
      "epoch": 0.7357780500612637,
      "grad_norm": 0.9249791247127002,
      "learning_rate": 1.7218030677164698e-06,
      "loss": 0.1384,
      "step": 25221
    },
    {
      "epoch": 0.7358072232919074,
      "grad_norm": 0.7257236343971472,
      "learning_rate": 1.7214463595241909e-06,
      "loss": 0.1221,
      "step": 25222
    },
    {
      "epoch": 0.735836396522551,
      "grad_norm": 0.7856001053922799,
      "learning_rate": 1.7210896806020583e-06,
      "loss": 0.122,
      "step": 25223
    },
    {
      "epoch": 0.7358655697531945,
      "grad_norm": 1.2352753135807606,
      "learning_rate": 1.720733030953254e-06,
      "loss": 0.111,
      "step": 25224
    },
    {
      "epoch": 0.7358947429838381,
      "grad_norm": 0.8579676431534605,
      "learning_rate": 1.72037641058096e-06,
      "loss": 0.1101,
      "step": 25225
    },
    {
      "epoch": 0.7359239162144816,
      "grad_norm": 0.8398343325198167,
      "learning_rate": 1.7200198194883632e-06,
      "loss": 0.1289,
      "step": 25226
    },
    {
      "epoch": 0.7359530894451252,
      "grad_norm": 1.547312546179713,
      "learning_rate": 1.7196632576786481e-06,
      "loss": 0.1173,
      "step": 25227
    },
    {
      "epoch": 0.7359822626757687,
      "grad_norm": 0.9003318672602055,
      "learning_rate": 1.7193067251549966e-06,
      "loss": 0.1165,
      "step": 25228
    },
    {
      "epoch": 0.7360114359064123,
      "grad_norm": 0.8140376947371802,
      "learning_rate": 1.7189502219205894e-06,
      "loss": 0.1351,
      "step": 25229
    },
    {
      "epoch": 0.7360406091370558,
      "grad_norm": 0.8741857773815951,
      "learning_rate": 1.718593747978613e-06,
      "loss": 0.1243,
      "step": 25230
    },
    {
      "epoch": 0.7360697823676994,
      "grad_norm": 0.8652461061178748,
      "learning_rate": 1.7182373033322485e-06,
      "loss": 0.1204,
      "step": 25231
    },
    {
      "epoch": 0.7360989555983429,
      "grad_norm": 0.8595575707027153,
      "learning_rate": 1.7178808879846763e-06,
      "loss": 0.1228,
      "step": 25232
    },
    {
      "epoch": 0.7361281288289865,
      "grad_norm": 0.9282324690575567,
      "learning_rate": 1.7175245019390801e-06,
      "loss": 0.1098,
      "step": 25233
    },
    {
      "epoch": 0.73615730205963,
      "grad_norm": 0.8532936953628621,
      "learning_rate": 1.7171681451986428e-06,
      "loss": 0.139,
      "step": 25234
    },
    {
      "epoch": 0.7361864752902736,
      "grad_norm": 1.1356813078641073,
      "learning_rate": 1.716811817766545e-06,
      "loss": 0.1135,
      "step": 25235
    },
    {
      "epoch": 0.7362156485209173,
      "grad_norm": 1.1125982455851906,
      "learning_rate": 1.7164555196459659e-06,
      "loss": 0.1417,
      "step": 25236
    },
    {
      "epoch": 0.7362448217515608,
      "grad_norm": 0.8874185666031077,
      "learning_rate": 1.7160992508400892e-06,
      "loss": 0.1096,
      "step": 25237
    },
    {
      "epoch": 0.7362739949822044,
      "grad_norm": 1.101421033845138,
      "learning_rate": 1.7157430113520934e-06,
      "loss": 0.1236,
      "step": 25238
    },
    {
      "epoch": 0.7363031682128479,
      "grad_norm": 0.8578668617025079,
      "learning_rate": 1.71538680118516e-06,
      "loss": 0.0967,
      "step": 25239
    },
    {
      "epoch": 0.7363323414434915,
      "grad_norm": 1.02838406227728,
      "learning_rate": 1.7150306203424705e-06,
      "loss": 0.1108,
      "step": 25240
    },
    {
      "epoch": 0.736361514674135,
      "grad_norm": 1.0423443265487888,
      "learning_rate": 1.7146744688272033e-06,
      "loss": 0.1232,
      "step": 25241
    },
    {
      "epoch": 0.7363906879047786,
      "grad_norm": 0.7404149203167731,
      "learning_rate": 1.7143183466425366e-06,
      "loss": 0.1003,
      "step": 25242
    },
    {
      "epoch": 0.7364198611354221,
      "grad_norm": 0.8153272341392377,
      "learning_rate": 1.7139622537916533e-06,
      "loss": 0.0916,
      "step": 25243
    },
    {
      "epoch": 0.7364490343660657,
      "grad_norm": 0.8857841917298593,
      "learning_rate": 1.7136061902777286e-06,
      "loss": 0.1231,
      "step": 25244
    },
    {
      "epoch": 0.7364782075967092,
      "grad_norm": 0.9688457467265948,
      "learning_rate": 1.713250156103945e-06,
      "loss": 0.1233,
      "step": 25245
    },
    {
      "epoch": 0.7365073808273528,
      "grad_norm": 1.1345142642731583,
      "learning_rate": 1.7128941512734781e-06,
      "loss": 0.1212,
      "step": 25246
    },
    {
      "epoch": 0.7365365540579963,
      "grad_norm": 0.8563042071323728,
      "learning_rate": 1.7125381757895088e-06,
      "loss": 0.1213,
      "step": 25247
    },
    {
      "epoch": 0.7365657272886399,
      "grad_norm": 0.9739527917204357,
      "learning_rate": 1.7121822296552138e-06,
      "loss": 0.1225,
      "step": 25248
    },
    {
      "epoch": 0.7365949005192836,
      "grad_norm": 1.0025426988035975,
      "learning_rate": 1.7118263128737693e-06,
      "loss": 0.1434,
      "step": 25249
    },
    {
      "epoch": 0.7366240737499271,
      "grad_norm": 0.9172220347337707,
      "learning_rate": 1.7114704254483549e-06,
      "loss": 0.1053,
      "step": 25250
    },
    {
      "epoch": 0.7366532469805707,
      "grad_norm": 0.8207826382201381,
      "learning_rate": 1.7111145673821489e-06,
      "loss": 0.1275,
      "step": 25251
    },
    {
      "epoch": 0.7366824202112142,
      "grad_norm": 0.8933047248882343,
      "learning_rate": 1.7107587386783258e-06,
      "loss": 0.1207,
      "step": 25252
    },
    {
      "epoch": 0.7367115934418578,
      "grad_norm": 0.8904389300151773,
      "learning_rate": 1.7104029393400646e-06,
      "loss": 0.1452,
      "step": 25253
    },
    {
      "epoch": 0.7367407666725013,
      "grad_norm": 0.7367043057960646,
      "learning_rate": 1.7100471693705405e-06,
      "loss": 0.1029,
      "step": 25254
    },
    {
      "epoch": 0.7367699399031449,
      "grad_norm": 0.7648136268051936,
      "learning_rate": 1.7096914287729287e-06,
      "loss": 0.1084,
      "step": 25255
    },
    {
      "epoch": 0.7367991131337884,
      "grad_norm": 0.9378611376088117,
      "learning_rate": 1.709335717550406e-06,
      "loss": 0.1306,
      "step": 25256
    },
    {
      "epoch": 0.736828286364432,
      "grad_norm": 0.7537964662440101,
      "learning_rate": 1.7089800357061504e-06,
      "loss": 0.1039,
      "step": 25257
    },
    {
      "epoch": 0.7368574595950755,
      "grad_norm": 0.7520104198875354,
      "learning_rate": 1.7086243832433353e-06,
      "loss": 0.1193,
      "step": 25258
    },
    {
      "epoch": 0.7368866328257191,
      "grad_norm": 0.9008341568376169,
      "learning_rate": 1.7082687601651344e-06,
      "loss": 0.1153,
      "step": 25259
    },
    {
      "epoch": 0.7369158060563626,
      "grad_norm": 1.059608005032094,
      "learning_rate": 1.7079131664747256e-06,
      "loss": 0.1162,
      "step": 25260
    },
    {
      "epoch": 0.7369449792870062,
      "grad_norm": 0.7660511675565401,
      "learning_rate": 1.7075576021752826e-06,
      "loss": 0.1007,
      "step": 25261
    },
    {
      "epoch": 0.7369741525176497,
      "grad_norm": 0.6890316702609873,
      "learning_rate": 1.7072020672699775e-06,
      "loss": 0.1112,
      "step": 25262
    },
    {
      "epoch": 0.7370033257482934,
      "grad_norm": 0.9422067141317693,
      "learning_rate": 1.7068465617619861e-06,
      "loss": 0.1307,
      "step": 25263
    },
    {
      "epoch": 0.737032498978937,
      "grad_norm": 1.4778763290020256,
      "learning_rate": 1.7064910856544842e-06,
      "loss": 0.1211,
      "step": 25264
    },
    {
      "epoch": 0.7370616722095805,
      "grad_norm": 0.8505609361935126,
      "learning_rate": 1.7061356389506439e-06,
      "loss": 0.1218,
      "step": 25265
    },
    {
      "epoch": 0.7370908454402241,
      "grad_norm": 0.7946341945130069,
      "learning_rate": 1.7057802216536369e-06,
      "loss": 0.1034,
      "step": 25266
    },
    {
      "epoch": 0.7371200186708676,
      "grad_norm": 0.8677088750994095,
      "learning_rate": 1.7054248337666385e-06,
      "loss": 0.1366,
      "step": 25267
    },
    {
      "epoch": 0.7371491919015112,
      "grad_norm": 0.7919279927712066,
      "learning_rate": 1.7050694752928198e-06,
      "loss": 0.1092,
      "step": 25268
    },
    {
      "epoch": 0.7371783651321547,
      "grad_norm": 1.0481757496621407,
      "learning_rate": 1.7047141462353538e-06,
      "loss": 0.1315,
      "step": 25269
    },
    {
      "epoch": 0.7372075383627983,
      "grad_norm": 1.3248705217274135,
      "learning_rate": 1.7043588465974148e-06,
      "loss": 0.1469,
      "step": 25270
    },
    {
      "epoch": 0.7372367115934418,
      "grad_norm": 0.9862843422901132,
      "learning_rate": 1.7040035763821738e-06,
      "loss": 0.0943,
      "step": 25271
    },
    {
      "epoch": 0.7372658848240854,
      "grad_norm": 0.8694817549761541,
      "learning_rate": 1.7036483355928002e-06,
      "loss": 0.1223,
      "step": 25272
    },
    {
      "epoch": 0.737295058054729,
      "grad_norm": 0.8065121355796986,
      "learning_rate": 1.7032931242324691e-06,
      "loss": 0.1148,
      "step": 25273
    },
    {
      "epoch": 0.7373242312853725,
      "grad_norm": 1.054999863812303,
      "learning_rate": 1.7029379423043479e-06,
      "loss": 0.1084,
      "step": 25274
    },
    {
      "epoch": 0.737353404516016,
      "grad_norm": 0.9127816940580294,
      "learning_rate": 1.7025827898116115e-06,
      "loss": 0.1153,
      "step": 25275
    },
    {
      "epoch": 0.7373825777466597,
      "grad_norm": 0.9339764236338165,
      "learning_rate": 1.7022276667574272e-06,
      "loss": 0.1155,
      "step": 25276
    },
    {
      "epoch": 0.7374117509773033,
      "grad_norm": 0.9194037018513296,
      "learning_rate": 1.7018725731449692e-06,
      "loss": 0.1329,
      "step": 25277
    },
    {
      "epoch": 0.7374409242079468,
      "grad_norm": 1.0288101664648228,
      "learning_rate": 1.701517508977405e-06,
      "loss": 0.1047,
      "step": 25278
    },
    {
      "epoch": 0.7374700974385904,
      "grad_norm": 0.8035757981797365,
      "learning_rate": 1.7011624742579037e-06,
      "loss": 0.1256,
      "step": 25279
    },
    {
      "epoch": 0.7374992706692339,
      "grad_norm": 1.0381347895455237,
      "learning_rate": 1.7008074689896359e-06,
      "loss": 0.1134,
      "step": 25280
    },
    {
      "epoch": 0.7375284438998775,
      "grad_norm": 0.9140663289613524,
      "learning_rate": 1.7004524931757733e-06,
      "loss": 0.1438,
      "step": 25281
    },
    {
      "epoch": 0.737557617130521,
      "grad_norm": 1.1388058220901165,
      "learning_rate": 1.700097546819482e-06,
      "loss": 0.1339,
      "step": 25282
    },
    {
      "epoch": 0.7375867903611646,
      "grad_norm": 0.744424852543238,
      "learning_rate": 1.6997426299239327e-06,
      "loss": 0.111,
      "step": 25283
    },
    {
      "epoch": 0.7376159635918081,
      "grad_norm": 0.6704914742716379,
      "learning_rate": 1.6993877424922945e-06,
      "loss": 0.1108,
      "step": 25284
    },
    {
      "epoch": 0.7376451368224517,
      "grad_norm": 0.7887579127255342,
      "learning_rate": 1.699032884527732e-06,
      "loss": 0.1444,
      "step": 25285
    },
    {
      "epoch": 0.7376743100530953,
      "grad_norm": 1.2477765701714825,
      "learning_rate": 1.6986780560334165e-06,
      "loss": 0.0984,
      "step": 25286
    },
    {
      "epoch": 0.7377034832837388,
      "grad_norm": 1.09505236161158,
      "learning_rate": 1.698323257012517e-06,
      "loss": 0.1425,
      "step": 25287
    },
    {
      "epoch": 0.7377326565143824,
      "grad_norm": 1.1090470372073882,
      "learning_rate": 1.6979684874681983e-06,
      "loss": 0.1025,
      "step": 25288
    },
    {
      "epoch": 0.7377618297450259,
      "grad_norm": 0.7474682054854008,
      "learning_rate": 1.697613747403628e-06,
      "loss": 0.0979,
      "step": 25289
    },
    {
      "epoch": 0.7377910029756696,
      "grad_norm": 0.9958266496411424,
      "learning_rate": 1.6972590368219755e-06,
      "loss": 0.122,
      "step": 25290
    },
    {
      "epoch": 0.7378201762063131,
      "grad_norm": 1.4041187903991246,
      "learning_rate": 1.6969043557264053e-06,
      "loss": 0.1197,
      "step": 25291
    },
    {
      "epoch": 0.7378493494369567,
      "grad_norm": 0.873790759024061,
      "learning_rate": 1.6965497041200829e-06,
      "loss": 0.1109,
      "step": 25292
    },
    {
      "epoch": 0.7378785226676002,
      "grad_norm": 0.7870163084088219,
      "learning_rate": 1.6961950820061767e-06,
      "loss": 0.105,
      "step": 25293
    },
    {
      "epoch": 0.7379076958982438,
      "grad_norm": 0.8080892165558088,
      "learning_rate": 1.6958404893878534e-06,
      "loss": 0.1,
      "step": 25294
    },
    {
      "epoch": 0.7379368691288873,
      "grad_norm": 0.9422983683712554,
      "learning_rate": 1.6954859262682777e-06,
      "loss": 0.1259,
      "step": 25295
    },
    {
      "epoch": 0.7379660423595309,
      "grad_norm": 0.8727555335092521,
      "learning_rate": 1.6951313926506124e-06,
      "loss": 0.1172,
      "step": 25296
    },
    {
      "epoch": 0.7379952155901744,
      "grad_norm": 0.7652748839371435,
      "learning_rate": 1.6947768885380278e-06,
      "loss": 0.1222,
      "step": 25297
    },
    {
      "epoch": 0.738024388820818,
      "grad_norm": 1.0887679184307082,
      "learning_rate": 1.6944224139336835e-06,
      "loss": 0.1234,
      "step": 25298
    },
    {
      "epoch": 0.7380535620514616,
      "grad_norm": 1.2673148003698989,
      "learning_rate": 1.6940679688407474e-06,
      "loss": 0.1361,
      "step": 25299
    },
    {
      "epoch": 0.7380827352821051,
      "grad_norm": 0.8633692029700694,
      "learning_rate": 1.6937135532623849e-06,
      "loss": 0.1088,
      "step": 25300
    },
    {
      "epoch": 0.7381119085127487,
      "grad_norm": 0.7747075654055182,
      "learning_rate": 1.6933591672017585e-06,
      "loss": 0.1455,
      "step": 25301
    },
    {
      "epoch": 0.7381410817433922,
      "grad_norm": 0.677240042499947,
      "learning_rate": 1.69300481066203e-06,
      "loss": 0.1236,
      "step": 25302
    },
    {
      "epoch": 0.7381702549740359,
      "grad_norm": 0.9029406886127779,
      "learning_rate": 1.692650483646367e-06,
      "loss": 0.1178,
      "step": 25303
    },
    {
      "epoch": 0.7381994282046794,
      "grad_norm": 0.7303228048236673,
      "learning_rate": 1.6922961861579295e-06,
      "loss": 0.1231,
      "step": 25304
    },
    {
      "epoch": 0.738228601435323,
      "grad_norm": 1.0257079461165068,
      "learning_rate": 1.6919419181998835e-06,
      "loss": 0.12,
      "step": 25305
    },
    {
      "epoch": 0.7382577746659665,
      "grad_norm": 0.826062206882135,
      "learning_rate": 1.691587679775389e-06,
      "loss": 0.0988,
      "step": 25306
    },
    {
      "epoch": 0.7382869478966101,
      "grad_norm": 1.0108898120413579,
      "learning_rate": 1.6912334708876116e-06,
      "loss": 0.1151,
      "step": 25307
    },
    {
      "epoch": 0.7383161211272536,
      "grad_norm": 0.9036026203197851,
      "learning_rate": 1.6908792915397115e-06,
      "loss": 0.1466,
      "step": 25308
    },
    {
      "epoch": 0.7383452943578972,
      "grad_norm": 0.770493745646177,
      "learning_rate": 1.6905251417348496e-06,
      "loss": 0.1222,
      "step": 25309
    },
    {
      "epoch": 0.7383744675885408,
      "grad_norm": 1.0709504522731002,
      "learning_rate": 1.690171021476189e-06,
      "loss": 0.1103,
      "step": 25310
    },
    {
      "epoch": 0.7384036408191843,
      "grad_norm": 0.7943863876776105,
      "learning_rate": 1.6898169307668932e-06,
      "loss": 0.1019,
      "step": 25311
    },
    {
      "epoch": 0.7384328140498279,
      "grad_norm": 0.9259974212396024,
      "learning_rate": 1.6894628696101201e-06,
      "loss": 0.1235,
      "step": 25312
    },
    {
      "epoch": 0.7384619872804714,
      "grad_norm": 1.07402123444298,
      "learning_rate": 1.6891088380090342e-06,
      "loss": 0.1198,
      "step": 25313
    },
    {
      "epoch": 0.738491160511115,
      "grad_norm": 0.7311273303407267,
      "learning_rate": 1.6887548359667939e-06,
      "loss": 0.1168,
      "step": 25314
    },
    {
      "epoch": 0.7385203337417585,
      "grad_norm": 0.8746853956529446,
      "learning_rate": 1.6884008634865584e-06,
      "loss": 0.1197,
      "step": 25315
    },
    {
      "epoch": 0.7385495069724021,
      "grad_norm": 0.9412831713663902,
      "learning_rate": 1.6880469205714888e-06,
      "loss": 0.1156,
      "step": 25316
    },
    {
      "epoch": 0.7385786802030457,
      "grad_norm": 0.7666779693899957,
      "learning_rate": 1.6876930072247482e-06,
      "loss": 0.1141,
      "step": 25317
    },
    {
      "epoch": 0.7386078534336893,
      "grad_norm": 0.8145189765663631,
      "learning_rate": 1.6873391234494936e-06,
      "loss": 0.1234,
      "step": 25318
    },
    {
      "epoch": 0.7386370266643328,
      "grad_norm": 1.2152461541244233,
      "learning_rate": 1.6869852692488826e-06,
      "loss": 0.1284,
      "step": 25319
    },
    {
      "epoch": 0.7386661998949764,
      "grad_norm": 1.1502248798022294,
      "learning_rate": 1.6866314446260778e-06,
      "loss": 0.1454,
      "step": 25320
    },
    {
      "epoch": 0.73869537312562,
      "grad_norm": 1.0078035979676785,
      "learning_rate": 1.6862776495842365e-06,
      "loss": 0.1252,
      "step": 25321
    },
    {
      "epoch": 0.7387245463562635,
      "grad_norm": 0.8911224581332606,
      "learning_rate": 1.6859238841265157e-06,
      "loss": 0.1189,
      "step": 25322
    },
    {
      "epoch": 0.738753719586907,
      "grad_norm": 0.7859438445214656,
      "learning_rate": 1.6855701482560754e-06,
      "loss": 0.1257,
      "step": 25323
    },
    {
      "epoch": 0.7387828928175506,
      "grad_norm": 0.8271070160879815,
      "learning_rate": 1.6852164419760752e-06,
      "loss": 0.1133,
      "step": 25324
    },
    {
      "epoch": 0.7388120660481942,
      "grad_norm": 1.007895153492105,
      "learning_rate": 1.6848627652896716e-06,
      "loss": 0.1056,
      "step": 25325
    },
    {
      "epoch": 0.7388412392788377,
      "grad_norm": 0.9271857457001312,
      "learning_rate": 1.6845091182000196e-06,
      "loss": 0.1399,
      "step": 25326
    },
    {
      "epoch": 0.7388704125094813,
      "grad_norm": 0.7724191013336708,
      "learning_rate": 1.6841555007102806e-06,
      "loss": 0.1216,
      "step": 25327
    },
    {
      "epoch": 0.7388995857401248,
      "grad_norm": 0.7769602395806887,
      "learning_rate": 1.6838019128236083e-06,
      "loss": 0.0879,
      "step": 25328
    },
    {
      "epoch": 0.7389287589707684,
      "grad_norm": 0.9950594960387527,
      "learning_rate": 1.6834483545431606e-06,
      "loss": 0.1168,
      "step": 25329
    },
    {
      "epoch": 0.738957932201412,
      "grad_norm": 0.8284338149531952,
      "learning_rate": 1.6830948258720964e-06,
      "loss": 0.1219,
      "step": 25330
    },
    {
      "epoch": 0.7389871054320556,
      "grad_norm": 0.7465268207472059,
      "learning_rate": 1.6827413268135694e-06,
      "loss": 0.1082,
      "step": 25331
    },
    {
      "epoch": 0.7390162786626991,
      "grad_norm": 0.9532600911478566,
      "learning_rate": 1.6823878573707341e-06,
      "loss": 0.1024,
      "step": 25332
    },
    {
      "epoch": 0.7390454518933427,
      "grad_norm": 1.2348503494756145,
      "learning_rate": 1.6820344175467502e-06,
      "loss": 0.1254,
      "step": 25333
    },
    {
      "epoch": 0.7390746251239863,
      "grad_norm": 0.8048644288093839,
      "learning_rate": 1.6816810073447686e-06,
      "loss": 0.1249,
      "step": 25334
    },
    {
      "epoch": 0.7391037983546298,
      "grad_norm": 0.7463363458690977,
      "learning_rate": 1.681327626767949e-06,
      "loss": 0.1013,
      "step": 25335
    },
    {
      "epoch": 0.7391329715852734,
      "grad_norm": 0.8456477130553874,
      "learning_rate": 1.6809742758194426e-06,
      "loss": 0.113,
      "step": 25336
    },
    {
      "epoch": 0.7391621448159169,
      "grad_norm": 0.9659166131898201,
      "learning_rate": 1.680620954502407e-06,
      "loss": 0.1072,
      "step": 25337
    },
    {
      "epoch": 0.7391913180465605,
      "grad_norm": 0.8320748594693623,
      "learning_rate": 1.6802676628199948e-06,
      "loss": 0.1112,
      "step": 25338
    },
    {
      "epoch": 0.739220491277204,
      "grad_norm": 1.0040201288794541,
      "learning_rate": 1.6799144007753576e-06,
      "loss": 0.1024,
      "step": 25339
    },
    {
      "epoch": 0.7392496645078476,
      "grad_norm": 0.8786702076063105,
      "learning_rate": 1.6795611683716555e-06,
      "loss": 0.0968,
      "step": 25340
    },
    {
      "epoch": 0.7392788377384911,
      "grad_norm": 0.8722965065660602,
      "learning_rate": 1.679207965612038e-06,
      "loss": 0.0985,
      "step": 25341
    },
    {
      "epoch": 0.7393080109691347,
      "grad_norm": 0.8566522122966621,
      "learning_rate": 1.6788547924996578e-06,
      "loss": 0.1287,
      "step": 25342
    },
    {
      "epoch": 0.7393371841997782,
      "grad_norm": 0.8130467367070658,
      "learning_rate": 1.678501649037671e-06,
      "loss": 0.1162,
      "step": 25343
    },
    {
      "epoch": 0.7393663574304219,
      "grad_norm": 0.8761452407573056,
      "learning_rate": 1.6781485352292281e-06,
      "loss": 0.1175,
      "step": 25344
    },
    {
      "epoch": 0.7393955306610654,
      "grad_norm": 1.2940274165400936,
      "learning_rate": 1.6777954510774808e-06,
      "loss": 0.1206,
      "step": 25345
    },
    {
      "epoch": 0.739424703891709,
      "grad_norm": 1.0570273790806137,
      "learning_rate": 1.6774423965855823e-06,
      "loss": 0.1351,
      "step": 25346
    },
    {
      "epoch": 0.7394538771223526,
      "grad_norm": 0.9344682524090563,
      "learning_rate": 1.6770893717566872e-06,
      "loss": 0.111,
      "step": 25347
    },
    {
      "epoch": 0.7394830503529961,
      "grad_norm": 1.2793674111916882,
      "learning_rate": 1.6767363765939444e-06,
      "loss": 0.136,
      "step": 25348
    },
    {
      "epoch": 0.7395122235836397,
      "grad_norm": 1.0608057392286048,
      "learning_rate": 1.6763834111005039e-06,
      "loss": 0.1162,
      "step": 25349
    },
    {
      "epoch": 0.7395413968142832,
      "grad_norm": 0.7445686660391058,
      "learning_rate": 1.6760304752795215e-06,
      "loss": 0.1417,
      "step": 25350
    },
    {
      "epoch": 0.7395705700449268,
      "grad_norm": 0.9735687822159999,
      "learning_rate": 1.675677569134143e-06,
      "loss": 0.1121,
      "step": 25351
    },
    {
      "epoch": 0.7395997432755703,
      "grad_norm": 1.0102545417041977,
      "learning_rate": 1.6753246926675237e-06,
      "loss": 0.1304,
      "step": 25352
    },
    {
      "epoch": 0.7396289165062139,
      "grad_norm": 1.3639402008031616,
      "learning_rate": 1.6749718458828102e-06,
      "loss": 0.1373,
      "step": 25353
    },
    {
      "epoch": 0.7396580897368574,
      "grad_norm": 0.8620142432377433,
      "learning_rate": 1.6746190287831559e-06,
      "loss": 0.1103,
      "step": 25354
    },
    {
      "epoch": 0.739687262967501,
      "grad_norm": 1.014938079064612,
      "learning_rate": 1.6742662413717092e-06,
      "loss": 0.1222,
      "step": 25355
    },
    {
      "epoch": 0.7397164361981445,
      "grad_norm": 0.8723643333151515,
      "learning_rate": 1.673913483651618e-06,
      "loss": 0.1183,
      "step": 25356
    },
    {
      "epoch": 0.7397456094287882,
      "grad_norm": 0.8299990124103854,
      "learning_rate": 1.673560755626033e-06,
      "loss": 0.1075,
      "step": 25357
    },
    {
      "epoch": 0.7397747826594318,
      "grad_norm": 0.9832267034035805,
      "learning_rate": 1.6732080572981052e-06,
      "loss": 0.1156,
      "step": 25358
    },
    {
      "epoch": 0.7398039558900753,
      "grad_norm": 1.3896315058880548,
      "learning_rate": 1.6728553886709798e-06,
      "loss": 0.1327,
      "step": 25359
    },
    {
      "epoch": 0.7398331291207189,
      "grad_norm": 0.8507942262110857,
      "learning_rate": 1.6725027497478092e-06,
      "loss": 0.0972,
      "step": 25360
    },
    {
      "epoch": 0.7398623023513624,
      "grad_norm": 0.9933799142356851,
      "learning_rate": 1.6721501405317398e-06,
      "loss": 0.1407,
      "step": 25361
    },
    {
      "epoch": 0.739891475582006,
      "grad_norm": 1.0911925315744393,
      "learning_rate": 1.6717975610259175e-06,
      "loss": 0.1361,
      "step": 25362
    },
    {
      "epoch": 0.7399206488126495,
      "grad_norm": 0.9913835876354764,
      "learning_rate": 1.6714450112334924e-06,
      "loss": 0.1185,
      "step": 25363
    },
    {
      "epoch": 0.7399498220432931,
      "grad_norm": 0.9459508176023983,
      "learning_rate": 1.671092491157613e-06,
      "loss": 0.0895,
      "step": 25364
    },
    {
      "epoch": 0.7399789952739366,
      "grad_norm": 1.0171833360019018,
      "learning_rate": 1.6707400008014257e-06,
      "loss": 0.1016,
      "step": 25365
    },
    {
      "epoch": 0.7400081685045802,
      "grad_norm": 0.9135241923001208,
      "learning_rate": 1.6703875401680747e-06,
      "loss": 0.1179,
      "step": 25366
    },
    {
      "epoch": 0.7400373417352237,
      "grad_norm": 0.7377216085161417,
      "learning_rate": 1.670035109260711e-06,
      "loss": 0.1096,
      "step": 25367
    },
    {
      "epoch": 0.7400665149658673,
      "grad_norm": 0.781221612603949,
      "learning_rate": 1.6696827080824784e-06,
      "loss": 0.094,
      "step": 25368
    },
    {
      "epoch": 0.7400956881965108,
      "grad_norm": 1.2490721073710185,
      "learning_rate": 1.6693303366365205e-06,
      "loss": 0.1285,
      "step": 25369
    },
    {
      "epoch": 0.7401248614271544,
      "grad_norm": 1.140595970412751,
      "learning_rate": 1.6689779949259894e-06,
      "loss": 0.1423,
      "step": 25370
    },
    {
      "epoch": 0.740154034657798,
      "grad_norm": 1.1134970041962298,
      "learning_rate": 1.6686256829540282e-06,
      "loss": 0.1228,
      "step": 25371
    },
    {
      "epoch": 0.7401832078884416,
      "grad_norm": 0.9452682178795438,
      "learning_rate": 1.6682734007237793e-06,
      "loss": 0.1272,
      "step": 25372
    },
    {
      "epoch": 0.7402123811190852,
      "grad_norm": 1.035749661831755,
      "learning_rate": 1.6679211482383923e-06,
      "loss": 0.0964,
      "step": 25373
    },
    {
      "epoch": 0.7402415543497287,
      "grad_norm": 0.7226682253429818,
      "learning_rate": 1.6675689255010098e-06,
      "loss": 0.1007,
      "step": 25374
    },
    {
      "epoch": 0.7402707275803723,
      "grad_norm": 0.8096881255591155,
      "learning_rate": 1.6672167325147741e-06,
      "loss": 0.1082,
      "step": 25375
    },
    {
      "epoch": 0.7402999008110158,
      "grad_norm": 0.897310494971551,
      "learning_rate": 1.6668645692828323e-06,
      "loss": 0.0941,
      "step": 25376
    },
    {
      "epoch": 0.7403290740416594,
      "grad_norm": 0.9521364435395179,
      "learning_rate": 1.6665124358083296e-06,
      "loss": 0.1282,
      "step": 25377
    },
    {
      "epoch": 0.7403582472723029,
      "grad_norm": 1.2353152986653075,
      "learning_rate": 1.666160332094408e-06,
      "loss": 0.1183,
      "step": 25378
    },
    {
      "epoch": 0.7403874205029465,
      "grad_norm": 0.7693406591470277,
      "learning_rate": 1.6658082581442098e-06,
      "loss": 0.1218,
      "step": 25379
    },
    {
      "epoch": 0.74041659373359,
      "grad_norm": 0.7983395794475866,
      "learning_rate": 1.665456213960881e-06,
      "loss": 0.1289,
      "step": 25380
    },
    {
      "epoch": 0.7404457669642336,
      "grad_norm": 0.8501129599977921,
      "learning_rate": 1.6651041995475613e-06,
      "loss": 0.1125,
      "step": 25381
    },
    {
      "epoch": 0.7404749401948771,
      "grad_norm": 0.8171686588634324,
      "learning_rate": 1.664752214907397e-06,
      "loss": 0.1608,
      "step": 25382
    },
    {
      "epoch": 0.7405041134255207,
      "grad_norm": 0.927952847881401,
      "learning_rate": 1.6644002600435267e-06,
      "loss": 0.1194,
      "step": 25383
    },
    {
      "epoch": 0.7405332866561644,
      "grad_norm": 0.8322235908395166,
      "learning_rate": 1.664048334959097e-06,
      "loss": 0.1007,
      "step": 25384
    },
    {
      "epoch": 0.7405624598868079,
      "grad_norm": 0.7438857772698875,
      "learning_rate": 1.663696439657247e-06,
      "loss": 0.105,
      "step": 25385
    },
    {
      "epoch": 0.7405916331174515,
      "grad_norm": 0.7538778802375633,
      "learning_rate": 1.6633445741411169e-06,
      "loss": 0.1027,
      "step": 25386
    },
    {
      "epoch": 0.740620806348095,
      "grad_norm": 1.1904370590106432,
      "learning_rate": 1.66299273841385e-06,
      "loss": 0.1211,
      "step": 25387
    },
    {
      "epoch": 0.7406499795787386,
      "grad_norm": 1.5398770808674112,
      "learning_rate": 1.662640932478589e-06,
      "loss": 0.1079,
      "step": 25388
    },
    {
      "epoch": 0.7406791528093821,
      "grad_norm": 0.8759431557643031,
      "learning_rate": 1.6622891563384714e-06,
      "loss": 0.1098,
      "step": 25389
    },
    {
      "epoch": 0.7407083260400257,
      "grad_norm": 0.7902504275915255,
      "learning_rate": 1.6619374099966412e-06,
      "loss": 0.1224,
      "step": 25390
    },
    {
      "epoch": 0.7407374992706692,
      "grad_norm": 1.162438744624882,
      "learning_rate": 1.661585693456237e-06,
      "loss": 0.1273,
      "step": 25391
    },
    {
      "epoch": 0.7407666725013128,
      "grad_norm": 0.9169285785607442,
      "learning_rate": 1.6612340067203968e-06,
      "loss": 0.1168,
      "step": 25392
    },
    {
      "epoch": 0.7407958457319563,
      "grad_norm": 0.8393633412940993,
      "learning_rate": 1.6608823497922626e-06,
      "loss": 0.1108,
      "step": 25393
    },
    {
      "epoch": 0.7408250189625999,
      "grad_norm": 0.6990912682692665,
      "learning_rate": 1.6605307226749757e-06,
      "loss": 0.1095,
      "step": 25394
    },
    {
      "epoch": 0.7408541921932434,
      "grad_norm": 0.9879222376101539,
      "learning_rate": 1.6601791253716725e-06,
      "loss": 0.1415,
      "step": 25395
    },
    {
      "epoch": 0.740883365423887,
      "grad_norm": 1.1348303097250978,
      "learning_rate": 1.6598275578854917e-06,
      "loss": 0.1076,
      "step": 25396
    },
    {
      "epoch": 0.7409125386545306,
      "grad_norm": 0.9411588160906919,
      "learning_rate": 1.6594760202195749e-06,
      "loss": 0.1228,
      "step": 25397
    },
    {
      "epoch": 0.7409417118851742,
      "grad_norm": 0.8093755643714603,
      "learning_rate": 1.6591245123770583e-06,
      "loss": 0.1162,
      "step": 25398
    },
    {
      "epoch": 0.7409708851158178,
      "grad_norm": 1.0155090857890545,
      "learning_rate": 1.6587730343610776e-06,
      "loss": 0.1288,
      "step": 25399
    },
    {
      "epoch": 0.7410000583464613,
      "grad_norm": 0.8846906618267024,
      "learning_rate": 1.6584215861747766e-06,
      "loss": 0.1101,
      "step": 25400
    },
    {
      "epoch": 0.7410292315771049,
      "grad_norm": 0.7678375805109864,
      "learning_rate": 1.65807016782129e-06,
      "loss": 0.1123,
      "step": 25401
    },
    {
      "epoch": 0.7410584048077484,
      "grad_norm": 0.7614403773250501,
      "learning_rate": 1.6577187793037535e-06,
      "loss": 0.1152,
      "step": 25402
    },
    {
      "epoch": 0.741087578038392,
      "grad_norm": 0.7680447139282883,
      "learning_rate": 1.6573674206253077e-06,
      "loss": 0.1033,
      "step": 25403
    },
    {
      "epoch": 0.7411167512690355,
      "grad_norm": 0.7959803953095608,
      "learning_rate": 1.6570160917890876e-06,
      "loss": 0.1067,
      "step": 25404
    },
    {
      "epoch": 0.7411459244996791,
      "grad_norm": 0.86153092292329,
      "learning_rate": 1.6566647927982283e-06,
      "loss": 0.1366,
      "step": 25405
    },
    {
      "epoch": 0.7411750977303226,
      "grad_norm": 0.8970047267704228,
      "learning_rate": 1.6563135236558675e-06,
      "loss": 0.1234,
      "step": 25406
    },
    {
      "epoch": 0.7412042709609662,
      "grad_norm": 0.8237904485101333,
      "learning_rate": 1.6559622843651429e-06,
      "loss": 0.1032,
      "step": 25407
    },
    {
      "epoch": 0.7412334441916097,
      "grad_norm": 0.7105008058025912,
      "learning_rate": 1.6556110749291888e-06,
      "loss": 0.1236,
      "step": 25408
    },
    {
      "epoch": 0.7412626174222533,
      "grad_norm": 0.7860115769909971,
      "learning_rate": 1.655259895351139e-06,
      "loss": 0.1072,
      "step": 25409
    },
    {
      "epoch": 0.7412917906528969,
      "grad_norm": 0.8804442229356237,
      "learning_rate": 1.6549087456341317e-06,
      "loss": 0.1257,
      "step": 25410
    },
    {
      "epoch": 0.7413209638835405,
      "grad_norm": 0.8105582031140881,
      "learning_rate": 1.6545576257812995e-06,
      "loss": 0.1246,
      "step": 25411
    },
    {
      "epoch": 0.7413501371141841,
      "grad_norm": 0.789937401807501,
      "learning_rate": 1.6542065357957793e-06,
      "loss": 0.1296,
      "step": 25412
    },
    {
      "epoch": 0.7413793103448276,
      "grad_norm": 1.0434908734170527,
      "learning_rate": 1.6538554756807035e-06,
      "loss": 0.151,
      "step": 25413
    },
    {
      "epoch": 0.7414084835754712,
      "grad_norm": 0.8006444351043412,
      "learning_rate": 1.653504445439208e-06,
      "loss": 0.1096,
      "step": 25414
    },
    {
      "epoch": 0.7414376568061147,
      "grad_norm": 0.838615604229827,
      "learning_rate": 1.6531534450744268e-06,
      "loss": 0.1339,
      "step": 25415
    },
    {
      "epoch": 0.7414668300367583,
      "grad_norm": 0.8090596873840712,
      "learning_rate": 1.6528024745894904e-06,
      "loss": 0.1156,
      "step": 25416
    },
    {
      "epoch": 0.7414960032674018,
      "grad_norm": 1.1246619977451124,
      "learning_rate": 1.6524515339875346e-06,
      "loss": 0.1187,
      "step": 25417
    },
    {
      "epoch": 0.7415251764980454,
      "grad_norm": 0.7490263473434614,
      "learning_rate": 1.6521006232716941e-06,
      "loss": 0.1124,
      "step": 25418
    },
    {
      "epoch": 0.7415543497286889,
      "grad_norm": 0.7352819684613954,
      "learning_rate": 1.6517497424450985e-06,
      "loss": 0.15,
      "step": 25419
    },
    {
      "epoch": 0.7415835229593325,
      "grad_norm": 0.8687632301078961,
      "learning_rate": 1.6513988915108836e-06,
      "loss": 0.1202,
      "step": 25420
    },
    {
      "epoch": 0.741612696189976,
      "grad_norm": 0.7663999610481497,
      "learning_rate": 1.6510480704721798e-06,
      "loss": 0.119,
      "step": 25421
    },
    {
      "epoch": 0.7416418694206196,
      "grad_norm": 0.7455157376620623,
      "learning_rate": 1.650697279332118e-06,
      "loss": 0.1533,
      "step": 25422
    },
    {
      "epoch": 0.7416710426512632,
      "grad_norm": 0.7097885563794665,
      "learning_rate": 1.650346518093831e-06,
      "loss": 0.1073,
      "step": 25423
    },
    {
      "epoch": 0.7417002158819067,
      "grad_norm": 0.8284290309907514,
      "learning_rate": 1.6499957867604527e-06,
      "loss": 0.1112,
      "step": 25424
    },
    {
      "epoch": 0.7417293891125504,
      "grad_norm": 0.7962707894633176,
      "learning_rate": 1.649645085335112e-06,
      "loss": 0.1107,
      "step": 25425
    },
    {
      "epoch": 0.7417585623431939,
      "grad_norm": 0.7968001948841206,
      "learning_rate": 1.6492944138209382e-06,
      "loss": 0.1284,
      "step": 25426
    },
    {
      "epoch": 0.7417877355738375,
      "grad_norm": 1.2010669149237727,
      "learning_rate": 1.648943772221066e-06,
      "loss": 0.1276,
      "step": 25427
    },
    {
      "epoch": 0.741816908804481,
      "grad_norm": 0.7628615958201201,
      "learning_rate": 1.648593160538624e-06,
      "loss": 0.0869,
      "step": 25428
    },
    {
      "epoch": 0.7418460820351246,
      "grad_norm": 0.6660997680511695,
      "learning_rate": 1.6482425787767392e-06,
      "loss": 0.0993,
      "step": 25429
    },
    {
      "epoch": 0.7418752552657681,
      "grad_norm": 0.9096575575987993,
      "learning_rate": 1.6478920269385472e-06,
      "loss": 0.1365,
      "step": 25430
    },
    {
      "epoch": 0.7419044284964117,
      "grad_norm": 1.1179123581038652,
      "learning_rate": 1.6475415050271754e-06,
      "loss": 0.1183,
      "step": 25431
    },
    {
      "epoch": 0.7419336017270552,
      "grad_norm": 0.8847006034413464,
      "learning_rate": 1.6471910130457508e-06,
      "loss": 0.1199,
      "step": 25432
    },
    {
      "epoch": 0.7419627749576988,
      "grad_norm": 0.9165676425464232,
      "learning_rate": 1.646840550997406e-06,
      "loss": 0.123,
      "step": 25433
    },
    {
      "epoch": 0.7419919481883424,
      "grad_norm": 1.2480362274106354,
      "learning_rate": 1.6464901188852684e-06,
      "loss": 0.1234,
      "step": 25434
    },
    {
      "epoch": 0.7420211214189859,
      "grad_norm": 0.7311462670928583,
      "learning_rate": 1.646139716712465e-06,
      "loss": 0.0972,
      "step": 25435
    },
    {
      "epoch": 0.7420502946496295,
      "grad_norm": 1.0648678979346429,
      "learning_rate": 1.6457893444821255e-06,
      "loss": 0.1195,
      "step": 25436
    },
    {
      "epoch": 0.742079467880273,
      "grad_norm": 0.9334327651337558,
      "learning_rate": 1.6454390021973798e-06,
      "loss": 0.1074,
      "step": 25437
    },
    {
      "epoch": 0.7421086411109167,
      "grad_norm": 0.7848374413560427,
      "learning_rate": 1.6450886898613538e-06,
      "loss": 0.1266,
      "step": 25438
    },
    {
      "epoch": 0.7421378143415602,
      "grad_norm": 0.9621393073657919,
      "learning_rate": 1.6447384074771732e-06,
      "loss": 0.1237,
      "step": 25439
    },
    {
      "epoch": 0.7421669875722038,
      "grad_norm": 0.9061550032611203,
      "learning_rate": 1.644388155047969e-06,
      "loss": 0.1102,
      "step": 25440
    },
    {
      "epoch": 0.7421961608028473,
      "grad_norm": 1.186165421768891,
      "learning_rate": 1.6440379325768646e-06,
      "loss": 0.1064,
      "step": 25441
    },
    {
      "epoch": 0.7422253340334909,
      "grad_norm": 1.1411524256563175,
      "learning_rate": 1.6436877400669904e-06,
      "loss": 0.1158,
      "step": 25442
    },
    {
      "epoch": 0.7422545072641344,
      "grad_norm": 0.9063056563284617,
      "learning_rate": 1.643337577521469e-06,
      "loss": 0.0974,
      "step": 25443
    },
    {
      "epoch": 0.742283680494778,
      "grad_norm": 0.9133675605781577,
      "learning_rate": 1.6429874449434297e-06,
      "loss": 0.1212,
      "step": 25444
    },
    {
      "epoch": 0.7423128537254216,
      "grad_norm": 1.1041187593131603,
      "learning_rate": 1.6426373423359975e-06,
      "loss": 0.1201,
      "step": 25445
    },
    {
      "epoch": 0.7423420269560651,
      "grad_norm": 1.051387351379852,
      "learning_rate": 1.6422872697022958e-06,
      "loss": 0.1025,
      "step": 25446
    },
    {
      "epoch": 0.7423712001867087,
      "grad_norm": 0.9285985697730978,
      "learning_rate": 1.641937227045452e-06,
      "loss": 0.1158,
      "step": 25447
    },
    {
      "epoch": 0.7424003734173522,
      "grad_norm": 1.0951366644053775,
      "learning_rate": 1.6415872143685924e-06,
      "loss": 0.1423,
      "step": 25448
    },
    {
      "epoch": 0.7424295466479958,
      "grad_norm": 1.001333933093759,
      "learning_rate": 1.6412372316748387e-06,
      "loss": 0.1102,
      "step": 25449
    },
    {
      "epoch": 0.7424587198786393,
      "grad_norm": 0.8529908056140938,
      "learning_rate": 1.640887278967319e-06,
      "loss": 0.1353,
      "step": 25450
    },
    {
      "epoch": 0.7424878931092829,
      "grad_norm": 0.6451206073086674,
      "learning_rate": 1.6405373562491562e-06,
      "loss": 0.1205,
      "step": 25451
    },
    {
      "epoch": 0.7425170663399265,
      "grad_norm": 0.7512592341763321,
      "learning_rate": 1.6401874635234716e-06,
      "loss": 0.1365,
      "step": 25452
    },
    {
      "epoch": 0.7425462395705701,
      "grad_norm": 0.6750821858785784,
      "learning_rate": 1.6398376007933914e-06,
      "loss": 0.1057,
      "step": 25453
    },
    {
      "epoch": 0.7425754128012136,
      "grad_norm": 0.8788521283989128,
      "learning_rate": 1.6394877680620407e-06,
      "loss": 0.1235,
      "step": 25454
    },
    {
      "epoch": 0.7426045860318572,
      "grad_norm": 0.6389843363690554,
      "learning_rate": 1.6391379653325412e-06,
      "loss": 0.1023,
      "step": 25455
    },
    {
      "epoch": 0.7426337592625007,
      "grad_norm": 0.7921985564656653,
      "learning_rate": 1.638788192608014e-06,
      "loss": 0.1208,
      "step": 25456
    },
    {
      "epoch": 0.7426629324931443,
      "grad_norm": 0.8358770450717694,
      "learning_rate": 1.6384384498915844e-06,
      "loss": 0.1136,
      "step": 25457
    },
    {
      "epoch": 0.7426921057237879,
      "grad_norm": 0.6269270825396777,
      "learning_rate": 1.6380887371863747e-06,
      "loss": 0.1143,
      "step": 25458
    },
    {
      "epoch": 0.7427212789544314,
      "grad_norm": 0.7772857358708563,
      "learning_rate": 1.6377390544955024e-06,
      "loss": 0.1121,
      "step": 25459
    },
    {
      "epoch": 0.742750452185075,
      "grad_norm": 0.8009850369783482,
      "learning_rate": 1.6373894018220971e-06,
      "loss": 0.1038,
      "step": 25460
    },
    {
      "epoch": 0.7427796254157185,
      "grad_norm": 0.9487626611399563,
      "learning_rate": 1.637039779169276e-06,
      "loss": 0.1236,
      "step": 25461
    },
    {
      "epoch": 0.7428087986463621,
      "grad_norm": 0.7575777363808675,
      "learning_rate": 1.6366901865401592e-06,
      "loss": 0.1101,
      "step": 25462
    },
    {
      "epoch": 0.7428379718770056,
      "grad_norm": 0.8093174230273426,
      "learning_rate": 1.6363406239378715e-06,
      "loss": 0.1219,
      "step": 25463
    },
    {
      "epoch": 0.7428671451076492,
      "grad_norm": 0.8523740167678296,
      "learning_rate": 1.6359910913655314e-06,
      "loss": 0.1119,
      "step": 25464
    },
    {
      "epoch": 0.7428963183382927,
      "grad_norm": 0.8158904565755344,
      "learning_rate": 1.6356415888262583e-06,
      "loss": 0.1242,
      "step": 25465
    },
    {
      "epoch": 0.7429254915689364,
      "grad_norm": 0.7557731574314885,
      "learning_rate": 1.6352921163231738e-06,
      "loss": 0.1212,
      "step": 25466
    },
    {
      "epoch": 0.74295466479958,
      "grad_norm": 1.5021850666971923,
      "learning_rate": 1.6349426738594e-06,
      "loss": 0.1005,
      "step": 25467
    },
    {
      "epoch": 0.7429838380302235,
      "grad_norm": 0.8435779966046582,
      "learning_rate": 1.634593261438055e-06,
      "loss": 0.1078,
      "step": 25468
    },
    {
      "epoch": 0.743013011260867,
      "grad_norm": 0.6486910102729132,
      "learning_rate": 1.6342438790622556e-06,
      "loss": 0.1117,
      "step": 25469
    },
    {
      "epoch": 0.7430421844915106,
      "grad_norm": 0.7197599091949821,
      "learning_rate": 1.6338945267351253e-06,
      "loss": 0.1015,
      "step": 25470
    },
    {
      "epoch": 0.7430713577221542,
      "grad_norm": 0.9144436898866133,
      "learning_rate": 1.6335452044597794e-06,
      "loss": 0.1023,
      "step": 25471
    },
    {
      "epoch": 0.7431005309527977,
      "grad_norm": 0.6483596145456543,
      "learning_rate": 1.6331959122393405e-06,
      "loss": 0.1117,
      "step": 25472
    },
    {
      "epoch": 0.7431297041834413,
      "grad_norm": 0.787331992253388,
      "learning_rate": 1.6328466500769225e-06,
      "loss": 0.1115,
      "step": 25473
    },
    {
      "epoch": 0.7431588774140848,
      "grad_norm": 0.9774850073862337,
      "learning_rate": 1.6324974179756476e-06,
      "loss": 0.1122,
      "step": 25474
    },
    {
      "epoch": 0.7431880506447284,
      "grad_norm": 0.7852460671945108,
      "learning_rate": 1.6321482159386314e-06,
      "loss": 0.1164,
      "step": 25475
    },
    {
      "epoch": 0.7432172238753719,
      "grad_norm": 0.8141108891769548,
      "learning_rate": 1.6317990439689913e-06,
      "loss": 0.1035,
      "step": 25476
    },
    {
      "epoch": 0.7432463971060155,
      "grad_norm": 0.8439670860992345,
      "learning_rate": 1.6314499020698444e-06,
      "loss": 0.1224,
      "step": 25477
    },
    {
      "epoch": 0.743275570336659,
      "grad_norm": 0.8902089647834072,
      "learning_rate": 1.631100790244311e-06,
      "loss": 0.109,
      "step": 25478
    },
    {
      "epoch": 0.7433047435673027,
      "grad_norm": 0.9622996545312251,
      "learning_rate": 1.6307517084955033e-06,
      "loss": 0.1022,
      "step": 25479
    },
    {
      "epoch": 0.7433339167979462,
      "grad_norm": 0.8769423630598164,
      "learning_rate": 1.630402656826542e-06,
      "loss": 0.1164,
      "step": 25480
    },
    {
      "epoch": 0.7433630900285898,
      "grad_norm": 1.1854201881829816,
      "learning_rate": 1.630053635240541e-06,
      "loss": 0.1146,
      "step": 25481
    },
    {
      "epoch": 0.7433922632592334,
      "grad_norm": 0.8432987016520562,
      "learning_rate": 1.6297046437406156e-06,
      "loss": 0.1022,
      "step": 25482
    },
    {
      "epoch": 0.7434214364898769,
      "grad_norm": 0.7889299292369527,
      "learning_rate": 1.6293556823298823e-06,
      "loss": 0.133,
      "step": 25483
    },
    {
      "epoch": 0.7434506097205205,
      "grad_norm": 1.0117205907326565,
      "learning_rate": 1.6290067510114583e-06,
      "loss": 0.1201,
      "step": 25484
    },
    {
      "epoch": 0.743479782951164,
      "grad_norm": 1.0412832235517318,
      "learning_rate": 1.6286578497884575e-06,
      "loss": 0.098,
      "step": 25485
    },
    {
      "epoch": 0.7435089561818076,
      "grad_norm": 0.8620017663603526,
      "learning_rate": 1.6283089786639933e-06,
      "loss": 0.1286,
      "step": 25486
    },
    {
      "epoch": 0.7435381294124511,
      "grad_norm": 1.133428864430782,
      "learning_rate": 1.627960137641183e-06,
      "loss": 0.1316,
      "step": 25487
    },
    {
      "epoch": 0.7435673026430947,
      "grad_norm": 0.7748872761268861,
      "learning_rate": 1.6276113267231392e-06,
      "loss": 0.1254,
      "step": 25488
    },
    {
      "epoch": 0.7435964758737382,
      "grad_norm": 0.8168004763587532,
      "learning_rate": 1.6272625459129737e-06,
      "loss": 0.1147,
      "step": 25489
    },
    {
      "epoch": 0.7436256491043818,
      "grad_norm": 0.7334192510394804,
      "learning_rate": 1.6269137952138064e-06,
      "loss": 0.1153,
      "step": 25490
    },
    {
      "epoch": 0.7436548223350253,
      "grad_norm": 1.0478577222605305,
      "learning_rate": 1.626565074628747e-06,
      "loss": 0.134,
      "step": 25491
    },
    {
      "epoch": 0.7436839955656689,
      "grad_norm": 0.8121206573306156,
      "learning_rate": 1.626216384160908e-06,
      "loss": 0.1189,
      "step": 25492
    },
    {
      "epoch": 0.7437131687963126,
      "grad_norm": 0.9527340094945739,
      "learning_rate": 1.6258677238134052e-06,
      "loss": 0.1017,
      "step": 25493
    },
    {
      "epoch": 0.7437423420269561,
      "grad_norm": 0.8191193835618129,
      "learning_rate": 1.62551909358935e-06,
      "loss": 0.1368,
      "step": 25494
    },
    {
      "epoch": 0.7437715152575997,
      "grad_norm": 0.8564678401155142,
      "learning_rate": 1.6251704934918533e-06,
      "loss": 0.0997,
      "step": 25495
    },
    {
      "epoch": 0.7438006884882432,
      "grad_norm": 0.7826307777950089,
      "learning_rate": 1.6248219235240287e-06,
      "loss": 0.1035,
      "step": 25496
    },
    {
      "epoch": 0.7438298617188868,
      "grad_norm": 0.8216581390653788,
      "learning_rate": 1.6244733836889897e-06,
      "loss": 0.1015,
      "step": 25497
    },
    {
      "epoch": 0.7438590349495303,
      "grad_norm": 0.7795015813151074,
      "learning_rate": 1.6241248739898469e-06,
      "loss": 0.1241,
      "step": 25498
    },
    {
      "epoch": 0.7438882081801739,
      "grad_norm": 0.9220823397389473,
      "learning_rate": 1.623776394429709e-06,
      "loss": 0.1162,
      "step": 25499
    },
    {
      "epoch": 0.7439173814108174,
      "grad_norm": 0.8041320740082133,
      "learning_rate": 1.6234279450116918e-06,
      "loss": 0.0977,
      "step": 25500
    },
    {
      "epoch": 0.743946554641461,
      "grad_norm": 0.7047822171205932,
      "learning_rate": 1.6230795257389021e-06,
      "loss": 0.1311,
      "step": 25501
    },
    {
      "epoch": 0.7439757278721045,
      "grad_norm": 0.7572621591783532,
      "learning_rate": 1.6227311366144538e-06,
      "loss": 0.1121,
      "step": 25502
    },
    {
      "epoch": 0.7440049011027481,
      "grad_norm": 1.0272799798139245,
      "learning_rate": 1.622382777641454e-06,
      "loss": 0.1148,
      "step": 25503
    },
    {
      "epoch": 0.7440340743333916,
      "grad_norm": 0.8133294697702453,
      "learning_rate": 1.622034448823016e-06,
      "loss": 0.1005,
      "step": 25504
    },
    {
      "epoch": 0.7440632475640352,
      "grad_norm": 0.8817510751531433,
      "learning_rate": 1.6216861501622483e-06,
      "loss": 0.1212,
      "step": 25505
    },
    {
      "epoch": 0.7440924207946789,
      "grad_norm": 1.0316321980202487,
      "learning_rate": 1.6213378816622583e-06,
      "loss": 0.1438,
      "step": 25506
    },
    {
      "epoch": 0.7441215940253224,
      "grad_norm": 0.7901698333667843,
      "learning_rate": 1.6209896433261573e-06,
      "loss": 0.1144,
      "step": 25507
    },
    {
      "epoch": 0.744150767255966,
      "grad_norm": 0.7283455912549498,
      "learning_rate": 1.620641435157056e-06,
      "loss": 0.1242,
      "step": 25508
    },
    {
      "epoch": 0.7441799404866095,
      "grad_norm": 1.124404937433288,
      "learning_rate": 1.6202932571580593e-06,
      "loss": 0.1234,
      "step": 25509
    },
    {
      "epoch": 0.7442091137172531,
      "grad_norm": 0.741831345451023,
      "learning_rate": 1.6199451093322794e-06,
      "loss": 0.09,
      "step": 25510
    },
    {
      "epoch": 0.7442382869478966,
      "grad_norm": 0.8475788042988227,
      "learning_rate": 1.6195969916828224e-06,
      "loss": 0.0986,
      "step": 25511
    },
    {
      "epoch": 0.7442674601785402,
      "grad_norm": 1.1727542967840356,
      "learning_rate": 1.619248904212795e-06,
      "loss": 0.1177,
      "step": 25512
    },
    {
      "epoch": 0.7442966334091837,
      "grad_norm": 0.963286441570922,
      "learning_rate": 1.6189008469253064e-06,
      "loss": 0.1264,
      "step": 25513
    },
    {
      "epoch": 0.7443258066398273,
      "grad_norm": 1.0430121824220553,
      "learning_rate": 1.6185528198234656e-06,
      "loss": 0.1297,
      "step": 25514
    },
    {
      "epoch": 0.7443549798704708,
      "grad_norm": 0.8351046293034777,
      "learning_rate": 1.6182048229103774e-06,
      "loss": 0.1172,
      "step": 25515
    },
    {
      "epoch": 0.7443841531011144,
      "grad_norm": 0.8808831723939586,
      "learning_rate": 1.6178568561891484e-06,
      "loss": 0.1146,
      "step": 25516
    },
    {
      "epoch": 0.7444133263317579,
      "grad_norm": 1.0297276449556383,
      "learning_rate": 1.6175089196628874e-06,
      "loss": 0.1053,
      "step": 25517
    },
    {
      "epoch": 0.7444424995624015,
      "grad_norm": 0.7246997990728504,
      "learning_rate": 1.6171610133346992e-06,
      "loss": 0.1078,
      "step": 25518
    },
    {
      "epoch": 0.744471672793045,
      "grad_norm": 0.8487178867770659,
      "learning_rate": 1.6168131372076868e-06,
      "loss": 0.1335,
      "step": 25519
    },
    {
      "epoch": 0.7445008460236887,
      "grad_norm": 1.2413324814821267,
      "learning_rate": 1.616465291284962e-06,
      "loss": 0.1137,
      "step": 25520
    },
    {
      "epoch": 0.7445300192543323,
      "grad_norm": 0.8236342336085557,
      "learning_rate": 1.616117475569628e-06,
      "loss": 0.1131,
      "step": 25521
    },
    {
      "epoch": 0.7445591924849758,
      "grad_norm": 0.903432158000206,
      "learning_rate": 1.6157696900647874e-06,
      "loss": 0.1234,
      "step": 25522
    },
    {
      "epoch": 0.7445883657156194,
      "grad_norm": 1.0895261908075224,
      "learning_rate": 1.6154219347735484e-06,
      "loss": 0.1453,
      "step": 25523
    },
    {
      "epoch": 0.7446175389462629,
      "grad_norm": 1.2394685697188887,
      "learning_rate": 1.6150742096990151e-06,
      "loss": 0.127,
      "step": 25524
    },
    {
      "epoch": 0.7446467121769065,
      "grad_norm": 0.7495244324670081,
      "learning_rate": 1.6147265148442892e-06,
      "loss": 0.1084,
      "step": 25525
    },
    {
      "epoch": 0.74467588540755,
      "grad_norm": 0.6213348733752562,
      "learning_rate": 1.6143788502124768e-06,
      "loss": 0.1036,
      "step": 25526
    },
    {
      "epoch": 0.7447050586381936,
      "grad_norm": 0.8996843840594385,
      "learning_rate": 1.6140312158066834e-06,
      "loss": 0.1032,
      "step": 25527
    },
    {
      "epoch": 0.7447342318688371,
      "grad_norm": 1.1597407651426115,
      "learning_rate": 1.6136836116300109e-06,
      "loss": 0.1166,
      "step": 25528
    },
    {
      "epoch": 0.7447634050994807,
      "grad_norm": 0.6980797666475563,
      "learning_rate": 1.6133360376855616e-06,
      "loss": 0.1098,
      "step": 25529
    },
    {
      "epoch": 0.7447925783301242,
      "grad_norm": 0.6870208505510624,
      "learning_rate": 1.6129884939764396e-06,
      "loss": 0.0947,
      "step": 25530
    },
    {
      "epoch": 0.7448217515607678,
      "grad_norm": 0.7926473302234334,
      "learning_rate": 1.6126409805057492e-06,
      "loss": 0.1198,
      "step": 25531
    },
    {
      "epoch": 0.7448509247914114,
      "grad_norm": 0.9145850364045315,
      "learning_rate": 1.6122934972765914e-06,
      "loss": 0.1071,
      "step": 25532
    },
    {
      "epoch": 0.744880098022055,
      "grad_norm": 1.2818810250205608,
      "learning_rate": 1.611946044292067e-06,
      "loss": 0.1108,
      "step": 25533
    },
    {
      "epoch": 0.7449092712526986,
      "grad_norm": 0.8039091127731487,
      "learning_rate": 1.6115986215552808e-06,
      "loss": 0.098,
      "step": 25534
    },
    {
      "epoch": 0.7449384444833421,
      "grad_norm": 0.8527214430764477,
      "learning_rate": 1.6112512290693338e-06,
      "loss": 0.1416,
      "step": 25535
    },
    {
      "epoch": 0.7449676177139857,
      "grad_norm": 0.970098336114009,
      "learning_rate": 1.6109038668373234e-06,
      "loss": 0.0991,
      "step": 25536
    },
    {
      "epoch": 0.7449967909446292,
      "grad_norm": 1.1187396466934256,
      "learning_rate": 1.6105565348623574e-06,
      "loss": 0.1327,
      "step": 25537
    },
    {
      "epoch": 0.7450259641752728,
      "grad_norm": 0.9311686004770586,
      "learning_rate": 1.6102092331475339e-06,
      "loss": 0.1037,
      "step": 25538
    },
    {
      "epoch": 0.7450551374059163,
      "grad_norm": 0.8785398287443141,
      "learning_rate": 1.609861961695951e-06,
      "loss": 0.0931,
      "step": 25539
    },
    {
      "epoch": 0.7450843106365599,
      "grad_norm": 0.8494143320744586,
      "learning_rate": 1.609514720510713e-06,
      "loss": 0.106,
      "step": 25540
    },
    {
      "epoch": 0.7451134838672034,
      "grad_norm": 0.9415105827703377,
      "learning_rate": 1.6091675095949189e-06,
      "loss": 0.112,
      "step": 25541
    },
    {
      "epoch": 0.745142657097847,
      "grad_norm": 0.8763081132105256,
      "learning_rate": 1.6088203289516652e-06,
      "loss": 0.1271,
      "step": 25542
    },
    {
      "epoch": 0.7451718303284905,
      "grad_norm": 0.8929431481682392,
      "learning_rate": 1.6084731785840547e-06,
      "loss": 0.116,
      "step": 25543
    },
    {
      "epoch": 0.7452010035591341,
      "grad_norm": 0.9028324909614509,
      "learning_rate": 1.6081260584951875e-06,
      "loss": 0.1381,
      "step": 25544
    },
    {
      "epoch": 0.7452301767897777,
      "grad_norm": 0.9750611600946603,
      "learning_rate": 1.6077789686881611e-06,
      "loss": 0.114,
      "step": 25545
    },
    {
      "epoch": 0.7452593500204212,
      "grad_norm": 0.8078006404430004,
      "learning_rate": 1.6074319091660723e-06,
      "loss": 0.1155,
      "step": 25546
    },
    {
      "epoch": 0.7452885232510649,
      "grad_norm": 1.018055942403003,
      "learning_rate": 1.6070848799320237e-06,
      "loss": 0.1269,
      "step": 25547
    },
    {
      "epoch": 0.7453176964817084,
      "grad_norm": 0.9232982422290381,
      "learning_rate": 1.6067378809891094e-06,
      "loss": 0.1179,
      "step": 25548
    },
    {
      "epoch": 0.745346869712352,
      "grad_norm": 1.1525596094515442,
      "learning_rate": 1.6063909123404298e-06,
      "loss": 0.131,
      "step": 25549
    },
    {
      "epoch": 0.7453760429429955,
      "grad_norm": 1.0587325693696445,
      "learning_rate": 1.6060439739890832e-06,
      "loss": 0.1091,
      "step": 25550
    },
    {
      "epoch": 0.7454052161736391,
      "grad_norm": 0.8698500911476206,
      "learning_rate": 1.6056970659381654e-06,
      "loss": 0.1147,
      "step": 25551
    },
    {
      "epoch": 0.7454343894042826,
      "grad_norm": 1.3338528907672536,
      "learning_rate": 1.6053501881907728e-06,
      "loss": 0.1243,
      "step": 25552
    },
    {
      "epoch": 0.7454635626349262,
      "grad_norm": 0.9939976643404548,
      "learning_rate": 1.6050033407500048e-06,
      "loss": 0.1241,
      "step": 25553
    },
    {
      "epoch": 0.7454927358655697,
      "grad_norm": 0.9490224930223939,
      "learning_rate": 1.6046565236189554e-06,
      "loss": 0.1121,
      "step": 25554
    },
    {
      "epoch": 0.7455219090962133,
      "grad_norm": 0.7675865964998464,
      "learning_rate": 1.6043097368007233e-06,
      "loss": 0.1245,
      "step": 25555
    },
    {
      "epoch": 0.7455510823268569,
      "grad_norm": 1.0211646104043877,
      "learning_rate": 1.6039629802984014e-06,
      "loss": 0.1104,
      "step": 25556
    },
    {
      "epoch": 0.7455802555575004,
      "grad_norm": 0.8369056220121306,
      "learning_rate": 1.603616254115089e-06,
      "loss": 0.1307,
      "step": 25557
    },
    {
      "epoch": 0.745609428788144,
      "grad_norm": 0.9513548401552328,
      "learning_rate": 1.6032695582538798e-06,
      "loss": 0.1043,
      "step": 25558
    },
    {
      "epoch": 0.7456386020187875,
      "grad_norm": 1.072861385359351,
      "learning_rate": 1.602922892717868e-06,
      "loss": 0.1342,
      "step": 25559
    },
    {
      "epoch": 0.7456677752494312,
      "grad_norm": 0.9539497317221474,
      "learning_rate": 1.602576257510149e-06,
      "loss": 0.1468,
      "step": 25560
    },
    {
      "epoch": 0.7456969484800747,
      "grad_norm": 0.6985725803914287,
      "learning_rate": 1.6022296526338204e-06,
      "loss": 0.0915,
      "step": 25561
    },
    {
      "epoch": 0.7457261217107183,
      "grad_norm": 1.3062122090407413,
      "learning_rate": 1.6018830780919741e-06,
      "loss": 0.1317,
      "step": 25562
    },
    {
      "epoch": 0.7457552949413618,
      "grad_norm": 1.0300371415741765,
      "learning_rate": 1.6015365338877025e-06,
      "loss": 0.1248,
      "step": 25563
    },
    {
      "epoch": 0.7457844681720054,
      "grad_norm": 0.8176422147947936,
      "learning_rate": 1.6011900200241038e-06,
      "loss": 0.0952,
      "step": 25564
    },
    {
      "epoch": 0.7458136414026489,
      "grad_norm": 0.8474100415831087,
      "learning_rate": 1.6008435365042685e-06,
      "loss": 0.1243,
      "step": 25565
    },
    {
      "epoch": 0.7458428146332925,
      "grad_norm": 5.006150667434528,
      "learning_rate": 1.6004970833312878e-06,
      "loss": 0.1086,
      "step": 25566
    },
    {
      "epoch": 0.745871987863936,
      "grad_norm": 0.8356199022025703,
      "learning_rate": 1.6001506605082605e-06,
      "loss": 0.1128,
      "step": 25567
    },
    {
      "epoch": 0.7459011610945796,
      "grad_norm": 0.8197559960530519,
      "learning_rate": 1.599804268038277e-06,
      "loss": 0.1076,
      "step": 25568
    },
    {
      "epoch": 0.7459303343252232,
      "grad_norm": 0.9229738169506888,
      "learning_rate": 1.5994579059244276e-06,
      "loss": 0.1304,
      "step": 25569
    },
    {
      "epoch": 0.7459595075558667,
      "grad_norm": 1.0226081550306922,
      "learning_rate": 1.5991115741698076e-06,
      "loss": 0.1314,
      "step": 25570
    },
    {
      "epoch": 0.7459886807865103,
      "grad_norm": 1.2285554130538987,
      "learning_rate": 1.5987652727775077e-06,
      "loss": 0.1086,
      "step": 25571
    },
    {
      "epoch": 0.7460178540171538,
      "grad_norm": 1.0954519130263571,
      "learning_rate": 1.598419001750618e-06,
      "loss": 0.1275,
      "step": 25572
    },
    {
      "epoch": 0.7460470272477974,
      "grad_norm": 0.8074544591816742,
      "learning_rate": 1.5980727610922315e-06,
      "loss": 0.133,
      "step": 25573
    },
    {
      "epoch": 0.746076200478441,
      "grad_norm": 1.2046636037278546,
      "learning_rate": 1.5977265508054408e-06,
      "loss": 0.1346,
      "step": 25574
    },
    {
      "epoch": 0.7461053737090846,
      "grad_norm": 0.8894811971962874,
      "learning_rate": 1.5973803708933355e-06,
      "loss": 0.1246,
      "step": 25575
    },
    {
      "epoch": 0.7461345469397281,
      "grad_norm": 0.988885625330251,
      "learning_rate": 1.597034221359004e-06,
      "loss": 0.1141,
      "step": 25576
    },
    {
      "epoch": 0.7461637201703717,
      "grad_norm": 0.9923304779624527,
      "learning_rate": 1.5966881022055403e-06,
      "loss": 0.1147,
      "step": 25577
    },
    {
      "epoch": 0.7461928934010152,
      "grad_norm": 1.0562825232150244,
      "learning_rate": 1.5963420134360313e-06,
      "loss": 0.1013,
      "step": 25578
    },
    {
      "epoch": 0.7462220666316588,
      "grad_norm": 0.9596395482609146,
      "learning_rate": 1.5959959550535682e-06,
      "loss": 0.119,
      "step": 25579
    },
    {
      "epoch": 0.7462512398623024,
      "grad_norm": 0.7517602565914026,
      "learning_rate": 1.595649927061242e-06,
      "loss": 0.1417,
      "step": 25580
    },
    {
      "epoch": 0.7462804130929459,
      "grad_norm": 1.026186386798158,
      "learning_rate": 1.595303929462141e-06,
      "loss": 0.1385,
      "step": 25581
    },
    {
      "epoch": 0.7463095863235895,
      "grad_norm": 0.862758481281522,
      "learning_rate": 1.594957962259352e-06,
      "loss": 0.119,
      "step": 25582
    },
    {
      "epoch": 0.746338759554233,
      "grad_norm": 0.7700798815742126,
      "learning_rate": 1.5946120254559666e-06,
      "loss": 0.1496,
      "step": 25583
    },
    {
      "epoch": 0.7463679327848766,
      "grad_norm": 0.8386103680514339,
      "learning_rate": 1.5942661190550713e-06,
      "loss": 0.1278,
      "step": 25584
    },
    {
      "epoch": 0.7463971060155201,
      "grad_norm": 1.0830627699934756,
      "learning_rate": 1.5939202430597562e-06,
      "loss": 0.1225,
      "step": 25585
    },
    {
      "epoch": 0.7464262792461637,
      "grad_norm": 0.9011135878300403,
      "learning_rate": 1.5935743974731065e-06,
      "loss": 0.125,
      "step": 25586
    },
    {
      "epoch": 0.7464554524768073,
      "grad_norm": 0.8177723284523348,
      "learning_rate": 1.593228582298213e-06,
      "loss": 0.1296,
      "step": 25587
    },
    {
      "epoch": 0.7464846257074509,
      "grad_norm": 0.7812770830191983,
      "learning_rate": 1.5928827975381617e-06,
      "loss": 0.1113,
      "step": 25588
    },
    {
      "epoch": 0.7465137989380944,
      "grad_norm": 1.1012562085984332,
      "learning_rate": 1.5925370431960373e-06,
      "loss": 0.085,
      "step": 25589
    },
    {
      "epoch": 0.746542972168738,
      "grad_norm": 0.7157208521702754,
      "learning_rate": 1.5921913192749288e-06,
      "loss": 0.1181,
      "step": 25590
    },
    {
      "epoch": 0.7465721453993815,
      "grad_norm": 0.6344133581036481,
      "learning_rate": 1.5918456257779248e-06,
      "loss": 0.1038,
      "step": 25591
    },
    {
      "epoch": 0.7466013186300251,
      "grad_norm": 0.8730673364442029,
      "learning_rate": 1.5914999627081096e-06,
      "loss": 0.1163,
      "step": 25592
    },
    {
      "epoch": 0.7466304918606687,
      "grad_norm": 1.073997424960767,
      "learning_rate": 1.5911543300685667e-06,
      "loss": 0.099,
      "step": 25593
    },
    {
      "epoch": 0.7466596650913122,
      "grad_norm": 0.8947140763807463,
      "learning_rate": 1.5908087278623863e-06,
      "loss": 0.1311,
      "step": 25594
    },
    {
      "epoch": 0.7466888383219558,
      "grad_norm": 0.6886352924022845,
      "learning_rate": 1.5904631560926515e-06,
      "loss": 0.1223,
      "step": 25595
    },
    {
      "epoch": 0.7467180115525993,
      "grad_norm": 0.8302032427528294,
      "learning_rate": 1.5901176147624448e-06,
      "loss": 0.0969,
      "step": 25596
    },
    {
      "epoch": 0.7467471847832429,
      "grad_norm": 0.7117401002308471,
      "learning_rate": 1.589772103874857e-06,
      "loss": 0.1056,
      "step": 25597
    },
    {
      "epoch": 0.7467763580138864,
      "grad_norm": 0.9721643354323618,
      "learning_rate": 1.5894266234329697e-06,
      "loss": 0.1454,
      "step": 25598
    },
    {
      "epoch": 0.74680553124453,
      "grad_norm": 0.8414748072462078,
      "learning_rate": 1.5890811734398659e-06,
      "loss": 0.1261,
      "step": 25599
    },
    {
      "epoch": 0.7468347044751735,
      "grad_norm": 0.8829117922438163,
      "learning_rate": 1.588735753898633e-06,
      "loss": 0.1059,
      "step": 25600
    },
    {
      "epoch": 0.7468638777058172,
      "grad_norm": 1.0462696314783562,
      "learning_rate": 1.5883903648123528e-06,
      "loss": 0.1199,
      "step": 25601
    },
    {
      "epoch": 0.7468930509364607,
      "grad_norm": 0.6695386273924907,
      "learning_rate": 1.588045006184107e-06,
      "loss": 0.1214,
      "step": 25602
    },
    {
      "epoch": 0.7469222241671043,
      "grad_norm": 1.2591927558180418,
      "learning_rate": 1.5876996780169813e-06,
      "loss": 0.1095,
      "step": 25603
    },
    {
      "epoch": 0.7469513973977479,
      "grad_norm": 0.8860847365222658,
      "learning_rate": 1.5873543803140594e-06,
      "loss": 0.1179,
      "step": 25604
    },
    {
      "epoch": 0.7469805706283914,
      "grad_norm": 0.9533108212734227,
      "learning_rate": 1.5870091130784237e-06,
      "loss": 0.128,
      "step": 25605
    },
    {
      "epoch": 0.747009743859035,
      "grad_norm": 0.9225178779363759,
      "learning_rate": 1.5866638763131536e-06,
      "loss": 0.1128,
      "step": 25606
    },
    {
      "epoch": 0.7470389170896785,
      "grad_norm": 0.8778929331649101,
      "learning_rate": 1.5863186700213356e-06,
      "loss": 0.1409,
      "step": 25607
    },
    {
      "epoch": 0.7470680903203221,
      "grad_norm": 0.8500698613801462,
      "learning_rate": 1.5859734942060479e-06,
      "loss": 0.1463,
      "step": 25608
    },
    {
      "epoch": 0.7470972635509656,
      "grad_norm": 0.916773226968268,
      "learning_rate": 1.5856283488703738e-06,
      "loss": 0.1208,
      "step": 25609
    },
    {
      "epoch": 0.7471264367816092,
      "grad_norm": 1.3881675992267661,
      "learning_rate": 1.5852832340173962e-06,
      "loss": 0.108,
      "step": 25610
    },
    {
      "epoch": 0.7471556100122527,
      "grad_norm": 0.9720055053802819,
      "learning_rate": 1.5849381496501948e-06,
      "loss": 0.1109,
      "step": 25611
    },
    {
      "epoch": 0.7471847832428963,
      "grad_norm": 0.7714399454441949,
      "learning_rate": 1.5845930957718491e-06,
      "loss": 0.1191,
      "step": 25612
    },
    {
      "epoch": 0.7472139564735398,
      "grad_norm": 0.9274973907083488,
      "learning_rate": 1.584248072385442e-06,
      "loss": 0.1281,
      "step": 25613
    },
    {
      "epoch": 0.7472431297041835,
      "grad_norm": 0.853891497627841,
      "learning_rate": 1.5839030794940513e-06,
      "loss": 0.1084,
      "step": 25614
    },
    {
      "epoch": 0.747272302934827,
      "grad_norm": 0.7458355945066203,
      "learning_rate": 1.5835581171007603e-06,
      "loss": 0.1211,
      "step": 25615
    },
    {
      "epoch": 0.7473014761654706,
      "grad_norm": 0.8022379384011737,
      "learning_rate": 1.5832131852086452e-06,
      "loss": 0.117,
      "step": 25616
    },
    {
      "epoch": 0.7473306493961142,
      "grad_norm": 0.8887473459433449,
      "learning_rate": 1.5828682838207882e-06,
      "loss": 0.1104,
      "step": 25617
    },
    {
      "epoch": 0.7473598226267577,
      "grad_norm": 0.7825986872831625,
      "learning_rate": 1.5825234129402679e-06,
      "loss": 0.1281,
      "step": 25618
    },
    {
      "epoch": 0.7473889958574013,
      "grad_norm": 0.7345869986556904,
      "learning_rate": 1.582178572570161e-06,
      "loss": 0.1173,
      "step": 25619
    },
    {
      "epoch": 0.7474181690880448,
      "grad_norm": 0.820942982376986,
      "learning_rate": 1.5818337627135477e-06,
      "loss": 0.1277,
      "step": 25620
    },
    {
      "epoch": 0.7474473423186884,
      "grad_norm": 0.9539420149368407,
      "learning_rate": 1.5814889833735087e-06,
      "loss": 0.1124,
      "step": 25621
    },
    {
      "epoch": 0.7474765155493319,
      "grad_norm": 0.8295040468938645,
      "learning_rate": 1.5811442345531197e-06,
      "loss": 0.1097,
      "step": 25622
    },
    {
      "epoch": 0.7475056887799755,
      "grad_norm": 0.7998331738142896,
      "learning_rate": 1.5807995162554574e-06,
      "loss": 0.1343,
      "step": 25623
    },
    {
      "epoch": 0.747534862010619,
      "grad_norm": 0.9058265781022713,
      "learning_rate": 1.5804548284836018e-06,
      "loss": 0.1434,
      "step": 25624
    },
    {
      "epoch": 0.7475640352412626,
      "grad_norm": 0.77370724980713,
      "learning_rate": 1.5801101712406296e-06,
      "loss": 0.1069,
      "step": 25625
    },
    {
      "epoch": 0.7475932084719061,
      "grad_norm": 0.8491998827047683,
      "learning_rate": 1.5797655445296146e-06,
      "loss": 0.1051,
      "step": 25626
    },
    {
      "epoch": 0.7476223817025497,
      "grad_norm": 0.9163458204771122,
      "learning_rate": 1.5794209483536388e-06,
      "loss": 0.1179,
      "step": 25627
    },
    {
      "epoch": 0.7476515549331934,
      "grad_norm": 0.9323202469232902,
      "learning_rate": 1.5790763827157769e-06,
      "loss": 0.1285,
      "step": 25628
    },
    {
      "epoch": 0.7476807281638369,
      "grad_norm": 0.8657285290055365,
      "learning_rate": 1.5787318476191021e-06,
      "loss": 0.1163,
      "step": 25629
    },
    {
      "epoch": 0.7477099013944805,
      "grad_norm": 0.874499824021466,
      "learning_rate": 1.5783873430666947e-06,
      "loss": 0.1033,
      "step": 25630
    },
    {
      "epoch": 0.747739074625124,
      "grad_norm": 0.8748384166736571,
      "learning_rate": 1.5780428690616284e-06,
      "loss": 0.1395,
      "step": 25631
    },
    {
      "epoch": 0.7477682478557676,
      "grad_norm": 0.7851887220845484,
      "learning_rate": 1.5776984256069767e-06,
      "loss": 0.1055,
      "step": 25632
    },
    {
      "epoch": 0.7477974210864111,
      "grad_norm": 0.9042595817353926,
      "learning_rate": 1.5773540127058162e-06,
      "loss": 0.1001,
      "step": 25633
    },
    {
      "epoch": 0.7478265943170547,
      "grad_norm": 0.8458277829155152,
      "learning_rate": 1.5770096303612243e-06,
      "loss": 0.1104,
      "step": 25634
    },
    {
      "epoch": 0.7478557675476982,
      "grad_norm": 0.8950348281535193,
      "learning_rate": 1.5766652785762726e-06,
      "loss": 0.1269,
      "step": 25635
    },
    {
      "epoch": 0.7478849407783418,
      "grad_norm": 0.7225497063346766,
      "learning_rate": 1.576320957354035e-06,
      "loss": 0.1233,
      "step": 25636
    },
    {
      "epoch": 0.7479141140089853,
      "grad_norm": 0.8940911506486378,
      "learning_rate": 1.5759766666975878e-06,
      "loss": 0.1108,
      "step": 25637
    },
    {
      "epoch": 0.7479432872396289,
      "grad_norm": 0.8380670318959027,
      "learning_rate": 1.575632406610002e-06,
      "loss": 0.1129,
      "step": 25638
    },
    {
      "epoch": 0.7479724604702724,
      "grad_norm": 0.7181155540416648,
      "learning_rate": 1.5752881770943529e-06,
      "loss": 0.0956,
      "step": 25639
    },
    {
      "epoch": 0.748001633700916,
      "grad_norm": 0.8029626984678675,
      "learning_rate": 1.5749439781537145e-06,
      "loss": 0.1446,
      "step": 25640
    },
    {
      "epoch": 0.7480308069315597,
      "grad_norm": 1.0508857609416549,
      "learning_rate": 1.574599809791159e-06,
      "loss": 0.1097,
      "step": 25641
    },
    {
      "epoch": 0.7480599801622032,
      "grad_norm": 0.8246555096083829,
      "learning_rate": 1.5742556720097574e-06,
      "loss": 0.0896,
      "step": 25642
    },
    {
      "epoch": 0.7480891533928468,
      "grad_norm": 0.8795579743206051,
      "learning_rate": 1.5739115648125846e-06,
      "loss": 0.107,
      "step": 25643
    },
    {
      "epoch": 0.7481183266234903,
      "grad_norm": 0.8127137789150932,
      "learning_rate": 1.5735674882027097e-06,
      "loss": 0.1179,
      "step": 25644
    },
    {
      "epoch": 0.7481474998541339,
      "grad_norm": 0.8657455104022886,
      "learning_rate": 1.5732234421832083e-06,
      "loss": 0.1151,
      "step": 25645
    },
    {
      "epoch": 0.7481766730847774,
      "grad_norm": 0.984091213798382,
      "learning_rate": 1.5728794267571478e-06,
      "loss": 0.1207,
      "step": 25646
    },
    {
      "epoch": 0.748205846315421,
      "grad_norm": 0.8555390115961721,
      "learning_rate": 1.5725354419276039e-06,
      "loss": 0.1071,
      "step": 25647
    },
    {
      "epoch": 0.7482350195460645,
      "grad_norm": 1.1362405378720326,
      "learning_rate": 1.5721914876976452e-06,
      "loss": 0.1254,
      "step": 25648
    },
    {
      "epoch": 0.7482641927767081,
      "grad_norm": 0.9078234450730663,
      "learning_rate": 1.5718475640703407e-06,
      "loss": 0.1286,
      "step": 25649
    },
    {
      "epoch": 0.7482933660073516,
      "grad_norm": 1.012314769564317,
      "learning_rate": 1.571503671048763e-06,
      "loss": 0.1131,
      "step": 25650
    },
    {
      "epoch": 0.7483225392379952,
      "grad_norm": 0.8669448799297004,
      "learning_rate": 1.5711598086359837e-06,
      "loss": 0.1178,
      "step": 25651
    },
    {
      "epoch": 0.7483517124686387,
      "grad_norm": 0.9417444921510744,
      "learning_rate": 1.5708159768350711e-06,
      "loss": 0.1068,
      "step": 25652
    },
    {
      "epoch": 0.7483808856992823,
      "grad_norm": 0.9311394405412246,
      "learning_rate": 1.5704721756490932e-06,
      "loss": 0.1192,
      "step": 25653
    },
    {
      "epoch": 0.7484100589299258,
      "grad_norm": 0.7286011907726347,
      "learning_rate": 1.5701284050811227e-06,
      "loss": 0.1141,
      "step": 25654
    },
    {
      "epoch": 0.7484392321605695,
      "grad_norm": 0.988775257715098,
      "learning_rate": 1.569784665134227e-06,
      "loss": 0.1033,
      "step": 25655
    },
    {
      "epoch": 0.7484684053912131,
      "grad_norm": 0.9231658020629819,
      "learning_rate": 1.5694409558114715e-06,
      "loss": 0.1128,
      "step": 25656
    },
    {
      "epoch": 0.7484975786218566,
      "grad_norm": 0.8887093770242535,
      "learning_rate": 1.5690972771159318e-06,
      "loss": 0.1257,
      "step": 25657
    },
    {
      "epoch": 0.7485267518525002,
      "grad_norm": 1.3122384141373964,
      "learning_rate": 1.5687536290506722e-06,
      "loss": 0.1231,
      "step": 25658
    },
    {
      "epoch": 0.7485559250831437,
      "grad_norm": 1.0793584893273471,
      "learning_rate": 1.56841001161876e-06,
      "loss": 0.116,
      "step": 25659
    },
    {
      "epoch": 0.7485850983137873,
      "grad_norm": 0.9791206413428024,
      "learning_rate": 1.5680664248232652e-06,
      "loss": 0.1075,
      "step": 25660
    },
    {
      "epoch": 0.7486142715444308,
      "grad_norm": 0.7584541340528008,
      "learning_rate": 1.567722868667254e-06,
      "loss": 0.1076,
      "step": 25661
    },
    {
      "epoch": 0.7486434447750744,
      "grad_norm": 0.8887738686445517,
      "learning_rate": 1.5673793431537925e-06,
      "loss": 0.1331,
      "step": 25662
    },
    {
      "epoch": 0.7486726180057179,
      "grad_norm": 1.8058045291080915,
      "learning_rate": 1.5670358482859488e-06,
      "loss": 0.1393,
      "step": 25663
    },
    {
      "epoch": 0.7487017912363615,
      "grad_norm": 0.9396069158149576,
      "learning_rate": 1.5666923840667907e-06,
      "loss": 0.1128,
      "step": 25664
    },
    {
      "epoch": 0.748730964467005,
      "grad_norm": 0.852240701152601,
      "learning_rate": 1.566348950499384e-06,
      "loss": 0.0937,
      "step": 25665
    },
    {
      "epoch": 0.7487601376976486,
      "grad_norm": 0.9996120030112183,
      "learning_rate": 1.5660055475867918e-06,
      "loss": 0.1355,
      "step": 25666
    },
    {
      "epoch": 0.7487893109282922,
      "grad_norm": 0.8031682577824013,
      "learning_rate": 1.5656621753320844e-06,
      "loss": 0.1069,
      "step": 25667
    },
    {
      "epoch": 0.7488184841589358,
      "grad_norm": 1.2529655951038805,
      "learning_rate": 1.5653188337383236e-06,
      "loss": 0.1258,
      "step": 25668
    },
    {
      "epoch": 0.7488476573895794,
      "grad_norm": 0.8544354484814337,
      "learning_rate": 1.5649755228085766e-06,
      "loss": 0.1299,
      "step": 25669
    },
    {
      "epoch": 0.7488768306202229,
      "grad_norm": 0.9718943308170146,
      "learning_rate": 1.5646322425459092e-06,
      "loss": 0.1348,
      "step": 25670
    },
    {
      "epoch": 0.7489060038508665,
      "grad_norm": 0.8862346063142209,
      "learning_rate": 1.5642889929533856e-06,
      "loss": 0.1291,
      "step": 25671
    },
    {
      "epoch": 0.74893517708151,
      "grad_norm": 0.9019518537476776,
      "learning_rate": 1.5639457740340674e-06,
      "loss": 0.1293,
      "step": 25672
    },
    {
      "epoch": 0.7489643503121536,
      "grad_norm": 0.7627082783371806,
      "learning_rate": 1.563602585791023e-06,
      "loss": 0.1244,
      "step": 25673
    },
    {
      "epoch": 0.7489935235427971,
      "grad_norm": 0.9860020227765464,
      "learning_rate": 1.5632594282273129e-06,
      "loss": 0.1335,
      "step": 25674
    },
    {
      "epoch": 0.7490226967734407,
      "grad_norm": 1.3573526095780761,
      "learning_rate": 1.5629163013460041e-06,
      "loss": 0.1172,
      "step": 25675
    },
    {
      "epoch": 0.7490518700040842,
      "grad_norm": 0.7911529622078647,
      "learning_rate": 1.5625732051501558e-06,
      "loss": 0.1169,
      "step": 25676
    },
    {
      "epoch": 0.7490810432347278,
      "grad_norm": 0.954631176425323,
      "learning_rate": 1.5622301396428351e-06,
      "loss": 0.1208,
      "step": 25677
    },
    {
      "epoch": 0.7491102164653713,
      "grad_norm": 0.7905542458713434,
      "learning_rate": 1.5618871048271034e-06,
      "loss": 0.1021,
      "step": 25678
    },
    {
      "epoch": 0.7491393896960149,
      "grad_norm": 0.9653716620721325,
      "learning_rate": 1.5615441007060211e-06,
      "loss": 0.1327,
      "step": 25679
    },
    {
      "epoch": 0.7491685629266585,
      "grad_norm": 0.735935900526844,
      "learning_rate": 1.561201127282652e-06,
      "loss": 0.1096,
      "step": 25680
    },
    {
      "epoch": 0.749197736157302,
      "grad_norm": 0.7418271668979863,
      "learning_rate": 1.5608581845600606e-06,
      "loss": 0.1068,
      "step": 25681
    },
    {
      "epoch": 0.7492269093879457,
      "grad_norm": 1.1632935520368335,
      "learning_rate": 1.5605152725413058e-06,
      "loss": 0.1155,
      "step": 25682
    },
    {
      "epoch": 0.7492560826185892,
      "grad_norm": 0.8550446078428814,
      "learning_rate": 1.5601723912294481e-06,
      "loss": 0.1392,
      "step": 25683
    },
    {
      "epoch": 0.7492852558492328,
      "grad_norm": 0.8621473842462445,
      "learning_rate": 1.5598295406275516e-06,
      "loss": 0.1305,
      "step": 25684
    },
    {
      "epoch": 0.7493144290798763,
      "grad_norm": 0.937678709069431,
      "learning_rate": 1.559486720738676e-06,
      "loss": 0.1012,
      "step": 25685
    },
    {
      "epoch": 0.7493436023105199,
      "grad_norm": 0.7971498399200088,
      "learning_rate": 1.5591439315658786e-06,
      "loss": 0.13,
      "step": 25686
    },
    {
      "epoch": 0.7493727755411634,
      "grad_norm": 0.741866315596296,
      "learning_rate": 1.5588011731122254e-06,
      "loss": 0.1231,
      "step": 25687
    },
    {
      "epoch": 0.749401948771807,
      "grad_norm": 1.1522685624220508,
      "learning_rate": 1.5584584453807738e-06,
      "loss": 0.1401,
      "step": 25688
    },
    {
      "epoch": 0.7494311220024505,
      "grad_norm": 0.938849643434187,
      "learning_rate": 1.5581157483745824e-06,
      "loss": 0.1219,
      "step": 25689
    },
    {
      "epoch": 0.7494602952330941,
      "grad_norm": 0.9111289872918175,
      "learning_rate": 1.5577730820967135e-06,
      "loss": 0.1136,
      "step": 25690
    },
    {
      "epoch": 0.7494894684637377,
      "grad_norm": 0.8241711797103661,
      "learning_rate": 1.557430446550225e-06,
      "loss": 0.1123,
      "step": 25691
    },
    {
      "epoch": 0.7495186416943812,
      "grad_norm": 0.7043010184822692,
      "learning_rate": 1.557087841738174e-06,
      "loss": 0.1126,
      "step": 25692
    },
    {
      "epoch": 0.7495478149250248,
      "grad_norm": 0.8381720276354171,
      "learning_rate": 1.5567452676636207e-06,
      "loss": 0.1051,
      "step": 25693
    },
    {
      "epoch": 0.7495769881556683,
      "grad_norm": 1.084801681471802,
      "learning_rate": 1.5564027243296254e-06,
      "loss": 0.1097,
      "step": 25694
    },
    {
      "epoch": 0.749606161386312,
      "grad_norm": 1.0063538727901777,
      "learning_rate": 1.5560602117392442e-06,
      "loss": 0.1121,
      "step": 25695
    },
    {
      "epoch": 0.7496353346169555,
      "grad_norm": 0.8374868438912568,
      "learning_rate": 1.5557177298955339e-06,
      "loss": 0.1241,
      "step": 25696
    },
    {
      "epoch": 0.7496645078475991,
      "grad_norm": 0.9112579423054664,
      "learning_rate": 1.5553752788015552e-06,
      "loss": 0.1159,
      "step": 25697
    },
    {
      "epoch": 0.7496936810782426,
      "grad_norm": 0.8229293561206907,
      "learning_rate": 1.5550328584603619e-06,
      "loss": 0.1293,
      "step": 25698
    },
    {
      "epoch": 0.7497228543088862,
      "grad_norm": 0.991546629013683,
      "learning_rate": 1.554690468875013e-06,
      "loss": 0.1296,
      "step": 25699
    },
    {
      "epoch": 0.7497520275395297,
      "grad_norm": 0.9612341527354766,
      "learning_rate": 1.5543481100485669e-06,
      "loss": 0.1025,
      "step": 25700
    },
    {
      "epoch": 0.7497812007701733,
      "grad_norm": 0.8886960342689915,
      "learning_rate": 1.5540057819840782e-06,
      "loss": 0.1311,
      "step": 25701
    },
    {
      "epoch": 0.7498103740008168,
      "grad_norm": 0.8294574979464563,
      "learning_rate": 1.5536634846846016e-06,
      "loss": 0.1189,
      "step": 25702
    },
    {
      "epoch": 0.7498395472314604,
      "grad_norm": 0.9520830625817014,
      "learning_rate": 1.553321218153196e-06,
      "loss": 0.1046,
      "step": 25703
    },
    {
      "epoch": 0.749868720462104,
      "grad_norm": 0.775330468701103,
      "learning_rate": 1.5529789823929149e-06,
      "loss": 0.1211,
      "step": 25704
    },
    {
      "epoch": 0.7498978936927475,
      "grad_norm": 0.6679047702139579,
      "learning_rate": 1.5526367774068158e-06,
      "loss": 0.1128,
      "step": 25705
    },
    {
      "epoch": 0.7499270669233911,
      "grad_norm": 1.1116795057968818,
      "learning_rate": 1.5522946031979507e-06,
      "loss": 0.1336,
      "step": 25706
    },
    {
      "epoch": 0.7499562401540346,
      "grad_norm": 0.878106243220114,
      "learning_rate": 1.551952459769378e-06,
      "loss": 0.1107,
      "step": 25707
    },
    {
      "epoch": 0.7499854133846782,
      "grad_norm": 0.7496572643796967,
      "learning_rate": 1.5516103471241512e-06,
      "loss": 0.115,
      "step": 25708
    },
    {
      "epoch": 0.7500145866153218,
      "grad_norm": 0.8909588369443133,
      "learning_rate": 1.5512682652653221e-06,
      "loss": 0.1185,
      "step": 25709
    },
    {
      "epoch": 0.7500437598459654,
      "grad_norm": 0.9451354533796887,
      "learning_rate": 1.5509262141959463e-06,
      "loss": 0.1342,
      "step": 25710
    },
    {
      "epoch": 0.7500729330766089,
      "grad_norm": 0.9042477030205243,
      "learning_rate": 1.5505841939190796e-06,
      "loss": 0.1175,
      "step": 25711
    },
    {
      "epoch": 0.7501021063072525,
      "grad_norm": 0.8046544980615903,
      "learning_rate": 1.5502422044377741e-06,
      "loss": 0.1111,
      "step": 25712
    },
    {
      "epoch": 0.750131279537896,
      "grad_norm": 0.6873544487419347,
      "learning_rate": 1.549900245755081e-06,
      "loss": 0.1093,
      "step": 25713
    },
    {
      "epoch": 0.7501604527685396,
      "grad_norm": 0.9247468006234043,
      "learning_rate": 1.5495583178740563e-06,
      "loss": 0.1243,
      "step": 25714
    },
    {
      "epoch": 0.7501896259991832,
      "grad_norm": 0.8559041359883738,
      "learning_rate": 1.5492164207977517e-06,
      "loss": 0.1234,
      "step": 25715
    },
    {
      "epoch": 0.7502187992298267,
      "grad_norm": 0.9897421291896847,
      "learning_rate": 1.5488745545292155e-06,
      "loss": 0.1109,
      "step": 25716
    },
    {
      "epoch": 0.7502479724604703,
      "grad_norm": 0.8266209911558882,
      "learning_rate": 1.5485327190715066e-06,
      "loss": 0.1304,
      "step": 25717
    },
    {
      "epoch": 0.7502771456911138,
      "grad_norm": 0.7300099003378026,
      "learning_rate": 1.548190914427674e-06,
      "loss": 0.1014,
      "step": 25718
    },
    {
      "epoch": 0.7503063189217574,
      "grad_norm": 0.7944459331619189,
      "learning_rate": 1.5478491406007672e-06,
      "loss": 0.0927,
      "step": 25719
    },
    {
      "epoch": 0.7503354921524009,
      "grad_norm": 1.4416304824765536,
      "learning_rate": 1.5475073975938409e-06,
      "loss": 0.1291,
      "step": 25720
    },
    {
      "epoch": 0.7503646653830445,
      "grad_norm": 0.7870133310338016,
      "learning_rate": 1.5471656854099437e-06,
      "loss": 0.1003,
      "step": 25721
    },
    {
      "epoch": 0.750393838613688,
      "grad_norm": 0.770019211641465,
      "learning_rate": 1.546824004052126e-06,
      "loss": 0.122,
      "step": 25722
    },
    {
      "epoch": 0.7504230118443317,
      "grad_norm": 0.8051129295730975,
      "learning_rate": 1.546482353523439e-06,
      "loss": 0.1153,
      "step": 25723
    },
    {
      "epoch": 0.7504521850749752,
      "grad_norm": 0.7747533309883834,
      "learning_rate": 1.5461407338269351e-06,
      "loss": 0.1142,
      "step": 25724
    },
    {
      "epoch": 0.7504813583056188,
      "grad_norm": 0.8472187954208377,
      "learning_rate": 1.5457991449656618e-06,
      "loss": 0.1003,
      "step": 25725
    },
    {
      "epoch": 0.7505105315362623,
      "grad_norm": 1.1004646868941497,
      "learning_rate": 1.545457586942668e-06,
      "loss": 0.1133,
      "step": 25726
    },
    {
      "epoch": 0.7505397047669059,
      "grad_norm": 0.8664687944865257,
      "learning_rate": 1.5451160597610038e-06,
      "loss": 0.1044,
      "step": 25727
    },
    {
      "epoch": 0.7505688779975495,
      "grad_norm": 0.8657255289319268,
      "learning_rate": 1.5447745634237204e-06,
      "loss": 0.125,
      "step": 25728
    },
    {
      "epoch": 0.750598051228193,
      "grad_norm": 0.7561478075010376,
      "learning_rate": 1.5444330979338634e-06,
      "loss": 0.1212,
      "step": 25729
    },
    {
      "epoch": 0.7506272244588366,
      "grad_norm": 0.8906009988923281,
      "learning_rate": 1.544091663294484e-06,
      "loss": 0.1494,
      "step": 25730
    },
    {
      "epoch": 0.7506563976894801,
      "grad_norm": 0.9576505702480632,
      "learning_rate": 1.5437502595086295e-06,
      "loss": 0.1094,
      "step": 25731
    },
    {
      "epoch": 0.7506855709201237,
      "grad_norm": 0.9966472602789167,
      "learning_rate": 1.5434088865793461e-06,
      "loss": 0.1083,
      "step": 25732
    },
    {
      "epoch": 0.7507147441507672,
      "grad_norm": 0.9673156502653935,
      "learning_rate": 1.5430675445096827e-06,
      "loss": 0.1268,
      "step": 25733
    },
    {
      "epoch": 0.7507439173814108,
      "grad_norm": 0.6494572180029597,
      "learning_rate": 1.5427262333026894e-06,
      "loss": 0.1242,
      "step": 25734
    },
    {
      "epoch": 0.7507730906120543,
      "grad_norm": 0.8430400269354585,
      "learning_rate": 1.5423849529614098e-06,
      "loss": 0.1096,
      "step": 25735
    },
    {
      "epoch": 0.750802263842698,
      "grad_norm": 0.8645013085778063,
      "learning_rate": 1.5420437034888914e-06,
      "loss": 0.1364,
      "step": 25736
    },
    {
      "epoch": 0.7508314370733415,
      "grad_norm": 0.688901586949705,
      "learning_rate": 1.5417024848881823e-06,
      "loss": 0.0997,
      "step": 25737
    },
    {
      "epoch": 0.7508606103039851,
      "grad_norm": 0.7500581859235532,
      "learning_rate": 1.5413612971623282e-06,
      "loss": 0.117,
      "step": 25738
    },
    {
      "epoch": 0.7508897835346287,
      "grad_norm": 1.000258172728012,
      "learning_rate": 1.5410201403143726e-06,
      "loss": 0.1068,
      "step": 25739
    },
    {
      "epoch": 0.7509189567652722,
      "grad_norm": 0.7286092268553375,
      "learning_rate": 1.5406790143473644e-06,
      "loss": 0.1053,
      "step": 25740
    },
    {
      "epoch": 0.7509481299959158,
      "grad_norm": 0.744143623975935,
      "learning_rate": 1.5403379192643491e-06,
      "loss": 0.1085,
      "step": 25741
    },
    {
      "epoch": 0.7509773032265593,
      "grad_norm": 0.9229720704294662,
      "learning_rate": 1.5399968550683708e-06,
      "loss": 0.1222,
      "step": 25742
    },
    {
      "epoch": 0.7510064764572029,
      "grad_norm": 1.0041125224633975,
      "learning_rate": 1.5396558217624734e-06,
      "loss": 0.1154,
      "step": 25743
    },
    {
      "epoch": 0.7510356496878464,
      "grad_norm": 0.8483922300350779,
      "learning_rate": 1.5393148193497042e-06,
      "loss": 0.1477,
      "step": 25744
    },
    {
      "epoch": 0.75106482291849,
      "grad_norm": 0.8912519006605253,
      "learning_rate": 1.538973847833105e-06,
      "loss": 0.1295,
      "step": 25745
    },
    {
      "epoch": 0.7510939961491335,
      "grad_norm": 0.8236529472409622,
      "learning_rate": 1.5386329072157209e-06,
      "loss": 0.1028,
      "step": 25746
    },
    {
      "epoch": 0.7511231693797771,
      "grad_norm": 1.0366472976575478,
      "learning_rate": 1.5382919975005971e-06,
      "loss": 0.1516,
      "step": 25747
    },
    {
      "epoch": 0.7511523426104206,
      "grad_norm": 0.8515181377093403,
      "learning_rate": 1.5379511186907764e-06,
      "loss": 0.1157,
      "step": 25748
    },
    {
      "epoch": 0.7511815158410642,
      "grad_norm": 2.0746256570564485,
      "learning_rate": 1.5376102707893e-06,
      "loss": 0.1113,
      "step": 25749
    },
    {
      "epoch": 0.7512106890717078,
      "grad_norm": 0.9266128271380434,
      "learning_rate": 1.5372694537992138e-06,
      "loss": 0.1159,
      "step": 25750
    },
    {
      "epoch": 0.7512398623023514,
      "grad_norm": 1.0543881817697456,
      "learning_rate": 1.536928667723558e-06,
      "loss": 0.1202,
      "step": 25751
    },
    {
      "epoch": 0.751269035532995,
      "grad_norm": 0.7582579627222545,
      "learning_rate": 1.5365879125653776e-06,
      "loss": 0.1232,
      "step": 25752
    },
    {
      "epoch": 0.7512982087636385,
      "grad_norm": 0.9280920830413915,
      "learning_rate": 1.5362471883277125e-06,
      "loss": 0.123,
      "step": 25753
    },
    {
      "epoch": 0.7513273819942821,
      "grad_norm": 0.8118954715549233,
      "learning_rate": 1.5359064950136065e-06,
      "loss": 0.1095,
      "step": 25754
    },
    {
      "epoch": 0.7513565552249256,
      "grad_norm": 0.8382301200004377,
      "learning_rate": 1.5355658326261008e-06,
      "loss": 0.1275,
      "step": 25755
    },
    {
      "epoch": 0.7513857284555692,
      "grad_norm": 0.9030350085480212,
      "learning_rate": 1.5352252011682351e-06,
      "loss": 0.1191,
      "step": 25756
    },
    {
      "epoch": 0.7514149016862127,
      "grad_norm": 0.8534362217001674,
      "learning_rate": 1.5348846006430513e-06,
      "loss": 0.1097,
      "step": 25757
    },
    {
      "epoch": 0.7514440749168563,
      "grad_norm": 1.0283059911674362,
      "learning_rate": 1.534544031053592e-06,
      "loss": 0.1158,
      "step": 25758
    },
    {
      "epoch": 0.7514732481474998,
      "grad_norm": 0.8814878203165918,
      "learning_rate": 1.5342034924028948e-06,
      "loss": 0.1497,
      "step": 25759
    },
    {
      "epoch": 0.7515024213781434,
      "grad_norm": 0.9472910776939061,
      "learning_rate": 1.5338629846940033e-06,
      "loss": 0.1358,
      "step": 25760
    },
    {
      "epoch": 0.7515315946087869,
      "grad_norm": 0.934545581378259,
      "learning_rate": 1.533522507929956e-06,
      "loss": 0.1311,
      "step": 25761
    },
    {
      "epoch": 0.7515607678394305,
      "grad_norm": 3.804019459227602,
      "learning_rate": 1.53318206211379e-06,
      "loss": 0.1388,
      "step": 25762
    },
    {
      "epoch": 0.7515899410700742,
      "grad_norm": 0.9187604451123866,
      "learning_rate": 1.532841647248547e-06,
      "loss": 0.114,
      "step": 25763
    },
    {
      "epoch": 0.7516191143007177,
      "grad_norm": 0.9454141071279012,
      "learning_rate": 1.5325012633372677e-06,
      "loss": 0.1076,
      "step": 25764
    },
    {
      "epoch": 0.7516482875313613,
      "grad_norm": 1.022386653222536,
      "learning_rate": 1.53216091038299e-06,
      "loss": 0.1071,
      "step": 25765
    },
    {
      "epoch": 0.7516774607620048,
      "grad_norm": 0.8616648625056953,
      "learning_rate": 1.5318205883887494e-06,
      "loss": 0.1164,
      "step": 25766
    },
    {
      "epoch": 0.7517066339926484,
      "grad_norm": 0.9621778674013503,
      "learning_rate": 1.5314802973575888e-06,
      "loss": 0.1135,
      "step": 25767
    },
    {
      "epoch": 0.7517358072232919,
      "grad_norm": 0.62209175496305,
      "learning_rate": 1.531140037292544e-06,
      "loss": 0.1051,
      "step": 25768
    },
    {
      "epoch": 0.7517649804539355,
      "grad_norm": 0.8775747135041679,
      "learning_rate": 1.5307998081966507e-06,
      "loss": 0.1156,
      "step": 25769
    },
    {
      "epoch": 0.751794153684579,
      "grad_norm": 1.1942064510018175,
      "learning_rate": 1.530459610072949e-06,
      "loss": 0.1232,
      "step": 25770
    },
    {
      "epoch": 0.7518233269152226,
      "grad_norm": 0.8855733617127285,
      "learning_rate": 1.5301194429244776e-06,
      "loss": 0.1081,
      "step": 25771
    },
    {
      "epoch": 0.7518525001458661,
      "grad_norm": 0.7607391829487901,
      "learning_rate": 1.529779306754271e-06,
      "loss": 0.1179,
      "step": 25772
    },
    {
      "epoch": 0.7518816733765097,
      "grad_norm": 0.8636723525629825,
      "learning_rate": 1.5294392015653648e-06,
      "loss": 0.1143,
      "step": 25773
    },
    {
      "epoch": 0.7519108466071532,
      "grad_norm": 0.8722640448327522,
      "learning_rate": 1.5290991273607986e-06,
      "loss": 0.1219,
      "step": 25774
    },
    {
      "epoch": 0.7519400198377968,
      "grad_norm": 1.0854114211235604,
      "learning_rate": 1.5287590841436056e-06,
      "loss": 0.1253,
      "step": 25775
    },
    {
      "epoch": 0.7519691930684403,
      "grad_norm": 0.793304124044723,
      "learning_rate": 1.5284190719168224e-06,
      "loss": 0.1192,
      "step": 25776
    },
    {
      "epoch": 0.751998366299084,
      "grad_norm": 0.7337154705818196,
      "learning_rate": 1.5280790906834863e-06,
      "loss": 0.1179,
      "step": 25777
    },
    {
      "epoch": 0.7520275395297276,
      "grad_norm": 0.871914108863201,
      "learning_rate": 1.527739140446632e-06,
      "loss": 0.1141,
      "step": 25778
    },
    {
      "epoch": 0.7520567127603711,
      "grad_norm": 0.8699708569000806,
      "learning_rate": 1.527399221209292e-06,
      "loss": 0.1256,
      "step": 25779
    },
    {
      "epoch": 0.7520858859910147,
      "grad_norm": 0.8517358303980148,
      "learning_rate": 1.5270593329745036e-06,
      "loss": 0.1174,
      "step": 25780
    },
    {
      "epoch": 0.7521150592216582,
      "grad_norm": 1.2603983556341987,
      "learning_rate": 1.5267194757452996e-06,
      "loss": 0.1224,
      "step": 25781
    },
    {
      "epoch": 0.7521442324523018,
      "grad_norm": 1.229955663058453,
      "learning_rate": 1.5263796495247162e-06,
      "loss": 0.1174,
      "step": 25782
    },
    {
      "epoch": 0.7521734056829453,
      "grad_norm": 0.8220658004708723,
      "learning_rate": 1.5260398543157851e-06,
      "loss": 0.1135,
      "step": 25783
    },
    {
      "epoch": 0.7522025789135889,
      "grad_norm": 0.8753495724032763,
      "learning_rate": 1.5257000901215418e-06,
      "loss": 0.1361,
      "step": 25784
    },
    {
      "epoch": 0.7522317521442324,
      "grad_norm": 1.0133299363899728,
      "learning_rate": 1.5253603569450192e-06,
      "loss": 0.1067,
      "step": 25785
    },
    {
      "epoch": 0.752260925374876,
      "grad_norm": 1.0725323846787793,
      "learning_rate": 1.5250206547892477e-06,
      "loss": 0.1084,
      "step": 25786
    },
    {
      "epoch": 0.7522900986055195,
      "grad_norm": 0.7877331615597004,
      "learning_rate": 1.524680983657263e-06,
      "loss": 0.1073,
      "step": 25787
    },
    {
      "epoch": 0.7523192718361631,
      "grad_norm": 0.8543230161109976,
      "learning_rate": 1.5243413435520977e-06,
      "loss": 0.1302,
      "step": 25788
    },
    {
      "epoch": 0.7523484450668066,
      "grad_norm": 0.8261126604003302,
      "learning_rate": 1.5240017344767837e-06,
      "loss": 0.1082,
      "step": 25789
    },
    {
      "epoch": 0.7523776182974503,
      "grad_norm": 1.011447016471589,
      "learning_rate": 1.5236621564343507e-06,
      "loss": 0.1374,
      "step": 25790
    },
    {
      "epoch": 0.7524067915280939,
      "grad_norm": 1.0970751413172217,
      "learning_rate": 1.5233226094278336e-06,
      "loss": 0.1168,
      "step": 25791
    },
    {
      "epoch": 0.7524359647587374,
      "grad_norm": 1.154937269989941,
      "learning_rate": 1.5229830934602608e-06,
      "loss": 0.1066,
      "step": 25792
    },
    {
      "epoch": 0.752465137989381,
      "grad_norm": 0.7369734811166199,
      "learning_rate": 1.5226436085346646e-06,
      "loss": 0.0936,
      "step": 25793
    },
    {
      "epoch": 0.7524943112200245,
      "grad_norm": 0.8012708710065065,
      "learning_rate": 1.5223041546540778e-06,
      "loss": 0.1172,
      "step": 25794
    },
    {
      "epoch": 0.7525234844506681,
      "grad_norm": 0.8504496220103561,
      "learning_rate": 1.5219647318215297e-06,
      "loss": 0.1302,
      "step": 25795
    },
    {
      "epoch": 0.7525526576813116,
      "grad_norm": 1.3388398262619507,
      "learning_rate": 1.5216253400400483e-06,
      "loss": 0.102,
      "step": 25796
    },
    {
      "epoch": 0.7525818309119552,
      "grad_norm": 0.8398517099374087,
      "learning_rate": 1.5212859793126672e-06,
      "loss": 0.1097,
      "step": 25797
    },
    {
      "epoch": 0.7526110041425987,
      "grad_norm": 0.7939681099286744,
      "learning_rate": 1.5209466496424146e-06,
      "loss": 0.1333,
      "step": 25798
    },
    {
      "epoch": 0.7526401773732423,
      "grad_norm": 0.7374726875389844,
      "learning_rate": 1.5206073510323177e-06,
      "loss": 0.1291,
      "step": 25799
    },
    {
      "epoch": 0.7526693506038858,
      "grad_norm": 0.8541748450938101,
      "learning_rate": 1.5202680834854084e-06,
      "loss": 0.1097,
      "step": 25800
    },
    {
      "epoch": 0.7526985238345294,
      "grad_norm": 0.7624552554381788,
      "learning_rate": 1.5199288470047163e-06,
      "loss": 0.1375,
      "step": 25801
    },
    {
      "epoch": 0.752727697065173,
      "grad_norm": 0.8605363225941227,
      "learning_rate": 1.5195896415932687e-06,
      "loss": 0.1114,
      "step": 25802
    },
    {
      "epoch": 0.7527568702958165,
      "grad_norm": 0.7502923124699972,
      "learning_rate": 1.5192504672540919e-06,
      "loss": 0.0925,
      "step": 25803
    },
    {
      "epoch": 0.7527860435264602,
      "grad_norm": 0.8045466862669897,
      "learning_rate": 1.5189113239902182e-06,
      "loss": 0.1117,
      "step": 25804
    },
    {
      "epoch": 0.7528152167571037,
      "grad_norm": 1.120178268443537,
      "learning_rate": 1.5185722118046714e-06,
      "loss": 0.1147,
      "step": 25805
    },
    {
      "epoch": 0.7528443899877473,
      "grad_norm": 0.9345635236915236,
      "learning_rate": 1.518233130700481e-06,
      "loss": 0.1303,
      "step": 25806
    },
    {
      "epoch": 0.7528735632183908,
      "grad_norm": 0.7488181350558006,
      "learning_rate": 1.5178940806806753e-06,
      "loss": 0.1096,
      "step": 25807
    },
    {
      "epoch": 0.7529027364490344,
      "grad_norm": 0.7496632544713565,
      "learning_rate": 1.5175550617482804e-06,
      "loss": 0.1092,
      "step": 25808
    },
    {
      "epoch": 0.7529319096796779,
      "grad_norm": 0.7321865320211671,
      "learning_rate": 1.5172160739063208e-06,
      "loss": 0.1195,
      "step": 25809
    },
    {
      "epoch": 0.7529610829103215,
      "grad_norm": 0.9067910773689672,
      "learning_rate": 1.516877117157826e-06,
      "loss": 0.1236,
      "step": 25810
    },
    {
      "epoch": 0.752990256140965,
      "grad_norm": 0.8273544180677533,
      "learning_rate": 1.5165381915058196e-06,
      "loss": 0.1087,
      "step": 25811
    },
    {
      "epoch": 0.7530194293716086,
      "grad_norm": 0.7558130969418564,
      "learning_rate": 1.51619929695333e-06,
      "loss": 0.1203,
      "step": 25812
    },
    {
      "epoch": 0.7530486026022521,
      "grad_norm": 0.8595675943393801,
      "learning_rate": 1.51586043350338e-06,
      "loss": 0.1267,
      "step": 25813
    },
    {
      "epoch": 0.7530777758328957,
      "grad_norm": 0.6977803243866763,
      "learning_rate": 1.5155216011589979e-06,
      "loss": 0.0972,
      "step": 25814
    },
    {
      "epoch": 0.7531069490635393,
      "grad_norm": 0.8183157120963988,
      "learning_rate": 1.5151827999232071e-06,
      "loss": 0.1192,
      "step": 25815
    },
    {
      "epoch": 0.7531361222941828,
      "grad_norm": 0.9025714857647811,
      "learning_rate": 1.5148440297990308e-06,
      "loss": 0.1492,
      "step": 25816
    },
    {
      "epoch": 0.7531652955248265,
      "grad_norm": 2.137719978205146,
      "learning_rate": 1.5145052907894946e-06,
      "loss": 0.1206,
      "step": 25817
    },
    {
      "epoch": 0.75319446875547,
      "grad_norm": 0.7518584121211901,
      "learning_rate": 1.5141665828976253e-06,
      "loss": 0.1065,
      "step": 25818
    },
    {
      "epoch": 0.7532236419861136,
      "grad_norm": 0.8352424914910912,
      "learning_rate": 1.5138279061264445e-06,
      "loss": 0.11,
      "step": 25819
    },
    {
      "epoch": 0.7532528152167571,
      "grad_norm": 0.9322873060983801,
      "learning_rate": 1.5134892604789743e-06,
      "loss": 0.108,
      "step": 25820
    },
    {
      "epoch": 0.7532819884474007,
      "grad_norm": 1.1939548122290728,
      "learning_rate": 1.5131506459582412e-06,
      "loss": 0.1193,
      "step": 25821
    },
    {
      "epoch": 0.7533111616780442,
      "grad_norm": 0.7629191767789621,
      "learning_rate": 1.5128120625672648e-06,
      "loss": 0.1042,
      "step": 25822
    },
    {
      "epoch": 0.7533403349086878,
      "grad_norm": 0.8692873201149913,
      "learning_rate": 1.5124735103090704e-06,
      "loss": 0.1052,
      "step": 25823
    },
    {
      "epoch": 0.7533695081393313,
      "grad_norm": 0.934424415248695,
      "learning_rate": 1.5121349891866815e-06,
      "loss": 0.1018,
      "step": 25824
    },
    {
      "epoch": 0.7533986813699749,
      "grad_norm": 1.0896673051291843,
      "learning_rate": 1.5117964992031187e-06,
      "loss": 0.1441,
      "step": 25825
    },
    {
      "epoch": 0.7534278546006185,
      "grad_norm": 1.206064428837,
      "learning_rate": 1.5114580403614022e-06,
      "loss": 0.1235,
      "step": 25826
    },
    {
      "epoch": 0.753457027831262,
      "grad_norm": 0.8417440204406492,
      "learning_rate": 1.5111196126645573e-06,
      "loss": 0.112,
      "step": 25827
    },
    {
      "epoch": 0.7534862010619056,
      "grad_norm": 0.8429514644846525,
      "learning_rate": 1.5107812161156037e-06,
      "loss": 0.1006,
      "step": 25828
    },
    {
      "epoch": 0.7535153742925491,
      "grad_norm": 0.6798431981677711,
      "learning_rate": 1.5104428507175612e-06,
      "loss": 0.125,
      "step": 25829
    },
    {
      "epoch": 0.7535445475231927,
      "grad_norm": 0.786217281828536,
      "learning_rate": 1.5101045164734512e-06,
      "loss": 0.146,
      "step": 25830
    },
    {
      "epoch": 0.7535737207538363,
      "grad_norm": 0.909765805922423,
      "learning_rate": 1.5097662133862973e-06,
      "loss": 0.132,
      "step": 25831
    },
    {
      "epoch": 0.7536028939844799,
      "grad_norm": 0.8993856071037685,
      "learning_rate": 1.5094279414591168e-06,
      "loss": 0.1107,
      "step": 25832
    },
    {
      "epoch": 0.7536320672151234,
      "grad_norm": 0.8117467983951188,
      "learning_rate": 1.509089700694929e-06,
      "loss": 0.1085,
      "step": 25833
    },
    {
      "epoch": 0.753661240445767,
      "grad_norm": 0.8235728459135495,
      "learning_rate": 1.5087514910967572e-06,
      "loss": 0.1259,
      "step": 25834
    },
    {
      "epoch": 0.7536904136764105,
      "grad_norm": 0.8557785771479315,
      "learning_rate": 1.508413312667616e-06,
      "loss": 0.1109,
      "step": 25835
    },
    {
      "epoch": 0.7537195869070541,
      "grad_norm": 0.8349318552744862,
      "learning_rate": 1.508075165410528e-06,
      "loss": 0.116,
      "step": 25836
    },
    {
      "epoch": 0.7537487601376976,
      "grad_norm": 0.8397457214782929,
      "learning_rate": 1.5077370493285126e-06,
      "loss": 0.1194,
      "step": 25837
    },
    {
      "epoch": 0.7537779333683412,
      "grad_norm": 0.7883389338674813,
      "learning_rate": 1.5073989644245873e-06,
      "loss": 0.1275,
      "step": 25838
    },
    {
      "epoch": 0.7538071065989848,
      "grad_norm": 0.8064327679277885,
      "learning_rate": 1.5070609107017687e-06,
      "loss": 0.1026,
      "step": 25839
    },
    {
      "epoch": 0.7538362798296283,
      "grad_norm": 0.8328954087145269,
      "learning_rate": 1.506722888163078e-06,
      "loss": 0.1176,
      "step": 25840
    },
    {
      "epoch": 0.7538654530602719,
      "grad_norm": 0.918903002541291,
      "learning_rate": 1.5063848968115297e-06,
      "loss": 0.1199,
      "step": 25841
    },
    {
      "epoch": 0.7538946262909154,
      "grad_norm": 0.9218694076987992,
      "learning_rate": 1.506046936650145e-06,
      "loss": 0.116,
      "step": 25842
    },
    {
      "epoch": 0.753923799521559,
      "grad_norm": 0.8553981890892228,
      "learning_rate": 1.5057090076819375e-06,
      "loss": 0.0949,
      "step": 25843
    },
    {
      "epoch": 0.7539529727522026,
      "grad_norm": 0.8016816482886957,
      "learning_rate": 1.5053711099099272e-06,
      "loss": 0.1019,
      "step": 25844
    },
    {
      "epoch": 0.7539821459828462,
      "grad_norm": 0.8313417626973173,
      "learning_rate": 1.5050332433371295e-06,
      "loss": 0.1027,
      "step": 25845
    },
    {
      "epoch": 0.7540113192134897,
      "grad_norm": 0.8996554314666295,
      "learning_rate": 1.5046954079665588e-06,
      "loss": 0.0942,
      "step": 25846
    },
    {
      "epoch": 0.7540404924441333,
      "grad_norm": 0.8516424226755879,
      "learning_rate": 1.5043576038012337e-06,
      "loss": 0.1277,
      "step": 25847
    },
    {
      "epoch": 0.7540696656747768,
      "grad_norm": 0.9941469091678935,
      "learning_rate": 1.5040198308441707e-06,
      "loss": 0.1181,
      "step": 25848
    },
    {
      "epoch": 0.7540988389054204,
      "grad_norm": 0.8230225930256071,
      "learning_rate": 1.503682089098384e-06,
      "loss": 0.1065,
      "step": 25849
    },
    {
      "epoch": 0.754128012136064,
      "grad_norm": 0.9212499227895967,
      "learning_rate": 1.5033443785668873e-06,
      "loss": 0.1049,
      "step": 25850
    },
    {
      "epoch": 0.7541571853667075,
      "grad_norm": 0.9520060923931164,
      "learning_rate": 1.5030066992526993e-06,
      "loss": 0.1368,
      "step": 25851
    },
    {
      "epoch": 0.7541863585973511,
      "grad_norm": 0.8640732809284455,
      "learning_rate": 1.502669051158831e-06,
      "loss": 0.1013,
      "step": 25852
    },
    {
      "epoch": 0.7542155318279946,
      "grad_norm": 0.9303947086138226,
      "learning_rate": 1.5023314342882984e-06,
      "loss": 0.1161,
      "step": 25853
    },
    {
      "epoch": 0.7542447050586382,
      "grad_norm": 0.8011869651117773,
      "learning_rate": 1.5019938486441172e-06,
      "loss": 0.1266,
      "step": 25854
    },
    {
      "epoch": 0.7542738782892817,
      "grad_norm": 0.9567966624041573,
      "learning_rate": 1.5016562942293e-06,
      "loss": 0.1017,
      "step": 25855
    },
    {
      "epoch": 0.7543030515199253,
      "grad_norm": 0.8704921474973745,
      "learning_rate": 1.5013187710468584e-06,
      "loss": 0.1069,
      "step": 25856
    },
    {
      "epoch": 0.7543322247505688,
      "grad_norm": 0.7797878699950223,
      "learning_rate": 1.5009812790998096e-06,
      "loss": 0.0988,
      "step": 25857
    },
    {
      "epoch": 0.7543613979812125,
      "grad_norm": 0.8203057622573032,
      "learning_rate": 1.500643818391165e-06,
      "loss": 0.1409,
      "step": 25858
    },
    {
      "epoch": 0.754390571211856,
      "grad_norm": 0.951587561368629,
      "learning_rate": 1.500306388923935e-06,
      "loss": 0.1073,
      "step": 25859
    },
    {
      "epoch": 0.7544197444424996,
      "grad_norm": 1.061516934732331,
      "learning_rate": 1.4999689907011338e-06,
      "loss": 0.1158,
      "step": 25860
    },
    {
      "epoch": 0.7544489176731431,
      "grad_norm": 0.9483004185178417,
      "learning_rate": 1.4996316237257758e-06,
      "loss": 0.11,
      "step": 25861
    },
    {
      "epoch": 0.7544780909037867,
      "grad_norm": 0.7892898201827739,
      "learning_rate": 1.4992942880008716e-06,
      "loss": 0.1019,
      "step": 25862
    },
    {
      "epoch": 0.7545072641344303,
      "grad_norm": 0.8486407833114757,
      "learning_rate": 1.4989569835294298e-06,
      "loss": 0.1114,
      "step": 25863
    },
    {
      "epoch": 0.7545364373650738,
      "grad_norm": 0.8545793243946734,
      "learning_rate": 1.4986197103144661e-06,
      "loss": 0.111,
      "step": 25864
    },
    {
      "epoch": 0.7545656105957174,
      "grad_norm": 0.9917273192168473,
      "learning_rate": 1.4982824683589887e-06,
      "loss": 0.1285,
      "step": 25865
    },
    {
      "epoch": 0.7545947838263609,
      "grad_norm": 1.0136428433058013,
      "learning_rate": 1.4979452576660091e-06,
      "loss": 0.1201,
      "step": 25866
    },
    {
      "epoch": 0.7546239570570045,
      "grad_norm": 1.142683193062564,
      "learning_rate": 1.4976080782385399e-06,
      "loss": 0.0963,
      "step": 25867
    },
    {
      "epoch": 0.754653130287648,
      "grad_norm": 1.0490554723344994,
      "learning_rate": 1.4972709300795896e-06,
      "loss": 0.1179,
      "step": 25868
    },
    {
      "epoch": 0.7546823035182916,
      "grad_norm": 1.0838232578951599,
      "learning_rate": 1.4969338131921667e-06,
      "loss": 0.1063,
      "step": 25869
    },
    {
      "epoch": 0.7547114767489351,
      "grad_norm": 0.8307847314099764,
      "learning_rate": 1.4965967275792842e-06,
      "loss": 0.1102,
      "step": 25870
    },
    {
      "epoch": 0.7547406499795788,
      "grad_norm": 1.338595340756564,
      "learning_rate": 1.4962596732439484e-06,
      "loss": 0.115,
      "step": 25871
    },
    {
      "epoch": 0.7547698232102223,
      "grad_norm": 1.0822023601561124,
      "learning_rate": 1.495922650189171e-06,
      "loss": 0.1166,
      "step": 25872
    },
    {
      "epoch": 0.7547989964408659,
      "grad_norm": 0.7975328518759391,
      "learning_rate": 1.4955856584179584e-06,
      "loss": 0.1385,
      "step": 25873
    },
    {
      "epoch": 0.7548281696715095,
      "grad_norm": 0.7821845278332362,
      "learning_rate": 1.495248697933322e-06,
      "loss": 0.1126,
      "step": 25874
    },
    {
      "epoch": 0.754857342902153,
      "grad_norm": 0.7082873143847945,
      "learning_rate": 1.4949117687382686e-06,
      "loss": 0.101,
      "step": 25875
    },
    {
      "epoch": 0.7548865161327966,
      "grad_norm": 0.7938271501070359,
      "learning_rate": 1.4945748708358044e-06,
      "loss": 0.1053,
      "step": 25876
    },
    {
      "epoch": 0.7549156893634401,
      "grad_norm": 0.7773730416861138,
      "learning_rate": 1.4942380042289388e-06,
      "loss": 0.1171,
      "step": 25877
    },
    {
      "epoch": 0.7549448625940837,
      "grad_norm": 1.0313441772972234,
      "learning_rate": 1.4939011689206812e-06,
      "loss": 0.1002,
      "step": 25878
    },
    {
      "epoch": 0.7549740358247272,
      "grad_norm": 0.818432201028089,
      "learning_rate": 1.493564364914037e-06,
      "loss": 0.1287,
      "step": 25879
    },
    {
      "epoch": 0.7550032090553708,
      "grad_norm": 0.9182194321868798,
      "learning_rate": 1.4932275922120116e-06,
      "loss": 0.1269,
      "step": 25880
    },
    {
      "epoch": 0.7550323822860143,
      "grad_norm": 0.8024847123486553,
      "learning_rate": 1.4928908508176148e-06,
      "loss": 0.1031,
      "step": 25881
    },
    {
      "epoch": 0.7550615555166579,
      "grad_norm": 1.0148061125312728,
      "learning_rate": 1.4925541407338511e-06,
      "loss": 0.1176,
      "step": 25882
    },
    {
      "epoch": 0.7550907287473014,
      "grad_norm": 0.783349701504457,
      "learning_rate": 1.4922174619637236e-06,
      "loss": 0.1023,
      "step": 25883
    },
    {
      "epoch": 0.755119901977945,
      "grad_norm": 0.7475352753241996,
      "learning_rate": 1.4918808145102443e-06,
      "loss": 0.1452,
      "step": 25884
    },
    {
      "epoch": 0.7551490752085886,
      "grad_norm": 0.8235592086340293,
      "learning_rate": 1.4915441983764156e-06,
      "loss": 0.1081,
      "step": 25885
    },
    {
      "epoch": 0.7551782484392322,
      "grad_norm": 1.0793896475449292,
      "learning_rate": 1.4912076135652414e-06,
      "loss": 0.1188,
      "step": 25886
    },
    {
      "epoch": 0.7552074216698758,
      "grad_norm": 0.8527877192086889,
      "learning_rate": 1.4908710600797293e-06,
      "loss": 0.1166,
      "step": 25887
    },
    {
      "epoch": 0.7552365949005193,
      "grad_norm": 1.025990503369907,
      "learning_rate": 1.490534537922883e-06,
      "loss": 0.1279,
      "step": 25888
    },
    {
      "epoch": 0.7552657681311629,
      "grad_norm": 0.7099068887493515,
      "learning_rate": 1.4901980470977046e-06,
      "loss": 0.1035,
      "step": 25889
    },
    {
      "epoch": 0.7552949413618064,
      "grad_norm": 0.6294984277144369,
      "learning_rate": 1.4898615876072002e-06,
      "loss": 0.1263,
      "step": 25890
    },
    {
      "epoch": 0.75532411459245,
      "grad_norm": 1.0830088936709952,
      "learning_rate": 1.4895251594543758e-06,
      "loss": 0.1024,
      "step": 25891
    },
    {
      "epoch": 0.7553532878230935,
      "grad_norm": 0.879881989210489,
      "learning_rate": 1.4891887626422324e-06,
      "loss": 0.108,
      "step": 25892
    },
    {
      "epoch": 0.7553824610537371,
      "grad_norm": 0.8406366078337004,
      "learning_rate": 1.4888523971737716e-06,
      "loss": 0.1211,
      "step": 25893
    },
    {
      "epoch": 0.7554116342843806,
      "grad_norm": 0.7158975523182622,
      "learning_rate": 1.4885160630520008e-06,
      "loss": 0.1065,
      "step": 25894
    },
    {
      "epoch": 0.7554408075150242,
      "grad_norm": 0.9213164725454261,
      "learning_rate": 1.488179760279918e-06,
      "loss": 0.1197,
      "step": 25895
    },
    {
      "epoch": 0.7554699807456677,
      "grad_norm": 2.0983595262270427,
      "learning_rate": 1.4878434888605287e-06,
      "loss": 0.1069,
      "step": 25896
    },
    {
      "epoch": 0.7554991539763113,
      "grad_norm": 0.9421713784874025,
      "learning_rate": 1.4875072487968356e-06,
      "loss": 0.1069,
      "step": 25897
    },
    {
      "epoch": 0.755528327206955,
      "grad_norm": 1.0693569782422778,
      "learning_rate": 1.4871710400918388e-06,
      "loss": 0.1099,
      "step": 25898
    },
    {
      "epoch": 0.7555575004375985,
      "grad_norm": 0.8511918821329131,
      "learning_rate": 1.4868348627485397e-06,
      "loss": 0.1015,
      "step": 25899
    },
    {
      "epoch": 0.7555866736682421,
      "grad_norm": 0.6910395200974965,
      "learning_rate": 1.4864987167699414e-06,
      "loss": 0.0939,
      "step": 25900
    },
    {
      "epoch": 0.7556158468988856,
      "grad_norm": 1.0408751846096094,
      "learning_rate": 1.486162602159042e-06,
      "loss": 0.1056,
      "step": 25901
    },
    {
      "epoch": 0.7556450201295292,
      "grad_norm": 0.7004601690901944,
      "learning_rate": 1.485826518918846e-06,
      "loss": 0.1044,
      "step": 25902
    },
    {
      "epoch": 0.7556741933601727,
      "grad_norm": 1.0082745657940422,
      "learning_rate": 1.4854904670523496e-06,
      "loss": 0.1255,
      "step": 25903
    },
    {
      "epoch": 0.7557033665908163,
      "grad_norm": 0.8292697853817166,
      "learning_rate": 1.485154446562558e-06,
      "loss": 0.1157,
      "step": 25904
    },
    {
      "epoch": 0.7557325398214598,
      "grad_norm": 0.8205664785554989,
      "learning_rate": 1.4848184574524677e-06,
      "loss": 0.1225,
      "step": 25905
    },
    {
      "epoch": 0.7557617130521034,
      "grad_norm": 1.1649608083028422,
      "learning_rate": 1.4844824997250779e-06,
      "loss": 0.1262,
      "step": 25906
    },
    {
      "epoch": 0.7557908862827469,
      "grad_norm": 1.0107865491889776,
      "learning_rate": 1.4841465733833887e-06,
      "loss": 0.1155,
      "step": 25907
    },
    {
      "epoch": 0.7558200595133905,
      "grad_norm": 0.7543127553830585,
      "learning_rate": 1.4838106784304012e-06,
      "loss": 0.1002,
      "step": 25908
    },
    {
      "epoch": 0.755849232744034,
      "grad_norm": 0.9196363893053334,
      "learning_rate": 1.483474814869113e-06,
      "loss": 0.1158,
      "step": 25909
    },
    {
      "epoch": 0.7558784059746776,
      "grad_norm": 5.341613293993254,
      "learning_rate": 1.4831389827025206e-06,
      "loss": 0.1007,
      "step": 25910
    },
    {
      "epoch": 0.7559075792053211,
      "grad_norm": 0.9742554329408882,
      "learning_rate": 1.4828031819336254e-06,
      "loss": 0.12,
      "step": 25911
    },
    {
      "epoch": 0.7559367524359648,
      "grad_norm": 0.7953343134436859,
      "learning_rate": 1.4824674125654232e-06,
      "loss": 0.1218,
      "step": 25912
    },
    {
      "epoch": 0.7559659256666084,
      "grad_norm": 0.8716245200533763,
      "learning_rate": 1.4821316746009096e-06,
      "loss": 0.1056,
      "step": 25913
    },
    {
      "epoch": 0.7559950988972519,
      "grad_norm": 0.9618030116541052,
      "learning_rate": 1.4817959680430876e-06,
      "loss": 0.088,
      "step": 25914
    },
    {
      "epoch": 0.7560242721278955,
      "grad_norm": 0.8652819971298802,
      "learning_rate": 1.4814602928949512e-06,
      "loss": 0.1179,
      "step": 25915
    },
    {
      "epoch": 0.756053445358539,
      "grad_norm": 0.8854919138178449,
      "learning_rate": 1.4811246491594961e-06,
      "loss": 0.1302,
      "step": 25916
    },
    {
      "epoch": 0.7560826185891826,
      "grad_norm": 0.8830455795101136,
      "learning_rate": 1.4807890368397215e-06,
      "loss": 0.0977,
      "step": 25917
    },
    {
      "epoch": 0.7561117918198261,
      "grad_norm": 0.9216145671200741,
      "learning_rate": 1.4804534559386208e-06,
      "loss": 0.1076,
      "step": 25918
    },
    {
      "epoch": 0.7561409650504697,
      "grad_norm": 0.8000667135237715,
      "learning_rate": 1.480117906459193e-06,
      "loss": 0.1256,
      "step": 25919
    },
    {
      "epoch": 0.7561701382811132,
      "grad_norm": 0.9024547885912507,
      "learning_rate": 1.4797823884044303e-06,
      "loss": 0.1058,
      "step": 25920
    },
    {
      "epoch": 0.7561993115117568,
      "grad_norm": 0.7338654652081568,
      "learning_rate": 1.4794469017773327e-06,
      "loss": 0.1022,
      "step": 25921
    },
    {
      "epoch": 0.7562284847424003,
      "grad_norm": 0.9438720342464741,
      "learning_rate": 1.479111446580892e-06,
      "loss": 0.1317,
      "step": 25922
    },
    {
      "epoch": 0.7562576579730439,
      "grad_norm": 0.8413841232525172,
      "learning_rate": 1.4787760228181019e-06,
      "loss": 0.1132,
      "step": 25923
    },
    {
      "epoch": 0.7562868312036874,
      "grad_norm": 0.9747519063979391,
      "learning_rate": 1.4784406304919596e-06,
      "loss": 0.1201,
      "step": 25924
    },
    {
      "epoch": 0.7563160044343311,
      "grad_norm": 0.9040490081693991,
      "learning_rate": 1.4781052696054598e-06,
      "loss": 0.0901,
      "step": 25925
    },
    {
      "epoch": 0.7563451776649747,
      "grad_norm": 0.8826366885308079,
      "learning_rate": 1.477769940161594e-06,
      "loss": 0.1268,
      "step": 25926
    },
    {
      "epoch": 0.7563743508956182,
      "grad_norm": 0.9184658452872857,
      "learning_rate": 1.477434642163359e-06,
      "loss": 0.1015,
      "step": 25927
    },
    {
      "epoch": 0.7564035241262618,
      "grad_norm": 1.0181008044302222,
      "learning_rate": 1.4770993756137465e-06,
      "loss": 0.1212,
      "step": 25928
    },
    {
      "epoch": 0.7564326973569053,
      "grad_norm": 0.9308072159314915,
      "learning_rate": 1.4767641405157485e-06,
      "loss": 0.1025,
      "step": 25929
    },
    {
      "epoch": 0.7564618705875489,
      "grad_norm": 0.8788407820851127,
      "learning_rate": 1.476428936872359e-06,
      "loss": 0.121,
      "step": 25930
    },
    {
      "epoch": 0.7564910438181924,
      "grad_norm": 1.0523315667493367,
      "learning_rate": 1.4760937646865718e-06,
      "loss": 0.1288,
      "step": 25931
    },
    {
      "epoch": 0.756520217048836,
      "grad_norm": 0.806332663983592,
      "learning_rate": 1.475758623961379e-06,
      "loss": 0.1102,
      "step": 25932
    },
    {
      "epoch": 0.7565493902794795,
      "grad_norm": 0.7671064953869315,
      "learning_rate": 1.4754235146997704e-06,
      "loss": 0.1155,
      "step": 25933
    },
    {
      "epoch": 0.7565785635101231,
      "grad_norm": 1.0927368440905616,
      "learning_rate": 1.4750884369047403e-06,
      "loss": 0.1172,
      "step": 25934
    },
    {
      "epoch": 0.7566077367407666,
      "grad_norm": 0.8445207583002338,
      "learning_rate": 1.4747533905792794e-06,
      "loss": 0.1021,
      "step": 25935
    },
    {
      "epoch": 0.7566369099714102,
      "grad_norm": 0.6645077128838399,
      "learning_rate": 1.474418375726377e-06,
      "loss": 0.0965,
      "step": 25936
    },
    {
      "epoch": 0.7566660832020538,
      "grad_norm": 0.9671532615444046,
      "learning_rate": 1.4740833923490262e-06,
      "loss": 0.1181,
      "step": 25937
    },
    {
      "epoch": 0.7566952564326973,
      "grad_norm": 0.9902850061524677,
      "learning_rate": 1.4737484404502178e-06,
      "loss": 0.1432,
      "step": 25938
    },
    {
      "epoch": 0.756724429663341,
      "grad_norm": 1.0415156810652546,
      "learning_rate": 1.4734135200329425e-06,
      "loss": 0.0995,
      "step": 25939
    },
    {
      "epoch": 0.7567536028939845,
      "grad_norm": 0.8475533043562927,
      "learning_rate": 1.473078631100187e-06,
      "loss": 0.1335,
      "step": 25940
    },
    {
      "epoch": 0.7567827761246281,
      "grad_norm": 0.8573282963470593,
      "learning_rate": 1.472743773654946e-06,
      "loss": 0.1102,
      "step": 25941
    },
    {
      "epoch": 0.7568119493552716,
      "grad_norm": 0.9241080034264542,
      "learning_rate": 1.4724089477002047e-06,
      "loss": 0.1218,
      "step": 25942
    },
    {
      "epoch": 0.7568411225859152,
      "grad_norm": 0.9145944415032274,
      "learning_rate": 1.4720741532389537e-06,
      "loss": 0.105,
      "step": 25943
    },
    {
      "epoch": 0.7568702958165587,
      "grad_norm": 0.8711946635114528,
      "learning_rate": 1.4717393902741845e-06,
      "loss": 0.1223,
      "step": 25944
    },
    {
      "epoch": 0.7568994690472023,
      "grad_norm": 0.9409995913225305,
      "learning_rate": 1.4714046588088838e-06,
      "loss": 0.1271,
      "step": 25945
    },
    {
      "epoch": 0.7569286422778458,
      "grad_norm": 0.75000581011883,
      "learning_rate": 1.4710699588460382e-06,
      "loss": 0.115,
      "step": 25946
    },
    {
      "epoch": 0.7569578155084894,
      "grad_norm": 0.8327671343984938,
      "learning_rate": 1.4707352903886395e-06,
      "loss": 0.1,
      "step": 25947
    },
    {
      "epoch": 0.756986988739133,
      "grad_norm": 0.8913646526156245,
      "learning_rate": 1.4704006534396714e-06,
      "loss": 0.1186,
      "step": 25948
    },
    {
      "epoch": 0.7570161619697765,
      "grad_norm": 1.0481898711688422,
      "learning_rate": 1.4700660480021263e-06,
      "loss": 0.1023,
      "step": 25949
    },
    {
      "epoch": 0.75704533520042,
      "grad_norm": 0.8521890379544377,
      "learning_rate": 1.4697314740789864e-06,
      "loss": 0.1009,
      "step": 25950
    },
    {
      "epoch": 0.7570745084310636,
      "grad_norm": 1.126152932194546,
      "learning_rate": 1.4693969316732426e-06,
      "loss": 0.138,
      "step": 25951
    },
    {
      "epoch": 0.7571036816617073,
      "grad_norm": 0.9845809426788137,
      "learning_rate": 1.4690624207878807e-06,
      "loss": 0.1306,
      "step": 25952
    },
    {
      "epoch": 0.7571328548923508,
      "grad_norm": 0.673575236417537,
      "learning_rate": 1.4687279414258848e-06,
      "loss": 0.1197,
      "step": 25953
    },
    {
      "epoch": 0.7571620281229944,
      "grad_norm": 0.9049576524294846,
      "learning_rate": 1.4683934935902428e-06,
      "loss": 0.104,
      "step": 25954
    },
    {
      "epoch": 0.7571912013536379,
      "grad_norm": 0.7645593047819226,
      "learning_rate": 1.4680590772839427e-06,
      "loss": 0.1081,
      "step": 25955
    },
    {
      "epoch": 0.7572203745842815,
      "grad_norm": 0.8204293226318649,
      "learning_rate": 1.4677246925099659e-06,
      "loss": 0.1153,
      "step": 25956
    },
    {
      "epoch": 0.757249547814925,
      "grad_norm": 0.8188709015858062,
      "learning_rate": 1.4673903392713018e-06,
      "loss": 0.1447,
      "step": 25957
    },
    {
      "epoch": 0.7572787210455686,
      "grad_norm": 1.2373567565441503,
      "learning_rate": 1.4670560175709331e-06,
      "loss": 0.1197,
      "step": 25958
    },
    {
      "epoch": 0.7573078942762121,
      "grad_norm": 0.8767735977746076,
      "learning_rate": 1.4667217274118433e-06,
      "loss": 0.1269,
      "step": 25959
    },
    {
      "epoch": 0.7573370675068557,
      "grad_norm": 0.9925801970461513,
      "learning_rate": 1.4663874687970187e-06,
      "loss": 0.1212,
      "step": 25960
    },
    {
      "epoch": 0.7573662407374993,
      "grad_norm": 0.6686022034013344,
      "learning_rate": 1.4660532417294448e-06,
      "loss": 0.108,
      "step": 25961
    },
    {
      "epoch": 0.7573954139681428,
      "grad_norm": 2.0859127547442027,
      "learning_rate": 1.4657190462121035e-06,
      "loss": 0.1171,
      "step": 25962
    },
    {
      "epoch": 0.7574245871987864,
      "grad_norm": 0.8538955615247369,
      "learning_rate": 1.4653848822479778e-06,
      "loss": 0.1372,
      "step": 25963
    },
    {
      "epoch": 0.7574537604294299,
      "grad_norm": 1.1753289851422752,
      "learning_rate": 1.4650507498400535e-06,
      "loss": 0.0862,
      "step": 25964
    },
    {
      "epoch": 0.7574829336600735,
      "grad_norm": 1.1818555683389145,
      "learning_rate": 1.4647166489913123e-06,
      "loss": 0.1168,
      "step": 25965
    },
    {
      "epoch": 0.7575121068907171,
      "grad_norm": 0.9780284514575832,
      "learning_rate": 1.4643825797047351e-06,
      "loss": 0.1097,
      "step": 25966
    },
    {
      "epoch": 0.7575412801213607,
      "grad_norm": 1.0640706978670855,
      "learning_rate": 1.4640485419833062e-06,
      "loss": 0.119,
      "step": 25967
    },
    {
      "epoch": 0.7575704533520042,
      "grad_norm": 1.1239686902375026,
      "learning_rate": 1.4637145358300099e-06,
      "loss": 0.1337,
      "step": 25968
    },
    {
      "epoch": 0.7575996265826478,
      "grad_norm": 1.3655347668875975,
      "learning_rate": 1.463380561247826e-06,
      "loss": 0.1145,
      "step": 25969
    },
    {
      "epoch": 0.7576287998132913,
      "grad_norm": 1.1733687009818943,
      "learning_rate": 1.463046618239734e-06,
      "loss": 0.1179,
      "step": 25970
    },
    {
      "epoch": 0.7576579730439349,
      "grad_norm": 0.7321282465144936,
      "learning_rate": 1.4627127068087194e-06,
      "loss": 0.0879,
      "step": 25971
    },
    {
      "epoch": 0.7576871462745784,
      "grad_norm": 0.9933471409592995,
      "learning_rate": 1.4623788269577594e-06,
      "loss": 0.1125,
      "step": 25972
    },
    {
      "epoch": 0.757716319505222,
      "grad_norm": 1.2773186342760419,
      "learning_rate": 1.4620449786898372e-06,
      "loss": 0.1381,
      "step": 25973
    },
    {
      "epoch": 0.7577454927358656,
      "grad_norm": 1.1663542132844367,
      "learning_rate": 1.4617111620079343e-06,
      "loss": 0.1387,
      "step": 25974
    },
    {
      "epoch": 0.7577746659665091,
      "grad_norm": 0.9684096729095881,
      "learning_rate": 1.4613773769150298e-06,
      "loss": 0.0976,
      "step": 25975
    },
    {
      "epoch": 0.7578038391971527,
      "grad_norm": 0.7706560538170121,
      "learning_rate": 1.4610436234141013e-06,
      "loss": 0.1261,
      "step": 25976
    },
    {
      "epoch": 0.7578330124277962,
      "grad_norm": 1.1786850203145411,
      "learning_rate": 1.4607099015081322e-06,
      "loss": 0.1148,
      "step": 25977
    },
    {
      "epoch": 0.7578621856584398,
      "grad_norm": 1.230491814231743,
      "learning_rate": 1.4603762112000986e-06,
      "loss": 0.1199,
      "step": 25978
    },
    {
      "epoch": 0.7578913588890833,
      "grad_norm": 1.0535222003526965,
      "learning_rate": 1.460042552492983e-06,
      "loss": 0.127,
      "step": 25979
    },
    {
      "epoch": 0.757920532119727,
      "grad_norm": 1.018164078482079,
      "learning_rate": 1.4597089253897606e-06,
      "loss": 0.1464,
      "step": 25980
    },
    {
      "epoch": 0.7579497053503705,
      "grad_norm": 0.8576248454750078,
      "learning_rate": 1.4593753298934132e-06,
      "loss": 0.1143,
      "step": 25981
    },
    {
      "epoch": 0.7579788785810141,
      "grad_norm": 1.2795461807170105,
      "learning_rate": 1.4590417660069177e-06,
      "loss": 0.1107,
      "step": 25982
    },
    {
      "epoch": 0.7580080518116576,
      "grad_norm": 0.9777373110980944,
      "learning_rate": 1.4587082337332508e-06,
      "loss": 0.1176,
      "step": 25983
    },
    {
      "epoch": 0.7580372250423012,
      "grad_norm": 0.9614068004197822,
      "learning_rate": 1.458374733075391e-06,
      "loss": 0.1158,
      "step": 25984
    },
    {
      "epoch": 0.7580663982729448,
      "grad_norm": 0.8765415290918716,
      "learning_rate": 1.4580412640363185e-06,
      "loss": 0.1083,
      "step": 25985
    },
    {
      "epoch": 0.7580955715035883,
      "grad_norm": 0.8348287583351599,
      "learning_rate": 1.4577078266190058e-06,
      "loss": 0.1151,
      "step": 25986
    },
    {
      "epoch": 0.7581247447342319,
      "grad_norm": 0.950245424025854,
      "learning_rate": 1.4573744208264335e-06,
      "loss": 0.117,
      "step": 25987
    },
    {
      "epoch": 0.7581539179648754,
      "grad_norm": 1.1097815798448516,
      "learning_rate": 1.457041046661577e-06,
      "loss": 0.1294,
      "step": 25988
    },
    {
      "epoch": 0.758183091195519,
      "grad_norm": 1.009146907616732,
      "learning_rate": 1.4567077041274109e-06,
      "loss": 0.1116,
      "step": 25989
    },
    {
      "epoch": 0.7582122644261625,
      "grad_norm": 0.7586790868249903,
      "learning_rate": 1.456374393226912e-06,
      "loss": 0.1038,
      "step": 25990
    },
    {
      "epoch": 0.7582414376568061,
      "grad_norm": 0.7418242568597122,
      "learning_rate": 1.4560411139630581e-06,
      "loss": 0.1274,
      "step": 25991
    },
    {
      "epoch": 0.7582706108874496,
      "grad_norm": 1.084858512902555,
      "learning_rate": 1.4557078663388236e-06,
      "loss": 0.0986,
      "step": 25992
    },
    {
      "epoch": 0.7582997841180933,
      "grad_norm": 0.858504223295019,
      "learning_rate": 1.4553746503571813e-06,
      "loss": 0.1144,
      "step": 25993
    },
    {
      "epoch": 0.7583289573487368,
      "grad_norm": 0.7787597298270245,
      "learning_rate": 1.4550414660211099e-06,
      "loss": 0.1033,
      "step": 25994
    },
    {
      "epoch": 0.7583581305793804,
      "grad_norm": 0.8932275680825315,
      "learning_rate": 1.4547083133335821e-06,
      "loss": 0.1131,
      "step": 25995
    },
    {
      "epoch": 0.758387303810024,
      "grad_norm": 1.0437860675415536,
      "learning_rate": 1.45437519229757e-06,
      "loss": 0.0972,
      "step": 25996
    },
    {
      "epoch": 0.7584164770406675,
      "grad_norm": 0.7378942739734735,
      "learning_rate": 1.45404210291605e-06,
      "loss": 0.1121,
      "step": 25997
    },
    {
      "epoch": 0.7584456502713111,
      "grad_norm": 0.6903590941643238,
      "learning_rate": 1.4537090451919972e-06,
      "loss": 0.1082,
      "step": 25998
    },
    {
      "epoch": 0.7584748235019546,
      "grad_norm": 0.940410991770605,
      "learning_rate": 1.4533760191283836e-06,
      "loss": 0.1243,
      "step": 25999
    },
    {
      "epoch": 0.7585039967325982,
      "grad_norm": 0.8713184786511823,
      "learning_rate": 1.4530430247281808e-06,
      "loss": 0.1172,
      "step": 26000
    },
    {
      "epoch": 0.7585331699632417,
      "grad_norm": 0.7118485771160761,
      "learning_rate": 1.4527100619943646e-06,
      "loss": 0.1337,
      "step": 26001
    },
    {
      "epoch": 0.7585623431938853,
      "grad_norm": 0.8045119603049272,
      "learning_rate": 1.4523771309299044e-06,
      "loss": 0.1185,
      "step": 26002
    },
    {
      "epoch": 0.7585915164245288,
      "grad_norm": 0.8168851311481075,
      "learning_rate": 1.452044231537774e-06,
      "loss": 0.1107,
      "step": 26003
    },
    {
      "epoch": 0.7586206896551724,
      "grad_norm": 0.6164216694250991,
      "learning_rate": 1.451711363820948e-06,
      "loss": 0.1195,
      "step": 26004
    },
    {
      "epoch": 0.7586498628858159,
      "grad_norm": 0.9144872541790582,
      "learning_rate": 1.4513785277823956e-06,
      "loss": 0.1109,
      "step": 26005
    },
    {
      "epoch": 0.7586790361164595,
      "grad_norm": 0.9360306194839095,
      "learning_rate": 1.4510457234250868e-06,
      "loss": 0.1358,
      "step": 26006
    },
    {
      "epoch": 0.7587082093471031,
      "grad_norm": 0.7041663597927578,
      "learning_rate": 1.4507129507519968e-06,
      "loss": 0.1001,
      "step": 26007
    },
    {
      "epoch": 0.7587373825777467,
      "grad_norm": 0.7837777907709031,
      "learning_rate": 1.4503802097660918e-06,
      "loss": 0.114,
      "step": 26008
    },
    {
      "epoch": 0.7587665558083903,
      "grad_norm": 0.7362928846800872,
      "learning_rate": 1.4500475004703475e-06,
      "loss": 0.1038,
      "step": 26009
    },
    {
      "epoch": 0.7587957290390338,
      "grad_norm": 0.8596231315156331,
      "learning_rate": 1.4497148228677294e-06,
      "loss": 0.1175,
      "step": 26010
    },
    {
      "epoch": 0.7588249022696774,
      "grad_norm": 1.0259867411899775,
      "learning_rate": 1.4493821769612115e-06,
      "loss": 0.1158,
      "step": 26011
    },
    {
      "epoch": 0.7588540755003209,
      "grad_norm": 0.9072027808247067,
      "learning_rate": 1.4490495627537621e-06,
      "loss": 0.1118,
      "step": 26012
    },
    {
      "epoch": 0.7588832487309645,
      "grad_norm": 0.8457106723844688,
      "learning_rate": 1.4487169802483485e-06,
      "loss": 0.1051,
      "step": 26013
    },
    {
      "epoch": 0.758912421961608,
      "grad_norm": 1.1286016343179337,
      "learning_rate": 1.4483844294479427e-06,
      "loss": 0.1337,
      "step": 26014
    },
    {
      "epoch": 0.7589415951922516,
      "grad_norm": 0.8435348531995076,
      "learning_rate": 1.4480519103555141e-06,
      "loss": 0.1367,
      "step": 26015
    },
    {
      "epoch": 0.7589707684228951,
      "grad_norm": 0.8806963037281516,
      "learning_rate": 1.4477194229740282e-06,
      "loss": 0.1012,
      "step": 26016
    },
    {
      "epoch": 0.7589999416535387,
      "grad_norm": 1.1095409650591022,
      "learning_rate": 1.4473869673064573e-06,
      "loss": 0.1215,
      "step": 26017
    },
    {
      "epoch": 0.7590291148841822,
      "grad_norm": 0.8571719788990452,
      "learning_rate": 1.4470545433557676e-06,
      "loss": 0.1168,
      "step": 26018
    },
    {
      "epoch": 0.7590582881148258,
      "grad_norm": 0.8549046547853687,
      "learning_rate": 1.4467221511249247e-06,
      "loss": 0.1042,
      "step": 26019
    },
    {
      "epoch": 0.7590874613454694,
      "grad_norm": 0.8946800367869224,
      "learning_rate": 1.4463897906168984e-06,
      "loss": 0.1172,
      "step": 26020
    },
    {
      "epoch": 0.759116634576113,
      "grad_norm": 0.9367240916529788,
      "learning_rate": 1.4460574618346573e-06,
      "loss": 0.1137,
      "step": 26021
    },
    {
      "epoch": 0.7591458078067566,
      "grad_norm": 1.0603338690555317,
      "learning_rate": 1.445725164781167e-06,
      "loss": 0.1113,
      "step": 26022
    },
    {
      "epoch": 0.7591749810374001,
      "grad_norm": 0.7919956411995522,
      "learning_rate": 1.4453928994593925e-06,
      "loss": 0.1035,
      "step": 26023
    },
    {
      "epoch": 0.7592041542680437,
      "grad_norm": 1.0548655536326177,
      "learning_rate": 1.4450606658723026e-06,
      "loss": 0.1153,
      "step": 26024
    },
    {
      "epoch": 0.7592333274986872,
      "grad_norm": 0.8492669261576317,
      "learning_rate": 1.4447284640228631e-06,
      "loss": 0.0978,
      "step": 26025
    },
    {
      "epoch": 0.7592625007293308,
      "grad_norm": 0.8709168049423043,
      "learning_rate": 1.4443962939140372e-06,
      "loss": 0.1065,
      "step": 26026
    },
    {
      "epoch": 0.7592916739599743,
      "grad_norm": 1.3811960156659688,
      "learning_rate": 1.4440641555487922e-06,
      "loss": 0.1065,
      "step": 26027
    },
    {
      "epoch": 0.7593208471906179,
      "grad_norm": 0.7200139465267126,
      "learning_rate": 1.4437320489300954e-06,
      "loss": 0.1045,
      "step": 26028
    },
    {
      "epoch": 0.7593500204212614,
      "grad_norm": 0.7628794682821637,
      "learning_rate": 1.44339997406091e-06,
      "loss": 0.1129,
      "step": 26029
    },
    {
      "epoch": 0.759379193651905,
      "grad_norm": 0.9482328257842885,
      "learning_rate": 1.4430679309441992e-06,
      "loss": 0.1548,
      "step": 26030
    },
    {
      "epoch": 0.7594083668825485,
      "grad_norm": 0.9370533620990203,
      "learning_rate": 1.44273591958293e-06,
      "loss": 0.1201,
      "step": 26031
    },
    {
      "epoch": 0.7594375401131921,
      "grad_norm": 0.9843232443690725,
      "learning_rate": 1.4424039399800639e-06,
      "loss": 0.1229,
      "step": 26032
    },
    {
      "epoch": 0.7594667133438356,
      "grad_norm": 0.891275018099884,
      "learning_rate": 1.442071992138566e-06,
      "loss": 0.1253,
      "step": 26033
    },
    {
      "epoch": 0.7594958865744793,
      "grad_norm": 1.0782847967864844,
      "learning_rate": 1.441740076061402e-06,
      "loss": 0.1184,
      "step": 26034
    },
    {
      "epoch": 0.7595250598051229,
      "grad_norm": 1.1422747124755679,
      "learning_rate": 1.4414081917515328e-06,
      "loss": 0.1218,
      "step": 26035
    },
    {
      "epoch": 0.7595542330357664,
      "grad_norm": 1.0143969155664316,
      "learning_rate": 1.4410763392119203e-06,
      "loss": 0.1212,
      "step": 26036
    },
    {
      "epoch": 0.75958340626641,
      "grad_norm": 1.0085085582756732,
      "learning_rate": 1.4407445184455304e-06,
      "loss": 0.113,
      "step": 26037
    },
    {
      "epoch": 0.7596125794970535,
      "grad_norm": 0.8049804432728066,
      "learning_rate": 1.4404127294553216e-06,
      "loss": 0.1162,
      "step": 26038
    },
    {
      "epoch": 0.7596417527276971,
      "grad_norm": 0.7588235740314043,
      "learning_rate": 1.4400809722442604e-06,
      "loss": 0.1194,
      "step": 26039
    },
    {
      "epoch": 0.7596709259583406,
      "grad_norm": 0.7968265456666715,
      "learning_rate": 1.4397492468153047e-06,
      "loss": 0.1227,
      "step": 26040
    },
    {
      "epoch": 0.7597000991889842,
      "grad_norm": 0.9157507802234769,
      "learning_rate": 1.4394175531714193e-06,
      "loss": 0.1268,
      "step": 26041
    },
    {
      "epoch": 0.7597292724196277,
      "grad_norm": 0.872768238942069,
      "learning_rate": 1.4390858913155641e-06,
      "loss": 0.0965,
      "step": 26042
    },
    {
      "epoch": 0.7597584456502713,
      "grad_norm": 0.8289127647144127,
      "learning_rate": 1.4387542612506983e-06,
      "loss": 0.0954,
      "step": 26043
    },
    {
      "epoch": 0.7597876188809148,
      "grad_norm": 0.7870445360928183,
      "learning_rate": 1.438422662979785e-06,
      "loss": 0.1166,
      "step": 26044
    },
    {
      "epoch": 0.7598167921115584,
      "grad_norm": 0.910924733280388,
      "learning_rate": 1.4380910965057843e-06,
      "loss": 0.1299,
      "step": 26045
    },
    {
      "epoch": 0.759845965342202,
      "grad_norm": 0.8015310873760055,
      "learning_rate": 1.4377595618316552e-06,
      "loss": 0.1259,
      "step": 26046
    },
    {
      "epoch": 0.7598751385728456,
      "grad_norm": 0.7435053511577032,
      "learning_rate": 1.4374280589603602e-06,
      "loss": 0.1202,
      "step": 26047
    },
    {
      "epoch": 0.7599043118034892,
      "grad_norm": 0.9469440455160251,
      "learning_rate": 1.4370965878948562e-06,
      "loss": 0.1072,
      "step": 26048
    },
    {
      "epoch": 0.7599334850341327,
      "grad_norm": 0.8816852366894563,
      "learning_rate": 1.4367651486381023e-06,
      "loss": 0.1254,
      "step": 26049
    },
    {
      "epoch": 0.7599626582647763,
      "grad_norm": 1.0915406489172184,
      "learning_rate": 1.4364337411930585e-06,
      "loss": 0.1094,
      "step": 26050
    },
    {
      "epoch": 0.7599918314954198,
      "grad_norm": 0.7775839955869801,
      "learning_rate": 1.436102365562685e-06,
      "loss": 0.1438,
      "step": 26051
    },
    {
      "epoch": 0.7600210047260634,
      "grad_norm": 0.7853566644503953,
      "learning_rate": 1.4357710217499387e-06,
      "loss": 0.1302,
      "step": 26052
    },
    {
      "epoch": 0.7600501779567069,
      "grad_norm": 0.9019594927138638,
      "learning_rate": 1.4354397097577766e-06,
      "loss": 0.1342,
      "step": 26053
    },
    {
      "epoch": 0.7600793511873505,
      "grad_norm": 0.6598718319418905,
      "learning_rate": 1.4351084295891593e-06,
      "loss": 0.1028,
      "step": 26054
    },
    {
      "epoch": 0.760108524417994,
      "grad_norm": 0.7998989603001021,
      "learning_rate": 1.4347771812470428e-06,
      "loss": 0.1326,
      "step": 26055
    },
    {
      "epoch": 0.7601376976486376,
      "grad_norm": 0.8621933089397833,
      "learning_rate": 1.4344459647343833e-06,
      "loss": 0.1265,
      "step": 26056
    },
    {
      "epoch": 0.7601668708792811,
      "grad_norm": 0.8460505298845999,
      "learning_rate": 1.4341147800541387e-06,
      "loss": 0.1094,
      "step": 26057
    },
    {
      "epoch": 0.7601960441099247,
      "grad_norm": 0.8597389125328023,
      "learning_rate": 1.4337836272092681e-06,
      "loss": 0.1244,
      "step": 26058
    },
    {
      "epoch": 0.7602252173405682,
      "grad_norm": 0.821636293537273,
      "learning_rate": 1.4334525062027255e-06,
      "loss": 0.1313,
      "step": 26059
    },
    {
      "epoch": 0.7602543905712118,
      "grad_norm": 1.0433881299768766,
      "learning_rate": 1.4331214170374663e-06,
      "loss": 0.1067,
      "step": 26060
    },
    {
      "epoch": 0.7602835638018555,
      "grad_norm": 2.0221918569399744,
      "learning_rate": 1.4327903597164488e-06,
      "loss": 0.1075,
      "step": 26061
    },
    {
      "epoch": 0.760312737032499,
      "grad_norm": 0.7565484371987905,
      "learning_rate": 1.4324593342426264e-06,
      "loss": 0.1275,
      "step": 26062
    },
    {
      "epoch": 0.7603419102631426,
      "grad_norm": 0.9417510453568073,
      "learning_rate": 1.432128340618955e-06,
      "loss": 0.1323,
      "step": 26063
    },
    {
      "epoch": 0.7603710834937861,
      "grad_norm": 0.8785801625610209,
      "learning_rate": 1.4317973788483914e-06,
      "loss": 0.101,
      "step": 26064
    },
    {
      "epoch": 0.7604002567244297,
      "grad_norm": 0.7417185558525325,
      "learning_rate": 1.4314664489338892e-06,
      "loss": 0.1231,
      "step": 26065
    },
    {
      "epoch": 0.7604294299550732,
      "grad_norm": 0.8351879040678396,
      "learning_rate": 1.4311355508784015e-06,
      "loss": 0.1048,
      "step": 26066
    },
    {
      "epoch": 0.7604586031857168,
      "grad_norm": 0.8431731966399241,
      "learning_rate": 1.430804684684885e-06,
      "loss": 0.1249,
      "step": 26067
    },
    {
      "epoch": 0.7604877764163603,
      "grad_norm": 1.4833114662917593,
      "learning_rate": 1.4304738503562903e-06,
      "loss": 0.1123,
      "step": 26068
    },
    {
      "epoch": 0.7605169496470039,
      "grad_norm": 0.8581745216150858,
      "learning_rate": 1.4301430478955748e-06,
      "loss": 0.129,
      "step": 26069
    },
    {
      "epoch": 0.7605461228776474,
      "grad_norm": 1.0792194494872442,
      "learning_rate": 1.4298122773056883e-06,
      "loss": 0.1123,
      "step": 26070
    },
    {
      "epoch": 0.760575296108291,
      "grad_norm": 0.8004529914549448,
      "learning_rate": 1.4294815385895872e-06,
      "loss": 0.1073,
      "step": 26071
    },
    {
      "epoch": 0.7606044693389346,
      "grad_norm": 0.6705396840924204,
      "learning_rate": 1.4291508317502229e-06,
      "loss": 0.1075,
      "step": 26072
    },
    {
      "epoch": 0.7606336425695781,
      "grad_norm": 0.9491198281198915,
      "learning_rate": 1.4288201567905452e-06,
      "loss": 0.125,
      "step": 26073
    },
    {
      "epoch": 0.7606628158002218,
      "grad_norm": 0.6802741828152761,
      "learning_rate": 1.4284895137135091e-06,
      "loss": 0.1154,
      "step": 26074
    },
    {
      "epoch": 0.7606919890308653,
      "grad_norm": 0.6897548118106798,
      "learning_rate": 1.4281589025220676e-06,
      "loss": 0.0956,
      "step": 26075
    },
    {
      "epoch": 0.7607211622615089,
      "grad_norm": 0.5562101397590327,
      "learning_rate": 1.4278283232191692e-06,
      "loss": 0.1185,
      "step": 26076
    },
    {
      "epoch": 0.7607503354921524,
      "grad_norm": 0.697138522827796,
      "learning_rate": 1.4274977758077685e-06,
      "loss": 0.0986,
      "step": 26077
    },
    {
      "epoch": 0.760779508722796,
      "grad_norm": 0.7319436734846092,
      "learning_rate": 1.4271672602908143e-06,
      "loss": 0.1113,
      "step": 26078
    },
    {
      "epoch": 0.7608086819534395,
      "grad_norm": 0.7363816138287833,
      "learning_rate": 1.4268367766712571e-06,
      "loss": 0.094,
      "step": 26079
    },
    {
      "epoch": 0.7608378551840831,
      "grad_norm": 0.9144220524949574,
      "learning_rate": 1.4265063249520478e-06,
      "loss": 0.1191,
      "step": 26080
    },
    {
      "epoch": 0.7608670284147266,
      "grad_norm": 0.8609360798205258,
      "learning_rate": 1.4261759051361378e-06,
      "loss": 0.1045,
      "step": 26081
    },
    {
      "epoch": 0.7608962016453702,
      "grad_norm": 0.7758677831773033,
      "learning_rate": 1.4258455172264774e-06,
      "loss": 0.1203,
      "step": 26082
    },
    {
      "epoch": 0.7609253748760137,
      "grad_norm": 0.6705278719730049,
      "learning_rate": 1.4255151612260127e-06,
      "loss": 0.1273,
      "step": 26083
    },
    {
      "epoch": 0.7609545481066573,
      "grad_norm": 0.8855828532746887,
      "learning_rate": 1.425184837137697e-06,
      "loss": 0.1223,
      "step": 26084
    },
    {
      "epoch": 0.7609837213373009,
      "grad_norm": 0.8729962093532602,
      "learning_rate": 1.4248545449644778e-06,
      "loss": 0.1048,
      "step": 26085
    },
    {
      "epoch": 0.7610128945679444,
      "grad_norm": 0.9834331543670914,
      "learning_rate": 1.424524284709302e-06,
      "loss": 0.1099,
      "step": 26086
    },
    {
      "epoch": 0.761042067798588,
      "grad_norm": 1.1227771963032696,
      "learning_rate": 1.4241940563751205e-06,
      "loss": 0.1089,
      "step": 26087
    },
    {
      "epoch": 0.7610712410292316,
      "grad_norm": 0.9042592500315241,
      "learning_rate": 1.4238638599648818e-06,
      "loss": 0.1044,
      "step": 26088
    },
    {
      "epoch": 0.7611004142598752,
      "grad_norm": 0.9398773073336264,
      "learning_rate": 1.423533695481533e-06,
      "loss": 0.0945,
      "step": 26089
    },
    {
      "epoch": 0.7611295874905187,
      "grad_norm": 0.9616544174256167,
      "learning_rate": 1.4232035629280199e-06,
      "loss": 0.1295,
      "step": 26090
    },
    {
      "epoch": 0.7611587607211623,
      "grad_norm": 1.0352716786918337,
      "learning_rate": 1.4228734623072932e-06,
      "loss": 0.1224,
      "step": 26091
    },
    {
      "epoch": 0.7611879339518058,
      "grad_norm": 0.9028573316249922,
      "learning_rate": 1.422543393622297e-06,
      "loss": 0.1108,
      "step": 26092
    },
    {
      "epoch": 0.7612171071824494,
      "grad_norm": 1.06193886836678,
      "learning_rate": 1.4222133568759793e-06,
      "loss": 0.1204,
      "step": 26093
    },
    {
      "epoch": 0.761246280413093,
      "grad_norm": 0.894203663634131,
      "learning_rate": 1.4218833520712876e-06,
      "loss": 0.1339,
      "step": 26094
    },
    {
      "epoch": 0.7612754536437365,
      "grad_norm": 0.9934211483158438,
      "learning_rate": 1.421553379211168e-06,
      "loss": 0.1122,
      "step": 26095
    },
    {
      "epoch": 0.76130462687438,
      "grad_norm": 0.8794428270048518,
      "learning_rate": 1.4212234382985634e-06,
      "loss": 0.1033,
      "step": 26096
    },
    {
      "epoch": 0.7613338001050236,
      "grad_norm": 0.8888271444596492,
      "learning_rate": 1.420893529336424e-06,
      "loss": 0.103,
      "step": 26097
    },
    {
      "epoch": 0.7613629733356672,
      "grad_norm": 0.8720925799092015,
      "learning_rate": 1.4205636523276907e-06,
      "loss": 0.1202,
      "step": 26098
    },
    {
      "epoch": 0.7613921465663107,
      "grad_norm": 0.9711674969153876,
      "learning_rate": 1.4202338072753119e-06,
      "loss": 0.1303,
      "step": 26099
    },
    {
      "epoch": 0.7614213197969543,
      "grad_norm": 1.0092013053378361,
      "learning_rate": 1.4199039941822296e-06,
      "loss": 0.1049,
      "step": 26100
    },
    {
      "epoch": 0.7614504930275979,
      "grad_norm": 1.0326288842659024,
      "learning_rate": 1.4195742130513917e-06,
      "loss": 0.1304,
      "step": 26101
    },
    {
      "epoch": 0.7614796662582415,
      "grad_norm": 1.290525021354441,
      "learning_rate": 1.4192444638857406e-06,
      "loss": 0.1092,
      "step": 26102
    },
    {
      "epoch": 0.761508839488885,
      "grad_norm": 0.7499833600199455,
      "learning_rate": 1.418914746688218e-06,
      "loss": 0.1038,
      "step": 26103
    },
    {
      "epoch": 0.7615380127195286,
      "grad_norm": 0.7940921307230743,
      "learning_rate": 1.4185850614617702e-06,
      "loss": 0.1243,
      "step": 26104
    },
    {
      "epoch": 0.7615671859501721,
      "grad_norm": 0.890027375851292,
      "learning_rate": 1.4182554082093413e-06,
      "loss": 0.1245,
      "step": 26105
    },
    {
      "epoch": 0.7615963591808157,
      "grad_norm": 1.0072383997810619,
      "learning_rate": 1.417925786933872e-06,
      "loss": 0.1012,
      "step": 26106
    },
    {
      "epoch": 0.7616255324114592,
      "grad_norm": 0.8988512233934777,
      "learning_rate": 1.4175961976383074e-06,
      "loss": 0.1075,
      "step": 26107
    },
    {
      "epoch": 0.7616547056421028,
      "grad_norm": 0.7775804860429172,
      "learning_rate": 1.4172666403255885e-06,
      "loss": 0.122,
      "step": 26108
    },
    {
      "epoch": 0.7616838788727464,
      "grad_norm": 0.8146670210731684,
      "learning_rate": 1.4169371149986566e-06,
      "loss": 0.1181,
      "step": 26109
    },
    {
      "epoch": 0.7617130521033899,
      "grad_norm": 0.8593160795739001,
      "learning_rate": 1.4166076216604546e-06,
      "loss": 0.1273,
      "step": 26110
    },
    {
      "epoch": 0.7617422253340335,
      "grad_norm": 0.7258672718667234,
      "learning_rate": 1.416278160313926e-06,
      "loss": 0.1052,
      "step": 26111
    },
    {
      "epoch": 0.761771398564677,
      "grad_norm": 0.8639625016813878,
      "learning_rate": 1.41594873096201e-06,
      "loss": 0.129,
      "step": 26112
    },
    {
      "epoch": 0.7618005717953206,
      "grad_norm": 1.0549937706213535,
      "learning_rate": 1.4156193336076468e-06,
      "loss": 0.1188,
      "step": 26113
    },
    {
      "epoch": 0.7618297450259641,
      "grad_norm": 1.040744735760747,
      "learning_rate": 1.4152899682537807e-06,
      "loss": 0.1311,
      "step": 26114
    },
    {
      "epoch": 0.7618589182566078,
      "grad_norm": 0.800371634445398,
      "learning_rate": 1.4149606349033479e-06,
      "loss": 0.1102,
      "step": 26115
    },
    {
      "epoch": 0.7618880914872513,
      "grad_norm": 0.7326289348566162,
      "learning_rate": 1.414631333559292e-06,
      "loss": 0.1107,
      "step": 26116
    },
    {
      "epoch": 0.7619172647178949,
      "grad_norm": 0.8358216723861759,
      "learning_rate": 1.4143020642245508e-06,
      "loss": 0.1244,
      "step": 26117
    },
    {
      "epoch": 0.7619464379485384,
      "grad_norm": 0.9032544770366628,
      "learning_rate": 1.4139728269020658e-06,
      "loss": 0.1041,
      "step": 26118
    },
    {
      "epoch": 0.761975611179182,
      "grad_norm": 0.6647824916904694,
      "learning_rate": 1.4136436215947758e-06,
      "loss": 0.1187,
      "step": 26119
    },
    {
      "epoch": 0.7620047844098256,
      "grad_norm": 0.7611100393768583,
      "learning_rate": 1.4133144483056177e-06,
      "loss": 0.1216,
      "step": 26120
    },
    {
      "epoch": 0.7620339576404691,
      "grad_norm": 1.0394359126634016,
      "learning_rate": 1.412985307037532e-06,
      "loss": 0.1087,
      "step": 26121
    },
    {
      "epoch": 0.7620631308711127,
      "grad_norm": 0.8332488684432571,
      "learning_rate": 1.4126561977934588e-06,
      "loss": 0.1305,
      "step": 26122
    },
    {
      "epoch": 0.7620923041017562,
      "grad_norm": 0.8175459595351123,
      "learning_rate": 1.4123271205763328e-06,
      "loss": 0.1103,
      "step": 26123
    },
    {
      "epoch": 0.7621214773323998,
      "grad_norm": 1.0622581782031035,
      "learning_rate": 1.4119980753890961e-06,
      "loss": 0.0998,
      "step": 26124
    },
    {
      "epoch": 0.7621506505630433,
      "grad_norm": 1.350676129465357,
      "learning_rate": 1.4116690622346834e-06,
      "loss": 0.115,
      "step": 26125
    },
    {
      "epoch": 0.7621798237936869,
      "grad_norm": 1.1058608444791878,
      "learning_rate": 1.411340081116031e-06,
      "loss": 0.1075,
      "step": 26126
    },
    {
      "epoch": 0.7622089970243304,
      "grad_norm": 0.8742520958084282,
      "learning_rate": 1.4110111320360782e-06,
      "loss": 0.1241,
      "step": 26127
    },
    {
      "epoch": 0.7622381702549741,
      "grad_norm": 0.8807076242303162,
      "learning_rate": 1.4106822149977628e-06,
      "loss": 0.1218,
      "step": 26128
    },
    {
      "epoch": 0.7622673434856176,
      "grad_norm": 0.7028763989526833,
      "learning_rate": 1.4103533300040196e-06,
      "loss": 0.1246,
      "step": 26129
    },
    {
      "epoch": 0.7622965167162612,
      "grad_norm": 0.7250147832036411,
      "learning_rate": 1.4100244770577831e-06,
      "loss": 0.1214,
      "step": 26130
    },
    {
      "epoch": 0.7623256899469047,
      "grad_norm": 0.8747872299612623,
      "learning_rate": 1.4096956561619929e-06,
      "loss": 0.1208,
      "step": 26131
    },
    {
      "epoch": 0.7623548631775483,
      "grad_norm": 0.6920845028611138,
      "learning_rate": 1.4093668673195832e-06,
      "loss": 0.0886,
      "step": 26132
    },
    {
      "epoch": 0.7623840364081919,
      "grad_norm": 0.8411257337544789,
      "learning_rate": 1.409038110533485e-06,
      "loss": 0.1176,
      "step": 26133
    },
    {
      "epoch": 0.7624132096388354,
      "grad_norm": 0.7044509770592748,
      "learning_rate": 1.408709385806641e-06,
      "loss": 0.1193,
      "step": 26134
    },
    {
      "epoch": 0.762442382869479,
      "grad_norm": 0.9000777091653247,
      "learning_rate": 1.4083806931419825e-06,
      "loss": 0.1249,
      "step": 26135
    },
    {
      "epoch": 0.7624715561001225,
      "grad_norm": 0.773039298613744,
      "learning_rate": 1.4080520325424418e-06,
      "loss": 0.1076,
      "step": 26136
    },
    {
      "epoch": 0.7625007293307661,
      "grad_norm": 0.718514893172037,
      "learning_rate": 1.4077234040109567e-06,
      "loss": 0.1214,
      "step": 26137
    },
    {
      "epoch": 0.7625299025614096,
      "grad_norm": 0.7763429287032169,
      "learning_rate": 1.4073948075504596e-06,
      "loss": 0.1298,
      "step": 26138
    },
    {
      "epoch": 0.7625590757920532,
      "grad_norm": 1.1534295868138604,
      "learning_rate": 1.4070662431638821e-06,
      "loss": 0.122,
      "step": 26139
    },
    {
      "epoch": 0.7625882490226967,
      "grad_norm": 0.8624455969386104,
      "learning_rate": 1.4067377108541597e-06,
      "loss": 0.1488,
      "step": 26140
    },
    {
      "epoch": 0.7626174222533403,
      "grad_norm": 0.8243022362839859,
      "learning_rate": 1.4064092106242272e-06,
      "loss": 0.1213,
      "step": 26141
    },
    {
      "epoch": 0.762646595483984,
      "grad_norm": 0.769343615825338,
      "learning_rate": 1.406080742477015e-06,
      "loss": 0.1135,
      "step": 26142
    },
    {
      "epoch": 0.7626757687146275,
      "grad_norm": 0.9565018495201091,
      "learning_rate": 1.4057523064154544e-06,
      "loss": 0.1172,
      "step": 26143
    },
    {
      "epoch": 0.762704941945271,
      "grad_norm": 1.4047610873709226,
      "learning_rate": 1.405423902442481e-06,
      "loss": 0.1457,
      "step": 26144
    },
    {
      "epoch": 0.7627341151759146,
      "grad_norm": 0.9720275556686869,
      "learning_rate": 1.4050955305610232e-06,
      "loss": 0.1271,
      "step": 26145
    },
    {
      "epoch": 0.7627632884065582,
      "grad_norm": 0.7835991442333221,
      "learning_rate": 1.4047671907740156e-06,
      "loss": 0.0847,
      "step": 26146
    },
    {
      "epoch": 0.7627924616372017,
      "grad_norm": 0.9013144996617115,
      "learning_rate": 1.4044388830843875e-06,
      "loss": 0.1313,
      "step": 26147
    },
    {
      "epoch": 0.7628216348678453,
      "grad_norm": 0.8317579576376288,
      "learning_rate": 1.4041106074950716e-06,
      "loss": 0.0963,
      "step": 26148
    },
    {
      "epoch": 0.7628508080984888,
      "grad_norm": 0.9615326176543755,
      "learning_rate": 1.4037823640089982e-06,
      "loss": 0.1088,
      "step": 26149
    },
    {
      "epoch": 0.7628799813291324,
      "grad_norm": 0.9889612993697408,
      "learning_rate": 1.4034541526290957e-06,
      "loss": 0.1356,
      "step": 26150
    },
    {
      "epoch": 0.7629091545597759,
      "grad_norm": 0.8351460427530856,
      "learning_rate": 1.4031259733582958e-06,
      "loss": 0.1057,
      "step": 26151
    },
    {
      "epoch": 0.7629383277904195,
      "grad_norm": 0.897135120743724,
      "learning_rate": 1.4027978261995301e-06,
      "loss": 0.1043,
      "step": 26152
    },
    {
      "epoch": 0.762967501021063,
      "grad_norm": 0.9586903416993737,
      "learning_rate": 1.4024697111557251e-06,
      "loss": 0.1102,
      "step": 26153
    },
    {
      "epoch": 0.7629966742517066,
      "grad_norm": 0.7091006457430841,
      "learning_rate": 1.4021416282298133e-06,
      "loss": 0.1033,
      "step": 26154
    },
    {
      "epoch": 0.7630258474823503,
      "grad_norm": 0.8564627396767824,
      "learning_rate": 1.401813577424722e-06,
      "loss": 0.1457,
      "step": 26155
    },
    {
      "epoch": 0.7630550207129938,
      "grad_norm": 0.8665696067842101,
      "learning_rate": 1.401485558743379e-06,
      "loss": 0.1177,
      "step": 26156
    },
    {
      "epoch": 0.7630841939436374,
      "grad_norm": 1.0068283095317692,
      "learning_rate": 1.401157572188714e-06,
      "loss": 0.134,
      "step": 26157
    },
    {
      "epoch": 0.7631133671742809,
      "grad_norm": 0.8907497856235018,
      "learning_rate": 1.4008296177636565e-06,
      "loss": 0.1269,
      "step": 26158
    },
    {
      "epoch": 0.7631425404049245,
      "grad_norm": 0.9406625381336449,
      "learning_rate": 1.4005016954711325e-06,
      "loss": 0.0952,
      "step": 26159
    },
    {
      "epoch": 0.763171713635568,
      "grad_norm": 1.0340193349784765,
      "learning_rate": 1.400173805314069e-06,
      "loss": 0.1187,
      "step": 26160
    },
    {
      "epoch": 0.7632008868662116,
      "grad_norm": 0.8797648399153376,
      "learning_rate": 1.3998459472953956e-06,
      "loss": 0.1209,
      "step": 26161
    },
    {
      "epoch": 0.7632300600968551,
      "grad_norm": 0.8892336228707008,
      "learning_rate": 1.3995181214180386e-06,
      "loss": 0.1182,
      "step": 26162
    },
    {
      "epoch": 0.7632592333274987,
      "grad_norm": 0.7893949456538327,
      "learning_rate": 1.399190327684921e-06,
      "loss": 0.1184,
      "step": 26163
    },
    {
      "epoch": 0.7632884065581422,
      "grad_norm": 0.9609933664140798,
      "learning_rate": 1.3988625660989758e-06,
      "loss": 0.0968,
      "step": 26164
    },
    {
      "epoch": 0.7633175797887858,
      "grad_norm": 0.7529623815427332,
      "learning_rate": 1.3985348366631258e-06,
      "loss": 0.0961,
      "step": 26165
    },
    {
      "epoch": 0.7633467530194293,
      "grad_norm": 0.8554896954035796,
      "learning_rate": 1.3982071393802953e-06,
      "loss": 0.1115,
      "step": 26166
    },
    {
      "epoch": 0.7633759262500729,
      "grad_norm": 0.9777081127658431,
      "learning_rate": 1.3978794742534135e-06,
      "loss": 0.1248,
      "step": 26167
    },
    {
      "epoch": 0.7634050994807164,
      "grad_norm": 0.7910750670891128,
      "learning_rate": 1.3975518412854038e-06,
      "loss": 0.1054,
      "step": 26168
    },
    {
      "epoch": 0.7634342727113601,
      "grad_norm": 0.8372695287966306,
      "learning_rate": 1.3972242404791896e-06,
      "loss": 0.1132,
      "step": 26169
    },
    {
      "epoch": 0.7634634459420037,
      "grad_norm": 0.7779962076886326,
      "learning_rate": 1.3968966718376976e-06,
      "loss": 0.1249,
      "step": 26170
    },
    {
      "epoch": 0.7634926191726472,
      "grad_norm": 0.9212527853625375,
      "learning_rate": 1.3965691353638532e-06,
      "loss": 0.1071,
      "step": 26171
    },
    {
      "epoch": 0.7635217924032908,
      "grad_norm": 0.9292382048809429,
      "learning_rate": 1.3962416310605798e-06,
      "loss": 0.0999,
      "step": 26172
    },
    {
      "epoch": 0.7635509656339343,
      "grad_norm": 1.134281645203754,
      "learning_rate": 1.395914158930799e-06,
      "loss": 0.1005,
      "step": 26173
    },
    {
      "epoch": 0.7635801388645779,
      "grad_norm": 0.9668929216293283,
      "learning_rate": 1.395586718977438e-06,
      "loss": 0.0955,
      "step": 26174
    },
    {
      "epoch": 0.7636093120952214,
      "grad_norm": 0.806384713606756,
      "learning_rate": 1.3952593112034163e-06,
      "loss": 0.1068,
      "step": 26175
    },
    {
      "epoch": 0.763638485325865,
      "grad_norm": 0.729396128674825,
      "learning_rate": 1.3949319356116608e-06,
      "loss": 0.1182,
      "step": 26176
    },
    {
      "epoch": 0.7636676585565085,
      "grad_norm": 1.0656409502481048,
      "learning_rate": 1.3946045922050911e-06,
      "loss": 0.114,
      "step": 26177
    },
    {
      "epoch": 0.7636968317871521,
      "grad_norm": 0.9233551092026401,
      "learning_rate": 1.3942772809866317e-06,
      "loss": 0.1134,
      "step": 26178
    },
    {
      "epoch": 0.7637260050177956,
      "grad_norm": 0.9580018016534266,
      "learning_rate": 1.3939500019592046e-06,
      "loss": 0.1182,
      "step": 26179
    },
    {
      "epoch": 0.7637551782484392,
      "grad_norm": 0.7582018960150472,
      "learning_rate": 1.3936227551257293e-06,
      "loss": 0.1503,
      "step": 26180
    },
    {
      "epoch": 0.7637843514790827,
      "grad_norm": 0.7594002273528503,
      "learning_rate": 1.3932955404891295e-06,
      "loss": 0.0959,
      "step": 26181
    },
    {
      "epoch": 0.7638135247097264,
      "grad_norm": 0.8320128400983632,
      "learning_rate": 1.3929683580523274e-06,
      "loss": 0.1023,
      "step": 26182
    },
    {
      "epoch": 0.76384269794037,
      "grad_norm": 0.8733870138636963,
      "learning_rate": 1.3926412078182411e-06,
      "loss": 0.1258,
      "step": 26183
    },
    {
      "epoch": 0.7638718711710135,
      "grad_norm": 0.9261463317054388,
      "learning_rate": 1.392314089789794e-06,
      "loss": 0.1367,
      "step": 26184
    },
    {
      "epoch": 0.7639010444016571,
      "grad_norm": 0.9961368062548425,
      "learning_rate": 1.3919870039699062e-06,
      "loss": 0.1163,
      "step": 26185
    },
    {
      "epoch": 0.7639302176323006,
      "grad_norm": 0.6961372193675052,
      "learning_rate": 1.3916599503614958e-06,
      "loss": 0.1006,
      "step": 26186
    },
    {
      "epoch": 0.7639593908629442,
      "grad_norm": 0.7459581187138465,
      "learning_rate": 1.391332928967483e-06,
      "loss": 0.114,
      "step": 26187
    },
    {
      "epoch": 0.7639885640935877,
      "grad_norm": 0.9477156298218982,
      "learning_rate": 1.391005939790791e-06,
      "loss": 0.1203,
      "step": 26188
    },
    {
      "epoch": 0.7640177373242313,
      "grad_norm": 1.0762321539839521,
      "learning_rate": 1.3906789828343358e-06,
      "loss": 0.1258,
      "step": 26189
    },
    {
      "epoch": 0.7640469105548748,
      "grad_norm": 0.9400921453796561,
      "learning_rate": 1.3903520581010354e-06,
      "loss": 0.128,
      "step": 26190
    },
    {
      "epoch": 0.7640760837855184,
      "grad_norm": 0.9850343275209514,
      "learning_rate": 1.3900251655938118e-06,
      "loss": 0.1159,
      "step": 26191
    },
    {
      "epoch": 0.7641052570161619,
      "grad_norm": 0.776003117127375,
      "learning_rate": 1.3896983053155821e-06,
      "loss": 0.108,
      "step": 26192
    },
    {
      "epoch": 0.7641344302468055,
      "grad_norm": 0.8159114840533124,
      "learning_rate": 1.3893714772692607e-06,
      "loss": 0.0962,
      "step": 26193
    },
    {
      "epoch": 0.764163603477449,
      "grad_norm": 0.9462367818043006,
      "learning_rate": 1.389044681457772e-06,
      "loss": 0.1176,
      "step": 26194
    },
    {
      "epoch": 0.7641927767080926,
      "grad_norm": 1.0006720493583758,
      "learning_rate": 1.3887179178840305e-06,
      "loss": 0.1124,
      "step": 26195
    },
    {
      "epoch": 0.7642219499387363,
      "grad_norm": 0.8008535688160848,
      "learning_rate": 1.3883911865509514e-06,
      "loss": 0.1061,
      "step": 26196
    },
    {
      "epoch": 0.7642511231693798,
      "grad_norm": 0.7653365849573978,
      "learning_rate": 1.3880644874614552e-06,
      "loss": 0.1058,
      "step": 26197
    },
    {
      "epoch": 0.7642802964000234,
      "grad_norm": 1.0773741125237393,
      "learning_rate": 1.3877378206184571e-06,
      "loss": 0.1298,
      "step": 26198
    },
    {
      "epoch": 0.7643094696306669,
      "grad_norm": 0.9364940124081743,
      "learning_rate": 1.3874111860248722e-06,
      "loss": 0.1141,
      "step": 26199
    },
    {
      "epoch": 0.7643386428613105,
      "grad_norm": 0.9301490879668555,
      "learning_rate": 1.3870845836836177e-06,
      "loss": 0.1126,
      "step": 26200
    },
    {
      "epoch": 0.764367816091954,
      "grad_norm": 0.8093211906790393,
      "learning_rate": 1.386758013597611e-06,
      "loss": 0.1168,
      "step": 26201
    },
    {
      "epoch": 0.7643969893225976,
      "grad_norm": 0.8698017267897885,
      "learning_rate": 1.386431475769766e-06,
      "loss": 0.1174,
      "step": 26202
    },
    {
      "epoch": 0.7644261625532411,
      "grad_norm": 0.953222352438878,
      "learning_rate": 1.3861049702029971e-06,
      "loss": 0.1231,
      "step": 26203
    },
    {
      "epoch": 0.7644553357838847,
      "grad_norm": 0.8810582395735321,
      "learning_rate": 1.3857784969002214e-06,
      "loss": 0.1207,
      "step": 26204
    },
    {
      "epoch": 0.7644845090145282,
      "grad_norm": 0.7950840377710335,
      "learning_rate": 1.3854520558643513e-06,
      "loss": 0.1412,
      "step": 26205
    },
    {
      "epoch": 0.7645136822451718,
      "grad_norm": 0.8763916156368131,
      "learning_rate": 1.3851256470983037e-06,
      "loss": 0.1035,
      "step": 26206
    },
    {
      "epoch": 0.7645428554758154,
      "grad_norm": 0.8761734058534855,
      "learning_rate": 1.38479927060499e-06,
      "loss": 0.127,
      "step": 26207
    },
    {
      "epoch": 0.7645720287064589,
      "grad_norm": 1.0127740547800734,
      "learning_rate": 1.3844729263873269e-06,
      "loss": 0.1309,
      "step": 26208
    },
    {
      "epoch": 0.7646012019371025,
      "grad_norm": 0.8732384759525622,
      "learning_rate": 1.3841466144482262e-06,
      "loss": 0.1312,
      "step": 26209
    },
    {
      "epoch": 0.7646303751677461,
      "grad_norm": 0.9122354532164116,
      "learning_rate": 1.3838203347905999e-06,
      "loss": 0.1104,
      "step": 26210
    },
    {
      "epoch": 0.7646595483983897,
      "grad_norm": 0.8134407679285944,
      "learning_rate": 1.3834940874173624e-06,
      "loss": 0.1152,
      "step": 26211
    },
    {
      "epoch": 0.7646887216290332,
      "grad_norm": 0.8169273397185864,
      "learning_rate": 1.383167872331428e-06,
      "loss": 0.1396,
      "step": 26212
    },
    {
      "epoch": 0.7647178948596768,
      "grad_norm": 0.9313472989800363,
      "learning_rate": 1.382841689535706e-06,
      "loss": 0.1084,
      "step": 26213
    },
    {
      "epoch": 0.7647470680903203,
      "grad_norm": 0.8093829327740456,
      "learning_rate": 1.3825155390331114e-06,
      "loss": 0.1313,
      "step": 26214
    },
    {
      "epoch": 0.7647762413209639,
      "grad_norm": 0.9013230684690757,
      "learning_rate": 1.382189420826554e-06,
      "loss": 0.1357,
      "step": 26215
    },
    {
      "epoch": 0.7648054145516074,
      "grad_norm": 0.7668506858228894,
      "learning_rate": 1.3818633349189448e-06,
      "loss": 0.1242,
      "step": 26216
    },
    {
      "epoch": 0.764834587782251,
      "grad_norm": 0.926569989866445,
      "learning_rate": 1.381537281313196e-06,
      "loss": 0.1132,
      "step": 26217
    },
    {
      "epoch": 0.7648637610128945,
      "grad_norm": 0.6817679841498191,
      "learning_rate": 1.3812112600122201e-06,
      "loss": 0.0901,
      "step": 26218
    },
    {
      "epoch": 0.7648929342435381,
      "grad_norm": 0.8774705720123143,
      "learning_rate": 1.3808852710189263e-06,
      "loss": 0.1268,
      "step": 26219
    },
    {
      "epoch": 0.7649221074741817,
      "grad_norm": 0.853094144607071,
      "learning_rate": 1.3805593143362227e-06,
      "loss": 0.1262,
      "step": 26220
    },
    {
      "epoch": 0.7649512807048252,
      "grad_norm": 0.8010893325699411,
      "learning_rate": 1.3802333899670239e-06,
      "loss": 0.1089,
      "step": 26221
    },
    {
      "epoch": 0.7649804539354688,
      "grad_norm": 0.8227006921687338,
      "learning_rate": 1.3799074979142369e-06,
      "loss": 0.1336,
      "step": 26222
    },
    {
      "epoch": 0.7650096271661124,
      "grad_norm": 0.7544040557332929,
      "learning_rate": 1.379581638180768e-06,
      "loss": 0.1174,
      "step": 26223
    },
    {
      "epoch": 0.765038800396756,
      "grad_norm": 0.8637354983940975,
      "learning_rate": 1.3792558107695335e-06,
      "loss": 0.1453,
      "step": 26224
    },
    {
      "epoch": 0.7650679736273995,
      "grad_norm": 1.0777165785817178,
      "learning_rate": 1.3789300156834389e-06,
      "loss": 0.1152,
      "step": 26225
    },
    {
      "epoch": 0.7650971468580431,
      "grad_norm": 0.7879738396809659,
      "learning_rate": 1.3786042529253913e-06,
      "loss": 0.1084,
      "step": 26226
    },
    {
      "epoch": 0.7651263200886866,
      "grad_norm": 0.7016310321239007,
      "learning_rate": 1.3782785224983015e-06,
      "loss": 0.0875,
      "step": 26227
    },
    {
      "epoch": 0.7651554933193302,
      "grad_norm": 1.2203319913017596,
      "learning_rate": 1.3779528244050765e-06,
      "loss": 0.1087,
      "step": 26228
    },
    {
      "epoch": 0.7651846665499737,
      "grad_norm": 0.7662474942040508,
      "learning_rate": 1.3776271586486229e-06,
      "loss": 0.1388,
      "step": 26229
    },
    {
      "epoch": 0.7652138397806173,
      "grad_norm": 0.8063930269842505,
      "learning_rate": 1.3773015252318489e-06,
      "loss": 0.1176,
      "step": 26230
    },
    {
      "epoch": 0.7652430130112609,
      "grad_norm": 0.8660898453060176,
      "learning_rate": 1.3769759241576642e-06,
      "loss": 0.1126,
      "step": 26231
    },
    {
      "epoch": 0.7652721862419044,
      "grad_norm": 1.010878535140388,
      "learning_rate": 1.376650355428973e-06,
      "loss": 0.0982,
      "step": 26232
    },
    {
      "epoch": 0.765301359472548,
      "grad_norm": 0.7806800752071383,
      "learning_rate": 1.376324819048681e-06,
      "loss": 0.1208,
      "step": 26233
    },
    {
      "epoch": 0.7653305327031915,
      "grad_norm": 0.938268846177341,
      "learning_rate": 1.3759993150196975e-06,
      "loss": 0.1226,
      "step": 26234
    },
    {
      "epoch": 0.7653597059338351,
      "grad_norm": 0.9861122910139926,
      "learning_rate": 1.3756738433449257e-06,
      "loss": 0.125,
      "step": 26235
    },
    {
      "epoch": 0.7653888791644786,
      "grad_norm": 0.9386610683992159,
      "learning_rate": 1.375348404027274e-06,
      "loss": 0.1236,
      "step": 26236
    },
    {
      "epoch": 0.7654180523951223,
      "grad_norm": 0.8633672654574206,
      "learning_rate": 1.375022997069645e-06,
      "loss": 0.1255,
      "step": 26237
    },
    {
      "epoch": 0.7654472256257658,
      "grad_norm": 0.8935751608128624,
      "learning_rate": 1.374697622474947e-06,
      "loss": 0.1252,
      "step": 26238
    },
    {
      "epoch": 0.7654763988564094,
      "grad_norm": 0.8291465751734923,
      "learning_rate": 1.374372280246083e-06,
      "loss": 0.1198,
      "step": 26239
    },
    {
      "epoch": 0.7655055720870529,
      "grad_norm": 0.6881511252850518,
      "learning_rate": 1.374046970385956e-06,
      "loss": 0.0963,
      "step": 26240
    },
    {
      "epoch": 0.7655347453176965,
      "grad_norm": 0.7701647522818884,
      "learning_rate": 1.3737216928974723e-06,
      "loss": 0.1095,
      "step": 26241
    },
    {
      "epoch": 0.76556391854834,
      "grad_norm": 1.071376330529272,
      "learning_rate": 1.373396447783537e-06,
      "loss": 0.1439,
      "step": 26242
    },
    {
      "epoch": 0.7655930917789836,
      "grad_norm": 1.0701209096809126,
      "learning_rate": 1.3730712350470516e-06,
      "loss": 0.1083,
      "step": 26243
    },
    {
      "epoch": 0.7656222650096272,
      "grad_norm": 0.8551621899125321,
      "learning_rate": 1.372746054690921e-06,
      "loss": 0.1212,
      "step": 26244
    },
    {
      "epoch": 0.7656514382402707,
      "grad_norm": 1.0081329271816202,
      "learning_rate": 1.3724209067180483e-06,
      "loss": 0.1297,
      "step": 26245
    },
    {
      "epoch": 0.7656806114709143,
      "grad_norm": 0.9135838539518528,
      "learning_rate": 1.3720957911313342e-06,
      "loss": 0.1246,
      "step": 26246
    },
    {
      "epoch": 0.7657097847015578,
      "grad_norm": 0.7551891413814291,
      "learning_rate": 1.3717707079336828e-06,
      "loss": 0.1087,
      "step": 26247
    },
    {
      "epoch": 0.7657389579322014,
      "grad_norm": 0.8999379916351353,
      "learning_rate": 1.3714456571279984e-06,
      "loss": 0.1187,
      "step": 26248
    },
    {
      "epoch": 0.7657681311628449,
      "grad_norm": 1.0576561005476992,
      "learning_rate": 1.3711206387171798e-06,
      "loss": 0.1294,
      "step": 26249
    },
    {
      "epoch": 0.7657973043934886,
      "grad_norm": 0.9553902479960511,
      "learning_rate": 1.3707956527041294e-06,
      "loss": 0.1214,
      "step": 26250
    },
    {
      "epoch": 0.7658264776241321,
      "grad_norm": 0.9196799012445692,
      "learning_rate": 1.37047069909175e-06,
      "loss": 0.1445,
      "step": 26251
    },
    {
      "epoch": 0.7658556508547757,
      "grad_norm": 0.7874001995227332,
      "learning_rate": 1.3701457778829418e-06,
      "loss": 0.1058,
      "step": 26252
    },
    {
      "epoch": 0.7658848240854192,
      "grad_norm": 1.0459834083947193,
      "learning_rate": 1.369820889080603e-06,
      "loss": 0.1093,
      "step": 26253
    },
    {
      "epoch": 0.7659139973160628,
      "grad_norm": 0.938649616690978,
      "learning_rate": 1.3694960326876393e-06,
      "loss": 0.1384,
      "step": 26254
    },
    {
      "epoch": 0.7659431705467064,
      "grad_norm": 0.6865196226329512,
      "learning_rate": 1.3691712087069486e-06,
      "loss": 0.1058,
      "step": 26255
    },
    {
      "epoch": 0.7659723437773499,
      "grad_norm": 0.9685957610013003,
      "learning_rate": 1.368846417141429e-06,
      "loss": 0.1487,
      "step": 26256
    },
    {
      "epoch": 0.7660015170079935,
      "grad_norm": 0.7855783301905297,
      "learning_rate": 1.368521657993983e-06,
      "loss": 0.1156,
      "step": 26257
    },
    {
      "epoch": 0.766030690238637,
      "grad_norm": 0.9477634181498675,
      "learning_rate": 1.3681969312675092e-06,
      "loss": 0.1107,
      "step": 26258
    },
    {
      "epoch": 0.7660598634692806,
      "grad_norm": 1.1397961070466422,
      "learning_rate": 1.3678722369649045e-06,
      "loss": 0.1233,
      "step": 26259
    },
    {
      "epoch": 0.7660890366999241,
      "grad_norm": 0.8802910992411307,
      "learning_rate": 1.3675475750890693e-06,
      "loss": 0.1173,
      "step": 26260
    },
    {
      "epoch": 0.7661182099305677,
      "grad_norm": 0.6852959092629128,
      "learning_rate": 1.3672229456429036e-06,
      "loss": 0.1033,
      "step": 26261
    },
    {
      "epoch": 0.7661473831612112,
      "grad_norm": 0.8839510340431758,
      "learning_rate": 1.3668983486293047e-06,
      "loss": 0.0959,
      "step": 26262
    },
    {
      "epoch": 0.7661765563918548,
      "grad_norm": 0.8572553122440686,
      "learning_rate": 1.3665737840511684e-06,
      "loss": 0.1088,
      "step": 26263
    },
    {
      "epoch": 0.7662057296224984,
      "grad_norm": 0.7137228297578125,
      "learning_rate": 1.3662492519113951e-06,
      "loss": 0.1143,
      "step": 26264
    },
    {
      "epoch": 0.766234902853142,
      "grad_norm": 0.8696405135233204,
      "learning_rate": 1.3659247522128798e-06,
      "loss": 0.1464,
      "step": 26265
    },
    {
      "epoch": 0.7662640760837856,
      "grad_norm": 0.7868935310991707,
      "learning_rate": 1.365600284958522e-06,
      "loss": 0.1097,
      "step": 26266
    },
    {
      "epoch": 0.7662932493144291,
      "grad_norm": 0.8974860316812074,
      "learning_rate": 1.3652758501512165e-06,
      "loss": 0.1058,
      "step": 26267
    },
    {
      "epoch": 0.7663224225450727,
      "grad_norm": 0.799519184422163,
      "learning_rate": 1.3649514477938613e-06,
      "loss": 0.1056,
      "step": 26268
    },
    {
      "epoch": 0.7663515957757162,
      "grad_norm": 0.8261566520424768,
      "learning_rate": 1.3646270778893523e-06,
      "loss": 0.1151,
      "step": 26269
    },
    {
      "epoch": 0.7663807690063598,
      "grad_norm": 0.8451196113916615,
      "learning_rate": 1.364302740440583e-06,
      "loss": 0.1019,
      "step": 26270
    },
    {
      "epoch": 0.7664099422370033,
      "grad_norm": 0.727806267640842,
      "learning_rate": 1.3639784354504509e-06,
      "loss": 0.0953,
      "step": 26271
    },
    {
      "epoch": 0.7664391154676469,
      "grad_norm": 0.8105129847942211,
      "learning_rate": 1.3636541629218536e-06,
      "loss": 0.136,
      "step": 26272
    },
    {
      "epoch": 0.7664682886982904,
      "grad_norm": 0.7534847321433793,
      "learning_rate": 1.363329922857682e-06,
      "loss": 0.0933,
      "step": 26273
    },
    {
      "epoch": 0.766497461928934,
      "grad_norm": 0.78367161884003,
      "learning_rate": 1.3630057152608334e-06,
      "loss": 0.1095,
      "step": 26274
    },
    {
      "epoch": 0.7665266351595775,
      "grad_norm": 0.6653689714496968,
      "learning_rate": 1.3626815401342025e-06,
      "loss": 0.0926,
      "step": 26275
    },
    {
      "epoch": 0.7665558083902211,
      "grad_norm": 0.7206363186672688,
      "learning_rate": 1.3623573974806808e-06,
      "loss": 0.1182,
      "step": 26276
    },
    {
      "epoch": 0.7665849816208647,
      "grad_norm": 0.8402632051718905,
      "learning_rate": 1.3620332873031639e-06,
      "loss": 0.144,
      "step": 26277
    },
    {
      "epoch": 0.7666141548515083,
      "grad_norm": 0.9351239698532204,
      "learning_rate": 1.3617092096045466e-06,
      "loss": 0.1109,
      "step": 26278
    },
    {
      "epoch": 0.7666433280821519,
      "grad_norm": 0.7641055166835903,
      "learning_rate": 1.3613851643877206e-06,
      "loss": 0.12,
      "step": 26279
    },
    {
      "epoch": 0.7666725013127954,
      "grad_norm": 0.8797418194600454,
      "learning_rate": 1.361061151655579e-06,
      "loss": 0.1249,
      "step": 26280
    },
    {
      "epoch": 0.766701674543439,
      "grad_norm": 0.7831566990130667,
      "learning_rate": 1.3607371714110151e-06,
      "loss": 0.1423,
      "step": 26281
    },
    {
      "epoch": 0.7667308477740825,
      "grad_norm": 0.9812283302496945,
      "learning_rate": 1.3604132236569212e-06,
      "loss": 0.1243,
      "step": 26282
    },
    {
      "epoch": 0.7667600210047261,
      "grad_norm": 0.7172249047175969,
      "learning_rate": 1.3600893083961864e-06,
      "loss": 0.1083,
      "step": 26283
    },
    {
      "epoch": 0.7667891942353696,
      "grad_norm": 0.8203349237324321,
      "learning_rate": 1.3597654256317084e-06,
      "loss": 0.0939,
      "step": 26284
    },
    {
      "epoch": 0.7668183674660132,
      "grad_norm": 0.9417641017967823,
      "learning_rate": 1.3594415753663754e-06,
      "loss": 0.1094,
      "step": 26285
    },
    {
      "epoch": 0.7668475406966567,
      "grad_norm": 0.8256592426687523,
      "learning_rate": 1.359117757603078e-06,
      "loss": 0.1242,
      "step": 26286
    },
    {
      "epoch": 0.7668767139273003,
      "grad_norm": 0.9015058823509582,
      "learning_rate": 1.3587939723447091e-06,
      "loss": 0.1245,
      "step": 26287
    },
    {
      "epoch": 0.7669058871579438,
      "grad_norm": 1.2808031531899104,
      "learning_rate": 1.3584702195941585e-06,
      "loss": 0.12,
      "step": 26288
    },
    {
      "epoch": 0.7669350603885874,
      "grad_norm": 0.8742758191790518,
      "learning_rate": 1.3581464993543147e-06,
      "loss": 0.102,
      "step": 26289
    },
    {
      "epoch": 0.7669642336192309,
      "grad_norm": 0.7941010130215583,
      "learning_rate": 1.35782281162807e-06,
      "loss": 0.1385,
      "step": 26290
    },
    {
      "epoch": 0.7669934068498746,
      "grad_norm": 0.750154611086974,
      "learning_rate": 1.3574991564183155e-06,
      "loss": 0.1038,
      "step": 26291
    },
    {
      "epoch": 0.7670225800805182,
      "grad_norm": 0.998807815362782,
      "learning_rate": 1.3571755337279386e-06,
      "loss": 0.1118,
      "step": 26292
    },
    {
      "epoch": 0.7670517533111617,
      "grad_norm": 1.2365964096570772,
      "learning_rate": 1.356851943559827e-06,
      "loss": 0.1038,
      "step": 26293
    },
    {
      "epoch": 0.7670809265418053,
      "grad_norm": 1.3332685752500593,
      "learning_rate": 1.3565283859168738e-06,
      "loss": 0.1268,
      "step": 26294
    },
    {
      "epoch": 0.7671100997724488,
      "grad_norm": 0.9024981148315767,
      "learning_rate": 1.3562048608019635e-06,
      "loss": 0.1321,
      "step": 26295
    },
    {
      "epoch": 0.7671392730030924,
      "grad_norm": 0.9515302823841885,
      "learning_rate": 1.3558813682179884e-06,
      "loss": 0.1136,
      "step": 26296
    },
    {
      "epoch": 0.7671684462337359,
      "grad_norm": 0.6359741660583533,
      "learning_rate": 1.3555579081678321e-06,
      "loss": 0.1029,
      "step": 26297
    },
    {
      "epoch": 0.7671976194643795,
      "grad_norm": 0.6716431148909698,
      "learning_rate": 1.355234480654387e-06,
      "loss": 0.0962,
      "step": 26298
    },
    {
      "epoch": 0.767226792695023,
      "grad_norm": 1.0057342710694277,
      "learning_rate": 1.354911085680538e-06,
      "loss": 0.1119,
      "step": 26299
    },
    {
      "epoch": 0.7672559659256666,
      "grad_norm": 1.0221882901163448,
      "learning_rate": 1.3545877232491716e-06,
      "loss": 0.1082,
      "step": 26300
    },
    {
      "epoch": 0.7672851391563101,
      "grad_norm": 0.9537886818232255,
      "learning_rate": 1.3542643933631755e-06,
      "loss": 0.1294,
      "step": 26301
    },
    {
      "epoch": 0.7673143123869537,
      "grad_norm": 1.3430581809932332,
      "learning_rate": 1.3539410960254384e-06,
      "loss": 0.1269,
      "step": 26302
    },
    {
      "epoch": 0.7673434856175972,
      "grad_norm": 0.8514420956156064,
      "learning_rate": 1.3536178312388432e-06,
      "loss": 0.1085,
      "step": 26303
    },
    {
      "epoch": 0.7673726588482409,
      "grad_norm": 0.7326071125051908,
      "learning_rate": 1.3532945990062784e-06,
      "loss": 0.1242,
      "step": 26304
    },
    {
      "epoch": 0.7674018320788845,
      "grad_norm": 0.8487698632811732,
      "learning_rate": 1.35297139933063e-06,
      "loss": 0.1427,
      "step": 26305
    },
    {
      "epoch": 0.767431005309528,
      "grad_norm": 1.1671383181527097,
      "learning_rate": 1.3526482322147798e-06,
      "loss": 0.1057,
      "step": 26306
    },
    {
      "epoch": 0.7674601785401716,
      "grad_norm": 0.7276062023581822,
      "learning_rate": 1.352325097661616e-06,
      "loss": 0.1143,
      "step": 26307
    },
    {
      "epoch": 0.7674893517708151,
      "grad_norm": 0.8089488802652228,
      "learning_rate": 1.3520019956740244e-06,
      "loss": 0.1058,
      "step": 26308
    },
    {
      "epoch": 0.7675185250014587,
      "grad_norm": 0.9404382782265481,
      "learning_rate": 1.351678926254888e-06,
      "loss": 0.1184,
      "step": 26309
    },
    {
      "epoch": 0.7675476982321022,
      "grad_norm": 0.7773568824293435,
      "learning_rate": 1.35135588940709e-06,
      "loss": 0.0923,
      "step": 26310
    },
    {
      "epoch": 0.7675768714627458,
      "grad_norm": 1.1283489551534065,
      "learning_rate": 1.3510328851335164e-06,
      "loss": 0.1063,
      "step": 26311
    },
    {
      "epoch": 0.7676060446933893,
      "grad_norm": 0.9811542149546232,
      "learning_rate": 1.3507099134370494e-06,
      "loss": 0.1107,
      "step": 26312
    },
    {
      "epoch": 0.7676352179240329,
      "grad_norm": 1.0223309748746012,
      "learning_rate": 1.3503869743205727e-06,
      "loss": 0.1099,
      "step": 26313
    },
    {
      "epoch": 0.7676643911546764,
      "grad_norm": 0.7347629024668387,
      "learning_rate": 1.3500640677869713e-06,
      "loss": 0.0999,
      "step": 26314
    },
    {
      "epoch": 0.76769356438532,
      "grad_norm": 0.7341897061573558,
      "learning_rate": 1.3497411938391276e-06,
      "loss": 0.1092,
      "step": 26315
    },
    {
      "epoch": 0.7677227376159635,
      "grad_norm": 0.851833901783838,
      "learning_rate": 1.3494183524799204e-06,
      "loss": 0.1246,
      "step": 26316
    },
    {
      "epoch": 0.7677519108466071,
      "grad_norm": 0.8753818742823339,
      "learning_rate": 1.3490955437122367e-06,
      "loss": 0.1059,
      "step": 26317
    },
    {
      "epoch": 0.7677810840772508,
      "grad_norm": 0.8341477962960666,
      "learning_rate": 1.348772767538955e-06,
      "loss": 0.1044,
      "step": 26318
    },
    {
      "epoch": 0.7678102573078943,
      "grad_norm": 0.7149525553203409,
      "learning_rate": 1.34845002396296e-06,
      "loss": 0.111,
      "step": 26319
    },
    {
      "epoch": 0.7678394305385379,
      "grad_norm": 0.9113556991700084,
      "learning_rate": 1.3481273129871297e-06,
      "loss": 0.1126,
      "step": 26320
    },
    {
      "epoch": 0.7678686037691814,
      "grad_norm": 0.7604835147354264,
      "learning_rate": 1.3478046346143487e-06,
      "loss": 0.1024,
      "step": 26321
    },
    {
      "epoch": 0.767897776999825,
      "grad_norm": 0.7818953082305324,
      "learning_rate": 1.3474819888474955e-06,
      "loss": 0.1329,
      "step": 26322
    },
    {
      "epoch": 0.7679269502304685,
      "grad_norm": 0.7055300546011628,
      "learning_rate": 1.3471593756894502e-06,
      "loss": 0.1349,
      "step": 26323
    },
    {
      "epoch": 0.7679561234611121,
      "grad_norm": 0.801859806573647,
      "learning_rate": 1.3468367951430939e-06,
      "loss": 0.1202,
      "step": 26324
    },
    {
      "epoch": 0.7679852966917556,
      "grad_norm": 0.8591917215223992,
      "learning_rate": 1.3465142472113085e-06,
      "loss": 0.1149,
      "step": 26325
    },
    {
      "epoch": 0.7680144699223992,
      "grad_norm": 0.7905620443673668,
      "learning_rate": 1.3461917318969714e-06,
      "loss": 0.1284,
      "step": 26326
    },
    {
      "epoch": 0.7680436431530427,
      "grad_norm": 0.9012770736109739,
      "learning_rate": 1.3458692492029608e-06,
      "loss": 0.1161,
      "step": 26327
    },
    {
      "epoch": 0.7680728163836863,
      "grad_norm": 0.7719845505449965,
      "learning_rate": 1.3455467991321586e-06,
      "loss": 0.093,
      "step": 26328
    },
    {
      "epoch": 0.7681019896143298,
      "grad_norm": 0.8562658122996444,
      "learning_rate": 1.3452243816874423e-06,
      "loss": 0.1207,
      "step": 26329
    },
    {
      "epoch": 0.7681311628449734,
      "grad_norm": 0.6791845106246308,
      "learning_rate": 1.344901996871687e-06,
      "loss": 0.1085,
      "step": 26330
    },
    {
      "epoch": 0.7681603360756171,
      "grad_norm": 0.8296558353589102,
      "learning_rate": 1.3445796446877773e-06,
      "loss": 0.1092,
      "step": 26331
    },
    {
      "epoch": 0.7681895093062606,
      "grad_norm": 1.1079128073038012,
      "learning_rate": 1.3442573251385882e-06,
      "loss": 0.1275,
      "step": 26332
    },
    {
      "epoch": 0.7682186825369042,
      "grad_norm": 0.7430678761466323,
      "learning_rate": 1.343935038226995e-06,
      "loss": 0.1136,
      "step": 26333
    },
    {
      "epoch": 0.7682478557675477,
      "grad_norm": 0.8437397346011964,
      "learning_rate": 1.3436127839558788e-06,
      "loss": 0.1432,
      "step": 26334
    },
    {
      "epoch": 0.7682770289981913,
      "grad_norm": 0.8250038742054424,
      "learning_rate": 1.3432905623281151e-06,
      "loss": 0.121,
      "step": 26335
    },
    {
      "epoch": 0.7683062022288348,
      "grad_norm": 0.8556139117128854,
      "learning_rate": 1.3429683733465782e-06,
      "loss": 0.1114,
      "step": 26336
    },
    {
      "epoch": 0.7683353754594784,
      "grad_norm": 0.8087185715853028,
      "learning_rate": 1.3426462170141475e-06,
      "loss": 0.1042,
      "step": 26337
    },
    {
      "epoch": 0.7683645486901219,
      "grad_norm": 0.7425615248636965,
      "learning_rate": 1.3423240933336989e-06,
      "loss": 0.1096,
      "step": 26338
    },
    {
      "epoch": 0.7683937219207655,
      "grad_norm": 0.9583128033071,
      "learning_rate": 1.3420020023081081e-06,
      "loss": 0.1001,
      "step": 26339
    },
    {
      "epoch": 0.768422895151409,
      "grad_norm": 0.8593452010260961,
      "learning_rate": 1.3416799439402483e-06,
      "loss": 0.1213,
      "step": 26340
    },
    {
      "epoch": 0.7684520683820526,
      "grad_norm": 0.8879522014577076,
      "learning_rate": 1.3413579182329989e-06,
      "loss": 0.1257,
      "step": 26341
    },
    {
      "epoch": 0.7684812416126962,
      "grad_norm": 0.7333457531120627,
      "learning_rate": 1.3410359251892307e-06,
      "loss": 0.1172,
      "step": 26342
    },
    {
      "epoch": 0.7685104148433397,
      "grad_norm": 1.0015957078579298,
      "learning_rate": 1.34071396481182e-06,
      "loss": 0.1241,
      "step": 26343
    },
    {
      "epoch": 0.7685395880739833,
      "grad_norm": 0.8076648583421359,
      "learning_rate": 1.3403920371036433e-06,
      "loss": 0.1337,
      "step": 26344
    },
    {
      "epoch": 0.7685687613046269,
      "grad_norm": 0.9656849619903707,
      "learning_rate": 1.3400701420675727e-06,
      "loss": 0.1345,
      "step": 26345
    },
    {
      "epoch": 0.7685979345352705,
      "grad_norm": 0.8535953469313441,
      "learning_rate": 1.339748279706481e-06,
      "loss": 0.1108,
      "step": 26346
    },
    {
      "epoch": 0.768627107765914,
      "grad_norm": 0.7917522328083967,
      "learning_rate": 1.339426450023244e-06,
      "loss": 0.1257,
      "step": 26347
    },
    {
      "epoch": 0.7686562809965576,
      "grad_norm": 0.6787829623433255,
      "learning_rate": 1.3391046530207325e-06,
      "loss": 0.1197,
      "step": 26348
    },
    {
      "epoch": 0.7686854542272011,
      "grad_norm": 0.8878994011466506,
      "learning_rate": 1.3387828887018222e-06,
      "loss": 0.0988,
      "step": 26349
    },
    {
      "epoch": 0.7687146274578447,
      "grad_norm": 0.9161149240701941,
      "learning_rate": 1.3384611570693828e-06,
      "loss": 0.1173,
      "step": 26350
    },
    {
      "epoch": 0.7687438006884882,
      "grad_norm": 0.7338299930776192,
      "learning_rate": 1.3381394581262896e-06,
      "loss": 0.1169,
      "step": 26351
    },
    {
      "epoch": 0.7687729739191318,
      "grad_norm": 0.8177897018433256,
      "learning_rate": 1.3378177918754132e-06,
      "loss": 0.1167,
      "step": 26352
    },
    {
      "epoch": 0.7688021471497754,
      "grad_norm": 0.7839406401691176,
      "learning_rate": 1.3374961583196238e-06,
      "loss": 0.0966,
      "step": 26353
    },
    {
      "epoch": 0.7688313203804189,
      "grad_norm": 0.790638859628819,
      "learning_rate": 1.3371745574617945e-06,
      "loss": 0.1188,
      "step": 26354
    },
    {
      "epoch": 0.7688604936110625,
      "grad_norm": 0.9705534971863546,
      "learning_rate": 1.3368529893047977e-06,
      "loss": 0.1167,
      "step": 26355
    },
    {
      "epoch": 0.768889666841706,
      "grad_norm": 1.0390544757812006,
      "learning_rate": 1.3365314538515028e-06,
      "loss": 0.0999,
      "step": 26356
    },
    {
      "epoch": 0.7689188400723496,
      "grad_norm": 0.945480891191583,
      "learning_rate": 1.3362099511047793e-06,
      "loss": 0.1177,
      "step": 26357
    },
    {
      "epoch": 0.7689480133029932,
      "grad_norm": 0.8838098435908222,
      "learning_rate": 1.3358884810675005e-06,
      "loss": 0.1454,
      "step": 26358
    },
    {
      "epoch": 0.7689771865336368,
      "grad_norm": 0.851736956955131,
      "learning_rate": 1.3355670437425344e-06,
      "loss": 0.1254,
      "step": 26359
    },
    {
      "epoch": 0.7690063597642803,
      "grad_norm": 0.7620084892561015,
      "learning_rate": 1.3352456391327479e-06,
      "loss": 0.0926,
      "step": 26360
    },
    {
      "epoch": 0.7690355329949239,
      "grad_norm": 0.7596378487858548,
      "learning_rate": 1.3349242672410162e-06,
      "loss": 0.1037,
      "step": 26361
    },
    {
      "epoch": 0.7690647062255674,
      "grad_norm": 0.7407636176932145,
      "learning_rate": 1.334602928070206e-06,
      "loss": 0.1191,
      "step": 26362
    },
    {
      "epoch": 0.769093879456211,
      "grad_norm": 0.8387156139437207,
      "learning_rate": 1.3342816216231846e-06,
      "loss": 0.1341,
      "step": 26363
    },
    {
      "epoch": 0.7691230526868545,
      "grad_norm": 0.9424023168724408,
      "learning_rate": 1.3339603479028229e-06,
      "loss": 0.0995,
      "step": 26364
    },
    {
      "epoch": 0.7691522259174981,
      "grad_norm": 0.8413175728940638,
      "learning_rate": 1.333639106911988e-06,
      "loss": 0.1123,
      "step": 26365
    },
    {
      "epoch": 0.7691813991481417,
      "grad_norm": 0.7428676524295936,
      "learning_rate": 1.3333178986535466e-06,
      "loss": 0.114,
      "step": 26366
    },
    {
      "epoch": 0.7692105723787852,
      "grad_norm": 0.8633340480866784,
      "learning_rate": 1.3329967231303682e-06,
      "loss": 0.101,
      "step": 26367
    },
    {
      "epoch": 0.7692397456094288,
      "grad_norm": 1.2447757965667121,
      "learning_rate": 1.3326755803453206e-06,
      "loss": 0.1074,
      "step": 26368
    },
    {
      "epoch": 0.7692689188400723,
      "grad_norm": 1.0479824407084282,
      "learning_rate": 1.3323544703012697e-06,
      "loss": 0.1117,
      "step": 26369
    },
    {
      "epoch": 0.7692980920707159,
      "grad_norm": 0.7147793449500534,
      "learning_rate": 1.3320333930010815e-06,
      "loss": 0.1121,
      "step": 26370
    },
    {
      "epoch": 0.7693272653013594,
      "grad_norm": 0.7578252238900758,
      "learning_rate": 1.3317123484476251e-06,
      "loss": 0.1103,
      "step": 26371
    },
    {
      "epoch": 0.7693564385320031,
      "grad_norm": 0.83767739554361,
      "learning_rate": 1.3313913366437637e-06,
      "loss": 0.1074,
      "step": 26372
    },
    {
      "epoch": 0.7693856117626466,
      "grad_norm": 0.9595939537888586,
      "learning_rate": 1.331070357592364e-06,
      "loss": 0.1188,
      "step": 26373
    },
    {
      "epoch": 0.7694147849932902,
      "grad_norm": 0.9503184644901421,
      "learning_rate": 1.3307494112962943e-06,
      "loss": 0.1145,
      "step": 26374
    },
    {
      "epoch": 0.7694439582239337,
      "grad_norm": 0.6936057562434776,
      "learning_rate": 1.3304284977584182e-06,
      "loss": 0.0997,
      "step": 26375
    },
    {
      "epoch": 0.7694731314545773,
      "grad_norm": 0.7660194227454243,
      "learning_rate": 1.3301076169815986e-06,
      "loss": 0.1052,
      "step": 26376
    },
    {
      "epoch": 0.7695023046852209,
      "grad_norm": 0.860131712849057,
      "learning_rate": 1.3297867689687038e-06,
      "loss": 0.12,
      "step": 26377
    },
    {
      "epoch": 0.7695314779158644,
      "grad_norm": 0.8692900744563608,
      "learning_rate": 1.3294659537225951e-06,
      "loss": 0.1241,
      "step": 26378
    },
    {
      "epoch": 0.769560651146508,
      "grad_norm": 0.8160703074895804,
      "learning_rate": 1.3291451712461395e-06,
      "loss": 0.0993,
      "step": 26379
    },
    {
      "epoch": 0.7695898243771515,
      "grad_norm": 0.9095841995953802,
      "learning_rate": 1.3288244215421981e-06,
      "loss": 0.1149,
      "step": 26380
    },
    {
      "epoch": 0.7696189976077951,
      "grad_norm": 0.8708794576204514,
      "learning_rate": 1.3285037046136372e-06,
      "loss": 0.1204,
      "step": 26381
    },
    {
      "epoch": 0.7696481708384386,
      "grad_norm": 0.9675974965172188,
      "learning_rate": 1.3281830204633188e-06,
      "loss": 0.1166,
      "step": 26382
    },
    {
      "epoch": 0.7696773440690822,
      "grad_norm": 0.8515714097596896,
      "learning_rate": 1.3278623690941045e-06,
      "loss": 0.1284,
      "step": 26383
    },
    {
      "epoch": 0.7697065172997257,
      "grad_norm": 0.8043822820085537,
      "learning_rate": 1.3275417505088585e-06,
      "loss": 0.1157,
      "step": 26384
    },
    {
      "epoch": 0.7697356905303694,
      "grad_norm": 0.955162189308029,
      "learning_rate": 1.3272211647104443e-06,
      "loss": 0.1225,
      "step": 26385
    },
    {
      "epoch": 0.7697648637610129,
      "grad_norm": 1.0431847591400947,
      "learning_rate": 1.3269006117017231e-06,
      "loss": 0.1048,
      "step": 26386
    },
    {
      "epoch": 0.7697940369916565,
      "grad_norm": 0.7443241082161194,
      "learning_rate": 1.3265800914855542e-06,
      "loss": 0.1118,
      "step": 26387
    },
    {
      "epoch": 0.7698232102223,
      "grad_norm": 0.8425779974112467,
      "learning_rate": 1.3262596040648034e-06,
      "loss": 0.1057,
      "step": 26388
    },
    {
      "epoch": 0.7698523834529436,
      "grad_norm": 0.768087129628987,
      "learning_rate": 1.3259391494423296e-06,
      "loss": 0.1054,
      "step": 26389
    },
    {
      "epoch": 0.7698815566835872,
      "grad_norm": 1.061329548066553,
      "learning_rate": 1.3256187276209913e-06,
      "loss": 0.1188,
      "step": 26390
    },
    {
      "epoch": 0.7699107299142307,
      "grad_norm": 0.900892773663434,
      "learning_rate": 1.325298338603655e-06,
      "loss": 0.1119,
      "step": 26391
    },
    {
      "epoch": 0.7699399031448743,
      "grad_norm": 1.0821439260157737,
      "learning_rate": 1.3249779823931774e-06,
      "loss": 0.1044,
      "step": 26392
    },
    {
      "epoch": 0.7699690763755178,
      "grad_norm": 0.8665013694211476,
      "learning_rate": 1.3246576589924176e-06,
      "loss": 0.0815,
      "step": 26393
    },
    {
      "epoch": 0.7699982496061614,
      "grad_norm": 1.037289665752422,
      "learning_rate": 1.3243373684042388e-06,
      "loss": 0.1176,
      "step": 26394
    },
    {
      "epoch": 0.7700274228368049,
      "grad_norm": 1.037716672470052,
      "learning_rate": 1.324017110631498e-06,
      "loss": 0.137,
      "step": 26395
    },
    {
      "epoch": 0.7700565960674485,
      "grad_norm": 1.211706994811601,
      "learning_rate": 1.3236968856770537e-06,
      "loss": 0.1345,
      "step": 26396
    },
    {
      "epoch": 0.770085769298092,
      "grad_norm": 1.0750149620536222,
      "learning_rate": 1.3233766935437665e-06,
      "loss": 0.1334,
      "step": 26397
    },
    {
      "epoch": 0.7701149425287356,
      "grad_norm": 0.8909687763686794,
      "learning_rate": 1.3230565342344953e-06,
      "loss": 0.1093,
      "step": 26398
    },
    {
      "epoch": 0.7701441157593792,
      "grad_norm": 0.6824404088662096,
      "learning_rate": 1.3227364077520976e-06,
      "loss": 0.1208,
      "step": 26399
    },
    {
      "epoch": 0.7701732889900228,
      "grad_norm": 0.8293847946531514,
      "learning_rate": 1.3224163140994302e-06,
      "loss": 0.1224,
      "step": 26400
    },
    {
      "epoch": 0.7702024622206664,
      "grad_norm": 0.871079034104856,
      "learning_rate": 1.3220962532793535e-06,
      "loss": 0.1119,
      "step": 26401
    },
    {
      "epoch": 0.7702316354513099,
      "grad_norm": 0.9644347823087496,
      "learning_rate": 1.321776225294722e-06,
      "loss": 0.1438,
      "step": 26402
    },
    {
      "epoch": 0.7702608086819535,
      "grad_norm": 1.1372883534878597,
      "learning_rate": 1.321456230148394e-06,
      "loss": 0.1059,
      "step": 26403
    },
    {
      "epoch": 0.770289981912597,
      "grad_norm": 0.7689123155392992,
      "learning_rate": 1.3211362678432282e-06,
      "loss": 0.123,
      "step": 26404
    },
    {
      "epoch": 0.7703191551432406,
      "grad_norm": 0.7275816622209508,
      "learning_rate": 1.32081633838208e-06,
      "loss": 0.1149,
      "step": 26405
    },
    {
      "epoch": 0.7703483283738841,
      "grad_norm": 0.7988063990793386,
      "learning_rate": 1.3204964417678034e-06,
      "loss": 0.1349,
      "step": 26406
    },
    {
      "epoch": 0.7703775016045277,
      "grad_norm": 1.000820402644795,
      "learning_rate": 1.3201765780032577e-06,
      "loss": 0.1164,
      "step": 26407
    },
    {
      "epoch": 0.7704066748351712,
      "grad_norm": 0.8542802866515816,
      "learning_rate": 1.3198567470912955e-06,
      "loss": 0.1155,
      "step": 26408
    },
    {
      "epoch": 0.7704358480658148,
      "grad_norm": 0.8719979593231254,
      "learning_rate": 1.3195369490347753e-06,
      "loss": 0.1033,
      "step": 26409
    },
    {
      "epoch": 0.7704650212964583,
      "grad_norm": 1.1192189668562742,
      "learning_rate": 1.3192171838365492e-06,
      "loss": 0.1246,
      "step": 26410
    },
    {
      "epoch": 0.7704941945271019,
      "grad_norm": 0.683453341444437,
      "learning_rate": 1.3188974514994752e-06,
      "loss": 0.0945,
      "step": 26411
    },
    {
      "epoch": 0.7705233677577455,
      "grad_norm": 1.1538560322257447,
      "learning_rate": 1.3185777520264053e-06,
      "loss": 0.1087,
      "step": 26412
    },
    {
      "epoch": 0.7705525409883891,
      "grad_norm": 1.123113533059127,
      "learning_rate": 1.3182580854201938e-06,
      "loss": 0.124,
      "step": 26413
    },
    {
      "epoch": 0.7705817142190327,
      "grad_norm": 0.9604330296704243,
      "learning_rate": 1.3179384516836947e-06,
      "loss": 0.1192,
      "step": 26414
    },
    {
      "epoch": 0.7706108874496762,
      "grad_norm": 0.9035893269277159,
      "learning_rate": 1.3176188508197634e-06,
      "loss": 0.133,
      "step": 26415
    },
    {
      "epoch": 0.7706400606803198,
      "grad_norm": 0.85900840253525,
      "learning_rate": 1.3172992828312519e-06,
      "loss": 0.1317,
      "step": 26416
    },
    {
      "epoch": 0.7706692339109633,
      "grad_norm": 1.1163044663463344,
      "learning_rate": 1.3169797477210122e-06,
      "loss": 0.1144,
      "step": 26417
    },
    {
      "epoch": 0.7706984071416069,
      "grad_norm": 1.1446577803877949,
      "learning_rate": 1.3166602454918997e-06,
      "loss": 0.0885,
      "step": 26418
    },
    {
      "epoch": 0.7707275803722504,
      "grad_norm": 0.7461391709802458,
      "learning_rate": 1.316340776146765e-06,
      "loss": 0.1162,
      "step": 26419
    },
    {
      "epoch": 0.770756753602894,
      "grad_norm": 0.8121190406704041,
      "learning_rate": 1.3160213396884576e-06,
      "loss": 0.0949,
      "step": 26420
    },
    {
      "epoch": 0.7707859268335375,
      "grad_norm": 1.1227051978681637,
      "learning_rate": 1.3157019361198348e-06,
      "loss": 0.1323,
      "step": 26421
    },
    {
      "epoch": 0.7708151000641811,
      "grad_norm": 1.3014020476072332,
      "learning_rate": 1.3153825654437458e-06,
      "loss": 0.0977,
      "step": 26422
    },
    {
      "epoch": 0.7708442732948246,
      "grad_norm": 1.3137095697635548,
      "learning_rate": 1.3150632276630405e-06,
      "loss": 0.1235,
      "step": 26423
    },
    {
      "epoch": 0.7708734465254682,
      "grad_norm": 0.9783099395143354,
      "learning_rate": 1.3147439227805726e-06,
      "loss": 0.1578,
      "step": 26424
    },
    {
      "epoch": 0.7709026197561117,
      "grad_norm": 1.6056538931094753,
      "learning_rate": 1.314424650799191e-06,
      "loss": 0.1119,
      "step": 26425
    },
    {
      "epoch": 0.7709317929867554,
      "grad_norm": 0.8491353982017323,
      "learning_rate": 1.3141054117217444e-06,
      "loss": 0.1271,
      "step": 26426
    },
    {
      "epoch": 0.770960966217399,
      "grad_norm": 1.2452005449943448,
      "learning_rate": 1.3137862055510852e-06,
      "loss": 0.1122,
      "step": 26427
    },
    {
      "epoch": 0.7709901394480425,
      "grad_norm": 0.9635875427143986,
      "learning_rate": 1.3134670322900644e-06,
      "loss": 0.0943,
      "step": 26428
    },
    {
      "epoch": 0.7710193126786861,
      "grad_norm": 0.752123689176318,
      "learning_rate": 1.3131478919415298e-06,
      "loss": 0.11,
      "step": 26429
    },
    {
      "epoch": 0.7710484859093296,
      "grad_norm": 0.7198618254965795,
      "learning_rate": 1.3128287845083288e-06,
      "loss": 0.1247,
      "step": 26430
    },
    {
      "epoch": 0.7710776591399732,
      "grad_norm": 1.3233392143387757,
      "learning_rate": 1.3125097099933144e-06,
      "loss": 0.1205,
      "step": 26431
    },
    {
      "epoch": 0.7711068323706167,
      "grad_norm": 1.0919256862175135,
      "learning_rate": 1.3121906683993307e-06,
      "loss": 0.1065,
      "step": 26432
    },
    {
      "epoch": 0.7711360056012603,
      "grad_norm": 0.9580261211845957,
      "learning_rate": 1.3118716597292292e-06,
      "loss": 0.1164,
      "step": 26433
    },
    {
      "epoch": 0.7711651788319038,
      "grad_norm": 0.6467131070332959,
      "learning_rate": 1.3115526839858583e-06,
      "loss": 0.1006,
      "step": 26434
    },
    {
      "epoch": 0.7711943520625474,
      "grad_norm": 0.8629428027626489,
      "learning_rate": 1.3112337411720643e-06,
      "loss": 0.1164,
      "step": 26435
    },
    {
      "epoch": 0.7712235252931909,
      "grad_norm": 0.86144947721426,
      "learning_rate": 1.3109148312906934e-06,
      "loss": 0.1205,
      "step": 26436
    },
    {
      "epoch": 0.7712526985238345,
      "grad_norm": 0.7128145435843275,
      "learning_rate": 1.3105959543445962e-06,
      "loss": 0.0983,
      "step": 26437
    },
    {
      "epoch": 0.771281871754478,
      "grad_norm": 0.9961558734954546,
      "learning_rate": 1.3102771103366157e-06,
      "loss": 0.1091,
      "step": 26438
    },
    {
      "epoch": 0.7713110449851217,
      "grad_norm": 0.9780844718050035,
      "learning_rate": 1.3099582992696019e-06,
      "loss": 0.1195,
      "step": 26439
    },
    {
      "epoch": 0.7713402182157653,
      "grad_norm": 1.0287588646014765,
      "learning_rate": 1.309639521146398e-06,
      "loss": 0.1313,
      "step": 26440
    },
    {
      "epoch": 0.7713693914464088,
      "grad_norm": 1.251658470659023,
      "learning_rate": 1.309320775969853e-06,
      "loss": 0.1255,
      "step": 26441
    },
    {
      "epoch": 0.7713985646770524,
      "grad_norm": 1.196933869166425,
      "learning_rate": 1.3090020637428109e-06,
      "loss": 0.1265,
      "step": 26442
    },
    {
      "epoch": 0.7714277379076959,
      "grad_norm": 0.9939862539712442,
      "learning_rate": 1.3086833844681163e-06,
      "loss": 0.1154,
      "step": 26443
    },
    {
      "epoch": 0.7714569111383395,
      "grad_norm": 0.8493062979362679,
      "learning_rate": 1.3083647381486147e-06,
      "loss": 0.1168,
      "step": 26444
    },
    {
      "epoch": 0.771486084368983,
      "grad_norm": 0.9132476162737213,
      "learning_rate": 1.3080461247871528e-06,
      "loss": 0.1091,
      "step": 26445
    },
    {
      "epoch": 0.7715152575996266,
      "grad_norm": 0.7964659543172595,
      "learning_rate": 1.3077275443865744e-06,
      "loss": 0.0903,
      "step": 26446
    },
    {
      "epoch": 0.7715444308302701,
      "grad_norm": 0.8313801419019907,
      "learning_rate": 1.307408996949721e-06,
      "loss": 0.1269,
      "step": 26447
    },
    {
      "epoch": 0.7715736040609137,
      "grad_norm": 0.8557884059584263,
      "learning_rate": 1.3070904824794405e-06,
      "loss": 0.1092,
      "step": 26448
    },
    {
      "epoch": 0.7716027772915572,
      "grad_norm": 0.9307981977101334,
      "learning_rate": 1.3067720009785744e-06,
      "loss": 0.12,
      "step": 26449
    },
    {
      "epoch": 0.7716319505222008,
      "grad_norm": 0.9161618851245314,
      "learning_rate": 1.3064535524499638e-06,
      "loss": 0.1123,
      "step": 26450
    },
    {
      "epoch": 0.7716611237528443,
      "grad_norm": 0.9973343431939814,
      "learning_rate": 1.3061351368964565e-06,
      "loss": 0.1178,
      "step": 26451
    },
    {
      "epoch": 0.7716902969834879,
      "grad_norm": 0.8219635170300139,
      "learning_rate": 1.3058167543208932e-06,
      "loss": 0.1071,
      "step": 26452
    },
    {
      "epoch": 0.7717194702141316,
      "grad_norm": 0.8308945313301898,
      "learning_rate": 1.3054984047261143e-06,
      "loss": 0.1105,
      "step": 26453
    },
    {
      "epoch": 0.7717486434447751,
      "grad_norm": 0.9276828756661168,
      "learning_rate": 1.305180088114965e-06,
      "loss": 0.1267,
      "step": 26454
    },
    {
      "epoch": 0.7717778166754187,
      "grad_norm": 0.6314204423551208,
      "learning_rate": 1.3048618044902867e-06,
      "loss": 0.1004,
      "step": 26455
    },
    {
      "epoch": 0.7718069899060622,
      "grad_norm": 0.7850854170641326,
      "learning_rate": 1.3045435538549178e-06,
      "loss": 0.1083,
      "step": 26456
    },
    {
      "epoch": 0.7718361631367058,
      "grad_norm": 0.8000894342283231,
      "learning_rate": 1.3042253362117025e-06,
      "loss": 0.103,
      "step": 26457
    },
    {
      "epoch": 0.7718653363673493,
      "grad_norm": 0.9008739004837375,
      "learning_rate": 1.3039071515634822e-06,
      "loss": 0.1183,
      "step": 26458
    },
    {
      "epoch": 0.7718945095979929,
      "grad_norm": 0.8673481606862793,
      "learning_rate": 1.3035889999130963e-06,
      "loss": 0.136,
      "step": 26459
    },
    {
      "epoch": 0.7719236828286364,
      "grad_norm": 0.8850524606452219,
      "learning_rate": 1.3032708812633843e-06,
      "loss": 0.1126,
      "step": 26460
    },
    {
      "epoch": 0.77195285605928,
      "grad_norm": 0.8824738752227391,
      "learning_rate": 1.302952795617189e-06,
      "loss": 0.1173,
      "step": 26461
    },
    {
      "epoch": 0.7719820292899235,
      "grad_norm": 0.9801705753487708,
      "learning_rate": 1.3026347429773467e-06,
      "loss": 0.1301,
      "step": 26462
    },
    {
      "epoch": 0.7720112025205671,
      "grad_norm": 0.869696321205162,
      "learning_rate": 1.3023167233466988e-06,
      "loss": 0.1166,
      "step": 26463
    },
    {
      "epoch": 0.7720403757512107,
      "grad_norm": 0.699863753769055,
      "learning_rate": 1.3019987367280863e-06,
      "loss": 0.1208,
      "step": 26464
    },
    {
      "epoch": 0.7720695489818542,
      "grad_norm": 0.7509956601812794,
      "learning_rate": 1.3016807831243462e-06,
      "loss": 0.1334,
      "step": 26465
    },
    {
      "epoch": 0.7720987222124978,
      "grad_norm": 0.7857488046474499,
      "learning_rate": 1.3013628625383156e-06,
      "loss": 0.1117,
      "step": 26466
    },
    {
      "epoch": 0.7721278954431414,
      "grad_norm": 0.7597992997166758,
      "learning_rate": 1.301044974972836e-06,
      "loss": 0.1092,
      "step": 26467
    },
    {
      "epoch": 0.772157068673785,
      "grad_norm": 0.902079263486036,
      "learning_rate": 1.3007271204307425e-06,
      "loss": 0.1109,
      "step": 26468
    },
    {
      "epoch": 0.7721862419044285,
      "grad_norm": 0.9154434600938871,
      "learning_rate": 1.3004092989148753e-06,
      "loss": 0.1356,
      "step": 26469
    },
    {
      "epoch": 0.7722154151350721,
      "grad_norm": 0.7602294858779245,
      "learning_rate": 1.3000915104280699e-06,
      "loss": 0.1209,
      "step": 26470
    },
    {
      "epoch": 0.7722445883657156,
      "grad_norm": 0.8650595391250381,
      "learning_rate": 1.2997737549731647e-06,
      "loss": 0.1037,
      "step": 26471
    },
    {
      "epoch": 0.7722737615963592,
      "grad_norm": 0.728679407589401,
      "learning_rate": 1.299456032552997e-06,
      "loss": 0.116,
      "step": 26472
    },
    {
      "epoch": 0.7723029348270027,
      "grad_norm": 0.911503442395789,
      "learning_rate": 1.2991383431704008e-06,
      "loss": 0.1636,
      "step": 26473
    },
    {
      "epoch": 0.7723321080576463,
      "grad_norm": 1.119843180670213,
      "learning_rate": 1.2988206868282138e-06,
      "loss": 0.1356,
      "step": 26474
    },
    {
      "epoch": 0.7723612812882898,
      "grad_norm": 0.8635609734114374,
      "learning_rate": 1.2985030635292733e-06,
      "loss": 0.0951,
      "step": 26475
    },
    {
      "epoch": 0.7723904545189334,
      "grad_norm": 0.7894405202081105,
      "learning_rate": 1.2981854732764142e-06,
      "loss": 0.1326,
      "step": 26476
    },
    {
      "epoch": 0.772419627749577,
      "grad_norm": 0.7744503147262438,
      "learning_rate": 1.2978679160724706e-06,
      "loss": 0.1196,
      "step": 26477
    },
    {
      "epoch": 0.7724488009802205,
      "grad_norm": 0.9085344278051577,
      "learning_rate": 1.2975503919202793e-06,
      "loss": 0.1343,
      "step": 26478
    },
    {
      "epoch": 0.7724779742108641,
      "grad_norm": 0.8709450678143941,
      "learning_rate": 1.2972329008226741e-06,
      "loss": 0.118,
      "step": 26479
    },
    {
      "epoch": 0.7725071474415077,
      "grad_norm": 0.6416055095465956,
      "learning_rate": 1.296915442782487e-06,
      "loss": 0.0993,
      "step": 26480
    },
    {
      "epoch": 0.7725363206721513,
      "grad_norm": 0.9495578419459769,
      "learning_rate": 1.2965980178025577e-06,
      "loss": 0.1105,
      "step": 26481
    },
    {
      "epoch": 0.7725654939027948,
      "grad_norm": 0.8520634504504494,
      "learning_rate": 1.2962806258857175e-06,
      "loss": 0.1049,
      "step": 26482
    },
    {
      "epoch": 0.7725946671334384,
      "grad_norm": 0.9039195815906078,
      "learning_rate": 1.2959632670347976e-06,
      "loss": 0.117,
      "step": 26483
    },
    {
      "epoch": 0.7726238403640819,
      "grad_norm": 0.7995310471836102,
      "learning_rate": 1.2956459412526357e-06,
      "loss": 0.1187,
      "step": 26484
    },
    {
      "epoch": 0.7726530135947255,
      "grad_norm": 1.0539171841398818,
      "learning_rate": 1.2953286485420618e-06,
      "loss": 0.1098,
      "step": 26485
    },
    {
      "epoch": 0.772682186825369,
      "grad_norm": 0.9727985169305272,
      "learning_rate": 1.2950113889059084e-06,
      "loss": 0.1255,
      "step": 26486
    },
    {
      "epoch": 0.7727113600560126,
      "grad_norm": 1.1550559656561254,
      "learning_rate": 1.294694162347009e-06,
      "loss": 0.1136,
      "step": 26487
    },
    {
      "epoch": 0.7727405332866562,
      "grad_norm": 0.726800504404567,
      "learning_rate": 1.2943769688681968e-06,
      "loss": 0.1141,
      "step": 26488
    },
    {
      "epoch": 0.7727697065172997,
      "grad_norm": 1.8236590615622714,
      "learning_rate": 1.294059808472302e-06,
      "loss": 0.0945,
      "step": 26489
    },
    {
      "epoch": 0.7727988797479433,
      "grad_norm": 0.8323302104959504,
      "learning_rate": 1.2937426811621557e-06,
      "loss": 0.1052,
      "step": 26490
    },
    {
      "epoch": 0.7728280529785868,
      "grad_norm": 0.7459109385827977,
      "learning_rate": 1.293425586940591e-06,
      "loss": 0.1081,
      "step": 26491
    },
    {
      "epoch": 0.7728572262092304,
      "grad_norm": 1.2150684866793608,
      "learning_rate": 1.2931085258104365e-06,
      "loss": 0.1089,
      "step": 26492
    },
    {
      "epoch": 0.7728863994398739,
      "grad_norm": 0.9975084486954806,
      "learning_rate": 1.292791497774526e-06,
      "loss": 0.1237,
      "step": 26493
    },
    {
      "epoch": 0.7729155726705176,
      "grad_norm": 0.6490156257469298,
      "learning_rate": 1.292474502835686e-06,
      "loss": 0.1296,
      "step": 26494
    },
    {
      "epoch": 0.7729447459011611,
      "grad_norm": 0.7228196405436439,
      "learning_rate": 1.2921575409967507e-06,
      "loss": 0.1077,
      "step": 26495
    },
    {
      "epoch": 0.7729739191318047,
      "grad_norm": 1.0764161726479002,
      "learning_rate": 1.2918406122605459e-06,
      "loss": 0.1186,
      "step": 26496
    },
    {
      "epoch": 0.7730030923624482,
      "grad_norm": 0.7546420173359845,
      "learning_rate": 1.2915237166299038e-06,
      "loss": 0.1026,
      "step": 26497
    },
    {
      "epoch": 0.7730322655930918,
      "grad_norm": 1.01179067039939,
      "learning_rate": 1.2912068541076523e-06,
      "loss": 0.1229,
      "step": 26498
    },
    {
      "epoch": 0.7730614388237353,
      "grad_norm": 0.9371775488421225,
      "learning_rate": 1.2908900246966215e-06,
      "loss": 0.1226,
      "step": 26499
    },
    {
      "epoch": 0.7730906120543789,
      "grad_norm": 1.0982933175170113,
      "learning_rate": 1.2905732283996374e-06,
      "loss": 0.1218,
      "step": 26500
    },
    {
      "epoch": 0.7731197852850225,
      "grad_norm": 1.797577771811171,
      "learning_rate": 1.290256465219532e-06,
      "loss": 0.1318,
      "step": 26501
    },
    {
      "epoch": 0.773148958515666,
      "grad_norm": 0.9185502509125388,
      "learning_rate": 1.2899397351591308e-06,
      "loss": 0.1202,
      "step": 26502
    },
    {
      "epoch": 0.7731781317463096,
      "grad_norm": 0.8406384464510138,
      "learning_rate": 1.289623038221261e-06,
      "loss": 0.1,
      "step": 26503
    },
    {
      "epoch": 0.7732073049769531,
      "grad_norm": 0.8154581556170826,
      "learning_rate": 1.289306374408751e-06,
      "loss": 0.1173,
      "step": 26504
    },
    {
      "epoch": 0.7732364782075967,
      "grad_norm": 0.9123238426515281,
      "learning_rate": 1.2889897437244292e-06,
      "loss": 0.1083,
      "step": 26505
    },
    {
      "epoch": 0.7732656514382402,
      "grad_norm": 1.1856662017489783,
      "learning_rate": 1.288673146171121e-06,
      "loss": 0.1092,
      "step": 26506
    },
    {
      "epoch": 0.7732948246688839,
      "grad_norm": 0.8013534639063158,
      "learning_rate": 1.2883565817516513e-06,
      "loss": 0.1159,
      "step": 26507
    },
    {
      "epoch": 0.7733239978995274,
      "grad_norm": 0.8404164148131812,
      "learning_rate": 1.2880400504688501e-06,
      "loss": 0.1189,
      "step": 26508
    },
    {
      "epoch": 0.773353171130171,
      "grad_norm": 0.9352412053671898,
      "learning_rate": 1.2877235523255388e-06,
      "loss": 0.1373,
      "step": 26509
    },
    {
      "epoch": 0.7733823443608145,
      "grad_norm": 0.8562182761344047,
      "learning_rate": 1.2874070873245465e-06,
      "loss": 0.1287,
      "step": 26510
    },
    {
      "epoch": 0.7734115175914581,
      "grad_norm": 0.7641345416105122,
      "learning_rate": 1.2870906554686979e-06,
      "loss": 0.1168,
      "step": 26511
    },
    {
      "epoch": 0.7734406908221017,
      "grad_norm": 0.9916976973259733,
      "learning_rate": 1.2867742567608182e-06,
      "loss": 0.1192,
      "step": 26512
    },
    {
      "epoch": 0.7734698640527452,
      "grad_norm": 0.8311264741871122,
      "learning_rate": 1.2864578912037302e-06,
      "loss": 0.1066,
      "step": 26513
    },
    {
      "epoch": 0.7734990372833888,
      "grad_norm": 0.7764714206791056,
      "learning_rate": 1.2861415588002607e-06,
      "loss": 0.0888,
      "step": 26514
    },
    {
      "epoch": 0.7735282105140323,
      "grad_norm": 0.8708474570257974,
      "learning_rate": 1.2858252595532316e-06,
      "loss": 0.0964,
      "step": 26515
    },
    {
      "epoch": 0.7735573837446759,
      "grad_norm": 0.9782078589934318,
      "learning_rate": 1.285508993465469e-06,
      "loss": 0.1209,
      "step": 26516
    },
    {
      "epoch": 0.7735865569753194,
      "grad_norm": 0.9509027822156123,
      "learning_rate": 1.2851927605397946e-06,
      "loss": 0.108,
      "step": 26517
    },
    {
      "epoch": 0.773615730205963,
      "grad_norm": 0.7746880580086296,
      "learning_rate": 1.2848765607790332e-06,
      "loss": 0.103,
      "step": 26518
    },
    {
      "epoch": 0.7736449034366065,
      "grad_norm": 0.7650423822361858,
      "learning_rate": 1.2845603941860074e-06,
      "loss": 0.1023,
      "step": 26519
    },
    {
      "epoch": 0.7736740766672501,
      "grad_norm": 1.4347883213488632,
      "learning_rate": 1.2842442607635381e-06,
      "loss": 0.1082,
      "step": 26520
    },
    {
      "epoch": 0.7737032498978937,
      "grad_norm": 1.1284873371552322,
      "learning_rate": 1.2839281605144488e-06,
      "loss": 0.1036,
      "step": 26521
    },
    {
      "epoch": 0.7737324231285373,
      "grad_norm": 0.9096892401280708,
      "learning_rate": 1.283612093441563e-06,
      "loss": 0.1099,
      "step": 26522
    },
    {
      "epoch": 0.7737615963591808,
      "grad_norm": 0.7006154927094754,
      "learning_rate": 1.2832960595477017e-06,
      "loss": 0.1174,
      "step": 26523
    },
    {
      "epoch": 0.7737907695898244,
      "grad_norm": 1.0828107341052888,
      "learning_rate": 1.2829800588356839e-06,
      "loss": 0.1021,
      "step": 26524
    },
    {
      "epoch": 0.773819942820468,
      "grad_norm": 1.0696356690412794,
      "learning_rate": 1.282664091308335e-06,
      "loss": 0.1172,
      "step": 26525
    },
    {
      "epoch": 0.7738491160511115,
      "grad_norm": 0.8497245081942782,
      "learning_rate": 1.282348156968472e-06,
      "loss": 0.1297,
      "step": 26526
    },
    {
      "epoch": 0.7738782892817551,
      "grad_norm": 0.9001105388819419,
      "learning_rate": 1.282032255818917e-06,
      "loss": 0.1285,
      "step": 26527
    },
    {
      "epoch": 0.7739074625123986,
      "grad_norm": 0.8068502221676728,
      "learning_rate": 1.2817163878624917e-06,
      "loss": 0.1217,
      "step": 26528
    },
    {
      "epoch": 0.7739366357430422,
      "grad_norm": 0.8138790681916827,
      "learning_rate": 1.281400553102015e-06,
      "loss": 0.1291,
      "step": 26529
    },
    {
      "epoch": 0.7739658089736857,
      "grad_norm": 0.8778194586781097,
      "learning_rate": 1.2810847515403058e-06,
      "loss": 0.141,
      "step": 26530
    },
    {
      "epoch": 0.7739949822043293,
      "grad_norm": 0.9503925376410518,
      "learning_rate": 1.2807689831801846e-06,
      "loss": 0.1069,
      "step": 26531
    },
    {
      "epoch": 0.7740241554349728,
      "grad_norm": 1.0624130337991038,
      "learning_rate": 1.2804532480244709e-06,
      "loss": 0.1157,
      "step": 26532
    },
    {
      "epoch": 0.7740533286656164,
      "grad_norm": 0.9790620493478033,
      "learning_rate": 1.2801375460759802e-06,
      "loss": 0.1371,
      "step": 26533
    },
    {
      "epoch": 0.77408250189626,
      "grad_norm": 1.1323242419711559,
      "learning_rate": 1.2798218773375342e-06,
      "loss": 0.1086,
      "step": 26534
    },
    {
      "epoch": 0.7741116751269036,
      "grad_norm": 1.0682323746768634,
      "learning_rate": 1.2795062418119519e-06,
      "loss": 0.1239,
      "step": 26535
    },
    {
      "epoch": 0.7741408483575472,
      "grad_norm": 0.8797137663193164,
      "learning_rate": 1.2791906395020493e-06,
      "loss": 0.1241,
      "step": 26536
    },
    {
      "epoch": 0.7741700215881907,
      "grad_norm": 0.9167332230238365,
      "learning_rate": 1.2788750704106434e-06,
      "loss": 0.1112,
      "step": 26537
    },
    {
      "epoch": 0.7741991948188343,
      "grad_norm": 0.7695633151047667,
      "learning_rate": 1.2785595345405539e-06,
      "loss": 0.1075,
      "step": 26538
    },
    {
      "epoch": 0.7742283680494778,
      "grad_norm": 0.8312754128050671,
      "learning_rate": 1.278244031894595e-06,
      "loss": 0.1069,
      "step": 26539
    },
    {
      "epoch": 0.7742575412801214,
      "grad_norm": 0.8945044287668984,
      "learning_rate": 1.277928562475585e-06,
      "loss": 0.1333,
      "step": 26540
    },
    {
      "epoch": 0.7742867145107649,
      "grad_norm": 0.8845537811854449,
      "learning_rate": 1.2776131262863412e-06,
      "loss": 0.1226,
      "step": 26541
    },
    {
      "epoch": 0.7743158877414085,
      "grad_norm": 1.046034095084399,
      "learning_rate": 1.2772977233296796e-06,
      "loss": 0.1193,
      "step": 26542
    },
    {
      "epoch": 0.774345060972052,
      "grad_norm": 0.9048239629282795,
      "learning_rate": 1.276982353608413e-06,
      "loss": 0.119,
      "step": 26543
    },
    {
      "epoch": 0.7743742342026956,
      "grad_norm": 0.8592410590381747,
      "learning_rate": 1.2766670171253614e-06,
      "loss": 0.1241,
      "step": 26544
    },
    {
      "epoch": 0.7744034074333391,
      "grad_norm": 0.9521390371402167,
      "learning_rate": 1.276351713883336e-06,
      "loss": 0.1062,
      "step": 26545
    },
    {
      "epoch": 0.7744325806639827,
      "grad_norm": 0.6922278166740373,
      "learning_rate": 1.2760364438851553e-06,
      "loss": 0.1071,
      "step": 26546
    },
    {
      "epoch": 0.7744617538946262,
      "grad_norm": 0.7868868881989651,
      "learning_rate": 1.2757212071336301e-06,
      "loss": 0.1184,
      "step": 26547
    },
    {
      "epoch": 0.7744909271252699,
      "grad_norm": 0.8375359753984807,
      "learning_rate": 1.275406003631579e-06,
      "loss": 0.1082,
      "step": 26548
    },
    {
      "epoch": 0.7745201003559135,
      "grad_norm": 0.8448427683544858,
      "learning_rate": 1.275090833381814e-06,
      "loss": 0.1235,
      "step": 26549
    },
    {
      "epoch": 0.774549273586557,
      "grad_norm": 0.8780531252526221,
      "learning_rate": 1.2747756963871472e-06,
      "loss": 0.1287,
      "step": 26550
    },
    {
      "epoch": 0.7745784468172006,
      "grad_norm": 0.7180162243325253,
      "learning_rate": 1.2744605926503934e-06,
      "loss": 0.1189,
      "step": 26551
    },
    {
      "epoch": 0.7746076200478441,
      "grad_norm": 0.7395139920355911,
      "learning_rate": 1.274145522174368e-06,
      "loss": 0.1208,
      "step": 26552
    },
    {
      "epoch": 0.7746367932784877,
      "grad_norm": 0.8643409270290434,
      "learning_rate": 1.2738304849618815e-06,
      "loss": 0.1495,
      "step": 26553
    },
    {
      "epoch": 0.7746659665091312,
      "grad_norm": 0.9146671208526945,
      "learning_rate": 1.2735154810157458e-06,
      "loss": 0.1055,
      "step": 26554
    },
    {
      "epoch": 0.7746951397397748,
      "grad_norm": 0.7962582769405433,
      "learning_rate": 1.2732005103387756e-06,
      "loss": 0.13,
      "step": 26555
    },
    {
      "epoch": 0.7747243129704183,
      "grad_norm": 1.033888721443246,
      "learning_rate": 1.2728855729337802e-06,
      "loss": 0.1424,
      "step": 26556
    },
    {
      "epoch": 0.7747534862010619,
      "grad_norm": 0.7861583172360689,
      "learning_rate": 1.2725706688035728e-06,
      "loss": 0.0905,
      "step": 26557
    },
    {
      "epoch": 0.7747826594317054,
      "grad_norm": 0.8478843797782293,
      "learning_rate": 1.2722557979509664e-06,
      "loss": 0.1219,
      "step": 26558
    },
    {
      "epoch": 0.774811832662349,
      "grad_norm": 0.7467407285651229,
      "learning_rate": 1.2719409603787696e-06,
      "loss": 0.1055,
      "step": 26559
    },
    {
      "epoch": 0.7748410058929925,
      "grad_norm": 0.9988871413293041,
      "learning_rate": 1.271626156089793e-06,
      "loss": 0.1237,
      "step": 26560
    },
    {
      "epoch": 0.7748701791236362,
      "grad_norm": 0.7728425459962185,
      "learning_rate": 1.2713113850868492e-06,
      "loss": 0.1093,
      "step": 26561
    },
    {
      "epoch": 0.7748993523542798,
      "grad_norm": 0.7353651798996653,
      "learning_rate": 1.2709966473727474e-06,
      "loss": 0.1348,
      "step": 26562
    },
    {
      "epoch": 0.7749285255849233,
      "grad_norm": 0.6751947559540624,
      "learning_rate": 1.270681942950296e-06,
      "loss": 0.112,
      "step": 26563
    },
    {
      "epoch": 0.7749576988155669,
      "grad_norm": 0.9657854387208041,
      "learning_rate": 1.2703672718223058e-06,
      "loss": 0.1178,
      "step": 26564
    },
    {
      "epoch": 0.7749868720462104,
      "grad_norm": 0.7443473108788434,
      "learning_rate": 1.2700526339915875e-06,
      "loss": 0.1035,
      "step": 26565
    },
    {
      "epoch": 0.775016045276854,
      "grad_norm": 0.8117538167786532,
      "learning_rate": 1.2697380294609495e-06,
      "loss": 0.1287,
      "step": 26566
    },
    {
      "epoch": 0.7750452185074975,
      "grad_norm": 0.9204762012155704,
      "learning_rate": 1.2694234582331982e-06,
      "loss": 0.1173,
      "step": 26567
    },
    {
      "epoch": 0.7750743917381411,
      "grad_norm": 0.810251947875881,
      "learning_rate": 1.2691089203111444e-06,
      "loss": 0.108,
      "step": 26568
    },
    {
      "epoch": 0.7751035649687846,
      "grad_norm": 0.7534612732531387,
      "learning_rate": 1.2687944156975952e-06,
      "loss": 0.1341,
      "step": 26569
    },
    {
      "epoch": 0.7751327381994282,
      "grad_norm": 0.9639774981600991,
      "learning_rate": 1.2684799443953582e-06,
      "loss": 0.1113,
      "step": 26570
    },
    {
      "epoch": 0.7751619114300717,
      "grad_norm": 0.7398740880920021,
      "learning_rate": 1.2681655064072429e-06,
      "loss": 0.1056,
      "step": 26571
    },
    {
      "epoch": 0.7751910846607153,
      "grad_norm": 0.637081908395961,
      "learning_rate": 1.267851101736055e-06,
      "loss": 0.1137,
      "step": 26572
    },
    {
      "epoch": 0.7752202578913588,
      "grad_norm": 0.9941602533204257,
      "learning_rate": 1.2675367303846004e-06,
      "loss": 0.1368,
      "step": 26573
    },
    {
      "epoch": 0.7752494311220024,
      "grad_norm": 1.0444638934507875,
      "learning_rate": 1.267222392355688e-06,
      "loss": 0.0949,
      "step": 26574
    },
    {
      "epoch": 0.7752786043526461,
      "grad_norm": 0.7904267526058851,
      "learning_rate": 1.2669080876521217e-06,
      "loss": 0.133,
      "step": 26575
    },
    {
      "epoch": 0.7753077775832896,
      "grad_norm": 0.8829949795051159,
      "learning_rate": 1.2665938162767105e-06,
      "loss": 0.1337,
      "step": 26576
    },
    {
      "epoch": 0.7753369508139332,
      "grad_norm": 0.9142570146776361,
      "learning_rate": 1.2662795782322567e-06,
      "loss": 0.1183,
      "step": 26577
    },
    {
      "epoch": 0.7753661240445767,
      "grad_norm": 0.7460793058960168,
      "learning_rate": 1.2659653735215687e-06,
      "loss": 0.0951,
      "step": 26578
    },
    {
      "epoch": 0.7753952972752203,
      "grad_norm": 0.8705738509957747,
      "learning_rate": 1.2656512021474509e-06,
      "loss": 0.1152,
      "step": 26579
    },
    {
      "epoch": 0.7754244705058638,
      "grad_norm": 0.8404481239550062,
      "learning_rate": 1.2653370641127066e-06,
      "loss": 0.0934,
      "step": 26580
    },
    {
      "epoch": 0.7754536437365074,
      "grad_norm": 0.8810257364826629,
      "learning_rate": 1.2650229594201408e-06,
      "loss": 0.1379,
      "step": 26581
    },
    {
      "epoch": 0.7754828169671509,
      "grad_norm": 0.6820729342789238,
      "learning_rate": 1.26470888807256e-06,
      "loss": 0.1033,
      "step": 26582
    },
    {
      "epoch": 0.7755119901977945,
      "grad_norm": 0.7775126625130048,
      "learning_rate": 1.2643948500727666e-06,
      "loss": 0.1401,
      "step": 26583
    },
    {
      "epoch": 0.775541163428438,
      "grad_norm": 0.8362570852191665,
      "learning_rate": 1.264080845423563e-06,
      "loss": 0.1061,
      "step": 26584
    },
    {
      "epoch": 0.7755703366590816,
      "grad_norm": 0.8341827908323795,
      "learning_rate": 1.2637668741277548e-06,
      "loss": 0.1267,
      "step": 26585
    },
    {
      "epoch": 0.7755995098897251,
      "grad_norm": 0.8924927693673023,
      "learning_rate": 1.2634529361881442e-06,
      "loss": 0.1141,
      "step": 26586
    },
    {
      "epoch": 0.7756286831203687,
      "grad_norm": 0.8432344529903103,
      "learning_rate": 1.2631390316075315e-06,
      "loss": 0.1138,
      "step": 26587
    },
    {
      "epoch": 0.7756578563510124,
      "grad_norm": 0.8962107175638608,
      "learning_rate": 1.2628251603887238e-06,
      "loss": 0.1188,
      "step": 26588
    },
    {
      "epoch": 0.7756870295816559,
      "grad_norm": 0.8872274234974421,
      "learning_rate": 1.262511322534521e-06,
      "loss": 0.0966,
      "step": 26589
    },
    {
      "epoch": 0.7757162028122995,
      "grad_norm": 0.7627003562541219,
      "learning_rate": 1.262197518047723e-06,
      "loss": 0.1208,
      "step": 26590
    },
    {
      "epoch": 0.775745376042943,
      "grad_norm": 0.7761094661369075,
      "learning_rate": 1.2618837469311351e-06,
      "loss": 0.1341,
      "step": 26591
    },
    {
      "epoch": 0.7757745492735866,
      "grad_norm": 0.8623604643579398,
      "learning_rate": 1.261570009187557e-06,
      "loss": 0.1218,
      "step": 26592
    },
    {
      "epoch": 0.7758037225042301,
      "grad_norm": 0.8593192080900649,
      "learning_rate": 1.261256304819788e-06,
      "loss": 0.098,
      "step": 26593
    },
    {
      "epoch": 0.7758328957348737,
      "grad_norm": 0.6704118061247689,
      "learning_rate": 1.2609426338306296e-06,
      "loss": 0.1133,
      "step": 26594
    },
    {
      "epoch": 0.7758620689655172,
      "grad_norm": 0.8922664771139991,
      "learning_rate": 1.2606289962228846e-06,
      "loss": 0.1456,
      "step": 26595
    },
    {
      "epoch": 0.7758912421961608,
      "grad_norm": 0.6804087787177441,
      "learning_rate": 1.2603153919993516e-06,
      "loss": 0.1013,
      "step": 26596
    },
    {
      "epoch": 0.7759204154268043,
      "grad_norm": 0.7390251390423266,
      "learning_rate": 1.2600018211628278e-06,
      "loss": 0.1104,
      "step": 26597
    },
    {
      "epoch": 0.7759495886574479,
      "grad_norm": 1.2288790597249433,
      "learning_rate": 1.2596882837161174e-06,
      "loss": 0.1145,
      "step": 26598
    },
    {
      "epoch": 0.7759787618880915,
      "grad_norm": 1.1180371656432018,
      "learning_rate": 1.2593747796620148e-06,
      "loss": 0.122,
      "step": 26599
    },
    {
      "epoch": 0.776007935118735,
      "grad_norm": 0.7891802008532125,
      "learning_rate": 1.2590613090033215e-06,
      "loss": 0.1047,
      "step": 26600
    },
    {
      "epoch": 0.7760371083493786,
      "grad_norm": 0.8002358332725346,
      "learning_rate": 1.2587478717428375e-06,
      "loss": 0.1241,
      "step": 26601
    },
    {
      "epoch": 0.7760662815800222,
      "grad_norm": 0.9576015250321538,
      "learning_rate": 1.2584344678833587e-06,
      "loss": 0.1268,
      "step": 26602
    },
    {
      "epoch": 0.7760954548106658,
      "grad_norm": 0.8548690248052143,
      "learning_rate": 1.258121097427683e-06,
      "loss": 0.105,
      "step": 26603
    },
    {
      "epoch": 0.7761246280413093,
      "grad_norm": 0.8483129380130555,
      "learning_rate": 1.2578077603786104e-06,
      "loss": 0.1066,
      "step": 26604
    },
    {
      "epoch": 0.7761538012719529,
      "grad_norm": 0.8726933690240404,
      "learning_rate": 1.2574944567389346e-06,
      "loss": 0.1042,
      "step": 26605
    },
    {
      "epoch": 0.7761829745025964,
      "grad_norm": 0.8512247353198941,
      "learning_rate": 1.2571811865114569e-06,
      "loss": 0.1302,
      "step": 26606
    },
    {
      "epoch": 0.77621214773324,
      "grad_norm": 0.8286348752057278,
      "learning_rate": 1.2568679496989706e-06,
      "loss": 0.1083,
      "step": 26607
    },
    {
      "epoch": 0.7762413209638835,
      "grad_norm": 0.901537042493502,
      "learning_rate": 1.2565547463042753e-06,
      "loss": 0.1131,
      "step": 26608
    },
    {
      "epoch": 0.7762704941945271,
      "grad_norm": 0.8679979136696329,
      "learning_rate": 1.2562415763301656e-06,
      "loss": 0.1285,
      "step": 26609
    },
    {
      "epoch": 0.7762996674251706,
      "grad_norm": 0.7955828353613565,
      "learning_rate": 1.2559284397794353e-06,
      "loss": 0.091,
      "step": 26610
    },
    {
      "epoch": 0.7763288406558142,
      "grad_norm": 0.6771791765096367,
      "learning_rate": 1.2556153366548823e-06,
      "loss": 0.1016,
      "step": 26611
    },
    {
      "epoch": 0.7763580138864578,
      "grad_norm": 0.8774287468611829,
      "learning_rate": 1.2553022669593034e-06,
      "loss": 0.1142,
      "step": 26612
    },
    {
      "epoch": 0.7763871871171013,
      "grad_norm": 0.969730479636771,
      "learning_rate": 1.254989230695492e-06,
      "loss": 0.1263,
      "step": 26613
    },
    {
      "epoch": 0.7764163603477449,
      "grad_norm": 0.7443136930296058,
      "learning_rate": 1.2546762278662412e-06,
      "loss": 0.1198,
      "step": 26614
    },
    {
      "epoch": 0.7764455335783885,
      "grad_norm": 0.7589716149465291,
      "learning_rate": 1.2543632584743488e-06,
      "loss": 0.1228,
      "step": 26615
    },
    {
      "epoch": 0.7764747068090321,
      "grad_norm": 0.7364776568685283,
      "learning_rate": 1.2540503225226064e-06,
      "loss": 0.1348,
      "step": 26616
    },
    {
      "epoch": 0.7765038800396756,
      "grad_norm": 0.8164915778874572,
      "learning_rate": 1.2537374200138058e-06,
      "loss": 0.1159,
      "step": 26617
    },
    {
      "epoch": 0.7765330532703192,
      "grad_norm": 0.7452086785688905,
      "learning_rate": 1.2534245509507465e-06,
      "loss": 0.1148,
      "step": 26618
    },
    {
      "epoch": 0.7765622265009627,
      "grad_norm": 0.8122767630242331,
      "learning_rate": 1.2531117153362176e-06,
      "loss": 0.1223,
      "step": 26619
    },
    {
      "epoch": 0.7765913997316063,
      "grad_norm": 0.9413088095715116,
      "learning_rate": 1.2527989131730123e-06,
      "loss": 0.1036,
      "step": 26620
    },
    {
      "epoch": 0.7766205729622498,
      "grad_norm": 0.8623245561426485,
      "learning_rate": 1.2524861444639246e-06,
      "loss": 0.1077,
      "step": 26621
    },
    {
      "epoch": 0.7766497461928934,
      "grad_norm": 0.825163178735011,
      "learning_rate": 1.2521734092117466e-06,
      "loss": 0.1218,
      "step": 26622
    },
    {
      "epoch": 0.776678919423537,
      "grad_norm": 0.8185792339260545,
      "learning_rate": 1.2518607074192679e-06,
      "loss": 0.1176,
      "step": 26623
    },
    {
      "epoch": 0.7767080926541805,
      "grad_norm": 0.7985641157458386,
      "learning_rate": 1.251548039089282e-06,
      "loss": 0.1182,
      "step": 26624
    },
    {
      "epoch": 0.7767372658848241,
      "grad_norm": 1.024651304113618,
      "learning_rate": 1.2512354042245818e-06,
      "loss": 0.1076,
      "step": 26625
    },
    {
      "epoch": 0.7767664391154676,
      "grad_norm": 0.8758307280649539,
      "learning_rate": 1.2509228028279568e-06,
      "loss": 0.1297,
      "step": 26626
    },
    {
      "epoch": 0.7767956123461112,
      "grad_norm": 0.9491621859658749,
      "learning_rate": 1.250610234902197e-06,
      "loss": 0.1407,
      "step": 26627
    },
    {
      "epoch": 0.7768247855767547,
      "grad_norm": 1.2129012666247923,
      "learning_rate": 1.2502977004500956e-06,
      "loss": 0.105,
      "step": 26628
    },
    {
      "epoch": 0.7768539588073984,
      "grad_norm": 0.8308476317112877,
      "learning_rate": 1.2499851994744393e-06,
      "loss": 0.1151,
      "step": 26629
    },
    {
      "epoch": 0.7768831320380419,
      "grad_norm": 0.99605626968484,
      "learning_rate": 1.24967273197802e-06,
      "loss": 0.1186,
      "step": 26630
    },
    {
      "epoch": 0.7769123052686855,
      "grad_norm": 0.9990949850093903,
      "learning_rate": 1.2493602979636289e-06,
      "loss": 0.113,
      "step": 26631
    },
    {
      "epoch": 0.776941478499329,
      "grad_norm": 0.943995982225375,
      "learning_rate": 1.2490478974340536e-06,
      "loss": 0.1174,
      "step": 26632
    },
    {
      "epoch": 0.7769706517299726,
      "grad_norm": 0.8212802373601578,
      "learning_rate": 1.2487355303920817e-06,
      "loss": 0.1186,
      "step": 26633
    },
    {
      "epoch": 0.7769998249606161,
      "grad_norm": 0.9452085012954069,
      "learning_rate": 1.2484231968405053e-06,
      "loss": 0.1618,
      "step": 26634
    },
    {
      "epoch": 0.7770289981912597,
      "grad_norm": 1.0732195217586151,
      "learning_rate": 1.2481108967821092e-06,
      "loss": 0.1225,
      "step": 26635
    },
    {
      "epoch": 0.7770581714219033,
      "grad_norm": 1.1001290613704633,
      "learning_rate": 1.2477986302196848e-06,
      "loss": 0.0999,
      "step": 26636
    },
    {
      "epoch": 0.7770873446525468,
      "grad_norm": 0.8535470565199292,
      "learning_rate": 1.2474863971560176e-06,
      "loss": 0.1289,
      "step": 26637
    },
    {
      "epoch": 0.7771165178831904,
      "grad_norm": 1.0052434803438746,
      "learning_rate": 1.2471741975938971e-06,
      "loss": 0.1245,
      "step": 26638
    },
    {
      "epoch": 0.7771456911138339,
      "grad_norm": 0.9707925426337226,
      "learning_rate": 1.2468620315361097e-06,
      "loss": 0.1126,
      "step": 26639
    },
    {
      "epoch": 0.7771748643444775,
      "grad_norm": 1.0776174456302299,
      "learning_rate": 1.2465498989854403e-06,
      "loss": 0.0935,
      "step": 26640
    },
    {
      "epoch": 0.777204037575121,
      "grad_norm": 0.849047428327794,
      "learning_rate": 1.2462377999446772e-06,
      "loss": 0.1149,
      "step": 26641
    },
    {
      "epoch": 0.7772332108057647,
      "grad_norm": 0.880854927069741,
      "learning_rate": 1.2459257344166093e-06,
      "loss": 0.1347,
      "step": 26642
    },
    {
      "epoch": 0.7772623840364082,
      "grad_norm": 0.7519132175018172,
      "learning_rate": 1.2456137024040194e-06,
      "loss": 0.0963,
      "step": 26643
    },
    {
      "epoch": 0.7772915572670518,
      "grad_norm": 1.1795045369126658,
      "learning_rate": 1.2453017039096932e-06,
      "loss": 0.1205,
      "step": 26644
    },
    {
      "epoch": 0.7773207304976953,
      "grad_norm": 1.0666707961461077,
      "learning_rate": 1.244989738936418e-06,
      "loss": 0.1335,
      "step": 26645
    },
    {
      "epoch": 0.7773499037283389,
      "grad_norm": 0.7465684775842973,
      "learning_rate": 1.2446778074869787e-06,
      "loss": 0.1022,
      "step": 26646
    },
    {
      "epoch": 0.7773790769589825,
      "grad_norm": 0.9245849369293081,
      "learning_rate": 1.244365909564156e-06,
      "loss": 0.1219,
      "step": 26647
    },
    {
      "epoch": 0.777408250189626,
      "grad_norm": 0.8194364920016455,
      "learning_rate": 1.2440540451707412e-06,
      "loss": 0.1095,
      "step": 26648
    },
    {
      "epoch": 0.7774374234202696,
      "grad_norm": 1.4730538792742582,
      "learning_rate": 1.2437422143095146e-06,
      "loss": 0.1159,
      "step": 26649
    },
    {
      "epoch": 0.7774665966509131,
      "grad_norm": 0.9654301351932375,
      "learning_rate": 1.24343041698326e-06,
      "loss": 0.1004,
      "step": 26650
    },
    {
      "epoch": 0.7774957698815567,
      "grad_norm": 1.0609500260812186,
      "learning_rate": 1.2431186531947632e-06,
      "loss": 0.1005,
      "step": 26651
    },
    {
      "epoch": 0.7775249431122002,
      "grad_norm": 0.9494782063941555,
      "learning_rate": 1.2428069229468065e-06,
      "loss": 0.1039,
      "step": 26652
    },
    {
      "epoch": 0.7775541163428438,
      "grad_norm": 0.6299200428661222,
      "learning_rate": 1.2424952262421708e-06,
      "loss": 0.0927,
      "step": 26653
    },
    {
      "epoch": 0.7775832895734873,
      "grad_norm": 0.9646335130164041,
      "learning_rate": 1.242183563083641e-06,
      "loss": 0.1024,
      "step": 26654
    },
    {
      "epoch": 0.7776124628041309,
      "grad_norm": 0.8931292920192913,
      "learning_rate": 1.2418719334740003e-06,
      "loss": 0.1122,
      "step": 26655
    },
    {
      "epoch": 0.7776416360347745,
      "grad_norm": 1.0655031520186276,
      "learning_rate": 1.24156033741603e-06,
      "loss": 0.1009,
      "step": 26656
    },
    {
      "epoch": 0.7776708092654181,
      "grad_norm": 1.021857634597553,
      "learning_rate": 1.2412487749125107e-06,
      "loss": 0.1024,
      "step": 26657
    },
    {
      "epoch": 0.7776999824960616,
      "grad_norm": 0.8575078998222521,
      "learning_rate": 1.240937245966226e-06,
      "loss": 0.124,
      "step": 26658
    },
    {
      "epoch": 0.7777291557267052,
      "grad_norm": 0.9227808571683745,
      "learning_rate": 1.2406257505799553e-06,
      "loss": 0.1243,
      "step": 26659
    },
    {
      "epoch": 0.7777583289573488,
      "grad_norm": 1.116672167410054,
      "learning_rate": 1.24031428875648e-06,
      "loss": 0.1085,
      "step": 26660
    },
    {
      "epoch": 0.7777875021879923,
      "grad_norm": 0.9178566782027545,
      "learning_rate": 1.240002860498583e-06,
      "loss": 0.1331,
      "step": 26661
    },
    {
      "epoch": 0.7778166754186359,
      "grad_norm": 0.8968965306034115,
      "learning_rate": 1.2396914658090425e-06,
      "loss": 0.103,
      "step": 26662
    },
    {
      "epoch": 0.7778458486492794,
      "grad_norm": 0.9028466221955137,
      "learning_rate": 1.2393801046906378e-06,
      "loss": 0.106,
      "step": 26663
    },
    {
      "epoch": 0.777875021879923,
      "grad_norm": 0.8946521624877845,
      "learning_rate": 1.2390687771461514e-06,
      "loss": 0.102,
      "step": 26664
    },
    {
      "epoch": 0.7779041951105665,
      "grad_norm": 0.8069674068638788,
      "learning_rate": 1.2387574831783594e-06,
      "loss": 0.1271,
      "step": 26665
    },
    {
      "epoch": 0.7779333683412101,
      "grad_norm": 0.8904724566033538,
      "learning_rate": 1.2384462227900446e-06,
      "loss": 0.1321,
      "step": 26666
    },
    {
      "epoch": 0.7779625415718536,
      "grad_norm": 0.8145129592069313,
      "learning_rate": 1.2381349959839817e-06,
      "loss": 0.1135,
      "step": 26667
    },
    {
      "epoch": 0.7779917148024972,
      "grad_norm": 0.9192763556875785,
      "learning_rate": 1.2378238027629535e-06,
      "loss": 0.1009,
      "step": 26668
    },
    {
      "epoch": 0.7780208880331408,
      "grad_norm": 1.0417557445471464,
      "learning_rate": 1.2375126431297363e-06,
      "loss": 0.1238,
      "step": 26669
    },
    {
      "epoch": 0.7780500612637844,
      "grad_norm": 0.7742885384288763,
      "learning_rate": 1.2372015170871066e-06,
      "loss": 0.112,
      "step": 26670
    },
    {
      "epoch": 0.778079234494428,
      "grad_norm": 0.9630538149212657,
      "learning_rate": 1.2368904246378433e-06,
      "loss": 0.1024,
      "step": 26671
    },
    {
      "epoch": 0.7781084077250715,
      "grad_norm": 1.1272673744241541,
      "learning_rate": 1.2365793657847258e-06,
      "loss": 0.1315,
      "step": 26672
    },
    {
      "epoch": 0.7781375809557151,
      "grad_norm": 0.9593502375094775,
      "learning_rate": 1.2362683405305288e-06,
      "loss": 0.1067,
      "step": 26673
    },
    {
      "epoch": 0.7781667541863586,
      "grad_norm": 1.250199771377887,
      "learning_rate": 1.2359573488780286e-06,
      "loss": 0.1238,
      "step": 26674
    },
    {
      "epoch": 0.7781959274170022,
      "grad_norm": 1.1218438201352507,
      "learning_rate": 1.2356463908300038e-06,
      "loss": 0.119,
      "step": 26675
    },
    {
      "epoch": 0.7782251006476457,
      "grad_norm": 1.0016220520139367,
      "learning_rate": 1.2353354663892292e-06,
      "loss": 0.1054,
      "step": 26676
    },
    {
      "epoch": 0.7782542738782893,
      "grad_norm": 1.1907167595340427,
      "learning_rate": 1.2350245755584784e-06,
      "loss": 0.1153,
      "step": 26677
    },
    {
      "epoch": 0.7782834471089328,
      "grad_norm": 0.7287298020032715,
      "learning_rate": 1.2347137183405322e-06,
      "loss": 0.0937,
      "step": 26678
    },
    {
      "epoch": 0.7783126203395764,
      "grad_norm": 0.745480931902787,
      "learning_rate": 1.234402894738163e-06,
      "loss": 0.1019,
      "step": 26679
    },
    {
      "epoch": 0.7783417935702199,
      "grad_norm": 1.0994987771951368,
      "learning_rate": 1.2340921047541443e-06,
      "loss": 0.1319,
      "step": 26680
    },
    {
      "epoch": 0.7783709668008635,
      "grad_norm": 1.0285183921062595,
      "learning_rate": 1.2337813483912537e-06,
      "loss": 0.1156,
      "step": 26681
    },
    {
      "epoch": 0.778400140031507,
      "grad_norm": 0.8084195105996703,
      "learning_rate": 1.2334706256522645e-06,
      "loss": 0.1196,
      "step": 26682
    },
    {
      "epoch": 0.7784293132621507,
      "grad_norm": 0.9120359785443655,
      "learning_rate": 1.2331599365399488e-06,
      "loss": 0.1032,
      "step": 26683
    },
    {
      "epoch": 0.7784584864927943,
      "grad_norm": 0.6817428343949745,
      "learning_rate": 1.232849281057082e-06,
      "loss": 0.1221,
      "step": 26684
    },
    {
      "epoch": 0.7784876597234378,
      "grad_norm": 1.0225743706698278,
      "learning_rate": 1.2325386592064387e-06,
      "loss": 0.1441,
      "step": 26685
    },
    {
      "epoch": 0.7785168329540814,
      "grad_norm": 1.0009657983376468,
      "learning_rate": 1.2322280709907914e-06,
      "loss": 0.1362,
      "step": 26686
    },
    {
      "epoch": 0.7785460061847249,
      "grad_norm": 0.7195015225001173,
      "learning_rate": 1.2319175164129104e-06,
      "loss": 0.112,
      "step": 26687
    },
    {
      "epoch": 0.7785751794153685,
      "grad_norm": 1.0028875804361255,
      "learning_rate": 1.2316069954755722e-06,
      "loss": 0.1098,
      "step": 26688
    },
    {
      "epoch": 0.778604352646012,
      "grad_norm": 1.000659052366997,
      "learning_rate": 1.2312965081815454e-06,
      "loss": 0.1161,
      "step": 26689
    },
    {
      "epoch": 0.7786335258766556,
      "grad_norm": 0.7514182524851203,
      "learning_rate": 1.2309860545336038e-06,
      "loss": 0.1108,
      "step": 26690
    },
    {
      "epoch": 0.7786626991072991,
      "grad_norm": 0.9129436053352685,
      "learning_rate": 1.2306756345345206e-06,
      "loss": 0.1346,
      "step": 26691
    },
    {
      "epoch": 0.7786918723379427,
      "grad_norm": 0.9784802357904911,
      "learning_rate": 1.2303652481870654e-06,
      "loss": 0.0991,
      "step": 26692
    },
    {
      "epoch": 0.7787210455685862,
      "grad_norm": 0.8306120165879555,
      "learning_rate": 1.2300548954940079e-06,
      "loss": 0.1103,
      "step": 26693
    },
    {
      "epoch": 0.7787502187992298,
      "grad_norm": 0.8554702064026037,
      "learning_rate": 1.2297445764581218e-06,
      "loss": 0.1405,
      "step": 26694
    },
    {
      "epoch": 0.7787793920298733,
      "grad_norm": 0.8924164994352132,
      "learning_rate": 1.2294342910821743e-06,
      "loss": 0.1205,
      "step": 26695
    },
    {
      "epoch": 0.778808565260517,
      "grad_norm": 1.0941959692835694,
      "learning_rate": 1.2291240393689397e-06,
      "loss": 0.0954,
      "step": 26696
    },
    {
      "epoch": 0.7788377384911606,
      "grad_norm": 0.8278228119703622,
      "learning_rate": 1.228813821321183e-06,
      "loss": 0.1283,
      "step": 26697
    },
    {
      "epoch": 0.7788669117218041,
      "grad_norm": 0.7989796401063589,
      "learning_rate": 1.2285036369416785e-06,
      "loss": 0.1158,
      "step": 26698
    },
    {
      "epoch": 0.7788960849524477,
      "grad_norm": 0.8799667853157728,
      "learning_rate": 1.2281934862331929e-06,
      "loss": 0.1016,
      "step": 26699
    },
    {
      "epoch": 0.7789252581830912,
      "grad_norm": 0.8323693952223855,
      "learning_rate": 1.2278833691984938e-06,
      "loss": 0.1212,
      "step": 26700
    },
    {
      "epoch": 0.7789544314137348,
      "grad_norm": 0.8224931820100664,
      "learning_rate": 1.2275732858403516e-06,
      "loss": 0.1268,
      "step": 26701
    },
    {
      "epoch": 0.7789836046443783,
      "grad_norm": 1.0095870012942598,
      "learning_rate": 1.227263236161536e-06,
      "loss": 0.118,
      "step": 26702
    },
    {
      "epoch": 0.7790127778750219,
      "grad_norm": 0.9547925257292422,
      "learning_rate": 1.2269532201648138e-06,
      "loss": 0.1267,
      "step": 26703
    },
    {
      "epoch": 0.7790419511056654,
      "grad_norm": 0.8814984158576729,
      "learning_rate": 1.2266432378529515e-06,
      "loss": 0.1079,
      "step": 26704
    },
    {
      "epoch": 0.779071124336309,
      "grad_norm": 0.6721525574171244,
      "learning_rate": 1.2263332892287183e-06,
      "loss": 0.1031,
      "step": 26705
    },
    {
      "epoch": 0.7791002975669525,
      "grad_norm": 0.8500720477022115,
      "learning_rate": 1.2260233742948796e-06,
      "loss": 0.1099,
      "step": 26706
    },
    {
      "epoch": 0.7791294707975961,
      "grad_norm": 0.8083825183320806,
      "learning_rate": 1.225713493054203e-06,
      "loss": 0.0952,
      "step": 26707
    },
    {
      "epoch": 0.7791586440282396,
      "grad_norm": 0.8482078588101409,
      "learning_rate": 1.225403645509457e-06,
      "loss": 0.1405,
      "step": 26708
    },
    {
      "epoch": 0.7791878172588832,
      "grad_norm": 0.8291330958677605,
      "learning_rate": 1.2250938316634058e-06,
      "loss": 0.1131,
      "step": 26709
    },
    {
      "epoch": 0.7792169904895269,
      "grad_norm": 0.749058899827566,
      "learning_rate": 1.2247840515188148e-06,
      "loss": 0.1108,
      "step": 26710
    },
    {
      "epoch": 0.7792461637201704,
      "grad_norm": 0.6851845760589484,
      "learning_rate": 1.224474305078452e-06,
      "loss": 0.1075,
      "step": 26711
    },
    {
      "epoch": 0.779275336950814,
      "grad_norm": 0.8155896657226269,
      "learning_rate": 1.2241645923450795e-06,
      "loss": 0.117,
      "step": 26712
    },
    {
      "epoch": 0.7793045101814575,
      "grad_norm": 0.740677544101561,
      "learning_rate": 1.2238549133214656e-06,
      "loss": 0.1254,
      "step": 26713
    },
    {
      "epoch": 0.7793336834121011,
      "grad_norm": 0.8438046602980243,
      "learning_rate": 1.2235452680103727e-06,
      "loss": 0.1072,
      "step": 26714
    },
    {
      "epoch": 0.7793628566427446,
      "grad_norm": 0.8325713259705939,
      "learning_rate": 1.2232356564145669e-06,
      "loss": 0.1098,
      "step": 26715
    },
    {
      "epoch": 0.7793920298733882,
      "grad_norm": 0.9077832510116061,
      "learning_rate": 1.222926078536812e-06,
      "loss": 0.0973,
      "step": 26716
    },
    {
      "epoch": 0.7794212031040317,
      "grad_norm": 0.9130288259711684,
      "learning_rate": 1.2226165343798695e-06,
      "loss": 0.1291,
      "step": 26717
    },
    {
      "epoch": 0.7794503763346753,
      "grad_norm": 0.9629574235455058,
      "learning_rate": 1.2223070239465056e-06,
      "loss": 0.116,
      "step": 26718
    },
    {
      "epoch": 0.7794795495653188,
      "grad_norm": 0.9530366632288055,
      "learning_rate": 1.2219975472394835e-06,
      "loss": 0.1058,
      "step": 26719
    },
    {
      "epoch": 0.7795087227959624,
      "grad_norm": 0.8913347652453288,
      "learning_rate": 1.2216881042615648e-06,
      "loss": 0.1365,
      "step": 26720
    },
    {
      "epoch": 0.779537896026606,
      "grad_norm": 0.8646455633136173,
      "learning_rate": 1.2213786950155132e-06,
      "loss": 0.0949,
      "step": 26721
    },
    {
      "epoch": 0.7795670692572495,
      "grad_norm": 0.8838438486528315,
      "learning_rate": 1.2210693195040912e-06,
      "loss": 0.1227,
      "step": 26722
    },
    {
      "epoch": 0.779596242487893,
      "grad_norm": 0.794708762125285,
      "learning_rate": 1.2207599777300588e-06,
      "loss": 0.127,
      "step": 26723
    },
    {
      "epoch": 0.7796254157185367,
      "grad_norm": 0.8809471127235253,
      "learning_rate": 1.2204506696961788e-06,
      "loss": 0.1344,
      "step": 26724
    },
    {
      "epoch": 0.7796545889491803,
      "grad_norm": 0.7929277943310479,
      "learning_rate": 1.220141395405215e-06,
      "loss": 0.1117,
      "step": 26725
    },
    {
      "epoch": 0.7796837621798238,
      "grad_norm": 0.7012512896546673,
      "learning_rate": 1.2198321548599258e-06,
      "loss": 0.0999,
      "step": 26726
    },
    {
      "epoch": 0.7797129354104674,
      "grad_norm": 0.7876964499431774,
      "learning_rate": 1.2195229480630715e-06,
      "loss": 0.1143,
      "step": 26727
    },
    {
      "epoch": 0.7797421086411109,
      "grad_norm": 0.9069026507711498,
      "learning_rate": 1.2192137750174154e-06,
      "loss": 0.1247,
      "step": 26728
    },
    {
      "epoch": 0.7797712818717545,
      "grad_norm": 0.7822523918268156,
      "learning_rate": 1.218904635725716e-06,
      "loss": 0.1205,
      "step": 26729
    },
    {
      "epoch": 0.779800455102398,
      "grad_norm": 0.9584496067290577,
      "learning_rate": 1.218595530190732e-06,
      "loss": 0.1125,
      "step": 26730
    },
    {
      "epoch": 0.7798296283330416,
      "grad_norm": 1.5316248666339345,
      "learning_rate": 1.218286458415225e-06,
      "loss": 0.1253,
      "step": 26731
    },
    {
      "epoch": 0.7798588015636851,
      "grad_norm": 0.8843190043115959,
      "learning_rate": 1.2179774204019545e-06,
      "loss": 0.1111,
      "step": 26732
    },
    {
      "epoch": 0.7798879747943287,
      "grad_norm": 0.7014560902717982,
      "learning_rate": 1.2176684161536789e-06,
      "loss": 0.1074,
      "step": 26733
    },
    {
      "epoch": 0.7799171480249723,
      "grad_norm": 0.8405700625956761,
      "learning_rate": 1.2173594456731552e-06,
      "loss": 0.1365,
      "step": 26734
    },
    {
      "epoch": 0.7799463212556158,
      "grad_norm": 0.7313345740138453,
      "learning_rate": 1.217050508963145e-06,
      "loss": 0.1196,
      "step": 26735
    },
    {
      "epoch": 0.7799754944862594,
      "grad_norm": 0.9984755855923472,
      "learning_rate": 1.2167416060264032e-06,
      "loss": 0.1107,
      "step": 26736
    },
    {
      "epoch": 0.780004667716903,
      "grad_norm": 0.8165395911930621,
      "learning_rate": 1.2164327368656891e-06,
      "loss": 0.1034,
      "step": 26737
    },
    {
      "epoch": 0.7800338409475466,
      "grad_norm": 0.8972382791560111,
      "learning_rate": 1.2161239014837622e-06,
      "loss": 0.1332,
      "step": 26738
    },
    {
      "epoch": 0.7800630141781901,
      "grad_norm": 0.8449132336743154,
      "learning_rate": 1.215815099883378e-06,
      "loss": 0.1184,
      "step": 26739
    },
    {
      "epoch": 0.7800921874088337,
      "grad_norm": 0.901871165124545,
      "learning_rate": 1.215506332067291e-06,
      "loss": 0.0959,
      "step": 26740
    },
    {
      "epoch": 0.7801213606394772,
      "grad_norm": 0.8205331596332643,
      "learning_rate": 1.215197598038262e-06,
      "loss": 0.1495,
      "step": 26741
    },
    {
      "epoch": 0.7801505338701208,
      "grad_norm": 0.8982133362430896,
      "learning_rate": 1.2148888977990435e-06,
      "loss": 0.1186,
      "step": 26742
    },
    {
      "epoch": 0.7801797071007643,
      "grad_norm": 0.8739905942822849,
      "learning_rate": 1.2145802313523953e-06,
      "loss": 0.1084,
      "step": 26743
    },
    {
      "epoch": 0.7802088803314079,
      "grad_norm": 0.9793410854378775,
      "learning_rate": 1.2142715987010695e-06,
      "loss": 0.1272,
      "step": 26744
    },
    {
      "epoch": 0.7802380535620514,
      "grad_norm": 0.7237864976502513,
      "learning_rate": 1.2139629998478242e-06,
      "loss": 0.1137,
      "step": 26745
    },
    {
      "epoch": 0.780267226792695,
      "grad_norm": 0.8582898059022251,
      "learning_rate": 1.2136544347954137e-06,
      "loss": 0.1064,
      "step": 26746
    },
    {
      "epoch": 0.7802964000233386,
      "grad_norm": 0.8673267023193789,
      "learning_rate": 1.213345903546591e-06,
      "loss": 0.1088,
      "step": 26747
    },
    {
      "epoch": 0.7803255732539821,
      "grad_norm": 0.8770160523231496,
      "learning_rate": 1.2130374061041129e-06,
      "loss": 0.1124,
      "step": 26748
    },
    {
      "epoch": 0.7803547464846257,
      "grad_norm": 0.9360533571696744,
      "learning_rate": 1.2127289424707333e-06,
      "loss": 0.1272,
      "step": 26749
    },
    {
      "epoch": 0.7803839197152692,
      "grad_norm": 0.9511495848014366,
      "learning_rate": 1.2124205126492045e-06,
      "loss": 0.1284,
      "step": 26750
    },
    {
      "epoch": 0.7804130929459129,
      "grad_norm": 0.6983904983574253,
      "learning_rate": 1.2121121166422828e-06,
      "loss": 0.1248,
      "step": 26751
    },
    {
      "epoch": 0.7804422661765564,
      "grad_norm": 0.80836694000979,
      "learning_rate": 1.2118037544527195e-06,
      "loss": 0.1332,
      "step": 26752
    },
    {
      "epoch": 0.7804714394072,
      "grad_norm": 0.7763670996189822,
      "learning_rate": 1.2114954260832668e-06,
      "loss": 0.1042,
      "step": 26753
    },
    {
      "epoch": 0.7805006126378435,
      "grad_norm": 0.7963575504478867,
      "learning_rate": 1.2111871315366785e-06,
      "loss": 0.1133,
      "step": 26754
    },
    {
      "epoch": 0.7805297858684871,
      "grad_norm": 0.9381122350973579,
      "learning_rate": 1.2108788708157087e-06,
      "loss": 0.1133,
      "step": 26755
    },
    {
      "epoch": 0.7805589590991306,
      "grad_norm": 1.0777249917953655,
      "learning_rate": 1.2105706439231073e-06,
      "loss": 0.1191,
      "step": 26756
    },
    {
      "epoch": 0.7805881323297742,
      "grad_norm": 0.9266597424002107,
      "learning_rate": 1.2102624508616257e-06,
      "loss": 0.1087,
      "step": 26757
    },
    {
      "epoch": 0.7806173055604178,
      "grad_norm": 0.9842706032528625,
      "learning_rate": 1.2099542916340172e-06,
      "loss": 0.136,
      "step": 26758
    },
    {
      "epoch": 0.7806464787910613,
      "grad_norm": 0.9169158453911002,
      "learning_rate": 1.209646166243032e-06,
      "loss": 0.133,
      "step": 26759
    },
    {
      "epoch": 0.7806756520217049,
      "grad_norm": 0.7454080072920237,
      "learning_rate": 1.2093380746914201e-06,
      "loss": 0.0961,
      "step": 26760
    },
    {
      "epoch": 0.7807048252523484,
      "grad_norm": 0.9954352719939636,
      "learning_rate": 1.2090300169819325e-06,
      "loss": 0.1224,
      "step": 26761
    },
    {
      "epoch": 0.780733998482992,
      "grad_norm": 0.6148071655149786,
      "learning_rate": 1.2087219931173217e-06,
      "loss": 0.1043,
      "step": 26762
    },
    {
      "epoch": 0.7807631717136355,
      "grad_norm": 1.0154074792198873,
      "learning_rate": 1.2084140031003355e-06,
      "loss": 0.1218,
      "step": 26763
    },
    {
      "epoch": 0.7807923449442792,
      "grad_norm": 0.9427767581583829,
      "learning_rate": 1.208106046933723e-06,
      "loss": 0.1107,
      "step": 26764
    },
    {
      "epoch": 0.7808215181749227,
      "grad_norm": 0.9415031648731707,
      "learning_rate": 1.2077981246202353e-06,
      "loss": 0.1452,
      "step": 26765
    },
    {
      "epoch": 0.7808506914055663,
      "grad_norm": 0.8756557239714715,
      "learning_rate": 1.2074902361626196e-06,
      "loss": 0.1227,
      "step": 26766
    },
    {
      "epoch": 0.7808798646362098,
      "grad_norm": 0.6957932399657003,
      "learning_rate": 1.2071823815636257e-06,
      "loss": 0.0937,
      "step": 26767
    },
    {
      "epoch": 0.7809090378668534,
      "grad_norm": 0.883933179139988,
      "learning_rate": 1.2068745608260035e-06,
      "loss": 0.1041,
      "step": 26768
    },
    {
      "epoch": 0.780938211097497,
      "grad_norm": 0.7340686565749424,
      "learning_rate": 1.2065667739525e-06,
      "loss": 0.1299,
      "step": 26769
    },
    {
      "epoch": 0.7809673843281405,
      "grad_norm": 1.0386307950050804,
      "learning_rate": 1.2062590209458614e-06,
      "loss": 0.109,
      "step": 26770
    },
    {
      "epoch": 0.780996557558784,
      "grad_norm": 0.8605718743394106,
      "learning_rate": 1.2059513018088375e-06,
      "loss": 0.1195,
      "step": 26771
    },
    {
      "epoch": 0.7810257307894276,
      "grad_norm": 0.7351565609045481,
      "learning_rate": 1.2056436165441738e-06,
      "loss": 0.1132,
      "step": 26772
    },
    {
      "epoch": 0.7810549040200712,
      "grad_norm": 0.804844919474205,
      "learning_rate": 1.2053359651546193e-06,
      "loss": 0.144,
      "step": 26773
    },
    {
      "epoch": 0.7810840772507147,
      "grad_norm": 0.975276817856636,
      "learning_rate": 1.2050283476429176e-06,
      "loss": 0.1119,
      "step": 26774
    },
    {
      "epoch": 0.7811132504813583,
      "grad_norm": 0.6950365481214305,
      "learning_rate": 1.2047207640118187e-06,
      "loss": 0.0903,
      "step": 26775
    },
    {
      "epoch": 0.7811424237120018,
      "grad_norm": 0.7311888139797804,
      "learning_rate": 1.204413214264067e-06,
      "loss": 0.1122,
      "step": 26776
    },
    {
      "epoch": 0.7811715969426454,
      "grad_norm": 0.7739204836649747,
      "learning_rate": 1.2041056984024063e-06,
      "loss": 0.1098,
      "step": 26777
    },
    {
      "epoch": 0.781200770173289,
      "grad_norm": 0.7795367277900777,
      "learning_rate": 1.2037982164295837e-06,
      "loss": 0.1089,
      "step": 26778
    },
    {
      "epoch": 0.7812299434039326,
      "grad_norm": 0.7290185208829019,
      "learning_rate": 1.203490768348346e-06,
      "loss": 0.1092,
      "step": 26779
    },
    {
      "epoch": 0.7812591166345761,
      "grad_norm": 0.8874276508149599,
      "learning_rate": 1.203183354161435e-06,
      "loss": 0.1412,
      "step": 26780
    },
    {
      "epoch": 0.7812882898652197,
      "grad_norm": 0.7654875723898305,
      "learning_rate": 1.2028759738715983e-06,
      "loss": 0.1143,
      "step": 26781
    },
    {
      "epoch": 0.7813174630958633,
      "grad_norm": 0.967778599241166,
      "learning_rate": 1.2025686274815784e-06,
      "loss": 0.1149,
      "step": 26782
    },
    {
      "epoch": 0.7813466363265068,
      "grad_norm": 0.6716975594959714,
      "learning_rate": 1.2022613149941176e-06,
      "loss": 0.1046,
      "step": 26783
    },
    {
      "epoch": 0.7813758095571504,
      "grad_norm": 0.7881468291688234,
      "learning_rate": 1.2019540364119608e-06,
      "loss": 0.1139,
      "step": 26784
    },
    {
      "epoch": 0.7814049827877939,
      "grad_norm": 0.7793891505249078,
      "learning_rate": 1.2016467917378539e-06,
      "loss": 0.1383,
      "step": 26785
    },
    {
      "epoch": 0.7814341560184375,
      "grad_norm": 0.9393049255640398,
      "learning_rate": 1.201339580974537e-06,
      "loss": 0.1384,
      "step": 26786
    },
    {
      "epoch": 0.781463329249081,
      "grad_norm": 0.773114171474124,
      "learning_rate": 1.201032404124753e-06,
      "loss": 0.1214,
      "step": 26787
    },
    {
      "epoch": 0.7814925024797246,
      "grad_norm": 0.9296450342613287,
      "learning_rate": 1.2007252611912457e-06,
      "loss": 0.1116,
      "step": 26788
    },
    {
      "epoch": 0.7815216757103681,
      "grad_norm": 0.8073457606369954,
      "learning_rate": 1.200418152176756e-06,
      "loss": 0.1066,
      "step": 26789
    },
    {
      "epoch": 0.7815508489410117,
      "grad_norm": 1.1966057101530976,
      "learning_rate": 1.2001110770840253e-06,
      "loss": 0.1189,
      "step": 26790
    },
    {
      "epoch": 0.7815800221716553,
      "grad_norm": 0.9165946645517647,
      "learning_rate": 1.1998040359157954e-06,
      "loss": 0.1273,
      "step": 26791
    },
    {
      "epoch": 0.7816091954022989,
      "grad_norm": 1.0271750745608197,
      "learning_rate": 1.1994970286748093e-06,
      "loss": 0.1258,
      "step": 26792
    },
    {
      "epoch": 0.7816383686329424,
      "grad_norm": 0.6935406299393101,
      "learning_rate": 1.1991900553638065e-06,
      "loss": 0.1071,
      "step": 26793
    },
    {
      "epoch": 0.781667541863586,
      "grad_norm": 0.8348271471994329,
      "learning_rate": 1.1988831159855257e-06,
      "loss": 0.1212,
      "step": 26794
    },
    {
      "epoch": 0.7816967150942296,
      "grad_norm": 0.7935682707299909,
      "learning_rate": 1.1985762105427107e-06,
      "loss": 0.1183,
      "step": 26795
    },
    {
      "epoch": 0.7817258883248731,
      "grad_norm": 0.8346079487274879,
      "learning_rate": 1.198269339038099e-06,
      "loss": 0.1038,
      "step": 26796
    },
    {
      "epoch": 0.7817550615555167,
      "grad_norm": 0.8994901482859692,
      "learning_rate": 1.1979625014744306e-06,
      "loss": 0.115,
      "step": 26797
    },
    {
      "epoch": 0.7817842347861602,
      "grad_norm": 0.7649070281646475,
      "learning_rate": 1.1976556978544467e-06,
      "loss": 0.0962,
      "step": 26798
    },
    {
      "epoch": 0.7818134080168038,
      "grad_norm": 0.9454515925374907,
      "learning_rate": 1.1973489281808854e-06,
      "loss": 0.1365,
      "step": 26799
    },
    {
      "epoch": 0.7818425812474473,
      "grad_norm": 0.8893040178251703,
      "learning_rate": 1.1970421924564835e-06,
      "loss": 0.1119,
      "step": 26800
    },
    {
      "epoch": 0.7818717544780909,
      "grad_norm": 0.6299941910648025,
      "learning_rate": 1.1967354906839824e-06,
      "loss": 0.1032,
      "step": 26801
    },
    {
      "epoch": 0.7819009277087344,
      "grad_norm": 0.7194752175170329,
      "learning_rate": 1.1964288228661175e-06,
      "loss": 0.1005,
      "step": 26802
    },
    {
      "epoch": 0.781930100939378,
      "grad_norm": 0.8269650255837503,
      "learning_rate": 1.1961221890056296e-06,
      "loss": 0.115,
      "step": 26803
    },
    {
      "epoch": 0.7819592741700215,
      "grad_norm": 0.6900089824003621,
      "learning_rate": 1.1958155891052531e-06,
      "loss": 0.0987,
      "step": 26804
    },
    {
      "epoch": 0.7819884474006652,
      "grad_norm": 0.7664680136418915,
      "learning_rate": 1.1955090231677285e-06,
      "loss": 0.1228,
      "step": 26805
    },
    {
      "epoch": 0.7820176206313088,
      "grad_norm": 0.8842127872208614,
      "learning_rate": 1.195202491195791e-06,
      "loss": 0.1058,
      "step": 26806
    },
    {
      "epoch": 0.7820467938619523,
      "grad_norm": 0.9171726720943688,
      "learning_rate": 1.194895993192176e-06,
      "loss": 0.1179,
      "step": 26807
    },
    {
      "epoch": 0.7820759670925959,
      "grad_norm": 0.7028869612526993,
      "learning_rate": 1.1945895291596217e-06,
      "loss": 0.1299,
      "step": 26808
    },
    {
      "epoch": 0.7821051403232394,
      "grad_norm": 0.7584500685507514,
      "learning_rate": 1.1942830991008646e-06,
      "loss": 0.1273,
      "step": 26809
    },
    {
      "epoch": 0.782134313553883,
      "grad_norm": 0.870532507451866,
      "learning_rate": 1.1939767030186377e-06,
      "loss": 0.1283,
      "step": 26810
    },
    {
      "epoch": 0.7821634867845265,
      "grad_norm": 0.8621506605643977,
      "learning_rate": 1.1936703409156802e-06,
      "loss": 0.1084,
      "step": 26811
    },
    {
      "epoch": 0.7821926600151701,
      "grad_norm": 0.6880970855532711,
      "learning_rate": 1.1933640127947255e-06,
      "loss": 0.1067,
      "step": 26812
    },
    {
      "epoch": 0.7822218332458136,
      "grad_norm": 0.9526159172576324,
      "learning_rate": 1.1930577186585063e-06,
      "loss": 0.1329,
      "step": 26813
    },
    {
      "epoch": 0.7822510064764572,
      "grad_norm": 0.7638848398201711,
      "learning_rate": 1.1927514585097594e-06,
      "loss": 0.0982,
      "step": 26814
    },
    {
      "epoch": 0.7822801797071007,
      "grad_norm": 0.8123166818427471,
      "learning_rate": 1.1924452323512192e-06,
      "loss": 0.1011,
      "step": 26815
    },
    {
      "epoch": 0.7823093529377443,
      "grad_norm": 0.8692443813015546,
      "learning_rate": 1.1921390401856198e-06,
      "loss": 0.1246,
      "step": 26816
    },
    {
      "epoch": 0.7823385261683878,
      "grad_norm": 0.8254263945250957,
      "learning_rate": 1.1918328820156928e-06,
      "loss": 0.104,
      "step": 26817
    },
    {
      "epoch": 0.7823676993990315,
      "grad_norm": 1.043279921974565,
      "learning_rate": 1.1915267578441737e-06,
      "loss": 0.0986,
      "step": 26818
    },
    {
      "epoch": 0.782396872629675,
      "grad_norm": 0.8854793295232857,
      "learning_rate": 1.1912206676737942e-06,
      "loss": 0.1259,
      "step": 26819
    },
    {
      "epoch": 0.7824260458603186,
      "grad_norm": 0.7688145757132527,
      "learning_rate": 1.1909146115072866e-06,
      "loss": 0.1021,
      "step": 26820
    },
    {
      "epoch": 0.7824552190909622,
      "grad_norm": 0.6315318923356129,
      "learning_rate": 1.1906085893473835e-06,
      "loss": 0.0956,
      "step": 26821
    },
    {
      "epoch": 0.7824843923216057,
      "grad_norm": 0.855182269868678,
      "learning_rate": 1.1903026011968194e-06,
      "loss": 0.1018,
      "step": 26822
    },
    {
      "epoch": 0.7825135655522493,
      "grad_norm": 1.2437032393368883,
      "learning_rate": 1.189996647058324e-06,
      "loss": 0.1071,
      "step": 26823
    },
    {
      "epoch": 0.7825427387828928,
      "grad_norm": 0.7593396867233735,
      "learning_rate": 1.1896907269346274e-06,
      "loss": 0.1156,
      "step": 26824
    },
    {
      "epoch": 0.7825719120135364,
      "grad_norm": 0.7818691260607719,
      "learning_rate": 1.1893848408284641e-06,
      "loss": 0.0971,
      "step": 26825
    },
    {
      "epoch": 0.7826010852441799,
      "grad_norm": 0.8160192005648955,
      "learning_rate": 1.1890789887425618e-06,
      "loss": 0.1057,
      "step": 26826
    },
    {
      "epoch": 0.7826302584748235,
      "grad_norm": 0.7062585494868514,
      "learning_rate": 1.188773170679653e-06,
      "loss": 0.0952,
      "step": 26827
    },
    {
      "epoch": 0.782659431705467,
      "grad_norm": 1.0048072435142412,
      "learning_rate": 1.1884673866424683e-06,
      "loss": 0.1078,
      "step": 26828
    },
    {
      "epoch": 0.7826886049361106,
      "grad_norm": 0.6853913456310932,
      "learning_rate": 1.1881616366337372e-06,
      "loss": 0.0987,
      "step": 26829
    },
    {
      "epoch": 0.7827177781667541,
      "grad_norm": 0.7420160718974594,
      "learning_rate": 1.1878559206561874e-06,
      "loss": 0.1291,
      "step": 26830
    },
    {
      "epoch": 0.7827469513973977,
      "grad_norm": 1.1803984725626182,
      "learning_rate": 1.1875502387125514e-06,
      "loss": 0.1307,
      "step": 26831
    },
    {
      "epoch": 0.7827761246280414,
      "grad_norm": 0.8828823275993483,
      "learning_rate": 1.1872445908055557e-06,
      "loss": 0.141,
      "step": 26832
    },
    {
      "epoch": 0.7828052978586849,
      "grad_norm": 1.0668592512178308,
      "learning_rate": 1.1869389769379314e-06,
      "loss": 0.1124,
      "step": 26833
    },
    {
      "epoch": 0.7828344710893285,
      "grad_norm": 0.8475742344287523,
      "learning_rate": 1.186633397112404e-06,
      "loss": 0.1389,
      "step": 26834
    },
    {
      "epoch": 0.782863644319972,
      "grad_norm": 1.0318833825760718,
      "learning_rate": 1.1863278513317046e-06,
      "loss": 0.1361,
      "step": 26835
    },
    {
      "epoch": 0.7828928175506156,
      "grad_norm": 0.8669605871058667,
      "learning_rate": 1.1860223395985598e-06,
      "loss": 0.1176,
      "step": 26836
    },
    {
      "epoch": 0.7829219907812591,
      "grad_norm": 0.700379017369269,
      "learning_rate": 1.1857168619156962e-06,
      "loss": 0.1121,
      "step": 26837
    },
    {
      "epoch": 0.7829511640119027,
      "grad_norm": 0.8243197277304135,
      "learning_rate": 1.1854114182858413e-06,
      "loss": 0.1385,
      "step": 26838
    },
    {
      "epoch": 0.7829803372425462,
      "grad_norm": 0.8155538368360573,
      "learning_rate": 1.1851060087117244e-06,
      "loss": 0.1273,
      "step": 26839
    },
    {
      "epoch": 0.7830095104731898,
      "grad_norm": 0.7557102775662806,
      "learning_rate": 1.1848006331960688e-06,
      "loss": 0.1044,
      "step": 26840
    },
    {
      "epoch": 0.7830386837038333,
      "grad_norm": 0.8165470558445905,
      "learning_rate": 1.1844952917416043e-06,
      "loss": 0.1227,
      "step": 26841
    },
    {
      "epoch": 0.7830678569344769,
      "grad_norm": 0.9378788158559712,
      "learning_rate": 1.184189984351054e-06,
      "loss": 0.1128,
      "step": 26842
    },
    {
      "epoch": 0.7830970301651204,
      "grad_norm": 0.7393726333565241,
      "learning_rate": 1.183884711027144e-06,
      "loss": 0.1074,
      "step": 26843
    },
    {
      "epoch": 0.783126203395764,
      "grad_norm": 0.7803308518649982,
      "learning_rate": 1.1835794717726e-06,
      "loss": 0.1138,
      "step": 26844
    },
    {
      "epoch": 0.7831553766264077,
      "grad_norm": 1.114408197837132,
      "learning_rate": 1.1832742665901486e-06,
      "loss": 0.1222,
      "step": 26845
    },
    {
      "epoch": 0.7831845498570512,
      "grad_norm": 0.9755866168268343,
      "learning_rate": 1.182969095482514e-06,
      "loss": 0.0992,
      "step": 26846
    },
    {
      "epoch": 0.7832137230876948,
      "grad_norm": 0.7674839105195207,
      "learning_rate": 1.1826639584524185e-06,
      "loss": 0.1193,
      "step": 26847
    },
    {
      "epoch": 0.7832428963183383,
      "grad_norm": 0.7976890375796063,
      "learning_rate": 1.1823588555025894e-06,
      "loss": 0.1328,
      "step": 26848
    },
    {
      "epoch": 0.7832720695489819,
      "grad_norm": 1.0477441113233703,
      "learning_rate": 1.182053786635749e-06,
      "loss": 0.1152,
      "step": 26849
    },
    {
      "epoch": 0.7833012427796254,
      "grad_norm": 0.9130039249914723,
      "learning_rate": 1.1817487518546194e-06,
      "loss": 0.1294,
      "step": 26850
    },
    {
      "epoch": 0.783330416010269,
      "grad_norm": 0.8111684208701135,
      "learning_rate": 1.1814437511619254e-06,
      "loss": 0.1347,
      "step": 26851
    },
    {
      "epoch": 0.7833595892409125,
      "grad_norm": 1.128438059941651,
      "learning_rate": 1.1811387845603916e-06,
      "loss": 0.122,
      "step": 26852
    },
    {
      "epoch": 0.7833887624715561,
      "grad_norm": 0.722683754825944,
      "learning_rate": 1.180833852052739e-06,
      "loss": 0.1088,
      "step": 26853
    },
    {
      "epoch": 0.7834179357021996,
      "grad_norm": 1.05479675217445,
      "learning_rate": 1.1805289536416887e-06,
      "loss": 0.1169,
      "step": 26854
    },
    {
      "epoch": 0.7834471089328432,
      "grad_norm": 0.7803310474664418,
      "learning_rate": 1.180224089329966e-06,
      "loss": 0.1138,
      "step": 26855
    },
    {
      "epoch": 0.7834762821634867,
      "grad_norm": 0.8860024298146697,
      "learning_rate": 1.1799192591202884e-06,
      "loss": 0.0925,
      "step": 26856
    },
    {
      "epoch": 0.7835054553941303,
      "grad_norm": 0.8998043125351297,
      "learning_rate": 1.1796144630153806e-06,
      "loss": 0.1147,
      "step": 26857
    },
    {
      "epoch": 0.7835346286247739,
      "grad_norm": 0.8579436010477661,
      "learning_rate": 1.1793097010179639e-06,
      "loss": 0.1173,
      "step": 26858
    },
    {
      "epoch": 0.7835638018554175,
      "grad_norm": 1.1066410641333093,
      "learning_rate": 1.179004973130758e-06,
      "loss": 0.1233,
      "step": 26859
    },
    {
      "epoch": 0.7835929750860611,
      "grad_norm": 0.8990092908336493,
      "learning_rate": 1.1787002793564822e-06,
      "loss": 0.0991,
      "step": 26860
    },
    {
      "epoch": 0.7836221483167046,
      "grad_norm": 0.9630480007906665,
      "learning_rate": 1.1783956196978595e-06,
      "loss": 0.105,
      "step": 26861
    },
    {
      "epoch": 0.7836513215473482,
      "grad_norm": 0.8582363903078812,
      "learning_rate": 1.1780909941576074e-06,
      "loss": 0.1256,
      "step": 26862
    },
    {
      "epoch": 0.7836804947779917,
      "grad_norm": 0.9205639610287715,
      "learning_rate": 1.1777864027384478e-06,
      "loss": 0.1187,
      "step": 26863
    },
    {
      "epoch": 0.7837096680086353,
      "grad_norm": 1.0210478042715165,
      "learning_rate": 1.177481845443097e-06,
      "loss": 0.1108,
      "step": 26864
    },
    {
      "epoch": 0.7837388412392788,
      "grad_norm": 0.9103747963753535,
      "learning_rate": 1.1771773222742778e-06,
      "loss": 0.1126,
      "step": 26865
    },
    {
      "epoch": 0.7837680144699224,
      "grad_norm": 1.0552119030485607,
      "learning_rate": 1.1768728332347062e-06,
      "loss": 0.0976,
      "step": 26866
    },
    {
      "epoch": 0.783797187700566,
      "grad_norm": 0.9337237343380899,
      "learning_rate": 1.1765683783271004e-06,
      "loss": 0.1007,
      "step": 26867
    },
    {
      "epoch": 0.7838263609312095,
      "grad_norm": 0.9383534581297182,
      "learning_rate": 1.176263957554179e-06,
      "loss": 0.0996,
      "step": 26868
    },
    {
      "epoch": 0.783855534161853,
      "grad_norm": 0.7949921727905045,
      "learning_rate": 1.1759595709186616e-06,
      "loss": 0.1263,
      "step": 26869
    },
    {
      "epoch": 0.7838847073924966,
      "grad_norm": 1.0558144681634218,
      "learning_rate": 1.1756552184232634e-06,
      "loss": 0.1313,
      "step": 26870
    },
    {
      "epoch": 0.7839138806231402,
      "grad_norm": 0.8564336025917788,
      "learning_rate": 1.175350900070703e-06,
      "loss": 0.1046,
      "step": 26871
    },
    {
      "epoch": 0.7839430538537838,
      "grad_norm": 1.1128294106431664,
      "learning_rate": 1.1750466158636975e-06,
      "loss": 0.1235,
      "step": 26872
    },
    {
      "epoch": 0.7839722270844274,
      "grad_norm": 0.7481349097680933,
      "learning_rate": 1.1747423658049612e-06,
      "loss": 0.1033,
      "step": 26873
    },
    {
      "epoch": 0.7840014003150709,
      "grad_norm": 1.0080161958019997,
      "learning_rate": 1.1744381498972117e-06,
      "loss": 0.1238,
      "step": 26874
    },
    {
      "epoch": 0.7840305735457145,
      "grad_norm": 0.9772100033840949,
      "learning_rate": 1.1741339681431669e-06,
      "loss": 0.1103,
      "step": 26875
    },
    {
      "epoch": 0.784059746776358,
      "grad_norm": 0.8285845957630742,
      "learning_rate": 1.17382982054554e-06,
      "loss": 0.1268,
      "step": 26876
    },
    {
      "epoch": 0.7840889200070016,
      "grad_norm": 1.0864471912142566,
      "learning_rate": 1.1735257071070466e-06,
      "loss": 0.1228,
      "step": 26877
    },
    {
      "epoch": 0.7841180932376451,
      "grad_norm": 0.8564488073734594,
      "learning_rate": 1.1732216278304032e-06,
      "loss": 0.0952,
      "step": 26878
    },
    {
      "epoch": 0.7841472664682887,
      "grad_norm": 0.8030651747634504,
      "learning_rate": 1.1729175827183232e-06,
      "loss": 0.105,
      "step": 26879
    },
    {
      "epoch": 0.7841764396989322,
      "grad_norm": 0.7667886353252452,
      "learning_rate": 1.1726135717735204e-06,
      "loss": 0.1163,
      "step": 26880
    },
    {
      "epoch": 0.7842056129295758,
      "grad_norm": 1.0480954538064446,
      "learning_rate": 1.1723095949987101e-06,
      "loss": 0.1322,
      "step": 26881
    },
    {
      "epoch": 0.7842347861602194,
      "grad_norm": 0.7230827016380355,
      "learning_rate": 1.1720056523966072e-06,
      "loss": 0.1023,
      "step": 26882
    },
    {
      "epoch": 0.7842639593908629,
      "grad_norm": 0.7217534438164802,
      "learning_rate": 1.171701743969924e-06,
      "loss": 0.1179,
      "step": 26883
    },
    {
      "epoch": 0.7842931326215065,
      "grad_norm": 0.8114078742560802,
      "learning_rate": 1.1713978697213723e-06,
      "loss": 0.1053,
      "step": 26884
    },
    {
      "epoch": 0.78432230585215,
      "grad_norm": 0.7021766512605949,
      "learning_rate": 1.1710940296536682e-06,
      "loss": 0.1154,
      "step": 26885
    },
    {
      "epoch": 0.7843514790827937,
      "grad_norm": 0.8600017977371976,
      "learning_rate": 1.1707902237695206e-06,
      "loss": 0.1204,
      "step": 26886
    },
    {
      "epoch": 0.7843806523134372,
      "grad_norm": 0.8926396098940488,
      "learning_rate": 1.1704864520716442e-06,
      "loss": 0.1081,
      "step": 26887
    },
    {
      "epoch": 0.7844098255440808,
      "grad_norm": 0.8685521401022998,
      "learning_rate": 1.170182714562752e-06,
      "loss": 0.1187,
      "step": 26888
    },
    {
      "epoch": 0.7844389987747243,
      "grad_norm": 0.8758005932407248,
      "learning_rate": 1.1698790112455538e-06,
      "loss": 0.1027,
      "step": 26889
    },
    {
      "epoch": 0.7844681720053679,
      "grad_norm": 0.8952556006070274,
      "learning_rate": 1.1695753421227606e-06,
      "loss": 0.1413,
      "step": 26890
    },
    {
      "epoch": 0.7844973452360114,
      "grad_norm": 0.8134807017722688,
      "learning_rate": 1.1692717071970844e-06,
      "loss": 0.1254,
      "step": 26891
    },
    {
      "epoch": 0.784526518466655,
      "grad_norm": 1.0174722168432515,
      "learning_rate": 1.1689681064712367e-06,
      "loss": 0.115,
      "step": 26892
    },
    {
      "epoch": 0.7845556916972986,
      "grad_norm": 0.9097203778879775,
      "learning_rate": 1.1686645399479278e-06,
      "loss": 0.1077,
      "step": 26893
    },
    {
      "epoch": 0.7845848649279421,
      "grad_norm": 0.7252747379861907,
      "learning_rate": 1.1683610076298658e-06,
      "loss": 0.0995,
      "step": 26894
    },
    {
      "epoch": 0.7846140381585857,
      "grad_norm": 2.0357071447954014,
      "learning_rate": 1.1680575095197634e-06,
      "loss": 0.1343,
      "step": 26895
    },
    {
      "epoch": 0.7846432113892292,
      "grad_norm": 1.3182450614391934,
      "learning_rate": 1.1677540456203285e-06,
      "loss": 0.1078,
      "step": 26896
    },
    {
      "epoch": 0.7846723846198728,
      "grad_norm": 0.864687794459096,
      "learning_rate": 1.167450615934268e-06,
      "loss": 0.1047,
      "step": 26897
    },
    {
      "epoch": 0.7847015578505163,
      "grad_norm": 0.847906256638758,
      "learning_rate": 1.1671472204642964e-06,
      "loss": 0.1189,
      "step": 26898
    },
    {
      "epoch": 0.78473073108116,
      "grad_norm": 0.8590265179007457,
      "learning_rate": 1.1668438592131194e-06,
      "loss": 0.1006,
      "step": 26899
    },
    {
      "epoch": 0.7847599043118035,
      "grad_norm": 1.0116670667599434,
      "learning_rate": 1.1665405321834439e-06,
      "loss": 0.1186,
      "step": 26900
    },
    {
      "epoch": 0.7847890775424471,
      "grad_norm": 0.7010234245668824,
      "learning_rate": 1.1662372393779809e-06,
      "loss": 0.1174,
      "step": 26901
    },
    {
      "epoch": 0.7848182507730906,
      "grad_norm": 0.9368354596407508,
      "learning_rate": 1.1659339807994364e-06,
      "loss": 0.1467,
      "step": 26902
    },
    {
      "epoch": 0.7848474240037342,
      "grad_norm": 0.771874980587234,
      "learning_rate": 1.165630756450517e-06,
      "loss": 0.1168,
      "step": 26903
    },
    {
      "epoch": 0.7848765972343777,
      "grad_norm": 0.7951679227400112,
      "learning_rate": 1.1653275663339308e-06,
      "loss": 0.1099,
      "step": 26904
    },
    {
      "epoch": 0.7849057704650213,
      "grad_norm": 0.8609451871037228,
      "learning_rate": 1.1650244104523862e-06,
      "loss": 0.1168,
      "step": 26905
    },
    {
      "epoch": 0.7849349436956649,
      "grad_norm": 0.9215836537814548,
      "learning_rate": 1.164721288808588e-06,
      "loss": 0.0979,
      "step": 26906
    },
    {
      "epoch": 0.7849641169263084,
      "grad_norm": 0.8879045100520424,
      "learning_rate": 1.1644182014052408e-06,
      "loss": 0.1019,
      "step": 26907
    },
    {
      "epoch": 0.784993290156952,
      "grad_norm": 0.9620173950761984,
      "learning_rate": 1.1641151482450541e-06,
      "loss": 0.1483,
      "step": 26908
    },
    {
      "epoch": 0.7850224633875955,
      "grad_norm": 0.8099233952877505,
      "learning_rate": 1.1638121293307302e-06,
      "loss": 0.0995,
      "step": 26909
    },
    {
      "epoch": 0.7850516366182391,
      "grad_norm": 0.9758814672664569,
      "learning_rate": 1.163509144664977e-06,
      "loss": 0.0979,
      "step": 26910
    },
    {
      "epoch": 0.7850808098488826,
      "grad_norm": 0.9139112724301753,
      "learning_rate": 1.1632061942504975e-06,
      "loss": 0.1155,
      "step": 26911
    },
    {
      "epoch": 0.7851099830795262,
      "grad_norm": 1.267470682177874,
      "learning_rate": 1.1629032780899978e-06,
      "loss": 0.1024,
      "step": 26912
    },
    {
      "epoch": 0.7851391563101698,
      "grad_norm": 0.9679952516979631,
      "learning_rate": 1.1626003961861821e-06,
      "loss": 0.0993,
      "step": 26913
    },
    {
      "epoch": 0.7851683295408134,
      "grad_norm": 0.7575683204419322,
      "learning_rate": 1.1622975485417526e-06,
      "loss": 0.1023,
      "step": 26914
    },
    {
      "epoch": 0.785197502771457,
      "grad_norm": 0.7378218391439108,
      "learning_rate": 1.1619947351594147e-06,
      "loss": 0.1123,
      "step": 26915
    },
    {
      "epoch": 0.7852266760021005,
      "grad_norm": 0.7338850072246711,
      "learning_rate": 1.1616919560418727e-06,
      "loss": 0.105,
      "step": 26916
    },
    {
      "epoch": 0.785255849232744,
      "grad_norm": 0.8676492666613993,
      "learning_rate": 1.1613892111918273e-06,
      "loss": 0.111,
      "step": 26917
    },
    {
      "epoch": 0.7852850224633876,
      "grad_norm": 1.1011054265315563,
      "learning_rate": 1.1610865006119838e-06,
      "loss": 0.1029,
      "step": 26918
    },
    {
      "epoch": 0.7853141956940312,
      "grad_norm": 0.7005316929548462,
      "learning_rate": 1.160783824305044e-06,
      "loss": 0.1152,
      "step": 26919
    },
    {
      "epoch": 0.7853433689246747,
      "grad_norm": 1.0391599316915296,
      "learning_rate": 1.1604811822737084e-06,
      "loss": 0.1308,
      "step": 26920
    },
    {
      "epoch": 0.7853725421553183,
      "grad_norm": 0.8165121760645124,
      "learning_rate": 1.1601785745206795e-06,
      "loss": 0.1019,
      "step": 26921
    },
    {
      "epoch": 0.7854017153859618,
      "grad_norm": 0.7787105521225467,
      "learning_rate": 1.1598760010486614e-06,
      "loss": 0.0917,
      "step": 26922
    },
    {
      "epoch": 0.7854308886166054,
      "grad_norm": 0.691787327300869,
      "learning_rate": 1.1595734618603543e-06,
      "loss": 0.1258,
      "step": 26923
    },
    {
      "epoch": 0.7854600618472489,
      "grad_norm": 0.982851786473919,
      "learning_rate": 1.1592709569584565e-06,
      "loss": 0.1177,
      "step": 26924
    },
    {
      "epoch": 0.7854892350778925,
      "grad_norm": 0.8147593573127733,
      "learning_rate": 1.1589684863456723e-06,
      "loss": 0.1159,
      "step": 26925
    },
    {
      "epoch": 0.7855184083085361,
      "grad_norm": 0.8159315026056807,
      "learning_rate": 1.1586660500247004e-06,
      "loss": 0.1244,
      "step": 26926
    },
    {
      "epoch": 0.7855475815391797,
      "grad_norm": 0.8041398174115109,
      "learning_rate": 1.1583636479982384e-06,
      "loss": 0.1345,
      "step": 26927
    },
    {
      "epoch": 0.7855767547698232,
      "grad_norm": 0.7807428745584111,
      "learning_rate": 1.1580612802689911e-06,
      "loss": 0.0908,
      "step": 26928
    },
    {
      "epoch": 0.7856059280004668,
      "grad_norm": 0.7520185543281153,
      "learning_rate": 1.157758946839656e-06,
      "loss": 0.1256,
      "step": 26929
    },
    {
      "epoch": 0.7856351012311104,
      "grad_norm": 0.7153975776300564,
      "learning_rate": 1.1574566477129302e-06,
      "loss": 0.1114,
      "step": 26930
    },
    {
      "epoch": 0.7856642744617539,
      "grad_norm": 0.8062988737351608,
      "learning_rate": 1.1571543828915155e-06,
      "loss": 0.1145,
      "step": 26931
    },
    {
      "epoch": 0.7856934476923975,
      "grad_norm": 1.0590870312759066,
      "learning_rate": 1.1568521523781095e-06,
      "loss": 0.0981,
      "step": 26932
    },
    {
      "epoch": 0.785722620923041,
      "grad_norm": 0.8514193187990563,
      "learning_rate": 1.1565499561754085e-06,
      "loss": 0.127,
      "step": 26933
    },
    {
      "epoch": 0.7857517941536846,
      "grad_norm": 0.895190943725986,
      "learning_rate": 1.1562477942861116e-06,
      "loss": 0.1207,
      "step": 26934
    },
    {
      "epoch": 0.7857809673843281,
      "grad_norm": 0.9450988864887407,
      "learning_rate": 1.1559456667129183e-06,
      "loss": 0.1045,
      "step": 26935
    },
    {
      "epoch": 0.7858101406149717,
      "grad_norm": 1.0362955785684302,
      "learning_rate": 1.1556435734585248e-06,
      "loss": 0.1157,
      "step": 26936
    },
    {
      "epoch": 0.7858393138456152,
      "grad_norm": 0.7935279419670792,
      "learning_rate": 1.1553415145256259e-06,
      "loss": 0.1101,
      "step": 26937
    },
    {
      "epoch": 0.7858684870762588,
      "grad_norm": 0.9216892335011704,
      "learning_rate": 1.155039489916922e-06,
      "loss": 0.1319,
      "step": 26938
    },
    {
      "epoch": 0.7858976603069023,
      "grad_norm": 0.8454616031196389,
      "learning_rate": 1.1547374996351063e-06,
      "loss": 0.0903,
      "step": 26939
    },
    {
      "epoch": 0.785926833537546,
      "grad_norm": 0.6737115247528297,
      "learning_rate": 1.1544355436828769e-06,
      "loss": 0.1245,
      "step": 26940
    },
    {
      "epoch": 0.7859560067681896,
      "grad_norm": 0.7819384496496615,
      "learning_rate": 1.1541336220629285e-06,
      "loss": 0.1044,
      "step": 26941
    },
    {
      "epoch": 0.7859851799988331,
      "grad_norm": 1.322076755898772,
      "learning_rate": 1.1538317347779583e-06,
      "loss": 0.103,
      "step": 26942
    },
    {
      "epoch": 0.7860143532294767,
      "grad_norm": 1.382774753900953,
      "learning_rate": 1.1535298818306595e-06,
      "loss": 0.1466,
      "step": 26943
    },
    {
      "epoch": 0.7860435264601202,
      "grad_norm": 0.7743865283279714,
      "learning_rate": 1.1532280632237269e-06,
      "loss": 0.1186,
      "step": 26944
    },
    {
      "epoch": 0.7860726996907638,
      "grad_norm": 0.9518823635543687,
      "learning_rate": 1.1529262789598554e-06,
      "loss": 0.1533,
      "step": 26945
    },
    {
      "epoch": 0.7861018729214073,
      "grad_norm": 0.7107481831528828,
      "learning_rate": 1.1526245290417415e-06,
      "loss": 0.0983,
      "step": 26946
    },
    {
      "epoch": 0.7861310461520509,
      "grad_norm": 1.0304808572182447,
      "learning_rate": 1.152322813472076e-06,
      "loss": 0.1048,
      "step": 26947
    },
    {
      "epoch": 0.7861602193826944,
      "grad_norm": 0.9304661820686831,
      "learning_rate": 1.1520211322535552e-06,
      "loss": 0.1142,
      "step": 26948
    },
    {
      "epoch": 0.786189392613338,
      "grad_norm": 0.9070036935799715,
      "learning_rate": 1.1517194853888713e-06,
      "loss": 0.1145,
      "step": 26949
    },
    {
      "epoch": 0.7862185658439815,
      "grad_norm": 0.8650367570286813,
      "learning_rate": 1.1514178728807151e-06,
      "loss": 0.1046,
      "step": 26950
    },
    {
      "epoch": 0.7862477390746251,
      "grad_norm": 0.7018217070502166,
      "learning_rate": 1.1511162947317822e-06,
      "loss": 0.1131,
      "step": 26951
    },
    {
      "epoch": 0.7862769123052686,
      "grad_norm": 0.7367283180653517,
      "learning_rate": 1.1508147509447653e-06,
      "loss": 0.1079,
      "step": 26952
    },
    {
      "epoch": 0.7863060855359122,
      "grad_norm": 0.8656438834038928,
      "learning_rate": 1.1505132415223552e-06,
      "loss": 0.1071,
      "step": 26953
    },
    {
      "epoch": 0.7863352587665559,
      "grad_norm": 0.8423230424025819,
      "learning_rate": 1.150211766467243e-06,
      "loss": 0.1095,
      "step": 26954
    },
    {
      "epoch": 0.7863644319971994,
      "grad_norm": 0.9235866939573659,
      "learning_rate": 1.1499103257821226e-06,
      "loss": 0.1265,
      "step": 26955
    },
    {
      "epoch": 0.786393605227843,
      "grad_norm": 0.7883921567853773,
      "learning_rate": 1.149608919469683e-06,
      "loss": 0.1215,
      "step": 26956
    },
    {
      "epoch": 0.7864227784584865,
      "grad_norm": 0.7621126622420391,
      "learning_rate": 1.1493075475326138e-06,
      "loss": 0.102,
      "step": 26957
    },
    {
      "epoch": 0.7864519516891301,
      "grad_norm": 0.7508340717348726,
      "learning_rate": 1.1490062099736098e-06,
      "loss": 0.1274,
      "step": 26958
    },
    {
      "epoch": 0.7864811249197736,
      "grad_norm": 1.2107192085602738,
      "learning_rate": 1.1487049067953592e-06,
      "loss": 0.1102,
      "step": 26959
    },
    {
      "epoch": 0.7865102981504172,
      "grad_norm": 0.882740292413829,
      "learning_rate": 1.1484036380005503e-06,
      "loss": 0.1143,
      "step": 26960
    },
    {
      "epoch": 0.7865394713810607,
      "grad_norm": 0.8018073132782089,
      "learning_rate": 1.1481024035918763e-06,
      "loss": 0.1205,
      "step": 26961
    },
    {
      "epoch": 0.7865686446117043,
      "grad_norm": 0.7865662270094895,
      "learning_rate": 1.1478012035720237e-06,
      "loss": 0.1422,
      "step": 26962
    },
    {
      "epoch": 0.7865978178423478,
      "grad_norm": 0.8155312475792762,
      "learning_rate": 1.1475000379436818e-06,
      "loss": 0.1238,
      "step": 26963
    },
    {
      "epoch": 0.7866269910729914,
      "grad_norm": 0.9796092648509211,
      "learning_rate": 1.147198906709539e-06,
      "loss": 0.0987,
      "step": 26964
    },
    {
      "epoch": 0.7866561643036349,
      "grad_norm": 1.0586389811067674,
      "learning_rate": 1.1468978098722866e-06,
      "loss": 0.1305,
      "step": 26965
    },
    {
      "epoch": 0.7866853375342785,
      "grad_norm": 0.7409045100530098,
      "learning_rate": 1.1465967474346106e-06,
      "loss": 0.0995,
      "step": 26966
    },
    {
      "epoch": 0.7867145107649222,
      "grad_norm": 0.7780086121140247,
      "learning_rate": 1.1462957193991975e-06,
      "loss": 0.0857,
      "step": 26967
    },
    {
      "epoch": 0.7867436839955657,
      "grad_norm": 0.837628663682243,
      "learning_rate": 1.1459947257687376e-06,
      "loss": 0.1051,
      "step": 26968
    },
    {
      "epoch": 0.7867728572262093,
      "grad_norm": 0.7236360158338214,
      "learning_rate": 1.1456937665459156e-06,
      "loss": 0.121,
      "step": 26969
    },
    {
      "epoch": 0.7868020304568528,
      "grad_norm": 1.070127536202674,
      "learning_rate": 1.1453928417334209e-06,
      "loss": 0.117,
      "step": 26970
    },
    {
      "epoch": 0.7868312036874964,
      "grad_norm": 0.7734379500031032,
      "learning_rate": 1.145091951333937e-06,
      "loss": 0.1167,
      "step": 26971
    },
    {
      "epoch": 0.7868603769181399,
      "grad_norm": 0.7271054501317741,
      "learning_rate": 1.144791095350154e-06,
      "loss": 0.1132,
      "step": 26972
    },
    {
      "epoch": 0.7868895501487835,
      "grad_norm": 0.7722909978875346,
      "learning_rate": 1.1444902737847553e-06,
      "loss": 0.1153,
      "step": 26973
    },
    {
      "epoch": 0.786918723379427,
      "grad_norm": 1.0529311870098763,
      "learning_rate": 1.1441894866404257e-06,
      "loss": 0.1247,
      "step": 26974
    },
    {
      "epoch": 0.7869478966100706,
      "grad_norm": 1.1310731684381354,
      "learning_rate": 1.1438887339198518e-06,
      "loss": 0.1246,
      "step": 26975
    },
    {
      "epoch": 0.7869770698407141,
      "grad_norm": 0.7147847864563611,
      "learning_rate": 1.1435880156257206e-06,
      "loss": 0.1233,
      "step": 26976
    },
    {
      "epoch": 0.7870062430713577,
      "grad_norm": 0.8413088562452513,
      "learning_rate": 1.143287331760713e-06,
      "loss": 0.1041,
      "step": 26977
    },
    {
      "epoch": 0.7870354163020012,
      "grad_norm": 0.9282174368199931,
      "learning_rate": 1.142986682327517e-06,
      "loss": 0.1276,
      "step": 26978
    },
    {
      "epoch": 0.7870645895326448,
      "grad_norm": 0.8712969838419217,
      "learning_rate": 1.1426860673288153e-06,
      "loss": 0.1028,
      "step": 26979
    },
    {
      "epoch": 0.7870937627632884,
      "grad_norm": 1.1435555554080257,
      "learning_rate": 1.14238548676729e-06,
      "loss": 0.1414,
      "step": 26980
    },
    {
      "epoch": 0.787122935993932,
      "grad_norm": 0.7130729393273794,
      "learning_rate": 1.1420849406456263e-06,
      "loss": 0.1013,
      "step": 26981
    },
    {
      "epoch": 0.7871521092245756,
      "grad_norm": 1.1305725242569322,
      "learning_rate": 1.1417844289665091e-06,
      "loss": 0.1216,
      "step": 26982
    },
    {
      "epoch": 0.7871812824552191,
      "grad_norm": 0.7932486868097797,
      "learning_rate": 1.1414839517326192e-06,
      "loss": 0.1122,
      "step": 26983
    },
    {
      "epoch": 0.7872104556858627,
      "grad_norm": 0.8054079701445868,
      "learning_rate": 1.1411835089466377e-06,
      "loss": 0.1119,
      "step": 26984
    },
    {
      "epoch": 0.7872396289165062,
      "grad_norm": 0.9864994701651784,
      "learning_rate": 1.1408831006112504e-06,
      "loss": 0.1151,
      "step": 26985
    },
    {
      "epoch": 0.7872688021471498,
      "grad_norm": 0.8039319204841343,
      "learning_rate": 1.1405827267291376e-06,
      "loss": 0.1227,
      "step": 26986
    },
    {
      "epoch": 0.7872979753777933,
      "grad_norm": 1.0690657532685257,
      "learning_rate": 1.1402823873029778e-06,
      "loss": 0.1473,
      "step": 26987
    },
    {
      "epoch": 0.7873271486084369,
      "grad_norm": 2.349516417868194,
      "learning_rate": 1.1399820823354584e-06,
      "loss": 0.118,
      "step": 26988
    },
    {
      "epoch": 0.7873563218390804,
      "grad_norm": 0.9688472133294002,
      "learning_rate": 1.139681811829258e-06,
      "loss": 0.1093,
      "step": 26989
    },
    {
      "epoch": 0.787385495069724,
      "grad_norm": 0.8275472845456364,
      "learning_rate": 1.1393815757870546e-06,
      "loss": 0.1307,
      "step": 26990
    },
    {
      "epoch": 0.7874146683003675,
      "grad_norm": 0.9310817924852001,
      "learning_rate": 1.1390813742115332e-06,
      "loss": 0.113,
      "step": 26991
    },
    {
      "epoch": 0.7874438415310111,
      "grad_norm": 0.9557097175977453,
      "learning_rate": 1.1387812071053706e-06,
      "loss": 0.1116,
      "step": 26992
    },
    {
      "epoch": 0.7874730147616547,
      "grad_norm": 0.8246996114442896,
      "learning_rate": 1.1384810744712471e-06,
      "loss": 0.105,
      "step": 26993
    },
    {
      "epoch": 0.7875021879922983,
      "grad_norm": 0.8743406944167271,
      "learning_rate": 1.1381809763118424e-06,
      "loss": 0.1361,
      "step": 26994
    },
    {
      "epoch": 0.7875313612229419,
      "grad_norm": 1.1043558299341243,
      "learning_rate": 1.1378809126298373e-06,
      "loss": 0.1231,
      "step": 26995
    },
    {
      "epoch": 0.7875605344535854,
      "grad_norm": 1.0758464004068735,
      "learning_rate": 1.1375808834279095e-06,
      "loss": 0.1121,
      "step": 26996
    },
    {
      "epoch": 0.787589707684229,
      "grad_norm": 0.931716048355612,
      "learning_rate": 1.137280888708736e-06,
      "loss": 0.1036,
      "step": 26997
    },
    {
      "epoch": 0.7876188809148725,
      "grad_norm": 0.7779312814316556,
      "learning_rate": 1.1369809284749982e-06,
      "loss": 0.0965,
      "step": 26998
    },
    {
      "epoch": 0.7876480541455161,
      "grad_norm": 1.0376133259115237,
      "learning_rate": 1.1366810027293711e-06,
      "loss": 0.1458,
      "step": 26999
    },
    {
      "epoch": 0.7876772273761596,
      "grad_norm": 1.1522640271177464,
      "learning_rate": 1.1363811114745354e-06,
      "loss": 0.1313,
      "step": 27000
    },
    {
      "epoch": 0.7877064006068032,
      "grad_norm": 0.8182592835601398,
      "learning_rate": 1.1360812547131655e-06,
      "loss": 0.108,
      "step": 27001
    },
    {
      "epoch": 0.7877355738374467,
      "grad_norm": 1.152048894714544,
      "learning_rate": 1.135781432447941e-06,
      "loss": 0.1443,
      "step": 27002
    },
    {
      "epoch": 0.7877647470680903,
      "grad_norm": 0.884579195459697,
      "learning_rate": 1.135481644681537e-06,
      "loss": 0.1191,
      "step": 27003
    },
    {
      "epoch": 0.7877939202987339,
      "grad_norm": 0.7527828077355831,
      "learning_rate": 1.135181891416629e-06,
      "loss": 0.1037,
      "step": 27004
    },
    {
      "epoch": 0.7878230935293774,
      "grad_norm": 0.8200120988297046,
      "learning_rate": 1.1348821726558951e-06,
      "loss": 0.1116,
      "step": 27005
    },
    {
      "epoch": 0.787852266760021,
      "grad_norm": 0.8972028511074547,
      "learning_rate": 1.1345824884020113e-06,
      "loss": 0.1083,
      "step": 27006
    },
    {
      "epoch": 0.7878814399906645,
      "grad_norm": 0.855428936458742,
      "learning_rate": 1.134282838657651e-06,
      "loss": 0.1122,
      "step": 27007
    },
    {
      "epoch": 0.7879106132213082,
      "grad_norm": 0.7133366751370145,
      "learning_rate": 1.133983223425492e-06,
      "loss": 0.1092,
      "step": 27008
    },
    {
      "epoch": 0.7879397864519517,
      "grad_norm": 0.8798477211755299,
      "learning_rate": 1.1336836427082083e-06,
      "loss": 0.1307,
      "step": 27009
    },
    {
      "epoch": 0.7879689596825953,
      "grad_norm": 0.8665858198878355,
      "learning_rate": 1.1333840965084725e-06,
      "loss": 0.1097,
      "step": 27010
    },
    {
      "epoch": 0.7879981329132388,
      "grad_norm": 0.7480220082472291,
      "learning_rate": 1.1330845848289606e-06,
      "loss": 0.0864,
      "step": 27011
    },
    {
      "epoch": 0.7880273061438824,
      "grad_norm": 0.7079429673161648,
      "learning_rate": 1.1327851076723473e-06,
      "loss": 0.1112,
      "step": 27012
    },
    {
      "epoch": 0.7880564793745259,
      "grad_norm": 1.383172043736597,
      "learning_rate": 1.1324856650413057e-06,
      "loss": 0.1049,
      "step": 27013
    },
    {
      "epoch": 0.7880856526051695,
      "grad_norm": 0.8373019806538111,
      "learning_rate": 1.132186256938508e-06,
      "loss": 0.1247,
      "step": 27014
    },
    {
      "epoch": 0.788114825835813,
      "grad_norm": 0.771620531410981,
      "learning_rate": 1.1318868833666286e-06,
      "loss": 0.1167,
      "step": 27015
    },
    {
      "epoch": 0.7881439990664566,
      "grad_norm": 0.9316198499289414,
      "learning_rate": 1.1315875443283396e-06,
      "loss": 0.1064,
      "step": 27016
    },
    {
      "epoch": 0.7881731722971002,
      "grad_norm": 0.8558932039206868,
      "learning_rate": 1.1312882398263114e-06,
      "loss": 0.1145,
      "step": 27017
    },
    {
      "epoch": 0.7882023455277437,
      "grad_norm": 0.8262050949069694,
      "learning_rate": 1.130988969863221e-06,
      "loss": 0.0945,
      "step": 27018
    },
    {
      "epoch": 0.7882315187583873,
      "grad_norm": 1.0459089058686855,
      "learning_rate": 1.1306897344417373e-06,
      "loss": 0.1219,
      "step": 27019
    },
    {
      "epoch": 0.7882606919890308,
      "grad_norm": 0.9251468084913778,
      "learning_rate": 1.1303905335645304e-06,
      "loss": 0.098,
      "step": 27020
    },
    {
      "epoch": 0.7882898652196745,
      "grad_norm": 0.7679005529198607,
      "learning_rate": 1.1300913672342744e-06,
      "loss": 0.1029,
      "step": 27021
    },
    {
      "epoch": 0.788319038450318,
      "grad_norm": 0.86234896637721,
      "learning_rate": 1.1297922354536396e-06,
      "loss": 0.1487,
      "step": 27022
    },
    {
      "epoch": 0.7883482116809616,
      "grad_norm": 0.6475395648456037,
      "learning_rate": 1.1294931382252932e-06,
      "loss": 0.1192,
      "step": 27023
    },
    {
      "epoch": 0.7883773849116051,
      "grad_norm": 0.962154914870367,
      "learning_rate": 1.1291940755519092e-06,
      "loss": 0.1038,
      "step": 27024
    },
    {
      "epoch": 0.7884065581422487,
      "grad_norm": 0.8348365296359087,
      "learning_rate": 1.128895047436157e-06,
      "loss": 0.1077,
      "step": 27025
    },
    {
      "epoch": 0.7884357313728922,
      "grad_norm": 0.7840105408076355,
      "learning_rate": 1.1285960538807066e-06,
      "loss": 0.1187,
      "step": 27026
    },
    {
      "epoch": 0.7884649046035358,
      "grad_norm": 0.8347802697343071,
      "learning_rate": 1.1282970948882243e-06,
      "loss": 0.1142,
      "step": 27027
    },
    {
      "epoch": 0.7884940778341794,
      "grad_norm": 0.8759492568057259,
      "learning_rate": 1.1279981704613828e-06,
      "loss": 0.1143,
      "step": 27028
    },
    {
      "epoch": 0.7885232510648229,
      "grad_norm": 0.8110954759711658,
      "learning_rate": 1.1276992806028485e-06,
      "loss": 0.1071,
      "step": 27029
    },
    {
      "epoch": 0.7885524242954665,
      "grad_norm": 0.7398886339637835,
      "learning_rate": 1.1274004253152914e-06,
      "loss": 0.1124,
      "step": 27030
    },
    {
      "epoch": 0.78858159752611,
      "grad_norm": 0.7910840109521887,
      "learning_rate": 1.1271016046013778e-06,
      "loss": 0.116,
      "step": 27031
    },
    {
      "epoch": 0.7886107707567536,
      "grad_norm": 1.0890123327402674,
      "learning_rate": 1.126802818463778e-06,
      "loss": 0.1322,
      "step": 27032
    },
    {
      "epoch": 0.7886399439873971,
      "grad_norm": 0.6688060214778,
      "learning_rate": 1.1265040669051581e-06,
      "loss": 0.1241,
      "step": 27033
    },
    {
      "epoch": 0.7886691172180407,
      "grad_norm": 0.7722276746876664,
      "learning_rate": 1.1262053499281833e-06,
      "loss": 0.1193,
      "step": 27034
    },
    {
      "epoch": 0.7886982904486843,
      "grad_norm": 0.7518688019620501,
      "learning_rate": 1.1259066675355224e-06,
      "loss": 0.1218,
      "step": 27035
    },
    {
      "epoch": 0.7887274636793279,
      "grad_norm": 0.7614123499987674,
      "learning_rate": 1.1256080197298437e-06,
      "loss": 0.1408,
      "step": 27036
    },
    {
      "epoch": 0.7887566369099714,
      "grad_norm": 0.7663154896064904,
      "learning_rate": 1.1253094065138105e-06,
      "loss": 0.1338,
      "step": 27037
    },
    {
      "epoch": 0.788785810140615,
      "grad_norm": 0.8704325046926882,
      "learning_rate": 1.1250108278900906e-06,
      "loss": 0.132,
      "step": 27038
    },
    {
      "epoch": 0.7888149833712585,
      "grad_norm": 0.7366138270196971,
      "learning_rate": 1.12471228386135e-06,
      "loss": 0.1017,
      "step": 27039
    },
    {
      "epoch": 0.7888441566019021,
      "grad_norm": 0.7670380610852506,
      "learning_rate": 1.1244137744302508e-06,
      "loss": 0.1171,
      "step": 27040
    },
    {
      "epoch": 0.7888733298325457,
      "grad_norm": 0.845280063715469,
      "learning_rate": 1.1241152995994603e-06,
      "loss": 0.0941,
      "step": 27041
    },
    {
      "epoch": 0.7889025030631892,
      "grad_norm": 0.7234602302032337,
      "learning_rate": 1.1238168593716448e-06,
      "loss": 0.1026,
      "step": 27042
    },
    {
      "epoch": 0.7889316762938328,
      "grad_norm": 0.755873136293606,
      "learning_rate": 1.123518453749467e-06,
      "loss": 0.1067,
      "step": 27043
    },
    {
      "epoch": 0.7889608495244763,
      "grad_norm": 0.7218157111755862,
      "learning_rate": 1.12322008273559e-06,
      "loss": 0.1141,
      "step": 27044
    },
    {
      "epoch": 0.7889900227551199,
      "grad_norm": 0.9889466128783103,
      "learning_rate": 1.1229217463326798e-06,
      "loss": 0.1219,
      "step": 27045
    },
    {
      "epoch": 0.7890191959857634,
      "grad_norm": 0.7903896519892492,
      "learning_rate": 1.1226234445433987e-06,
      "loss": 0.1123,
      "step": 27046
    },
    {
      "epoch": 0.789048369216407,
      "grad_norm": 0.9271489373101308,
      "learning_rate": 1.1223251773704069e-06,
      "loss": 0.1025,
      "step": 27047
    },
    {
      "epoch": 0.7890775424470506,
      "grad_norm": 0.7319543777838504,
      "learning_rate": 1.1220269448163735e-06,
      "loss": 0.1173,
      "step": 27048
    },
    {
      "epoch": 0.7891067156776942,
      "grad_norm": 0.7077700265266096,
      "learning_rate": 1.121728746883957e-06,
      "loss": 0.1124,
      "step": 27049
    },
    {
      "epoch": 0.7891358889083377,
      "grad_norm": 1.1161121398535105,
      "learning_rate": 1.1214305835758194e-06,
      "loss": 0.1071,
      "step": 27050
    },
    {
      "epoch": 0.7891650621389813,
      "grad_norm": 0.7895281541789846,
      "learning_rate": 1.1211324548946255e-06,
      "loss": 0.1144,
      "step": 27051
    },
    {
      "epoch": 0.7891942353696249,
      "grad_norm": 0.7774085888223303,
      "learning_rate": 1.1208343608430344e-06,
      "loss": 0.1022,
      "step": 27052
    },
    {
      "epoch": 0.7892234086002684,
      "grad_norm": 0.8718471687330965,
      "learning_rate": 1.1205363014237075e-06,
      "loss": 0.1278,
      "step": 27053
    },
    {
      "epoch": 0.789252581830912,
      "grad_norm": 0.8811875020140885,
      "learning_rate": 1.1202382766393056e-06,
      "loss": 0.1089,
      "step": 27054
    },
    {
      "epoch": 0.7892817550615555,
      "grad_norm": 0.8360053810229248,
      "learning_rate": 1.119940286492492e-06,
      "loss": 0.1158,
      "step": 27055
    },
    {
      "epoch": 0.7893109282921991,
      "grad_norm": 0.9734365039401242,
      "learning_rate": 1.119642330985925e-06,
      "loss": 0.1254,
      "step": 27056
    },
    {
      "epoch": 0.7893401015228426,
      "grad_norm": 0.868937374812772,
      "learning_rate": 1.1193444101222639e-06,
      "loss": 0.1349,
      "step": 27057
    },
    {
      "epoch": 0.7893692747534862,
      "grad_norm": 0.9714343484952161,
      "learning_rate": 1.119046523904171e-06,
      "loss": 0.1136,
      "step": 27058
    },
    {
      "epoch": 0.7893984479841297,
      "grad_norm": 1.0384370064192667,
      "learning_rate": 1.1187486723343027e-06,
      "loss": 0.1113,
      "step": 27059
    },
    {
      "epoch": 0.7894276212147733,
      "grad_norm": 0.8904081157750814,
      "learning_rate": 1.1184508554153207e-06,
      "loss": 0.1324,
      "step": 27060
    },
    {
      "epoch": 0.7894567944454168,
      "grad_norm": 0.6980593416909135,
      "learning_rate": 1.118153073149882e-06,
      "loss": 0.1195,
      "step": 27061
    },
    {
      "epoch": 0.7894859676760605,
      "grad_norm": 0.8183181824070184,
      "learning_rate": 1.1178553255406471e-06,
      "loss": 0.1266,
      "step": 27062
    },
    {
      "epoch": 0.789515140906704,
      "grad_norm": 0.7674492847616166,
      "learning_rate": 1.1175576125902732e-06,
      "loss": 0.1235,
      "step": 27063
    },
    {
      "epoch": 0.7895443141373476,
      "grad_norm": 1.0686941219648478,
      "learning_rate": 1.1172599343014167e-06,
      "loss": 0.1234,
      "step": 27064
    },
    {
      "epoch": 0.7895734873679912,
      "grad_norm": 0.9257269730998541,
      "learning_rate": 1.1169622906767368e-06,
      "loss": 0.0975,
      "step": 27065
    },
    {
      "epoch": 0.7896026605986347,
      "grad_norm": 0.7989338259893596,
      "learning_rate": 1.116664681718892e-06,
      "loss": 0.1322,
      "step": 27066
    },
    {
      "epoch": 0.7896318338292783,
      "grad_norm": 0.9501551133211303,
      "learning_rate": 1.1163671074305365e-06,
      "loss": 0.0992,
      "step": 27067
    },
    {
      "epoch": 0.7896610070599218,
      "grad_norm": 0.8220297913394656,
      "learning_rate": 1.1160695678143297e-06,
      "loss": 0.0976,
      "step": 27068
    },
    {
      "epoch": 0.7896901802905654,
      "grad_norm": 0.7420781942638193,
      "learning_rate": 1.1157720628729264e-06,
      "loss": 0.0971,
      "step": 27069
    },
    {
      "epoch": 0.7897193535212089,
      "grad_norm": 0.7748818413562756,
      "learning_rate": 1.1154745926089816e-06,
      "loss": 0.1144,
      "step": 27070
    },
    {
      "epoch": 0.7897485267518525,
      "grad_norm": 0.7086597366847097,
      "learning_rate": 1.1151771570251524e-06,
      "loss": 0.0945,
      "step": 27071
    },
    {
      "epoch": 0.789777699982496,
      "grad_norm": 0.9554744825537503,
      "learning_rate": 1.1148797561240954e-06,
      "loss": 0.1112,
      "step": 27072
    },
    {
      "epoch": 0.7898068732131396,
      "grad_norm": 0.8266113022934952,
      "learning_rate": 1.1145823899084645e-06,
      "loss": 0.1034,
      "step": 27073
    },
    {
      "epoch": 0.7898360464437831,
      "grad_norm": 0.8992658916797747,
      "learning_rate": 1.1142850583809133e-06,
      "loss": 0.1184,
      "step": 27074
    },
    {
      "epoch": 0.7898652196744268,
      "grad_norm": 1.061094711579638,
      "learning_rate": 1.1139877615440993e-06,
      "loss": 0.1081,
      "step": 27075
    },
    {
      "epoch": 0.7898943929050704,
      "grad_norm": 0.7539182591125799,
      "learning_rate": 1.1136904994006743e-06,
      "loss": 0.1061,
      "step": 27076
    },
    {
      "epoch": 0.7899235661357139,
      "grad_norm": 0.7331230192797517,
      "learning_rate": 1.1133932719532903e-06,
      "loss": 0.1036,
      "step": 27077
    },
    {
      "epoch": 0.7899527393663575,
      "grad_norm": 0.8883405371673604,
      "learning_rate": 1.1130960792046057e-06,
      "loss": 0.1173,
      "step": 27078
    },
    {
      "epoch": 0.789981912597001,
      "grad_norm": 0.7454993019681683,
      "learning_rate": 1.1127989211572715e-06,
      "loss": 0.1142,
      "step": 27079
    },
    {
      "epoch": 0.7900110858276446,
      "grad_norm": 1.1582832738734559,
      "learning_rate": 1.1125017978139396e-06,
      "loss": 0.1088,
      "step": 27080
    },
    {
      "epoch": 0.7900402590582881,
      "grad_norm": 0.8510141194896007,
      "learning_rate": 1.1122047091772647e-06,
      "loss": 0.1037,
      "step": 27081
    },
    {
      "epoch": 0.7900694322889317,
      "grad_norm": 0.787896618425818,
      "learning_rate": 1.111907655249898e-06,
      "loss": 0.0945,
      "step": 27082
    },
    {
      "epoch": 0.7900986055195752,
      "grad_norm": 0.7847387202622397,
      "learning_rate": 1.1116106360344909e-06,
      "loss": 0.1298,
      "step": 27083
    },
    {
      "epoch": 0.7901277787502188,
      "grad_norm": 0.8143617847944276,
      "learning_rate": 1.1113136515336953e-06,
      "loss": 0.1479,
      "step": 27084
    },
    {
      "epoch": 0.7901569519808623,
      "grad_norm": 0.7919774903233028,
      "learning_rate": 1.1110167017501643e-06,
      "loss": 0.0974,
      "step": 27085
    },
    {
      "epoch": 0.7901861252115059,
      "grad_norm": 0.8546097168471126,
      "learning_rate": 1.1107197866865482e-06,
      "loss": 0.1096,
      "step": 27086
    },
    {
      "epoch": 0.7902152984421494,
      "grad_norm": 0.8094623375714232,
      "learning_rate": 1.1104229063454957e-06,
      "loss": 0.1419,
      "step": 27087
    },
    {
      "epoch": 0.790244471672793,
      "grad_norm": 1.1065230427955302,
      "learning_rate": 1.1101260607296588e-06,
      "loss": 0.1396,
      "step": 27088
    },
    {
      "epoch": 0.7902736449034367,
      "grad_norm": 0.7493205945004073,
      "learning_rate": 1.1098292498416895e-06,
      "loss": 0.0995,
      "step": 27089
    },
    {
      "epoch": 0.7903028181340802,
      "grad_norm": 0.832246248518706,
      "learning_rate": 1.109532473684236e-06,
      "loss": 0.1119,
      "step": 27090
    },
    {
      "epoch": 0.7903319913647238,
      "grad_norm": 0.8641004671671787,
      "learning_rate": 1.1092357322599467e-06,
      "loss": 0.1259,
      "step": 27091
    },
    {
      "epoch": 0.7903611645953673,
      "grad_norm": 0.8880996219591738,
      "learning_rate": 1.1089390255714733e-06,
      "loss": 0.1203,
      "step": 27092
    },
    {
      "epoch": 0.7903903378260109,
      "grad_norm": 0.9002697296503198,
      "learning_rate": 1.108642353621463e-06,
      "loss": 0.1159,
      "step": 27093
    },
    {
      "epoch": 0.7904195110566544,
      "grad_norm": 0.7692330428831918,
      "learning_rate": 1.108345716412562e-06,
      "loss": 0.1095,
      "step": 27094
    },
    {
      "epoch": 0.790448684287298,
      "grad_norm": 0.8019301129088359,
      "learning_rate": 1.1080491139474248e-06,
      "loss": 0.1178,
      "step": 27095
    },
    {
      "epoch": 0.7904778575179415,
      "grad_norm": 0.72149071079873,
      "learning_rate": 1.107752546228696e-06,
      "loss": 0.0918,
      "step": 27096
    },
    {
      "epoch": 0.7905070307485851,
      "grad_norm": 0.9390267499518805,
      "learning_rate": 1.1074560132590218e-06,
      "loss": 0.119,
      "step": 27097
    },
    {
      "epoch": 0.7905362039792286,
      "grad_norm": 0.9875472469030105,
      "learning_rate": 1.1071595150410518e-06,
      "loss": 0.1068,
      "step": 27098
    },
    {
      "epoch": 0.7905653772098722,
      "grad_norm": 0.9522414094922597,
      "learning_rate": 1.1068630515774332e-06,
      "loss": 0.132,
      "step": 27099
    },
    {
      "epoch": 0.7905945504405157,
      "grad_norm": 0.822154335648392,
      "learning_rate": 1.10656662287081e-06,
      "loss": 0.1058,
      "step": 27100
    },
    {
      "epoch": 0.7906237236711593,
      "grad_norm": 0.7328819702652183,
      "learning_rate": 1.1062702289238308e-06,
      "loss": 0.1111,
      "step": 27101
    },
    {
      "epoch": 0.790652896901803,
      "grad_norm": 0.92680488768724,
      "learning_rate": 1.105973869739143e-06,
      "loss": 0.1417,
      "step": 27102
    },
    {
      "epoch": 0.7906820701324465,
      "grad_norm": 0.7479262159317424,
      "learning_rate": 1.1056775453193907e-06,
      "loss": 0.0881,
      "step": 27103
    },
    {
      "epoch": 0.7907112433630901,
      "grad_norm": 1.290706270914707,
      "learning_rate": 1.1053812556672183e-06,
      "loss": 0.1094,
      "step": 27104
    },
    {
      "epoch": 0.7907404165937336,
      "grad_norm": 0.8930020032148578,
      "learning_rate": 1.1050850007852737e-06,
      "loss": 0.1224,
      "step": 27105
    },
    {
      "epoch": 0.7907695898243772,
      "grad_norm": 1.2154004616883283,
      "learning_rate": 1.1047887806761993e-06,
      "loss": 0.1273,
      "step": 27106
    },
    {
      "epoch": 0.7907987630550207,
      "grad_norm": 0.9299056012719995,
      "learning_rate": 1.1044925953426406e-06,
      "loss": 0.112,
      "step": 27107
    },
    {
      "epoch": 0.7908279362856643,
      "grad_norm": 0.8292919192985915,
      "learning_rate": 1.1041964447872434e-06,
      "loss": 0.1214,
      "step": 27108
    },
    {
      "epoch": 0.7908571095163078,
      "grad_norm": 0.937772791906975,
      "learning_rate": 1.10390032901265e-06,
      "loss": 0.1178,
      "step": 27109
    },
    {
      "epoch": 0.7908862827469514,
      "grad_norm": 0.9054766946410139,
      "learning_rate": 1.1036042480215037e-06,
      "loss": 0.1165,
      "step": 27110
    },
    {
      "epoch": 0.7909154559775949,
      "grad_norm": 0.8453587046069618,
      "learning_rate": 1.1033082018164492e-06,
      "loss": 0.152,
      "step": 27111
    },
    {
      "epoch": 0.7909446292082385,
      "grad_norm": 0.7023114841040197,
      "learning_rate": 1.1030121904001278e-06,
      "loss": 0.0991,
      "step": 27112
    },
    {
      "epoch": 0.790973802438882,
      "grad_norm": 0.9327751718824633,
      "learning_rate": 1.1027162137751852e-06,
      "loss": 0.1098,
      "step": 27113
    },
    {
      "epoch": 0.7910029756695256,
      "grad_norm": 0.7422887851770696,
      "learning_rate": 1.1024202719442596e-06,
      "loss": 0.0862,
      "step": 27114
    },
    {
      "epoch": 0.7910321489001692,
      "grad_norm": 0.7049014329909208,
      "learning_rate": 1.1021243649099972e-06,
      "loss": 0.1268,
      "step": 27115
    },
    {
      "epoch": 0.7910613221308128,
      "grad_norm": 0.8604393699555738,
      "learning_rate": 1.1018284926750378e-06,
      "loss": 0.1174,
      "step": 27116
    },
    {
      "epoch": 0.7910904953614564,
      "grad_norm": 0.9837231514686511,
      "learning_rate": 1.1015326552420218e-06,
      "loss": 0.1186,
      "step": 27117
    },
    {
      "epoch": 0.7911196685920999,
      "grad_norm": 0.7758830117983704,
      "learning_rate": 1.101236852613592e-06,
      "loss": 0.1047,
      "step": 27118
    },
    {
      "epoch": 0.7911488418227435,
      "grad_norm": 0.8812448555955511,
      "learning_rate": 1.1009410847923897e-06,
      "loss": 0.1124,
      "step": 27119
    },
    {
      "epoch": 0.791178015053387,
      "grad_norm": 0.8442310245177006,
      "learning_rate": 1.100645351781055e-06,
      "loss": 0.112,
      "step": 27120
    },
    {
      "epoch": 0.7912071882840306,
      "grad_norm": 0.7845106073583007,
      "learning_rate": 1.1003496535822262e-06,
      "loss": 0.0901,
      "step": 27121
    },
    {
      "epoch": 0.7912363615146741,
      "grad_norm": 0.7196121650906268,
      "learning_rate": 1.1000539901985458e-06,
      "loss": 0.1264,
      "step": 27122
    },
    {
      "epoch": 0.7912655347453177,
      "grad_norm": 0.7425485908378817,
      "learning_rate": 1.099758361632653e-06,
      "loss": 0.1078,
      "step": 27123
    },
    {
      "epoch": 0.7912947079759612,
      "grad_norm": 0.7759085616538031,
      "learning_rate": 1.0994627678871833e-06,
      "loss": 0.0996,
      "step": 27124
    },
    {
      "epoch": 0.7913238812066048,
      "grad_norm": 0.7586204681365918,
      "learning_rate": 1.0991672089647814e-06,
      "loss": 0.1002,
      "step": 27125
    },
    {
      "epoch": 0.7913530544372483,
      "grad_norm": 1.8345522764639859,
      "learning_rate": 1.0988716848680842e-06,
      "loss": 0.1019,
      "step": 27126
    },
    {
      "epoch": 0.7913822276678919,
      "grad_norm": 0.8190523113213574,
      "learning_rate": 1.0985761955997276e-06,
      "loss": 0.0987,
      "step": 27127
    },
    {
      "epoch": 0.7914114008985355,
      "grad_norm": 0.8617946805400212,
      "learning_rate": 1.0982807411623526e-06,
      "loss": 0.1144,
      "step": 27128
    },
    {
      "epoch": 0.7914405741291791,
      "grad_norm": 1.2286630752846879,
      "learning_rate": 1.0979853215585957e-06,
      "loss": 0.1042,
      "step": 27129
    },
    {
      "epoch": 0.7914697473598227,
      "grad_norm": 0.9680502916427776,
      "learning_rate": 1.0976899367910932e-06,
      "loss": 0.1325,
      "step": 27130
    },
    {
      "epoch": 0.7914989205904662,
      "grad_norm": 0.955072518187602,
      "learning_rate": 1.097394586862483e-06,
      "loss": 0.1125,
      "step": 27131
    },
    {
      "epoch": 0.7915280938211098,
      "grad_norm": 0.7425040135781064,
      "learning_rate": 1.0970992717754043e-06,
      "loss": 0.1068,
      "step": 27132
    },
    {
      "epoch": 0.7915572670517533,
      "grad_norm": 0.9104003460204163,
      "learning_rate": 1.0968039915324913e-06,
      "loss": 0.115,
      "step": 27133
    },
    {
      "epoch": 0.7915864402823969,
      "grad_norm": 0.8801343256956771,
      "learning_rate": 1.0965087461363788e-06,
      "loss": 0.1315,
      "step": 27134
    },
    {
      "epoch": 0.7916156135130404,
      "grad_norm": 0.9456310284348848,
      "learning_rate": 1.0962135355897063e-06,
      "loss": 0.1108,
      "step": 27135
    },
    {
      "epoch": 0.791644786743684,
      "grad_norm": 0.6906914642583293,
      "learning_rate": 1.0959183598951056e-06,
      "loss": 0.1005,
      "step": 27136
    },
    {
      "epoch": 0.7916739599743275,
      "grad_norm": 0.9960876587345895,
      "learning_rate": 1.095623219055214e-06,
      "loss": 0.1009,
      "step": 27137
    },
    {
      "epoch": 0.7917031332049711,
      "grad_norm": 0.908515226563882,
      "learning_rate": 1.095328113072668e-06,
      "loss": 0.1129,
      "step": 27138
    },
    {
      "epoch": 0.7917323064356147,
      "grad_norm": 0.771092521850201,
      "learning_rate": 1.0950330419501003e-06,
      "loss": 0.1038,
      "step": 27139
    },
    {
      "epoch": 0.7917614796662582,
      "grad_norm": 0.7236073233684125,
      "learning_rate": 1.0947380056901436e-06,
      "loss": 0.105,
      "step": 27140
    },
    {
      "epoch": 0.7917906528969018,
      "grad_norm": 0.8497883082548906,
      "learning_rate": 1.0944430042954358e-06,
      "loss": 0.1182,
      "step": 27141
    },
    {
      "epoch": 0.7918198261275453,
      "grad_norm": 0.7610705951590233,
      "learning_rate": 1.0941480377686065e-06,
      "loss": 0.0972,
      "step": 27142
    },
    {
      "epoch": 0.791848999358189,
      "grad_norm": 0.72110363440369,
      "learning_rate": 1.0938531061122926e-06,
      "loss": 0.1314,
      "step": 27143
    },
    {
      "epoch": 0.7918781725888325,
      "grad_norm": 0.807041227237705,
      "learning_rate": 1.0935582093291247e-06,
      "loss": 0.1338,
      "step": 27144
    },
    {
      "epoch": 0.7919073458194761,
      "grad_norm": 1.0550105281370603,
      "learning_rate": 1.0932633474217374e-06,
      "loss": 0.1163,
      "step": 27145
    },
    {
      "epoch": 0.7919365190501196,
      "grad_norm": 0.8850146050823224,
      "learning_rate": 1.0929685203927625e-06,
      "loss": 0.1513,
      "step": 27146
    },
    {
      "epoch": 0.7919656922807632,
      "grad_norm": 0.6960212464952853,
      "learning_rate": 1.0926737282448308e-06,
      "loss": 0.0995,
      "step": 27147
    },
    {
      "epoch": 0.7919948655114067,
      "grad_norm": 0.7826302176495409,
      "learning_rate": 1.0923789709805754e-06,
      "loss": 0.1196,
      "step": 27148
    },
    {
      "epoch": 0.7920240387420503,
      "grad_norm": 0.8804146177427541,
      "learning_rate": 1.092084248602629e-06,
      "loss": 0.0996,
      "step": 27149
    },
    {
      "epoch": 0.7920532119726938,
      "grad_norm": 0.7310782519951465,
      "learning_rate": 1.0917895611136214e-06,
      "loss": 0.1037,
      "step": 27150
    },
    {
      "epoch": 0.7920823852033374,
      "grad_norm": 0.9999875152982066,
      "learning_rate": 1.0914949085161819e-06,
      "loss": 0.1093,
      "step": 27151
    },
    {
      "epoch": 0.792111558433981,
      "grad_norm": 0.9923710383217718,
      "learning_rate": 1.091200290812945e-06,
      "loss": 0.1355,
      "step": 27152
    },
    {
      "epoch": 0.7921407316646245,
      "grad_norm": 0.9491778541297881,
      "learning_rate": 1.0909057080065382e-06,
      "loss": 0.1365,
      "step": 27153
    },
    {
      "epoch": 0.7921699048952681,
      "grad_norm": 0.6807099827568043,
      "learning_rate": 1.0906111600995895e-06,
      "loss": 0.0983,
      "step": 27154
    },
    {
      "epoch": 0.7921990781259116,
      "grad_norm": 0.8567725912593552,
      "learning_rate": 1.090316647094734e-06,
      "loss": 0.1187,
      "step": 27155
    },
    {
      "epoch": 0.7922282513565553,
      "grad_norm": 1.108805960470962,
      "learning_rate": 1.0900221689945978e-06,
      "loss": 0.1172,
      "step": 27156
    },
    {
      "epoch": 0.7922574245871988,
      "grad_norm": 0.8158151823512337,
      "learning_rate": 1.089727725801809e-06,
      "loss": 0.1112,
      "step": 27157
    },
    {
      "epoch": 0.7922865978178424,
      "grad_norm": 1.0486515651120698,
      "learning_rate": 1.0894333175189993e-06,
      "loss": 0.1294,
      "step": 27158
    },
    {
      "epoch": 0.7923157710484859,
      "grad_norm": 0.824966210310151,
      "learning_rate": 1.0891389441487954e-06,
      "loss": 0.1058,
      "step": 27159
    },
    {
      "epoch": 0.7923449442791295,
      "grad_norm": 0.9263257222914655,
      "learning_rate": 1.088844605693824e-06,
      "loss": 0.1264,
      "step": 27160
    },
    {
      "epoch": 0.792374117509773,
      "grad_norm": 1.0945050618072207,
      "learning_rate": 1.088550302156714e-06,
      "loss": 0.0885,
      "step": 27161
    },
    {
      "epoch": 0.7924032907404166,
      "grad_norm": 1.3830758417720495,
      "learning_rate": 1.0882560335400943e-06,
      "loss": 0.1211,
      "step": 27162
    },
    {
      "epoch": 0.7924324639710602,
      "grad_norm": 1.211255424690409,
      "learning_rate": 1.0879617998465912e-06,
      "loss": 0.1079,
      "step": 27163
    },
    {
      "epoch": 0.7924616372017037,
      "grad_norm": 0.8134082408658129,
      "learning_rate": 1.0876676010788307e-06,
      "loss": 0.0924,
      "step": 27164
    },
    {
      "epoch": 0.7924908104323473,
      "grad_norm": 0.889315860492122,
      "learning_rate": 1.0873734372394402e-06,
      "loss": 0.1309,
      "step": 27165
    },
    {
      "epoch": 0.7925199836629908,
      "grad_norm": 1.2094765568061059,
      "learning_rate": 1.0870793083310449e-06,
      "loss": 0.1157,
      "step": 27166
    },
    {
      "epoch": 0.7925491568936344,
      "grad_norm": 1.3901104998579217,
      "learning_rate": 1.0867852143562712e-06,
      "loss": 0.1479,
      "step": 27167
    },
    {
      "epoch": 0.7925783301242779,
      "grad_norm": 0.9522795647766157,
      "learning_rate": 1.0864911553177459e-06,
      "loss": 0.1106,
      "step": 27168
    },
    {
      "epoch": 0.7926075033549215,
      "grad_norm": 0.9193753418996613,
      "learning_rate": 1.0861971312180942e-06,
      "loss": 0.1218,
      "step": 27169
    },
    {
      "epoch": 0.7926366765855651,
      "grad_norm": 1.0362991357964215,
      "learning_rate": 1.085903142059938e-06,
      "loss": 0.1312,
      "step": 27170
    },
    {
      "epoch": 0.7926658498162087,
      "grad_norm": 1.1401714533665626,
      "learning_rate": 1.0856091878459064e-06,
      "loss": 0.1189,
      "step": 27171
    },
    {
      "epoch": 0.7926950230468522,
      "grad_norm": 0.917302415006295,
      "learning_rate": 1.0853152685786196e-06,
      "loss": 0.0886,
      "step": 27172
    },
    {
      "epoch": 0.7927241962774958,
      "grad_norm": 1.2631958799446088,
      "learning_rate": 1.085021384260705e-06,
      "loss": 0.1123,
      "step": 27173
    },
    {
      "epoch": 0.7927533695081393,
      "grad_norm": 0.7993442809384943,
      "learning_rate": 1.0847275348947833e-06,
      "loss": 0.1209,
      "step": 27174
    },
    {
      "epoch": 0.7927825427387829,
      "grad_norm": 1.187448320820712,
      "learning_rate": 1.0844337204834814e-06,
      "loss": 0.1272,
      "step": 27175
    },
    {
      "epoch": 0.7928117159694265,
      "grad_norm": 0.8773412349222718,
      "learning_rate": 1.08413994102942e-06,
      "loss": 0.1402,
      "step": 27176
    },
    {
      "epoch": 0.79284088920007,
      "grad_norm": 0.683838764971712,
      "learning_rate": 1.0838461965352215e-06,
      "loss": 0.1006,
      "step": 27177
    },
    {
      "epoch": 0.7928700624307136,
      "grad_norm": 0.8919481410247305,
      "learning_rate": 1.083552487003509e-06,
      "loss": 0.0994,
      "step": 27178
    },
    {
      "epoch": 0.7928992356613571,
      "grad_norm": 0.7318332943450829,
      "learning_rate": 1.083258812436907e-06,
      "loss": 0.0918,
      "step": 27179
    },
    {
      "epoch": 0.7929284088920007,
      "grad_norm": 0.8147479444203601,
      "learning_rate": 1.0829651728380346e-06,
      "loss": 0.1257,
      "step": 27180
    },
    {
      "epoch": 0.7929575821226442,
      "grad_norm": 1.0957109828739,
      "learning_rate": 1.082671568209513e-06,
      "loss": 0.1198,
      "step": 27181
    },
    {
      "epoch": 0.7929867553532878,
      "grad_norm": 0.9268456476495067,
      "learning_rate": 1.0823779985539657e-06,
      "loss": 0.0927,
      "step": 27182
    },
    {
      "epoch": 0.7930159285839314,
      "grad_norm": 0.937669862032185,
      "learning_rate": 1.0820844638740125e-06,
      "loss": 0.1095,
      "step": 27183
    },
    {
      "epoch": 0.793045101814575,
      "grad_norm": 0.8994880085158811,
      "learning_rate": 1.0817909641722713e-06,
      "loss": 0.1105,
      "step": 27184
    },
    {
      "epoch": 0.7930742750452185,
      "grad_norm": 0.8368374731245064,
      "learning_rate": 1.0814974994513672e-06,
      "loss": 0.1043,
      "step": 27185
    },
    {
      "epoch": 0.7931034482758621,
      "grad_norm": 0.7875018884126366,
      "learning_rate": 1.0812040697139187e-06,
      "loss": 0.1055,
      "step": 27186
    },
    {
      "epoch": 0.7931326215065057,
      "grad_norm": 0.9334903899768735,
      "learning_rate": 1.0809106749625431e-06,
      "loss": 0.1227,
      "step": 27187
    },
    {
      "epoch": 0.7931617947371492,
      "grad_norm": 1.1329781338344356,
      "learning_rate": 1.0806173151998628e-06,
      "loss": 0.1147,
      "step": 27188
    },
    {
      "epoch": 0.7931909679677928,
      "grad_norm": 1.4054346205322619,
      "learning_rate": 1.0803239904284952e-06,
      "loss": 0.1172,
      "step": 27189
    },
    {
      "epoch": 0.7932201411984363,
      "grad_norm": 0.8135928422315268,
      "learning_rate": 1.0800307006510585e-06,
      "loss": 0.103,
      "step": 27190
    },
    {
      "epoch": 0.7932493144290799,
      "grad_norm": 1.0014669905524591,
      "learning_rate": 1.0797374458701716e-06,
      "loss": 0.119,
      "step": 27191
    },
    {
      "epoch": 0.7932784876597234,
      "grad_norm": 0.9133195992648436,
      "learning_rate": 1.079444226088454e-06,
      "loss": 0.1083,
      "step": 27192
    },
    {
      "epoch": 0.793307660890367,
      "grad_norm": 0.6612529568331375,
      "learning_rate": 1.0791510413085232e-06,
      "loss": 0.1009,
      "step": 27193
    },
    {
      "epoch": 0.7933368341210105,
      "grad_norm": 1.050006629251399,
      "learning_rate": 1.078857891532994e-06,
      "loss": 0.1153,
      "step": 27194
    },
    {
      "epoch": 0.7933660073516541,
      "grad_norm": 0.9127293559854903,
      "learning_rate": 1.0785647767644869e-06,
      "loss": 0.1411,
      "step": 27195
    },
    {
      "epoch": 0.7933951805822976,
      "grad_norm": 0.8450020135109233,
      "learning_rate": 1.078271697005616e-06,
      "loss": 0.119,
      "step": 27196
    },
    {
      "epoch": 0.7934243538129413,
      "grad_norm": 0.8240192595216537,
      "learning_rate": 1.0779786522589998e-06,
      "loss": 0.124,
      "step": 27197
    },
    {
      "epoch": 0.7934535270435848,
      "grad_norm": 0.8152466647131754,
      "learning_rate": 1.0776856425272548e-06,
      "loss": 0.1238,
      "step": 27198
    },
    {
      "epoch": 0.7934827002742284,
      "grad_norm": 0.8905718914173306,
      "learning_rate": 1.0773926678129958e-06,
      "loss": 0.1203,
      "step": 27199
    },
    {
      "epoch": 0.793511873504872,
      "grad_norm": 0.9610597286522311,
      "learning_rate": 1.0770997281188378e-06,
      "loss": 0.1145,
      "step": 27200
    },
    {
      "epoch": 0.7935410467355155,
      "grad_norm": 0.75896572777544,
      "learning_rate": 1.0768068234473978e-06,
      "loss": 0.1246,
      "step": 27201
    },
    {
      "epoch": 0.7935702199661591,
      "grad_norm": 0.8331296571922095,
      "learning_rate": 1.0765139538012892e-06,
      "loss": 0.1304,
      "step": 27202
    },
    {
      "epoch": 0.7935993931968026,
      "grad_norm": 0.8385479477782422,
      "learning_rate": 1.0762211191831283e-06,
      "loss": 0.0939,
      "step": 27203
    },
    {
      "epoch": 0.7936285664274462,
      "grad_norm": 0.8796501707340214,
      "learning_rate": 1.0759283195955273e-06,
      "loss": 0.1069,
      "step": 27204
    },
    {
      "epoch": 0.7936577396580897,
      "grad_norm": 0.9855807190635272,
      "learning_rate": 1.075635555041103e-06,
      "loss": 0.1161,
      "step": 27205
    },
    {
      "epoch": 0.7936869128887333,
      "grad_norm": 1.0909022563980866,
      "learning_rate": 1.0753428255224674e-06,
      "loss": 0.1176,
      "step": 27206
    },
    {
      "epoch": 0.7937160861193768,
      "grad_norm": 0.9271966413936198,
      "learning_rate": 1.0750501310422328e-06,
      "loss": 0.12,
      "step": 27207
    },
    {
      "epoch": 0.7937452593500204,
      "grad_norm": 0.7606476951413613,
      "learning_rate": 1.074757471603014e-06,
      "loss": 0.0915,
      "step": 27208
    },
    {
      "epoch": 0.7937744325806639,
      "grad_norm": 1.0786965180563166,
      "learning_rate": 1.074464847207425e-06,
      "loss": 0.1127,
      "step": 27209
    },
    {
      "epoch": 0.7938036058113075,
      "grad_norm": 0.9642516925484652,
      "learning_rate": 1.074172257858076e-06,
      "loss": 0.0867,
      "step": 27210
    },
    {
      "epoch": 0.7938327790419512,
      "grad_norm": 0.8178564365082546,
      "learning_rate": 1.0738797035575787e-06,
      "loss": 0.116,
      "step": 27211
    },
    {
      "epoch": 0.7938619522725947,
      "grad_norm": 0.7119798409234761,
      "learning_rate": 1.0735871843085483e-06,
      "loss": 0.1112,
      "step": 27212
    },
    {
      "epoch": 0.7938911255032383,
      "grad_norm": 1.1182402379916003,
      "learning_rate": 1.0732947001135935e-06,
      "loss": 0.1263,
      "step": 27213
    },
    {
      "epoch": 0.7939202987338818,
      "grad_norm": 0.7720191956329254,
      "learning_rate": 1.0730022509753235e-06,
      "loss": 0.105,
      "step": 27214
    },
    {
      "epoch": 0.7939494719645254,
      "grad_norm": 0.606352370124071,
      "learning_rate": 1.072709836896355e-06,
      "loss": 0.1111,
      "step": 27215
    },
    {
      "epoch": 0.7939786451951689,
      "grad_norm": 0.957001421108336,
      "learning_rate": 1.0724174578792952e-06,
      "loss": 0.107,
      "step": 27216
    },
    {
      "epoch": 0.7940078184258125,
      "grad_norm": 0.9266659059031501,
      "learning_rate": 1.0721251139267536e-06,
      "loss": 0.1168,
      "step": 27217
    },
    {
      "epoch": 0.794036991656456,
      "grad_norm": 0.9046915043052184,
      "learning_rate": 1.071832805041343e-06,
      "loss": 0.1329,
      "step": 27218
    },
    {
      "epoch": 0.7940661648870996,
      "grad_norm": 0.8037414910215839,
      "learning_rate": 1.071540531225671e-06,
      "loss": 0.1393,
      "step": 27219
    },
    {
      "epoch": 0.7940953381177431,
      "grad_norm": 1.1688948899651286,
      "learning_rate": 1.071248292482346e-06,
      "loss": 0.1211,
      "step": 27220
    },
    {
      "epoch": 0.7941245113483867,
      "grad_norm": 0.8053109879370193,
      "learning_rate": 1.0709560888139787e-06,
      "loss": 0.1219,
      "step": 27221
    },
    {
      "epoch": 0.7941536845790302,
      "grad_norm": 0.7711154565723309,
      "learning_rate": 1.0706639202231783e-06,
      "loss": 0.1088,
      "step": 27222
    },
    {
      "epoch": 0.7941828578096738,
      "grad_norm": 1.011853271244742,
      "learning_rate": 1.0703717867125524e-06,
      "loss": 0.1249,
      "step": 27223
    },
    {
      "epoch": 0.7942120310403175,
      "grad_norm": 0.8364694502713238,
      "learning_rate": 1.070079688284708e-06,
      "loss": 0.1049,
      "step": 27224
    },
    {
      "epoch": 0.794241204270961,
      "grad_norm": 0.8879913859202871,
      "learning_rate": 1.0697876249422557e-06,
      "loss": 0.1262,
      "step": 27225
    },
    {
      "epoch": 0.7942703775016046,
      "grad_norm": 0.79892463453512,
      "learning_rate": 1.0694955966877996e-06,
      "loss": 0.1118,
      "step": 27226
    },
    {
      "epoch": 0.7942995507322481,
      "grad_norm": 0.7897948536845185,
      "learning_rate": 1.06920360352395e-06,
      "loss": 0.1119,
      "step": 27227
    },
    {
      "epoch": 0.7943287239628917,
      "grad_norm": 0.7850501406887443,
      "learning_rate": 1.0689116454533105e-06,
      "loss": 0.1295,
      "step": 27228
    },
    {
      "epoch": 0.7943578971935352,
      "grad_norm": 1.0072128526934645,
      "learning_rate": 1.068619722478491e-06,
      "loss": 0.1069,
      "step": 27229
    },
    {
      "epoch": 0.7943870704241788,
      "grad_norm": 0.8950120076679392,
      "learning_rate": 1.0683278346020953e-06,
      "loss": 0.1318,
      "step": 27230
    },
    {
      "epoch": 0.7944162436548223,
      "grad_norm": 1.0075836046887874,
      "learning_rate": 1.068035981826731e-06,
      "loss": 0.1288,
      "step": 27231
    },
    {
      "epoch": 0.7944454168854659,
      "grad_norm": 0.8989678170551099,
      "learning_rate": 1.0677441641550012e-06,
      "loss": 0.1053,
      "step": 27232
    },
    {
      "epoch": 0.7944745901161094,
      "grad_norm": 1.0010786692342146,
      "learning_rate": 1.0674523815895143e-06,
      "loss": 0.1479,
      "step": 27233
    },
    {
      "epoch": 0.794503763346753,
      "grad_norm": 0.7515913770216751,
      "learning_rate": 1.0671606341328728e-06,
      "loss": 0.1303,
      "step": 27234
    },
    {
      "epoch": 0.7945329365773965,
      "grad_norm": 0.8256631985755358,
      "learning_rate": 1.0668689217876832e-06,
      "loss": 0.1161,
      "step": 27235
    },
    {
      "epoch": 0.7945621098080401,
      "grad_norm": 0.9734480532915907,
      "learning_rate": 1.0665772445565493e-06,
      "loss": 0.1253,
      "step": 27236
    },
    {
      "epoch": 0.7945912830386836,
      "grad_norm": 0.6859718576154452,
      "learning_rate": 1.0662856024420732e-06,
      "loss": 0.0938,
      "step": 27237
    },
    {
      "epoch": 0.7946204562693273,
      "grad_norm": 1.0410261535803178,
      "learning_rate": 1.06599399544686e-06,
      "loss": 0.1151,
      "step": 27238
    },
    {
      "epoch": 0.7946496294999709,
      "grad_norm": 0.9047278946682342,
      "learning_rate": 1.0657024235735152e-06,
      "loss": 0.11,
      "step": 27239
    },
    {
      "epoch": 0.7946788027306144,
      "grad_norm": 0.9623374158719716,
      "learning_rate": 1.0654108868246398e-06,
      "loss": 0.1375,
      "step": 27240
    },
    {
      "epoch": 0.794707975961258,
      "grad_norm": 0.8649855101651447,
      "learning_rate": 1.0651193852028353e-06,
      "loss": 0.1204,
      "step": 27241
    },
    {
      "epoch": 0.7947371491919015,
      "grad_norm": 0.8117566411642689,
      "learning_rate": 1.0648279187107068e-06,
      "loss": 0.1297,
      "step": 27242
    },
    {
      "epoch": 0.7947663224225451,
      "grad_norm": 0.8079122548166594,
      "learning_rate": 1.064536487350855e-06,
      "loss": 0.1118,
      "step": 27243
    },
    {
      "epoch": 0.7947954956531886,
      "grad_norm": 0.8523914860289792,
      "learning_rate": 1.06424509112588e-06,
      "loss": 0.1035,
      "step": 27244
    },
    {
      "epoch": 0.7948246688838322,
      "grad_norm": 0.9556340736393387,
      "learning_rate": 1.063953730038388e-06,
      "loss": 0.1113,
      "step": 27245
    },
    {
      "epoch": 0.7948538421144757,
      "grad_norm": 0.9754331264843032,
      "learning_rate": 1.0636624040909765e-06,
      "loss": 0.1024,
      "step": 27246
    },
    {
      "epoch": 0.7948830153451193,
      "grad_norm": 0.8163054623079017,
      "learning_rate": 1.0633711132862467e-06,
      "loss": 0.1128,
      "step": 27247
    },
    {
      "epoch": 0.7949121885757628,
      "grad_norm": 0.842210170911893,
      "learning_rate": 1.0630798576268013e-06,
      "loss": 0.1115,
      "step": 27248
    },
    {
      "epoch": 0.7949413618064064,
      "grad_norm": 0.7836559465763133,
      "learning_rate": 1.062788637115239e-06,
      "loss": 0.12,
      "step": 27249
    },
    {
      "epoch": 0.79497053503705,
      "grad_norm": 0.7956972852306032,
      "learning_rate": 1.0624974517541587e-06,
      "loss": 0.0986,
      "step": 27250
    },
    {
      "epoch": 0.7949997082676936,
      "grad_norm": 0.8141726113794314,
      "learning_rate": 1.0622063015461603e-06,
      "loss": 0.1198,
      "step": 27251
    },
    {
      "epoch": 0.7950288814983372,
      "grad_norm": 0.7980079504447605,
      "learning_rate": 1.0619151864938464e-06,
      "loss": 0.1072,
      "step": 27252
    },
    {
      "epoch": 0.7950580547289807,
      "grad_norm": 0.8830622236492937,
      "learning_rate": 1.0616241065998134e-06,
      "loss": 0.1124,
      "step": 27253
    },
    {
      "epoch": 0.7950872279596243,
      "grad_norm": 0.7724068062364485,
      "learning_rate": 1.0613330618666584e-06,
      "loss": 0.1292,
      "step": 27254
    },
    {
      "epoch": 0.7951164011902678,
      "grad_norm": 0.9088773064446449,
      "learning_rate": 1.0610420522969833e-06,
      "loss": 0.1105,
      "step": 27255
    },
    {
      "epoch": 0.7951455744209114,
      "grad_norm": 0.7981120777989499,
      "learning_rate": 1.0607510778933828e-06,
      "loss": 0.1221,
      "step": 27256
    },
    {
      "epoch": 0.7951747476515549,
      "grad_norm": 0.9219924405039507,
      "learning_rate": 1.0604601386584579e-06,
      "loss": 0.1015,
      "step": 27257
    },
    {
      "epoch": 0.7952039208821985,
      "grad_norm": 0.9552342074351698,
      "learning_rate": 1.0601692345948033e-06,
      "loss": 0.1263,
      "step": 27258
    },
    {
      "epoch": 0.795233094112842,
      "grad_norm": 0.9333306679607168,
      "learning_rate": 1.0598783657050183e-06,
      "loss": 0.1196,
      "step": 27259
    },
    {
      "epoch": 0.7952622673434856,
      "grad_norm": 0.7489530231122494,
      "learning_rate": 1.0595875319916977e-06,
      "loss": 0.1193,
      "step": 27260
    },
    {
      "epoch": 0.7952914405741291,
      "grad_norm": 0.867807944720601,
      "learning_rate": 1.0592967334574394e-06,
      "loss": 0.1002,
      "step": 27261
    },
    {
      "epoch": 0.7953206138047727,
      "grad_norm": 1.0469200478889713,
      "learning_rate": 1.059005970104839e-06,
      "loss": 0.1106,
      "step": 27262
    },
    {
      "epoch": 0.7953497870354163,
      "grad_norm": 0.9471704814576529,
      "learning_rate": 1.0587152419364926e-06,
      "loss": 0.122,
      "step": 27263
    },
    {
      "epoch": 0.7953789602660598,
      "grad_norm": 0.9068258596768978,
      "learning_rate": 1.0584245489549956e-06,
      "loss": 0.1129,
      "step": 27264
    },
    {
      "epoch": 0.7954081334967035,
      "grad_norm": 0.8361631280770446,
      "learning_rate": 1.0581338911629436e-06,
      "loss": 0.1039,
      "step": 27265
    },
    {
      "epoch": 0.795437306727347,
      "grad_norm": 1.1017321696575815,
      "learning_rate": 1.057843268562932e-06,
      "loss": 0.1398,
      "step": 27266
    },
    {
      "epoch": 0.7954664799579906,
      "grad_norm": 1.2778504875373855,
      "learning_rate": 1.0575526811575526e-06,
      "loss": 0.1375,
      "step": 27267
    },
    {
      "epoch": 0.7954956531886341,
      "grad_norm": 0.7949750708281008,
      "learning_rate": 1.0572621289494022e-06,
      "loss": 0.1123,
      "step": 27268
    },
    {
      "epoch": 0.7955248264192777,
      "grad_norm": 0.7750850559447942,
      "learning_rate": 1.0569716119410755e-06,
      "loss": 0.1003,
      "step": 27269
    },
    {
      "epoch": 0.7955539996499212,
      "grad_norm": 0.9011085546946347,
      "learning_rate": 1.0566811301351648e-06,
      "loss": 0.1483,
      "step": 27270
    },
    {
      "epoch": 0.7955831728805648,
      "grad_norm": 0.7779645887598234,
      "learning_rate": 1.0563906835342624e-06,
      "loss": 0.0983,
      "step": 27271
    },
    {
      "epoch": 0.7956123461112083,
      "grad_norm": 0.7652944868889955,
      "learning_rate": 1.0561002721409641e-06,
      "loss": 0.1049,
      "step": 27272
    },
    {
      "epoch": 0.7956415193418519,
      "grad_norm": 0.8896689235920401,
      "learning_rate": 1.0558098959578612e-06,
      "loss": 0.1393,
      "step": 27273
    },
    {
      "epoch": 0.7956706925724955,
      "grad_norm": 2.154808598446225,
      "learning_rate": 1.0555195549875425e-06,
      "loss": 0.1288,
      "step": 27274
    },
    {
      "epoch": 0.795699865803139,
      "grad_norm": 0.8105039252933518,
      "learning_rate": 1.055229249232607e-06,
      "loss": 0.1057,
      "step": 27275
    },
    {
      "epoch": 0.7957290390337826,
      "grad_norm": 0.7601961340534943,
      "learning_rate": 1.0549389786956427e-06,
      "loss": 0.1068,
      "step": 27276
    },
    {
      "epoch": 0.7957582122644261,
      "grad_norm": 0.9367799725960024,
      "learning_rate": 1.05464874337924e-06,
      "loss": 0.1056,
      "step": 27277
    },
    {
      "epoch": 0.7957873854950698,
      "grad_norm": 1.3257994814465035,
      "learning_rate": 1.0543585432859938e-06,
      "loss": 0.1142,
      "step": 27278
    },
    {
      "epoch": 0.7958165587257133,
      "grad_norm": 0.8538977594302306,
      "learning_rate": 1.0540683784184902e-06,
      "loss": 0.1139,
      "step": 27279
    },
    {
      "epoch": 0.7958457319563569,
      "grad_norm": 0.8283071662944053,
      "learning_rate": 1.0537782487793242e-06,
      "loss": 0.1112,
      "step": 27280
    },
    {
      "epoch": 0.7958749051870004,
      "grad_norm": 0.8035194992437611,
      "learning_rate": 1.0534881543710823e-06,
      "loss": 0.1116,
      "step": 27281
    },
    {
      "epoch": 0.795904078417644,
      "grad_norm": 0.6971214743367149,
      "learning_rate": 1.0531980951963572e-06,
      "loss": 0.0872,
      "step": 27282
    },
    {
      "epoch": 0.7959332516482875,
      "grad_norm": 0.9076570749974003,
      "learning_rate": 1.0529080712577378e-06,
      "loss": 0.1292,
      "step": 27283
    },
    {
      "epoch": 0.7959624248789311,
      "grad_norm": 0.8532211474920889,
      "learning_rate": 1.0526180825578108e-06,
      "loss": 0.1183,
      "step": 27284
    },
    {
      "epoch": 0.7959915981095746,
      "grad_norm": 0.7560721645327512,
      "learning_rate": 1.0523281290991678e-06,
      "loss": 0.1094,
      "step": 27285
    },
    {
      "epoch": 0.7960207713402182,
      "grad_norm": 0.6863790227198021,
      "learning_rate": 1.0520382108843979e-06,
      "loss": 0.1021,
      "step": 27286
    },
    {
      "epoch": 0.7960499445708618,
      "grad_norm": 0.943530506934025,
      "learning_rate": 1.0517483279160889e-06,
      "loss": 0.1089,
      "step": 27287
    },
    {
      "epoch": 0.7960791178015053,
      "grad_norm": 0.8455719346304803,
      "learning_rate": 1.051458480196827e-06,
      "loss": 0.1103,
      "step": 27288
    },
    {
      "epoch": 0.7961082910321489,
      "grad_norm": 1.0411619034805029,
      "learning_rate": 1.0511686677292021e-06,
      "loss": 0.1068,
      "step": 27289
    },
    {
      "epoch": 0.7961374642627924,
      "grad_norm": 0.7012838742892026,
      "learning_rate": 1.050878890515799e-06,
      "loss": 0.1289,
      "step": 27290
    },
    {
      "epoch": 0.796166637493436,
      "grad_norm": 0.7541020856424244,
      "learning_rate": 1.0505891485592073e-06,
      "loss": 0.1221,
      "step": 27291
    },
    {
      "epoch": 0.7961958107240796,
      "grad_norm": 0.8180727127319456,
      "learning_rate": 1.050299441862014e-06,
      "loss": 0.1273,
      "step": 27292
    },
    {
      "epoch": 0.7962249839547232,
      "grad_norm": 0.8950766712575334,
      "learning_rate": 1.0500097704268042e-06,
      "loss": 0.1265,
      "step": 27293
    },
    {
      "epoch": 0.7962541571853667,
      "grad_norm": 0.7790903717148736,
      "learning_rate": 1.0497201342561625e-06,
      "loss": 0.1167,
      "step": 27294
    },
    {
      "epoch": 0.7962833304160103,
      "grad_norm": 0.7474984820936914,
      "learning_rate": 1.0494305333526782e-06,
      "loss": 0.1062,
      "step": 27295
    },
    {
      "epoch": 0.7963125036466538,
      "grad_norm": 0.9903092008224443,
      "learning_rate": 1.0491409677189352e-06,
      "loss": 0.1033,
      "step": 27296
    },
    {
      "epoch": 0.7963416768772974,
      "grad_norm": 0.779866185498765,
      "learning_rate": 1.048851437357517e-06,
      "loss": 0.1112,
      "step": 27297
    },
    {
      "epoch": 0.796370850107941,
      "grad_norm": 0.8087830263086303,
      "learning_rate": 1.0485619422710097e-06,
      "loss": 0.095,
      "step": 27298
    },
    {
      "epoch": 0.7964000233385845,
      "grad_norm": 1.0298698460761202,
      "learning_rate": 1.048272482462e-06,
      "loss": 0.1068,
      "step": 27299
    },
    {
      "epoch": 0.7964291965692281,
      "grad_norm": 0.8249956681325328,
      "learning_rate": 1.0479830579330697e-06,
      "loss": 0.1211,
      "step": 27300
    },
    {
      "epoch": 0.7964583697998716,
      "grad_norm": 0.7654990462051833,
      "learning_rate": 1.0476936686868023e-06,
      "loss": 0.1122,
      "step": 27301
    },
    {
      "epoch": 0.7964875430305152,
      "grad_norm": 1.2977896988259128,
      "learning_rate": 1.0474043147257835e-06,
      "loss": 0.1389,
      "step": 27302
    },
    {
      "epoch": 0.7965167162611587,
      "grad_norm": 1.008924547308374,
      "learning_rate": 1.0471149960525938e-06,
      "loss": 0.1136,
      "step": 27303
    },
    {
      "epoch": 0.7965458894918023,
      "grad_norm": 0.8084272623989259,
      "learning_rate": 1.0468257126698177e-06,
      "loss": 0.1065,
      "step": 27304
    },
    {
      "epoch": 0.7965750627224459,
      "grad_norm": 0.7677326657619127,
      "learning_rate": 1.0465364645800397e-06,
      "loss": 0.1408,
      "step": 27305
    },
    {
      "epoch": 0.7966042359530895,
      "grad_norm": 0.7261190198599423,
      "learning_rate": 1.0462472517858401e-06,
      "loss": 0.1184,
      "step": 27306
    },
    {
      "epoch": 0.796633409183733,
      "grad_norm": 0.7956539267186988,
      "learning_rate": 1.0459580742898e-06,
      "loss": 0.0889,
      "step": 27307
    },
    {
      "epoch": 0.7966625824143766,
      "grad_norm": 0.8544799910759517,
      "learning_rate": 1.045668932094504e-06,
      "loss": 0.0973,
      "step": 27308
    },
    {
      "epoch": 0.7966917556450201,
      "grad_norm": 0.9441641107906975,
      "learning_rate": 1.04537982520253e-06,
      "loss": 0.1101,
      "step": 27309
    },
    {
      "epoch": 0.7967209288756637,
      "grad_norm": 0.748369684774757,
      "learning_rate": 1.0450907536164623e-06,
      "loss": 0.0964,
      "step": 27310
    },
    {
      "epoch": 0.7967501021063073,
      "grad_norm": 0.7201166621857837,
      "learning_rate": 1.0448017173388792e-06,
      "loss": 0.1177,
      "step": 27311
    },
    {
      "epoch": 0.7967792753369508,
      "grad_norm": 0.9313919002849654,
      "learning_rate": 1.0445127163723634e-06,
      "loss": 0.1088,
      "step": 27312
    },
    {
      "epoch": 0.7968084485675944,
      "grad_norm": 0.7366937516037917,
      "learning_rate": 1.0442237507194936e-06,
      "loss": 0.1107,
      "step": 27313
    },
    {
      "epoch": 0.7968376217982379,
      "grad_norm": 0.9756247991440086,
      "learning_rate": 1.0439348203828487e-06,
      "loss": 0.1079,
      "step": 27314
    },
    {
      "epoch": 0.7968667950288815,
      "grad_norm": 0.7766049100662035,
      "learning_rate": 1.0436459253650088e-06,
      "loss": 0.1128,
      "step": 27315
    },
    {
      "epoch": 0.796895968259525,
      "grad_norm": 0.9086673460545542,
      "learning_rate": 1.0433570656685555e-06,
      "loss": 0.128,
      "step": 27316
    },
    {
      "epoch": 0.7969251414901686,
      "grad_norm": 0.8607224473451294,
      "learning_rate": 1.0430682412960659e-06,
      "loss": 0.1162,
      "step": 27317
    },
    {
      "epoch": 0.7969543147208121,
      "grad_norm": 0.9319137601603329,
      "learning_rate": 1.0427794522501168e-06,
      "loss": 0.1116,
      "step": 27318
    },
    {
      "epoch": 0.7969834879514558,
      "grad_norm": 0.7976591204978623,
      "learning_rate": 1.0424906985332895e-06,
      "loss": 0.1297,
      "step": 27319
    },
    {
      "epoch": 0.7970126611820993,
      "grad_norm": 0.870949844542542,
      "learning_rate": 1.0422019801481604e-06,
      "loss": 0.1028,
      "step": 27320
    },
    {
      "epoch": 0.7970418344127429,
      "grad_norm": 0.8619316749310867,
      "learning_rate": 1.0419132970973046e-06,
      "loss": 0.1051,
      "step": 27321
    },
    {
      "epoch": 0.7970710076433865,
      "grad_norm": 0.7862771472718773,
      "learning_rate": 1.041624649383305e-06,
      "loss": 0.1381,
      "step": 27322
    },
    {
      "epoch": 0.79710018087403,
      "grad_norm": 0.857233247605059,
      "learning_rate": 1.041336037008735e-06,
      "loss": 0.1311,
      "step": 27323
    },
    {
      "epoch": 0.7971293541046736,
      "grad_norm": 0.86679087743703,
      "learning_rate": 1.0410474599761711e-06,
      "loss": 0.1036,
      "step": 27324
    },
    {
      "epoch": 0.7971585273353171,
      "grad_norm": 0.9608810048027391,
      "learning_rate": 1.0407589182881916e-06,
      "loss": 0.0985,
      "step": 27325
    },
    {
      "epoch": 0.7971877005659607,
      "grad_norm": 0.8463746457760372,
      "learning_rate": 1.0404704119473707e-06,
      "loss": 0.1235,
      "step": 27326
    },
    {
      "epoch": 0.7972168737966042,
      "grad_norm": 0.8035519886738591,
      "learning_rate": 1.040181940956284e-06,
      "loss": 0.1092,
      "step": 27327
    },
    {
      "epoch": 0.7972460470272478,
      "grad_norm": 0.9602836819719859,
      "learning_rate": 1.039893505317508e-06,
      "loss": 0.1281,
      "step": 27328
    },
    {
      "epoch": 0.7972752202578913,
      "grad_norm": 0.9643572818748601,
      "learning_rate": 1.039605105033618e-06,
      "loss": 0.1184,
      "step": 27329
    },
    {
      "epoch": 0.7973043934885349,
      "grad_norm": 0.7501368876732976,
      "learning_rate": 1.0393167401071885e-06,
      "loss": 0.135,
      "step": 27330
    },
    {
      "epoch": 0.7973335667191784,
      "grad_norm": 1.222176281836421,
      "learning_rate": 1.0390284105407927e-06,
      "loss": 0.1354,
      "step": 27331
    },
    {
      "epoch": 0.7973627399498221,
      "grad_norm": 1.1490824271254725,
      "learning_rate": 1.0387401163370064e-06,
      "loss": 0.1145,
      "step": 27332
    },
    {
      "epoch": 0.7973919131804657,
      "grad_norm": 0.8264927213388011,
      "learning_rate": 1.0384518574984014e-06,
      "loss": 0.1142,
      "step": 27333
    },
    {
      "epoch": 0.7974210864111092,
      "grad_norm": 1.0103645390764446,
      "learning_rate": 1.038163634027553e-06,
      "loss": 0.1297,
      "step": 27334
    },
    {
      "epoch": 0.7974502596417528,
      "grad_norm": 0.7876174767197305,
      "learning_rate": 1.0378754459270352e-06,
      "loss": 0.1158,
      "step": 27335
    },
    {
      "epoch": 0.7974794328723963,
      "grad_norm": 0.6555318205499635,
      "learning_rate": 1.037587293199419e-06,
      "loss": 0.1167,
      "step": 27336
    },
    {
      "epoch": 0.7975086061030399,
      "grad_norm": 0.9265734139056865,
      "learning_rate": 1.0372991758472768e-06,
      "loss": 0.1314,
      "step": 27337
    },
    {
      "epoch": 0.7975377793336834,
      "grad_norm": 1.1487913808388042,
      "learning_rate": 1.037011093873183e-06,
      "loss": 0.1538,
      "step": 27338
    },
    {
      "epoch": 0.797566952564327,
      "grad_norm": 0.9946993047628627,
      "learning_rate": 1.0367230472797064e-06,
      "loss": 0.1128,
      "step": 27339
    },
    {
      "epoch": 0.7975961257949705,
      "grad_norm": 0.7669284339381132,
      "learning_rate": 1.036435036069422e-06,
      "loss": 0.113,
      "step": 27340
    },
    {
      "epoch": 0.7976252990256141,
      "grad_norm": 0.7329772568054622,
      "learning_rate": 1.0361470602448975e-06,
      "loss": 0.11,
      "step": 27341
    },
    {
      "epoch": 0.7976544722562576,
      "grad_norm": 0.7349377252456173,
      "learning_rate": 1.0358591198087076e-06,
      "loss": 0.1071,
      "step": 27342
    },
    {
      "epoch": 0.7976836454869012,
      "grad_norm": 0.9312266589613643,
      "learning_rate": 1.0355712147634211e-06,
      "loss": 0.1076,
      "step": 27343
    },
    {
      "epoch": 0.7977128187175447,
      "grad_norm": 0.8086047133577277,
      "learning_rate": 1.0352833451116069e-06,
      "loss": 0.0947,
      "step": 27344
    },
    {
      "epoch": 0.7977419919481883,
      "grad_norm": 0.8984617533998107,
      "learning_rate": 1.0349955108558369e-06,
      "loss": 0.1043,
      "step": 27345
    },
    {
      "epoch": 0.797771165178832,
      "grad_norm": 0.6451594497243076,
      "learning_rate": 1.0347077119986814e-06,
      "loss": 0.088,
      "step": 27346
    },
    {
      "epoch": 0.7978003384094755,
      "grad_norm": 0.8004683371416975,
      "learning_rate": 1.0344199485427086e-06,
      "loss": 0.1217,
      "step": 27347
    },
    {
      "epoch": 0.7978295116401191,
      "grad_norm": 1.0035865205869823,
      "learning_rate": 1.0341322204904875e-06,
      "loss": 0.1125,
      "step": 27348
    },
    {
      "epoch": 0.7978586848707626,
      "grad_norm": 0.9000770291014063,
      "learning_rate": 1.0338445278445874e-06,
      "loss": 0.1037,
      "step": 27349
    },
    {
      "epoch": 0.7978878581014062,
      "grad_norm": 0.7279193308205009,
      "learning_rate": 1.0335568706075771e-06,
      "loss": 0.1179,
      "step": 27350
    },
    {
      "epoch": 0.7979170313320497,
      "grad_norm": 0.666165510272029,
      "learning_rate": 1.0332692487820216e-06,
      "loss": 0.1037,
      "step": 27351
    },
    {
      "epoch": 0.7979462045626933,
      "grad_norm": 0.8768612695247942,
      "learning_rate": 1.0329816623704942e-06,
      "loss": 0.1185,
      "step": 27352
    },
    {
      "epoch": 0.7979753777933368,
      "grad_norm": 0.8473419457334794,
      "learning_rate": 1.032694111375559e-06,
      "loss": 0.0969,
      "step": 27353
    },
    {
      "epoch": 0.7980045510239804,
      "grad_norm": 0.80039041600822,
      "learning_rate": 1.0324065957997824e-06,
      "loss": 0.106,
      "step": 27354
    },
    {
      "epoch": 0.7980337242546239,
      "grad_norm": 0.7061535278711579,
      "learning_rate": 1.0321191156457343e-06,
      "loss": 0.1086,
      "step": 27355
    },
    {
      "epoch": 0.7980628974852675,
      "grad_norm": 0.7978768143194749,
      "learning_rate": 1.0318316709159792e-06,
      "loss": 0.1223,
      "step": 27356
    },
    {
      "epoch": 0.798092070715911,
      "grad_norm": 0.7193545422037824,
      "learning_rate": 1.0315442616130828e-06,
      "loss": 0.1014,
      "step": 27357
    },
    {
      "epoch": 0.7981212439465546,
      "grad_norm": 0.709688780305429,
      "learning_rate": 1.0312568877396111e-06,
      "loss": 0.1123,
      "step": 27358
    },
    {
      "epoch": 0.7981504171771983,
      "grad_norm": 0.8245917762991147,
      "learning_rate": 1.0309695492981324e-06,
      "loss": 0.1015,
      "step": 27359
    },
    {
      "epoch": 0.7981795904078418,
      "grad_norm": 1.154410388594645,
      "learning_rate": 1.0306822462912103e-06,
      "loss": 0.1255,
      "step": 27360
    },
    {
      "epoch": 0.7982087636384854,
      "grad_norm": 0.9838972049995711,
      "learning_rate": 1.030394978721408e-06,
      "loss": 0.109,
      "step": 27361
    },
    {
      "epoch": 0.7982379368691289,
      "grad_norm": 0.7685313319232628,
      "learning_rate": 1.0301077465912928e-06,
      "loss": 0.114,
      "step": 27362
    },
    {
      "epoch": 0.7982671100997725,
      "grad_norm": 0.795557683594711,
      "learning_rate": 1.0298205499034265e-06,
      "loss": 0.1279,
      "step": 27363
    },
    {
      "epoch": 0.798296283330416,
      "grad_norm": 0.9100744446084528,
      "learning_rate": 1.0295333886603749e-06,
      "loss": 0.0909,
      "step": 27364
    },
    {
      "epoch": 0.7983254565610596,
      "grad_norm": 0.9533167245822671,
      "learning_rate": 1.0292462628647026e-06,
      "loss": 0.1327,
      "step": 27365
    },
    {
      "epoch": 0.7983546297917031,
      "grad_norm": 0.9543349418398875,
      "learning_rate": 1.0289591725189717e-06,
      "loss": 0.1114,
      "step": 27366
    },
    {
      "epoch": 0.7983838030223467,
      "grad_norm": 0.8412541393310877,
      "learning_rate": 1.028672117625744e-06,
      "loss": 0.0793,
      "step": 27367
    },
    {
      "epoch": 0.7984129762529902,
      "grad_norm": 0.7861122645941854,
      "learning_rate": 1.0283850981875853e-06,
      "loss": 0.1165,
      "step": 27368
    },
    {
      "epoch": 0.7984421494836338,
      "grad_norm": 0.9010736568877544,
      "learning_rate": 1.0280981142070545e-06,
      "loss": 0.1175,
      "step": 27369
    },
    {
      "epoch": 0.7984713227142773,
      "grad_norm": 1.152385746574157,
      "learning_rate": 1.0278111656867174e-06,
      "loss": 0.137,
      "step": 27370
    },
    {
      "epoch": 0.7985004959449209,
      "grad_norm": 0.9707370319950321,
      "learning_rate": 1.0275242526291324e-06,
      "loss": 0.1173,
      "step": 27371
    },
    {
      "epoch": 0.7985296691755644,
      "grad_norm": 0.7163698412723432,
      "learning_rate": 1.0272373750368635e-06,
      "loss": 0.1067,
      "step": 27372
    },
    {
      "epoch": 0.7985588424062081,
      "grad_norm": 0.8788242145510866,
      "learning_rate": 1.0269505329124713e-06,
      "loss": 0.106,
      "step": 27373
    },
    {
      "epoch": 0.7985880156368517,
      "grad_norm": 1.0949977314962551,
      "learning_rate": 1.026663726258515e-06,
      "loss": 0.1055,
      "step": 27374
    },
    {
      "epoch": 0.7986171888674952,
      "grad_norm": 0.8828902644346203,
      "learning_rate": 1.0263769550775564e-06,
      "loss": 0.1183,
      "step": 27375
    },
    {
      "epoch": 0.7986463620981388,
      "grad_norm": 0.7971093754824756,
      "learning_rate": 1.0260902193721573e-06,
      "loss": 0.1222,
      "step": 27376
    },
    {
      "epoch": 0.7986755353287823,
      "grad_norm": 0.8554109268803041,
      "learning_rate": 1.0258035191448756e-06,
      "loss": 0.1297,
      "step": 27377
    },
    {
      "epoch": 0.7987047085594259,
      "grad_norm": 1.4321768780104973,
      "learning_rate": 1.0255168543982708e-06,
      "loss": 0.0929,
      "step": 27378
    },
    {
      "epoch": 0.7987338817900694,
      "grad_norm": 0.781439972212485,
      "learning_rate": 1.0252302251349033e-06,
      "loss": 0.1044,
      "step": 27379
    },
    {
      "epoch": 0.798763055020713,
      "grad_norm": 0.9651303026479694,
      "learning_rate": 1.024943631357332e-06,
      "loss": 0.125,
      "step": 27380
    },
    {
      "epoch": 0.7987922282513565,
      "grad_norm": 0.9186434046135167,
      "learning_rate": 1.0246570730681122e-06,
      "loss": 0.1326,
      "step": 27381
    },
    {
      "epoch": 0.7988214014820001,
      "grad_norm": 1.0266898586051725,
      "learning_rate": 1.0243705502698075e-06,
      "loss": 0.0973,
      "step": 27382
    },
    {
      "epoch": 0.7988505747126436,
      "grad_norm": 1.1227978782006496,
      "learning_rate": 1.0240840629649735e-06,
      "loss": 0.1196,
      "step": 27383
    },
    {
      "epoch": 0.7988797479432872,
      "grad_norm": 0.9718486690452055,
      "learning_rate": 1.0237976111561666e-06,
      "loss": 0.1077,
      "step": 27384
    },
    {
      "epoch": 0.7989089211739308,
      "grad_norm": 0.9010038203770405,
      "learning_rate": 1.023511194845947e-06,
      "loss": 0.1176,
      "step": 27385
    },
    {
      "epoch": 0.7989380944045744,
      "grad_norm": 0.9916957610020355,
      "learning_rate": 1.02322481403687e-06,
      "loss": 0.1277,
      "step": 27386
    },
    {
      "epoch": 0.798967267635218,
      "grad_norm": 0.851873687310045,
      "learning_rate": 1.0229384687314915e-06,
      "loss": 0.0998,
      "step": 27387
    },
    {
      "epoch": 0.7989964408658615,
      "grad_norm": 1.017243669354646,
      "learning_rate": 1.0226521589323684e-06,
      "loss": 0.1032,
      "step": 27388
    },
    {
      "epoch": 0.7990256140965051,
      "grad_norm": 1.6622331720543835,
      "learning_rate": 1.0223658846420593e-06,
      "loss": 0.0963,
      "step": 27389
    },
    {
      "epoch": 0.7990547873271486,
      "grad_norm": 0.9540677558757608,
      "learning_rate": 1.0220796458631171e-06,
      "loss": 0.1008,
      "step": 27390
    },
    {
      "epoch": 0.7990839605577922,
      "grad_norm": 0.974557481035565,
      "learning_rate": 1.021793442598098e-06,
      "loss": 0.1097,
      "step": 27391
    },
    {
      "epoch": 0.7991131337884357,
      "grad_norm": 1.0136333069184431,
      "learning_rate": 1.021507274849558e-06,
      "loss": 0.1062,
      "step": 27392
    },
    {
      "epoch": 0.7991423070190793,
      "grad_norm": 0.9111600102249696,
      "learning_rate": 1.0212211426200502e-06,
      "loss": 0.1054,
      "step": 27393
    },
    {
      "epoch": 0.7991714802497228,
      "grad_norm": 0.8667781071069143,
      "learning_rate": 1.0209350459121304e-06,
      "loss": 0.1374,
      "step": 27394
    },
    {
      "epoch": 0.7992006534803664,
      "grad_norm": 1.1658946403964856,
      "learning_rate": 1.0206489847283535e-06,
      "loss": 0.1156,
      "step": 27395
    },
    {
      "epoch": 0.79922982671101,
      "grad_norm": 1.0612162801070393,
      "learning_rate": 1.0203629590712727e-06,
      "loss": 0.1213,
      "step": 27396
    },
    {
      "epoch": 0.7992589999416535,
      "grad_norm": 0.6733781856203072,
      "learning_rate": 1.0200769689434404e-06,
      "loss": 0.1323,
      "step": 27397
    },
    {
      "epoch": 0.7992881731722971,
      "grad_norm": 0.8627238954451573,
      "learning_rate": 1.0197910143474116e-06,
      "loss": 0.1178,
      "step": 27398
    },
    {
      "epoch": 0.7993173464029406,
      "grad_norm": 1.0186587177239874,
      "learning_rate": 1.0195050952857378e-06,
      "loss": 0.1258,
      "step": 27399
    },
    {
      "epoch": 0.7993465196335843,
      "grad_norm": 1.1352688856390698,
      "learning_rate": 1.0192192117609727e-06,
      "loss": 0.0969,
      "step": 27400
    },
    {
      "epoch": 0.7993756928642278,
      "grad_norm": 0.7959818087463973,
      "learning_rate": 1.0189333637756676e-06,
      "loss": 0.1187,
      "step": 27401
    },
    {
      "epoch": 0.7994048660948714,
      "grad_norm": 0.9637908873423694,
      "learning_rate": 1.0186475513323762e-06,
      "loss": 0.1263,
      "step": 27402
    },
    {
      "epoch": 0.7994340393255149,
      "grad_norm": 0.8707825603159962,
      "learning_rate": 1.0183617744336494e-06,
      "loss": 0.097,
      "step": 27403
    },
    {
      "epoch": 0.7994632125561585,
      "grad_norm": 1.131584600088067,
      "learning_rate": 1.018076033082036e-06,
      "loss": 0.1302,
      "step": 27404
    },
    {
      "epoch": 0.799492385786802,
      "grad_norm": 0.9505786522226987,
      "learning_rate": 1.0177903272800898e-06,
      "loss": 0.1137,
      "step": 27405
    },
    {
      "epoch": 0.7995215590174456,
      "grad_norm": 0.8803258128358723,
      "learning_rate": 1.0175046570303626e-06,
      "loss": 0.1277,
      "step": 27406
    },
    {
      "epoch": 0.7995507322480891,
      "grad_norm": 1.1133269484320216,
      "learning_rate": 1.0172190223354023e-06,
      "loss": 0.1314,
      "step": 27407
    },
    {
      "epoch": 0.7995799054787327,
      "grad_norm": 0.7900603980233648,
      "learning_rate": 1.016933423197759e-06,
      "loss": 0.1098,
      "step": 27408
    },
    {
      "epoch": 0.7996090787093763,
      "grad_norm": 0.7658848977935718,
      "learning_rate": 1.0166478596199847e-06,
      "loss": 0.1147,
      "step": 27409
    },
    {
      "epoch": 0.7996382519400198,
      "grad_norm": 1.0040842211708778,
      "learning_rate": 1.0163623316046267e-06,
      "loss": 0.1266,
      "step": 27410
    },
    {
      "epoch": 0.7996674251706634,
      "grad_norm": 0.9590869503747794,
      "learning_rate": 1.016076839154233e-06,
      "loss": 0.1253,
      "step": 27411
    },
    {
      "epoch": 0.7996965984013069,
      "grad_norm": 0.825789617131499,
      "learning_rate": 1.0157913822713567e-06,
      "loss": 0.1141,
      "step": 27412
    },
    {
      "epoch": 0.7997257716319506,
      "grad_norm": 1.18040239101638,
      "learning_rate": 1.0155059609585432e-06,
      "loss": 0.1149,
      "step": 27413
    },
    {
      "epoch": 0.7997549448625941,
      "grad_norm": 0.8379277379174047,
      "learning_rate": 1.0152205752183408e-06,
      "loss": 0.1045,
      "step": 27414
    },
    {
      "epoch": 0.7997841180932377,
      "grad_norm": 0.7105118907286953,
      "learning_rate": 1.0149352250532985e-06,
      "loss": 0.1139,
      "step": 27415
    },
    {
      "epoch": 0.7998132913238812,
      "grad_norm": 0.9452899581305659,
      "learning_rate": 1.0146499104659634e-06,
      "loss": 0.1055,
      "step": 27416
    },
    {
      "epoch": 0.7998424645545248,
      "grad_norm": 1.0002905838789558,
      "learning_rate": 1.0143646314588817e-06,
      "loss": 0.1005,
      "step": 27417
    },
    {
      "epoch": 0.7998716377851683,
      "grad_norm": 0.8606236401368312,
      "learning_rate": 1.0140793880346006e-06,
      "loss": 0.1152,
      "step": 27418
    },
    {
      "epoch": 0.7999008110158119,
      "grad_norm": 0.8002761524186969,
      "learning_rate": 1.0137941801956686e-06,
      "loss": 0.1376,
      "step": 27419
    },
    {
      "epoch": 0.7999299842464554,
      "grad_norm": 0.8804755466804401,
      "learning_rate": 1.0135090079446307e-06,
      "loss": 0.1107,
      "step": 27420
    },
    {
      "epoch": 0.799959157477099,
      "grad_norm": 0.7130652507536094,
      "learning_rate": 1.0132238712840315e-06,
      "loss": 0.1182,
      "step": 27421
    },
    {
      "epoch": 0.7999883307077426,
      "grad_norm": 0.9118448767102106,
      "learning_rate": 1.012938770216419e-06,
      "loss": 0.1176,
      "step": 27422
    },
    {
      "epoch": 0.8000175039383861,
      "grad_norm": 0.8609004163722702,
      "learning_rate": 1.0126537047443364e-06,
      "loss": 0.1111,
      "step": 27423
    },
    {
      "epoch": 0.8000466771690297,
      "grad_norm": 0.9193491155298933,
      "learning_rate": 1.0123686748703292e-06,
      "loss": 0.1212,
      "step": 27424
    },
    {
      "epoch": 0.8000758503996732,
      "grad_norm": 0.8982582014204211,
      "learning_rate": 1.0120836805969442e-06,
      "loss": 0.1125,
      "step": 27425
    },
    {
      "epoch": 0.8001050236303168,
      "grad_norm": 0.7771863371344789,
      "learning_rate": 1.0117987219267238e-06,
      "loss": 0.1081,
      "step": 27426
    },
    {
      "epoch": 0.8001341968609604,
      "grad_norm": 0.8806857382044603,
      "learning_rate": 1.011513798862211e-06,
      "loss": 0.1231,
      "step": 27427
    },
    {
      "epoch": 0.800163370091604,
      "grad_norm": 0.8949088742009761,
      "learning_rate": 1.0112289114059525e-06,
      "loss": 0.1198,
      "step": 27428
    },
    {
      "epoch": 0.8001925433222475,
      "grad_norm": 0.8702535990284568,
      "learning_rate": 1.0109440595604887e-06,
      "loss": 0.1271,
      "step": 27429
    },
    {
      "epoch": 0.8002217165528911,
      "grad_norm": 0.9267839597026477,
      "learning_rate": 1.0106592433283652e-06,
      "loss": 0.117,
      "step": 27430
    },
    {
      "epoch": 0.8002508897835346,
      "grad_norm": 0.8222063174651958,
      "learning_rate": 1.0103744627121226e-06,
      "loss": 0.103,
      "step": 27431
    },
    {
      "epoch": 0.8002800630141782,
      "grad_norm": 0.7926290344386797,
      "learning_rate": 1.0100897177143054e-06,
      "loss": 0.1235,
      "step": 27432
    },
    {
      "epoch": 0.8003092362448218,
      "grad_norm": 0.8564751019855571,
      "learning_rate": 1.009805008337455e-06,
      "loss": 0.1073,
      "step": 27433
    },
    {
      "epoch": 0.8003384094754653,
      "grad_norm": 0.7440124553336979,
      "learning_rate": 1.0095203345841115e-06,
      "loss": 0.123,
      "step": 27434
    },
    {
      "epoch": 0.8003675827061089,
      "grad_norm": 0.9112097950179906,
      "learning_rate": 1.009235696456818e-06,
      "loss": 0.1039,
      "step": 27435
    },
    {
      "epoch": 0.8003967559367524,
      "grad_norm": 0.7822635495414189,
      "learning_rate": 1.0089510939581166e-06,
      "loss": 0.1068,
      "step": 27436
    },
    {
      "epoch": 0.800425929167396,
      "grad_norm": 0.9361557872530007,
      "learning_rate": 1.0086665270905472e-06,
      "loss": 0.1031,
      "step": 27437
    },
    {
      "epoch": 0.8004551023980395,
      "grad_norm": 0.9948467467756411,
      "learning_rate": 1.0083819958566489e-06,
      "loss": 0.1164,
      "step": 27438
    },
    {
      "epoch": 0.8004842756286831,
      "grad_norm": 0.72331521075041,
      "learning_rate": 1.0080975002589644e-06,
      "loss": 0.1096,
      "step": 27439
    },
    {
      "epoch": 0.8005134488593267,
      "grad_norm": 0.7935637014052944,
      "learning_rate": 1.007813040300033e-06,
      "loss": 0.1139,
      "step": 27440
    },
    {
      "epoch": 0.8005426220899703,
      "grad_norm": 0.9534471465186868,
      "learning_rate": 1.0075286159823905e-06,
      "loss": 0.1158,
      "step": 27441
    },
    {
      "epoch": 0.8005717953206138,
      "grad_norm": 0.9560321220386006,
      "learning_rate": 1.0072442273085825e-06,
      "loss": 0.1033,
      "step": 27442
    },
    {
      "epoch": 0.8006009685512574,
      "grad_norm": 0.8886134616677258,
      "learning_rate": 1.0069598742811448e-06,
      "loss": 0.1226,
      "step": 27443
    },
    {
      "epoch": 0.800630141781901,
      "grad_norm": 0.6454328075690808,
      "learning_rate": 1.006675556902615e-06,
      "loss": 0.1228,
      "step": 27444
    },
    {
      "epoch": 0.8006593150125445,
      "grad_norm": 0.9234637758355148,
      "learning_rate": 1.0063912751755334e-06,
      "loss": 0.1496,
      "step": 27445
    },
    {
      "epoch": 0.8006884882431881,
      "grad_norm": 0.9702854374728535,
      "learning_rate": 1.0061070291024372e-06,
      "loss": 0.1101,
      "step": 27446
    },
    {
      "epoch": 0.8007176614738316,
      "grad_norm": 0.7329279377667942,
      "learning_rate": 1.0058228186858633e-06,
      "loss": 0.1057,
      "step": 27447
    },
    {
      "epoch": 0.8007468347044752,
      "grad_norm": 0.9125468441366694,
      "learning_rate": 1.0055386439283494e-06,
      "loss": 0.1214,
      "step": 27448
    },
    {
      "epoch": 0.8007760079351187,
      "grad_norm": 0.7911482095262947,
      "learning_rate": 1.0052545048324342e-06,
      "loss": 0.1218,
      "step": 27449
    },
    {
      "epoch": 0.8008051811657623,
      "grad_norm": 0.8908001573765704,
      "learning_rate": 1.0049704014006527e-06,
      "loss": 0.1012,
      "step": 27450
    },
    {
      "epoch": 0.8008343543964058,
      "grad_norm": 0.9453363997313438,
      "learning_rate": 1.004686333635541e-06,
      "loss": 0.1317,
      "step": 27451
    },
    {
      "epoch": 0.8008635276270494,
      "grad_norm": 0.870801751568944,
      "learning_rate": 1.0044023015396375e-06,
      "loss": 0.1072,
      "step": 27452
    },
    {
      "epoch": 0.8008927008576929,
      "grad_norm": 0.9291563537905266,
      "learning_rate": 1.0041183051154746e-06,
      "loss": 0.1021,
      "step": 27453
    },
    {
      "epoch": 0.8009218740883366,
      "grad_norm": 0.8064952750070626,
      "learning_rate": 1.00383434436559e-06,
      "loss": 0.116,
      "step": 27454
    },
    {
      "epoch": 0.8009510473189801,
      "grad_norm": 0.9306562986957118,
      "learning_rate": 1.0035504192925195e-06,
      "loss": 0.1124,
      "step": 27455
    },
    {
      "epoch": 0.8009802205496237,
      "grad_norm": 0.9311307517380818,
      "learning_rate": 1.003266529898797e-06,
      "loss": 0.1156,
      "step": 27456
    },
    {
      "epoch": 0.8010093937802673,
      "grad_norm": 0.7746613497416763,
      "learning_rate": 1.0029826761869554e-06,
      "loss": 0.1258,
      "step": 27457
    },
    {
      "epoch": 0.8010385670109108,
      "grad_norm": 0.9255562720236967,
      "learning_rate": 1.0026988581595315e-06,
      "loss": 0.1265,
      "step": 27458
    },
    {
      "epoch": 0.8010677402415544,
      "grad_norm": 0.8488042810775273,
      "learning_rate": 1.0024150758190566e-06,
      "loss": 0.1018,
      "step": 27459
    },
    {
      "epoch": 0.8010969134721979,
      "grad_norm": 0.7533661385301036,
      "learning_rate": 1.0021313291680674e-06,
      "loss": 0.1282,
      "step": 27460
    },
    {
      "epoch": 0.8011260867028415,
      "grad_norm": 0.7168257875771503,
      "learning_rate": 1.0018476182090935e-06,
      "loss": 0.1114,
      "step": 27461
    },
    {
      "epoch": 0.801155259933485,
      "grad_norm": 0.7129530312175197,
      "learning_rate": 1.001563942944671e-06,
      "loss": 0.1119,
      "step": 27462
    },
    {
      "epoch": 0.8011844331641286,
      "grad_norm": 0.6467688600173058,
      "learning_rate": 1.001280303377331e-06,
      "loss": 0.1059,
      "step": 27463
    },
    {
      "epoch": 0.8012136063947721,
      "grad_norm": 0.8250352063102936,
      "learning_rate": 1.000996699509605e-06,
      "loss": 0.1349,
      "step": 27464
    },
    {
      "epoch": 0.8012427796254157,
      "grad_norm": 0.743134392226119,
      "learning_rate": 1.0007131313440255e-06,
      "loss": 0.1005,
      "step": 27465
    },
    {
      "epoch": 0.8012719528560592,
      "grad_norm": 0.7632444025424922,
      "learning_rate": 1.0004295988831259e-06,
      "loss": 0.132,
      "step": 27466
    },
    {
      "epoch": 0.8013011260867028,
      "grad_norm": 0.8975724299906467,
      "learning_rate": 1.0001461021294363e-06,
      "loss": 0.1106,
      "step": 27467
    },
    {
      "epoch": 0.8013302993173465,
      "grad_norm": 0.8529121784209267,
      "learning_rate": 9.998626410854856e-07,
      "loss": 0.1083,
      "step": 27468
    },
    {
      "epoch": 0.80135947254799,
      "grad_norm": 0.9149983717059812,
      "learning_rate": 9.99579215753808e-07,
      "loss": 0.1283,
      "step": 27469
    },
    {
      "epoch": 0.8013886457786336,
      "grad_norm": 0.8506981819837592,
      "learning_rate": 9.992958261369324e-07,
      "loss": 0.1109,
      "step": 27470
    },
    {
      "epoch": 0.8014178190092771,
      "grad_norm": 0.8245174876311564,
      "learning_rate": 9.990124722373857e-07,
      "loss": 0.1125,
      "step": 27471
    },
    {
      "epoch": 0.8014469922399207,
      "grad_norm": 0.7232528396795872,
      "learning_rate": 9.987291540577026e-07,
      "loss": 0.1006,
      "step": 27472
    },
    {
      "epoch": 0.8014761654705642,
      "grad_norm": 0.9982663724992538,
      "learning_rate": 9.984458716004114e-07,
      "loss": 0.1357,
      "step": 27473
    },
    {
      "epoch": 0.8015053387012078,
      "grad_norm": 0.7650235797531183,
      "learning_rate": 9.98162624868038e-07,
      "loss": 0.1184,
      "step": 27474
    },
    {
      "epoch": 0.8015345119318513,
      "grad_norm": 0.7297808765548561,
      "learning_rate": 9.978794138631153e-07,
      "loss": 0.124,
      "step": 27475
    },
    {
      "epoch": 0.8015636851624949,
      "grad_norm": 0.8622994255567406,
      "learning_rate": 9.975962385881688e-07,
      "loss": 0.1283,
      "step": 27476
    },
    {
      "epoch": 0.8015928583931384,
      "grad_norm": 0.8824422420247346,
      "learning_rate": 9.973130990457285e-07,
      "loss": 0.123,
      "step": 27477
    },
    {
      "epoch": 0.801622031623782,
      "grad_norm": 0.8055058362559767,
      "learning_rate": 9.9702999523832e-07,
      "loss": 0.1197,
      "step": 27478
    },
    {
      "epoch": 0.8016512048544255,
      "grad_norm": 0.7574393173391978,
      "learning_rate": 9.967469271684732e-07,
      "loss": 0.1043,
      "step": 27479
    },
    {
      "epoch": 0.8016803780850691,
      "grad_norm": 0.9211481717686488,
      "learning_rate": 9.964638948387145e-07,
      "loss": 0.1188,
      "step": 27480
    },
    {
      "epoch": 0.8017095513157128,
      "grad_norm": 1.547594870813011,
      "learning_rate": 9.961808982515693e-07,
      "loss": 0.1512,
      "step": 27481
    },
    {
      "epoch": 0.8017387245463563,
      "grad_norm": 0.9228661760817329,
      "learning_rate": 9.95897937409565e-07,
      "loss": 0.1156,
      "step": 27482
    },
    {
      "epoch": 0.8017678977769999,
      "grad_norm": 0.7546212666172488,
      "learning_rate": 9.956150123152291e-07,
      "loss": 0.1137,
      "step": 27483
    },
    {
      "epoch": 0.8017970710076434,
      "grad_norm": 1.1521681964485182,
      "learning_rate": 9.953321229710854e-07,
      "loss": 0.1259,
      "step": 27484
    },
    {
      "epoch": 0.801826244238287,
      "grad_norm": 0.9702727280511132,
      "learning_rate": 9.95049269379662e-07,
      "loss": 0.1,
      "step": 27485
    },
    {
      "epoch": 0.8018554174689305,
      "grad_norm": 0.8129101381875943,
      "learning_rate": 9.947664515434823e-07,
      "loss": 0.1016,
      "step": 27486
    },
    {
      "epoch": 0.8018845906995741,
      "grad_norm": 0.7833678546779559,
      "learning_rate": 9.944836694650706e-07,
      "loss": 0.1346,
      "step": 27487
    },
    {
      "epoch": 0.8019137639302176,
      "grad_norm": 0.9835583680742942,
      "learning_rate": 9.942009231469524e-07,
      "loss": 0.1318,
      "step": 27488
    },
    {
      "epoch": 0.8019429371608612,
      "grad_norm": 0.8395465308407088,
      "learning_rate": 9.939182125916535e-07,
      "loss": 0.1029,
      "step": 27489
    },
    {
      "epoch": 0.8019721103915047,
      "grad_norm": 1.2117427699435273,
      "learning_rate": 9.936355378016965e-07,
      "loss": 0.13,
      "step": 27490
    },
    {
      "epoch": 0.8020012836221483,
      "grad_norm": 0.9255749330060353,
      "learning_rate": 9.933528987796037e-07,
      "loss": 0.1078,
      "step": 27491
    },
    {
      "epoch": 0.8020304568527918,
      "grad_norm": 0.8952781065033935,
      "learning_rate": 9.93070295527901e-07,
      "loss": 0.1263,
      "step": 27492
    },
    {
      "epoch": 0.8020596300834354,
      "grad_norm": 0.8174950489610706,
      "learning_rate": 9.9278772804911e-07,
      "loss": 0.126,
      "step": 27493
    },
    {
      "epoch": 0.802088803314079,
      "grad_norm": 0.8633872550982359,
      "learning_rate": 9.92505196345752e-07,
      "loss": 0.1304,
      "step": 27494
    },
    {
      "epoch": 0.8021179765447226,
      "grad_norm": 0.895939107570586,
      "learning_rate": 9.922227004203517e-07,
      "loss": 0.1055,
      "step": 27495
    },
    {
      "epoch": 0.8021471497753662,
      "grad_norm": 0.8810439537896332,
      "learning_rate": 9.919402402754314e-07,
      "loss": 0.1054,
      "step": 27496
    },
    {
      "epoch": 0.8021763230060097,
      "grad_norm": 0.7126229364810918,
      "learning_rate": 9.916578159135114e-07,
      "loss": 0.0878,
      "step": 27497
    },
    {
      "epoch": 0.8022054962366533,
      "grad_norm": 0.7414594488993557,
      "learning_rate": 9.913754273371128e-07,
      "loss": 0.1233,
      "step": 27498
    },
    {
      "epoch": 0.8022346694672968,
      "grad_norm": 0.9218964391377544,
      "learning_rate": 9.910930745487586e-07,
      "loss": 0.1364,
      "step": 27499
    },
    {
      "epoch": 0.8022638426979404,
      "grad_norm": 0.8567387959737994,
      "learning_rate": 9.908107575509673e-07,
      "loss": 0.1068,
      "step": 27500
    },
    {
      "epoch": 0.8022930159285839,
      "grad_norm": 0.7935055703829693,
      "learning_rate": 9.905284763462603e-07,
      "loss": 0.1169,
      "step": 27501
    },
    {
      "epoch": 0.8023221891592275,
      "grad_norm": 0.8315308574392503,
      "learning_rate": 9.90246230937159e-07,
      "loss": 0.1273,
      "step": 27502
    },
    {
      "epoch": 0.802351362389871,
      "grad_norm": 0.7955133952656952,
      "learning_rate": 9.899640213261823e-07,
      "loss": 0.1268,
      "step": 27503
    },
    {
      "epoch": 0.8023805356205146,
      "grad_norm": 0.9488374605414202,
      "learning_rate": 9.89681847515848e-07,
      "loss": 0.1217,
      "step": 27504
    },
    {
      "epoch": 0.8024097088511581,
      "grad_norm": 0.7815366441533715,
      "learning_rate": 9.893997095086788e-07,
      "loss": 0.1246,
      "step": 27505
    },
    {
      "epoch": 0.8024388820818017,
      "grad_norm": 0.9385181131703689,
      "learning_rate": 9.891176073071896e-07,
      "loss": 0.1101,
      "step": 27506
    },
    {
      "epoch": 0.8024680553124452,
      "grad_norm": 0.8404212265666252,
      "learning_rate": 9.888355409139027e-07,
      "loss": 0.1133,
      "step": 27507
    },
    {
      "epoch": 0.8024972285430889,
      "grad_norm": 0.7111770796997006,
      "learning_rate": 9.88553510331333e-07,
      "loss": 0.1087,
      "step": 27508
    },
    {
      "epoch": 0.8025264017737325,
      "grad_norm": 0.85651853202019,
      "learning_rate": 9.882715155620015e-07,
      "loss": 0.1061,
      "step": 27509
    },
    {
      "epoch": 0.802555575004376,
      "grad_norm": 0.9364010872011337,
      "learning_rate": 9.879895566084241e-07,
      "loss": 0.142,
      "step": 27510
    },
    {
      "epoch": 0.8025847482350196,
      "grad_norm": 0.8477630588934852,
      "learning_rate": 9.877076334731167e-07,
      "loss": 0.0942,
      "step": 27511
    },
    {
      "epoch": 0.8026139214656631,
      "grad_norm": 0.9740602613692005,
      "learning_rate": 9.874257461585979e-07,
      "loss": 0.135,
      "step": 27512
    },
    {
      "epoch": 0.8026430946963067,
      "grad_norm": 1.0255415594007768,
      "learning_rate": 9.871438946673855e-07,
      "loss": 0.1097,
      "step": 27513
    },
    {
      "epoch": 0.8026722679269502,
      "grad_norm": 0.867708343555253,
      "learning_rate": 9.868620790019929e-07,
      "loss": 0.101,
      "step": 27514
    },
    {
      "epoch": 0.8027014411575938,
      "grad_norm": 1.0916727839946327,
      "learning_rate": 9.865802991649393e-07,
      "loss": 0.1145,
      "step": 27515
    },
    {
      "epoch": 0.8027306143882373,
      "grad_norm": 0.7076756699990595,
      "learning_rate": 9.862985551587384e-07,
      "loss": 0.1057,
      "step": 27516
    },
    {
      "epoch": 0.8027597876188809,
      "grad_norm": 0.8077215592786581,
      "learning_rate": 9.86016846985905e-07,
      "loss": 0.0968,
      "step": 27517
    },
    {
      "epoch": 0.8027889608495244,
      "grad_norm": 0.7284569848091642,
      "learning_rate": 9.857351746489546e-07,
      "loss": 0.1089,
      "step": 27518
    },
    {
      "epoch": 0.802818134080168,
      "grad_norm": 0.9925048104491448,
      "learning_rate": 9.854535381504038e-07,
      "loss": 0.1124,
      "step": 27519
    },
    {
      "epoch": 0.8028473073108116,
      "grad_norm": 1.082531906295039,
      "learning_rate": 9.851719374927655e-07,
      "loss": 0.1149,
      "step": 27520
    },
    {
      "epoch": 0.8028764805414551,
      "grad_norm": 0.9989830487617188,
      "learning_rate": 9.848903726785518e-07,
      "loss": 0.1243,
      "step": 27521
    },
    {
      "epoch": 0.8029056537720988,
      "grad_norm": 0.6992970445341425,
      "learning_rate": 9.846088437102802e-07,
      "loss": 0.1294,
      "step": 27522
    },
    {
      "epoch": 0.8029348270027423,
      "grad_norm": 0.7919031635809013,
      "learning_rate": 9.843273505904622e-07,
      "loss": 0.1084,
      "step": 27523
    },
    {
      "epoch": 0.8029640002333859,
      "grad_norm": 0.9061890772451463,
      "learning_rate": 9.840458933216097e-07,
      "loss": 0.1195,
      "step": 27524
    },
    {
      "epoch": 0.8029931734640294,
      "grad_norm": 1.0522030653377388,
      "learning_rate": 9.837644719062367e-07,
      "loss": 0.083,
      "step": 27525
    },
    {
      "epoch": 0.803022346694673,
      "grad_norm": 0.6484002636183097,
      "learning_rate": 9.834830863468575e-07,
      "loss": 0.1188,
      "step": 27526
    },
    {
      "epoch": 0.8030515199253165,
      "grad_norm": 0.7819960455186077,
      "learning_rate": 9.832017366459817e-07,
      "loss": 0.1042,
      "step": 27527
    },
    {
      "epoch": 0.8030806931559601,
      "grad_norm": 0.9196624254240777,
      "learning_rate": 9.829204228061212e-07,
      "loss": 0.1427,
      "step": 27528
    },
    {
      "epoch": 0.8031098663866036,
      "grad_norm": 0.7512637588587939,
      "learning_rate": 9.826391448297895e-07,
      "loss": 0.1036,
      "step": 27529
    },
    {
      "epoch": 0.8031390396172472,
      "grad_norm": 0.761399698788126,
      "learning_rate": 9.82357902719495e-07,
      "loss": 0.1025,
      "step": 27530
    },
    {
      "epoch": 0.8031682128478908,
      "grad_norm": 0.7884522314321935,
      "learning_rate": 9.820766964777501e-07,
      "loss": 0.1362,
      "step": 27531
    },
    {
      "epoch": 0.8031973860785343,
      "grad_norm": 0.8762764995908606,
      "learning_rate": 9.817955261070666e-07,
      "loss": 0.114,
      "step": 27532
    },
    {
      "epoch": 0.8032265593091779,
      "grad_norm": 0.7402311609140194,
      "learning_rate": 9.815143916099533e-07,
      "loss": 0.0966,
      "step": 27533
    },
    {
      "epoch": 0.8032557325398214,
      "grad_norm": 0.9790257807777503,
      "learning_rate": 9.81233292988919e-07,
      "loss": 0.1055,
      "step": 27534
    },
    {
      "epoch": 0.8032849057704651,
      "grad_norm": 0.9065201993239207,
      "learning_rate": 9.809522302464757e-07,
      "loss": 0.1247,
      "step": 27535
    },
    {
      "epoch": 0.8033140790011086,
      "grad_norm": 0.9448381142552766,
      "learning_rate": 9.806712033851307e-07,
      "loss": 0.1106,
      "step": 27536
    },
    {
      "epoch": 0.8033432522317522,
      "grad_norm": 0.9193845004981372,
      "learning_rate": 9.803902124073945e-07,
      "loss": 0.102,
      "step": 27537
    },
    {
      "epoch": 0.8033724254623957,
      "grad_norm": 0.8344915088105022,
      "learning_rate": 9.801092573157734e-07,
      "loss": 0.1209,
      "step": 27538
    },
    {
      "epoch": 0.8034015986930393,
      "grad_norm": 0.7923926602430644,
      "learning_rate": 9.798283381127793e-07,
      "loss": 0.1067,
      "step": 27539
    },
    {
      "epoch": 0.8034307719236828,
      "grad_norm": 0.831507834185248,
      "learning_rate": 9.795474548009176e-07,
      "loss": 0.1148,
      "step": 27540
    },
    {
      "epoch": 0.8034599451543264,
      "grad_norm": 0.7731722617722381,
      "learning_rate": 9.792666073826952e-07,
      "loss": 0.1399,
      "step": 27541
    },
    {
      "epoch": 0.80348911838497,
      "grad_norm": 1.0448949677239212,
      "learning_rate": 9.789857958606207e-07,
      "loss": 0.086,
      "step": 27542
    },
    {
      "epoch": 0.8035182916156135,
      "grad_norm": 0.7869590606394334,
      "learning_rate": 9.787050202372023e-07,
      "loss": 0.0958,
      "step": 27543
    },
    {
      "epoch": 0.803547464846257,
      "grad_norm": 1.0754165322432865,
      "learning_rate": 9.784242805149442e-07,
      "loss": 0.1138,
      "step": 27544
    },
    {
      "epoch": 0.8035766380769006,
      "grad_norm": 0.8133889982205859,
      "learning_rate": 9.78143576696356e-07,
      "loss": 0.1325,
      "step": 27545
    },
    {
      "epoch": 0.8036058113075442,
      "grad_norm": 0.8810615591701374,
      "learning_rate": 9.778629087839414e-07,
      "loss": 0.0991,
      "step": 27546
    },
    {
      "epoch": 0.8036349845381877,
      "grad_norm": 1.1282760815960862,
      "learning_rate": 9.77582276780205e-07,
      "loss": 0.1087,
      "step": 27547
    },
    {
      "epoch": 0.8036641577688313,
      "grad_norm": 1.0124644803125806,
      "learning_rate": 9.77301680687654e-07,
      "loss": 0.1304,
      "step": 27548
    },
    {
      "epoch": 0.8036933309994749,
      "grad_norm": 0.8586315951501855,
      "learning_rate": 9.770211205087948e-07,
      "loss": 0.1149,
      "step": 27549
    },
    {
      "epoch": 0.8037225042301185,
      "grad_norm": 1.2215218079973935,
      "learning_rate": 9.767405962461306e-07,
      "loss": 0.1087,
      "step": 27550
    },
    {
      "epoch": 0.803751677460762,
      "grad_norm": 0.757236596094083,
      "learning_rate": 9.764601079021645e-07,
      "loss": 0.1246,
      "step": 27551
    },
    {
      "epoch": 0.8037808506914056,
      "grad_norm": 0.7812827316456944,
      "learning_rate": 9.761796554794034e-07,
      "loss": 0.1124,
      "step": 27552
    },
    {
      "epoch": 0.8038100239220491,
      "grad_norm": 0.9113787930889454,
      "learning_rate": 9.7589923898035e-07,
      "loss": 0.1265,
      "step": 27553
    },
    {
      "epoch": 0.8038391971526927,
      "grad_norm": 0.9763738642644213,
      "learning_rate": 9.75618858407506e-07,
      "loss": 0.1233,
      "step": 27554
    },
    {
      "epoch": 0.8038683703833363,
      "grad_norm": 0.7342411020902224,
      "learning_rate": 9.753385137633764e-07,
      "loss": 0.1041,
      "step": 27555
    },
    {
      "epoch": 0.8038975436139798,
      "grad_norm": 0.8643422526208991,
      "learning_rate": 9.750582050504648e-07,
      "loss": 0.1276,
      "step": 27556
    },
    {
      "epoch": 0.8039267168446234,
      "grad_norm": 0.8031755648302986,
      "learning_rate": 9.74777932271273e-07,
      "loss": 0.1224,
      "step": 27557
    },
    {
      "epoch": 0.8039558900752669,
      "grad_norm": 0.941386872050956,
      "learning_rate": 9.744976954283013e-07,
      "loss": 0.132,
      "step": 27558
    },
    {
      "epoch": 0.8039850633059105,
      "grad_norm": 0.6966302439469197,
      "learning_rate": 9.742174945240545e-07,
      "loss": 0.1092,
      "step": 27559
    },
    {
      "epoch": 0.804014236536554,
      "grad_norm": 0.7677355592201016,
      "learning_rate": 9.739373295610322e-07,
      "loss": 0.0967,
      "step": 27560
    },
    {
      "epoch": 0.8040434097671976,
      "grad_norm": 0.8254770337618673,
      "learning_rate": 9.736572005417354e-07,
      "loss": 0.1289,
      "step": 27561
    },
    {
      "epoch": 0.8040725829978412,
      "grad_norm": 0.7646539644982906,
      "learning_rate": 9.733771074686681e-07,
      "loss": 0.1366,
      "step": 27562
    },
    {
      "epoch": 0.8041017562284848,
      "grad_norm": 0.7624287634823101,
      "learning_rate": 9.730970503443281e-07,
      "loss": 0.1034,
      "step": 27563
    },
    {
      "epoch": 0.8041309294591283,
      "grad_norm": 0.9306076603852309,
      "learning_rate": 9.728170291712153e-07,
      "loss": 0.1081,
      "step": 27564
    },
    {
      "epoch": 0.8041601026897719,
      "grad_norm": 0.7185652973623851,
      "learning_rate": 9.725370439518323e-07,
      "loss": 0.1123,
      "step": 27565
    },
    {
      "epoch": 0.8041892759204154,
      "grad_norm": 1.0339619225291377,
      "learning_rate": 9.722570946886755e-07,
      "loss": 0.1281,
      "step": 27566
    },
    {
      "epoch": 0.804218449151059,
      "grad_norm": 0.8357950887205879,
      "learning_rate": 9.71977181384247e-07,
      "loss": 0.126,
      "step": 27567
    },
    {
      "epoch": 0.8042476223817026,
      "grad_norm": 0.8086783951902399,
      "learning_rate": 9.716973040410437e-07,
      "loss": 0.1074,
      "step": 27568
    },
    {
      "epoch": 0.8042767956123461,
      "grad_norm": 0.7949498671032257,
      "learning_rate": 9.714174626615664e-07,
      "loss": 0.1335,
      "step": 27569
    },
    {
      "epoch": 0.8043059688429897,
      "grad_norm": 1.0117006930422152,
      "learning_rate": 9.711376572483122e-07,
      "loss": 0.1377,
      "step": 27570
    },
    {
      "epoch": 0.8043351420736332,
      "grad_norm": 0.9456639991047978,
      "learning_rate": 9.708578878037778e-07,
      "loss": 0.1108,
      "step": 27571
    },
    {
      "epoch": 0.8043643153042768,
      "grad_norm": 0.915343136462663,
      "learning_rate": 9.705781543304627e-07,
      "loss": 0.0948,
      "step": 27572
    },
    {
      "epoch": 0.8043934885349203,
      "grad_norm": 0.8714827951311132,
      "learning_rate": 9.702984568308654e-07,
      "loss": 0.1075,
      "step": 27573
    },
    {
      "epoch": 0.8044226617655639,
      "grad_norm": 0.884711263380116,
      "learning_rate": 9.700187953074797e-07,
      "loss": 0.1061,
      "step": 27574
    },
    {
      "epoch": 0.8044518349962074,
      "grad_norm": 0.890139260451249,
      "learning_rate": 9.697391697628056e-07,
      "loss": 0.1145,
      "step": 27575
    },
    {
      "epoch": 0.8044810082268511,
      "grad_norm": 0.7615195481589713,
      "learning_rate": 9.694595801993383e-07,
      "loss": 0.0963,
      "step": 27576
    },
    {
      "epoch": 0.8045101814574946,
      "grad_norm": 1.0854808140848085,
      "learning_rate": 9.691800266195721e-07,
      "loss": 0.1417,
      "step": 27577
    },
    {
      "epoch": 0.8045393546881382,
      "grad_norm": 0.874197590124047,
      "learning_rate": 9.689005090260045e-07,
      "loss": 0.0975,
      "step": 27578
    },
    {
      "epoch": 0.8045685279187818,
      "grad_norm": 0.7717465030444081,
      "learning_rate": 9.686210274211321e-07,
      "loss": 0.1153,
      "step": 27579
    },
    {
      "epoch": 0.8045977011494253,
      "grad_norm": 0.9969176410958358,
      "learning_rate": 9.683415818074493e-07,
      "loss": 0.0937,
      "step": 27580
    },
    {
      "epoch": 0.8046268743800689,
      "grad_norm": 0.8402278413277697,
      "learning_rate": 9.680621721874483e-07,
      "loss": 0.1073,
      "step": 27581
    },
    {
      "epoch": 0.8046560476107124,
      "grad_norm": 0.8249377411077903,
      "learning_rate": 9.67782798563628e-07,
      "loss": 0.1173,
      "step": 27582
    },
    {
      "epoch": 0.804685220841356,
      "grad_norm": 0.803192629308676,
      "learning_rate": 9.675034609384792e-07,
      "loss": 0.1256,
      "step": 27583
    },
    {
      "epoch": 0.8047143940719995,
      "grad_norm": 1.1678074185282614,
      "learning_rate": 9.672241593144965e-07,
      "loss": 0.1011,
      "step": 27584
    },
    {
      "epoch": 0.8047435673026431,
      "grad_norm": 1.1864856481601738,
      "learning_rate": 9.669448936941733e-07,
      "loss": 0.1217,
      "step": 27585
    },
    {
      "epoch": 0.8047727405332866,
      "grad_norm": 0.7118350636026991,
      "learning_rate": 9.666656640800048e-07,
      "loss": 0.1224,
      "step": 27586
    },
    {
      "epoch": 0.8048019137639302,
      "grad_norm": 0.7955510891893275,
      "learning_rate": 9.663864704744825e-07,
      "loss": 0.1159,
      "step": 27587
    },
    {
      "epoch": 0.8048310869945737,
      "grad_norm": 0.8232893457522569,
      "learning_rate": 9.661073128800973e-07,
      "loss": 0.107,
      "step": 27588
    },
    {
      "epoch": 0.8048602602252174,
      "grad_norm": 0.8513235400021794,
      "learning_rate": 9.658281912993444e-07,
      "loss": 0.115,
      "step": 27589
    },
    {
      "epoch": 0.804889433455861,
      "grad_norm": 1.1189109386472753,
      "learning_rate": 9.655491057347133e-07,
      "loss": 0.1057,
      "step": 27590
    },
    {
      "epoch": 0.8049186066865045,
      "grad_norm": 0.8443591755701726,
      "learning_rate": 9.652700561886964e-07,
      "loss": 0.1183,
      "step": 27591
    },
    {
      "epoch": 0.804947779917148,
      "grad_norm": 0.9920503892854112,
      "learning_rate": 9.64991042663787e-07,
      "loss": 0.1136,
      "step": 27592
    },
    {
      "epoch": 0.8049769531477916,
      "grad_norm": 0.9103245817667461,
      "learning_rate": 9.647120651624737e-07,
      "loss": 0.1125,
      "step": 27593
    },
    {
      "epoch": 0.8050061263784352,
      "grad_norm": 0.7498952518795737,
      "learning_rate": 9.644331236872472e-07,
      "loss": 0.0974,
      "step": 27594
    },
    {
      "epoch": 0.8050352996090787,
      "grad_norm": 1.1221231264385536,
      "learning_rate": 9.641542182405995e-07,
      "loss": 0.1145,
      "step": 27595
    },
    {
      "epoch": 0.8050644728397223,
      "grad_norm": 1.4629426387282476,
      "learning_rate": 9.63875348825018e-07,
      "loss": 0.1165,
      "step": 27596
    },
    {
      "epoch": 0.8050936460703658,
      "grad_norm": 0.7649073341332615,
      "learning_rate": 9.63596515442995e-07,
      "loss": 0.0948,
      "step": 27597
    },
    {
      "epoch": 0.8051228193010094,
      "grad_norm": 0.9968918576357672,
      "learning_rate": 9.633177180970177e-07,
      "loss": 0.0973,
      "step": 27598
    },
    {
      "epoch": 0.8051519925316529,
      "grad_norm": 1.0691184429048453,
      "learning_rate": 9.630389567895776e-07,
      "loss": 0.1086,
      "step": 27599
    },
    {
      "epoch": 0.8051811657622965,
      "grad_norm": 1.019450539143369,
      "learning_rate": 9.627602315231616e-07,
      "loss": 0.1242,
      "step": 27600
    },
    {
      "epoch": 0.80521033899294,
      "grad_norm": 0.9327498563407185,
      "learning_rate": 9.624815423002576e-07,
      "loss": 0.1249,
      "step": 27601
    },
    {
      "epoch": 0.8052395122235836,
      "grad_norm": 0.7362454293940043,
      "learning_rate": 9.622028891233543e-07,
      "loss": 0.0984,
      "step": 27602
    },
    {
      "epoch": 0.8052686854542273,
      "grad_norm": 0.7822579531266258,
      "learning_rate": 9.619242719949411e-07,
      "loss": 0.1144,
      "step": 27603
    },
    {
      "epoch": 0.8052978586848708,
      "grad_norm": 0.9239614326140464,
      "learning_rate": 9.616456909175027e-07,
      "loss": 0.0971,
      "step": 27604
    },
    {
      "epoch": 0.8053270319155144,
      "grad_norm": 0.8223689071686345,
      "learning_rate": 9.613671458935287e-07,
      "loss": 0.1039,
      "step": 27605
    },
    {
      "epoch": 0.8053562051461579,
      "grad_norm": 0.9177224570765862,
      "learning_rate": 9.610886369255051e-07,
      "loss": 0.1166,
      "step": 27606
    },
    {
      "epoch": 0.8053853783768015,
      "grad_norm": 0.8410811683324743,
      "learning_rate": 9.608101640159162e-07,
      "loss": 0.1146,
      "step": 27607
    },
    {
      "epoch": 0.805414551607445,
      "grad_norm": 1.1448666995095045,
      "learning_rate": 9.605317271672504e-07,
      "loss": 0.1121,
      "step": 27608
    },
    {
      "epoch": 0.8054437248380886,
      "grad_norm": 0.9329725147861546,
      "learning_rate": 9.60253326381994e-07,
      "loss": 0.1228,
      "step": 27609
    },
    {
      "epoch": 0.8054728980687321,
      "grad_norm": 0.7056684039714629,
      "learning_rate": 9.59974961662632e-07,
      "loss": 0.092,
      "step": 27610
    },
    {
      "epoch": 0.8055020712993757,
      "grad_norm": 0.9541838169658238,
      "learning_rate": 9.596966330116474e-07,
      "loss": 0.1125,
      "step": 27611
    },
    {
      "epoch": 0.8055312445300192,
      "grad_norm": 1.1040348580314125,
      "learning_rate": 9.594183404315283e-07,
      "loss": 0.1427,
      "step": 27612
    },
    {
      "epoch": 0.8055604177606628,
      "grad_norm": 0.8768262650652688,
      "learning_rate": 9.591400839247572e-07,
      "loss": 0.1166,
      "step": 27613
    },
    {
      "epoch": 0.8055895909913063,
      "grad_norm": 0.7615113329104115,
      "learning_rate": 9.58861863493818e-07,
      "loss": 0.0999,
      "step": 27614
    },
    {
      "epoch": 0.8056187642219499,
      "grad_norm": 0.912292119000475,
      "learning_rate": 9.58583679141195e-07,
      "loss": 0.1082,
      "step": 27615
    },
    {
      "epoch": 0.8056479374525936,
      "grad_norm": 0.8534327845555917,
      "learning_rate": 9.58305530869374e-07,
      "loss": 0.1074,
      "step": 27616
    },
    {
      "epoch": 0.8056771106832371,
      "grad_norm": 1.302506181373477,
      "learning_rate": 9.580274186808359e-07,
      "loss": 0.1009,
      "step": 27617
    },
    {
      "epoch": 0.8057062839138807,
      "grad_norm": 1.1892135792392173,
      "learning_rate": 9.577493425780631e-07,
      "loss": 0.1216,
      "step": 27618
    },
    {
      "epoch": 0.8057354571445242,
      "grad_norm": 1.0685644096264661,
      "learning_rate": 9.574713025635401e-07,
      "loss": 0.1243,
      "step": 27619
    },
    {
      "epoch": 0.8057646303751678,
      "grad_norm": 1.0613542035055563,
      "learning_rate": 9.571932986397474e-07,
      "loss": 0.1064,
      "step": 27620
    },
    {
      "epoch": 0.8057938036058113,
      "grad_norm": 1.0999879934902057,
      "learning_rate": 9.569153308091678e-07,
      "loss": 0.1002,
      "step": 27621
    },
    {
      "epoch": 0.8058229768364549,
      "grad_norm": 0.9100385971405007,
      "learning_rate": 9.566373990742845e-07,
      "loss": 0.1184,
      "step": 27622
    },
    {
      "epoch": 0.8058521500670984,
      "grad_norm": 0.8974380860024977,
      "learning_rate": 9.563595034375766e-07,
      "loss": 0.1017,
      "step": 27623
    },
    {
      "epoch": 0.805881323297742,
      "grad_norm": 0.8825603888271505,
      "learning_rate": 9.560816439015247e-07,
      "loss": 0.1246,
      "step": 27624
    },
    {
      "epoch": 0.8059104965283855,
      "grad_norm": 0.9686615295957507,
      "learning_rate": 9.55803820468612e-07,
      "loss": 0.1226,
      "step": 27625
    },
    {
      "epoch": 0.8059396697590291,
      "grad_norm": 0.7032030121516174,
      "learning_rate": 9.555260331413157e-07,
      "loss": 0.108,
      "step": 27626
    },
    {
      "epoch": 0.8059688429896726,
      "grad_norm": 0.9914186132599075,
      "learning_rate": 9.552482819221193e-07,
      "loss": 0.1107,
      "step": 27627
    },
    {
      "epoch": 0.8059980162203162,
      "grad_norm": 0.8088929042226313,
      "learning_rate": 9.54970566813499e-07,
      "loss": 0.1095,
      "step": 27628
    },
    {
      "epoch": 0.8060271894509597,
      "grad_norm": 1.0028305047125612,
      "learning_rate": 9.546928878179374e-07,
      "loss": 0.1124,
      "step": 27629
    },
    {
      "epoch": 0.8060563626816034,
      "grad_norm": 1.2467790967603478,
      "learning_rate": 9.54415244937912e-07,
      "loss": 0.1062,
      "step": 27630
    },
    {
      "epoch": 0.806085535912247,
      "grad_norm": 1.0096328635678125,
      "learning_rate": 9.541376381759004e-07,
      "loss": 0.1099,
      "step": 27631
    },
    {
      "epoch": 0.8061147091428905,
      "grad_norm": 0.7248636709813338,
      "learning_rate": 9.538600675343818e-07,
      "loss": 0.1066,
      "step": 27632
    },
    {
      "epoch": 0.8061438823735341,
      "grad_norm": 0.8359399294345164,
      "learning_rate": 9.53582533015836e-07,
      "loss": 0.1056,
      "step": 27633
    },
    {
      "epoch": 0.8061730556041776,
      "grad_norm": 0.9525697005330482,
      "learning_rate": 9.53305034622738e-07,
      "loss": 0.1472,
      "step": 27634
    },
    {
      "epoch": 0.8062022288348212,
      "grad_norm": 1.1082326229718025,
      "learning_rate": 9.530275723575677e-07,
      "loss": 0.113,
      "step": 27635
    },
    {
      "epoch": 0.8062314020654647,
      "grad_norm": 0.8652451949775221,
      "learning_rate": 9.527501462228018e-07,
      "loss": 0.1222,
      "step": 27636
    },
    {
      "epoch": 0.8062605752961083,
      "grad_norm": 1.0307970435761293,
      "learning_rate": 9.524727562209146e-07,
      "loss": 0.1402,
      "step": 27637
    },
    {
      "epoch": 0.8062897485267518,
      "grad_norm": 0.8585871109648557,
      "learning_rate": 9.521954023543844e-07,
      "loss": 0.123,
      "step": 27638
    },
    {
      "epoch": 0.8063189217573954,
      "grad_norm": 0.9834767927940743,
      "learning_rate": 9.519180846256893e-07,
      "loss": 0.1173,
      "step": 27639
    },
    {
      "epoch": 0.8063480949880389,
      "grad_norm": 0.8467804807672528,
      "learning_rate": 9.516408030373025e-07,
      "loss": 0.1374,
      "step": 27640
    },
    {
      "epoch": 0.8063772682186825,
      "grad_norm": 1.092355609652649,
      "learning_rate": 9.51363557591699e-07,
      "loss": 0.1256,
      "step": 27641
    },
    {
      "epoch": 0.806406441449326,
      "grad_norm": 1.1469343584533855,
      "learning_rate": 9.510863482913568e-07,
      "loss": 0.1139,
      "step": 27642
    },
    {
      "epoch": 0.8064356146799697,
      "grad_norm": 0.8730894118637729,
      "learning_rate": 9.508091751387489e-07,
      "loss": 0.1178,
      "step": 27643
    },
    {
      "epoch": 0.8064647879106133,
      "grad_norm": 0.7942972733331833,
      "learning_rate": 9.505320381363486e-07,
      "loss": 0.1097,
      "step": 27644
    },
    {
      "epoch": 0.8064939611412568,
      "grad_norm": 0.8877381070858066,
      "learning_rate": 9.502549372866321e-07,
      "loss": 0.1318,
      "step": 27645
    },
    {
      "epoch": 0.8065231343719004,
      "grad_norm": 0.8931044665754303,
      "learning_rate": 9.499778725920739e-07,
      "loss": 0.1133,
      "step": 27646
    },
    {
      "epoch": 0.8065523076025439,
      "grad_norm": 1.2627948123104578,
      "learning_rate": 9.497008440551464e-07,
      "loss": 0.1309,
      "step": 27647
    },
    {
      "epoch": 0.8065814808331875,
      "grad_norm": 0.9609561848997056,
      "learning_rate": 9.494238516783211e-07,
      "loss": 0.1153,
      "step": 27648
    },
    {
      "epoch": 0.806610654063831,
      "grad_norm": 1.00056393687836,
      "learning_rate": 9.491468954640742e-07,
      "loss": 0.1268,
      "step": 27649
    },
    {
      "epoch": 0.8066398272944746,
      "grad_norm": 0.9582638790325807,
      "learning_rate": 9.488699754148762e-07,
      "loss": 0.0995,
      "step": 27650
    },
    {
      "epoch": 0.8066690005251181,
      "grad_norm": 0.8134779507158203,
      "learning_rate": 9.485930915331992e-07,
      "loss": 0.1202,
      "step": 27651
    },
    {
      "epoch": 0.8066981737557617,
      "grad_norm": 0.7138897674898071,
      "learning_rate": 9.483162438215177e-07,
      "loss": 0.1104,
      "step": 27652
    },
    {
      "epoch": 0.8067273469864052,
      "grad_norm": 0.8479061846663208,
      "learning_rate": 9.480394322823011e-07,
      "loss": 0.122,
      "step": 27653
    },
    {
      "epoch": 0.8067565202170488,
      "grad_norm": 0.8387993100806406,
      "learning_rate": 9.477626569180198e-07,
      "loss": 0.114,
      "step": 27654
    },
    {
      "epoch": 0.8067856934476924,
      "grad_norm": 0.7209054024864514,
      "learning_rate": 9.474859177311479e-07,
      "loss": 0.1241,
      "step": 27655
    },
    {
      "epoch": 0.8068148666783359,
      "grad_norm": 1.036788125164798,
      "learning_rate": 9.472092147241529e-07,
      "loss": 0.1453,
      "step": 27656
    },
    {
      "epoch": 0.8068440399089796,
      "grad_norm": 0.7315252955723138,
      "learning_rate": 9.469325478995078e-07,
      "loss": 0.0744,
      "step": 27657
    },
    {
      "epoch": 0.8068732131396231,
      "grad_norm": 0.842430395675882,
      "learning_rate": 9.466559172596801e-07,
      "loss": 0.1302,
      "step": 27658
    },
    {
      "epoch": 0.8069023863702667,
      "grad_norm": 0.8221287547134344,
      "learning_rate": 9.463793228071422e-07,
      "loss": 0.1202,
      "step": 27659
    },
    {
      "epoch": 0.8069315596009102,
      "grad_norm": 1.0681433003230443,
      "learning_rate": 9.461027645443616e-07,
      "loss": 0.1337,
      "step": 27660
    },
    {
      "epoch": 0.8069607328315538,
      "grad_norm": 0.7929305994184553,
      "learning_rate": 9.458262424738069e-07,
      "loss": 0.1005,
      "step": 27661
    },
    {
      "epoch": 0.8069899060621973,
      "grad_norm": 0.7386760497497545,
      "learning_rate": 9.455497565979477e-07,
      "loss": 0.1427,
      "step": 27662
    },
    {
      "epoch": 0.8070190792928409,
      "grad_norm": 0.9587053256319008,
      "learning_rate": 9.452733069192532e-07,
      "loss": 0.117,
      "step": 27663
    },
    {
      "epoch": 0.8070482525234844,
      "grad_norm": 0.7468410251227554,
      "learning_rate": 9.449968934401899e-07,
      "loss": 0.1066,
      "step": 27664
    },
    {
      "epoch": 0.807077425754128,
      "grad_norm": 0.7986438414699737,
      "learning_rate": 9.447205161632272e-07,
      "loss": 0.1117,
      "step": 27665
    },
    {
      "epoch": 0.8071065989847716,
      "grad_norm": 0.9221184390188412,
      "learning_rate": 9.444441750908323e-07,
      "loss": 0.1423,
      "step": 27666
    },
    {
      "epoch": 0.8071357722154151,
      "grad_norm": 0.8046740951130008,
      "learning_rate": 9.441678702254697e-07,
      "loss": 0.1479,
      "step": 27667
    },
    {
      "epoch": 0.8071649454460587,
      "grad_norm": 0.8078955023180293,
      "learning_rate": 9.438916015696087e-07,
      "loss": 0.1273,
      "step": 27668
    },
    {
      "epoch": 0.8071941186767022,
      "grad_norm": 0.751614568114784,
      "learning_rate": 9.43615369125716e-07,
      "loss": 0.1113,
      "step": 27669
    },
    {
      "epoch": 0.8072232919073459,
      "grad_norm": 0.7839193155711048,
      "learning_rate": 9.433391728962571e-07,
      "loss": 0.1099,
      "step": 27670
    },
    {
      "epoch": 0.8072524651379894,
      "grad_norm": 0.6825041258920004,
      "learning_rate": 9.430630128836966e-07,
      "loss": 0.079,
      "step": 27671
    },
    {
      "epoch": 0.807281638368633,
      "grad_norm": 0.8099100181328582,
      "learning_rate": 9.427868890905023e-07,
      "loss": 0.1127,
      "step": 27672
    },
    {
      "epoch": 0.8073108115992765,
      "grad_norm": 0.790503562504446,
      "learning_rate": 9.425108015191364e-07,
      "loss": 0.1151,
      "step": 27673
    },
    {
      "epoch": 0.8073399848299201,
      "grad_norm": 1.3363533684220408,
      "learning_rate": 9.422347501720675e-07,
      "loss": 0.1368,
      "step": 27674
    },
    {
      "epoch": 0.8073691580605636,
      "grad_norm": 0.8836754545764983,
      "learning_rate": 9.419587350517562e-07,
      "loss": 0.1117,
      "step": 27675
    },
    {
      "epoch": 0.8073983312912072,
      "grad_norm": 0.8294599315489385,
      "learning_rate": 9.4168275616067e-07,
      "loss": 0.1263,
      "step": 27676
    },
    {
      "epoch": 0.8074275045218507,
      "grad_norm": 0.7710846682558097,
      "learning_rate": 9.414068135012716e-07,
      "loss": 0.1004,
      "step": 27677
    },
    {
      "epoch": 0.8074566777524943,
      "grad_norm": 0.9781689552740249,
      "learning_rate": 9.411309070760228e-07,
      "loss": 0.1198,
      "step": 27678
    },
    {
      "epoch": 0.8074858509831379,
      "grad_norm": 0.7372479514081962,
      "learning_rate": 9.408550368873882e-07,
      "loss": 0.111,
      "step": 27679
    },
    {
      "epoch": 0.8075150242137814,
      "grad_norm": 0.8232777820631658,
      "learning_rate": 9.405792029378324e-07,
      "loss": 0.1022,
      "step": 27680
    },
    {
      "epoch": 0.807544197444425,
      "grad_norm": 1.2378355493279536,
      "learning_rate": 9.403034052298148e-07,
      "loss": 0.1331,
      "step": 27681
    },
    {
      "epoch": 0.8075733706750685,
      "grad_norm": 0.8633461785558697,
      "learning_rate": 9.400276437658007e-07,
      "loss": 0.1218,
      "step": 27682
    },
    {
      "epoch": 0.8076025439057121,
      "grad_norm": 0.6479737271777818,
      "learning_rate": 9.397519185482506e-07,
      "loss": 0.0994,
      "step": 27683
    },
    {
      "epoch": 0.8076317171363557,
      "grad_norm": 1.0638117353299963,
      "learning_rate": 9.39476229579625e-07,
      "loss": 0.1247,
      "step": 27684
    },
    {
      "epoch": 0.8076608903669993,
      "grad_norm": 0.9631327341602408,
      "learning_rate": 9.392005768623863e-07,
      "loss": 0.1153,
      "step": 27685
    },
    {
      "epoch": 0.8076900635976428,
      "grad_norm": 0.91266006072369,
      "learning_rate": 9.389249603989964e-07,
      "loss": 0.1173,
      "step": 27686
    },
    {
      "epoch": 0.8077192368282864,
      "grad_norm": 1.076757805086941,
      "learning_rate": 9.38649380191915e-07,
      "loss": 0.1317,
      "step": 27687
    },
    {
      "epoch": 0.8077484100589299,
      "grad_norm": 0.8860938818454016,
      "learning_rate": 9.383738362436017e-07,
      "loss": 0.1151,
      "step": 27688
    },
    {
      "epoch": 0.8077775832895735,
      "grad_norm": 0.8796505794890483,
      "learning_rate": 9.380983285565182e-07,
      "loss": 0.0913,
      "step": 27689
    },
    {
      "epoch": 0.807806756520217,
      "grad_norm": 0.6516729246030777,
      "learning_rate": 9.378228571331227e-07,
      "loss": 0.1,
      "step": 27690
    },
    {
      "epoch": 0.8078359297508606,
      "grad_norm": 0.9564384973263884,
      "learning_rate": 9.375474219758729e-07,
      "loss": 0.1136,
      "step": 27691
    },
    {
      "epoch": 0.8078651029815042,
      "grad_norm": 0.9857789493763088,
      "learning_rate": 9.372720230872323e-07,
      "loss": 0.0961,
      "step": 27692
    },
    {
      "epoch": 0.8078942762121477,
      "grad_norm": 0.8264663643223091,
      "learning_rate": 9.369966604696573e-07,
      "loss": 0.0895,
      "step": 27693
    },
    {
      "epoch": 0.8079234494427913,
      "grad_norm": 0.9302521695413319,
      "learning_rate": 9.367213341256054e-07,
      "loss": 0.1204,
      "step": 27694
    },
    {
      "epoch": 0.8079526226734348,
      "grad_norm": 1.0324593926005705,
      "learning_rate": 9.364460440575363e-07,
      "loss": 0.1384,
      "step": 27695
    },
    {
      "epoch": 0.8079817959040784,
      "grad_norm": 0.7545117449966773,
      "learning_rate": 9.361707902679068e-07,
      "loss": 0.1011,
      "step": 27696
    },
    {
      "epoch": 0.8080109691347219,
      "grad_norm": 0.7125484529146792,
      "learning_rate": 9.358955727591729e-07,
      "loss": 0.1268,
      "step": 27697
    },
    {
      "epoch": 0.8080401423653656,
      "grad_norm": 0.8507698900342994,
      "learning_rate": 9.356203915337935e-07,
      "loss": 0.1134,
      "step": 27698
    },
    {
      "epoch": 0.8080693155960091,
      "grad_norm": 0.9029781710710214,
      "learning_rate": 9.353452465942264e-07,
      "loss": 0.1207,
      "step": 27699
    },
    {
      "epoch": 0.8080984888266527,
      "grad_norm": 0.889847297408012,
      "learning_rate": 9.350701379429261e-07,
      "loss": 0.1093,
      "step": 27700
    },
    {
      "epoch": 0.8081276620572962,
      "grad_norm": 0.7877630553778059,
      "learning_rate": 9.347950655823484e-07,
      "loss": 0.1,
      "step": 27701
    },
    {
      "epoch": 0.8081568352879398,
      "grad_norm": 0.7420402317438628,
      "learning_rate": 9.34520029514951e-07,
      "loss": 0.0957,
      "step": 27702
    },
    {
      "epoch": 0.8081860085185834,
      "grad_norm": 0.8436022398261921,
      "learning_rate": 9.342450297431871e-07,
      "loss": 0.1277,
      "step": 27703
    },
    {
      "epoch": 0.8082151817492269,
      "grad_norm": 0.7753417486698301,
      "learning_rate": 9.339700662695145e-07,
      "loss": 0.1069,
      "step": 27704
    },
    {
      "epoch": 0.8082443549798705,
      "grad_norm": 0.8434091077299489,
      "learning_rate": 9.336951390963849e-07,
      "loss": 0.1237,
      "step": 27705
    },
    {
      "epoch": 0.808273528210514,
      "grad_norm": 1.1354659045094726,
      "learning_rate": 9.334202482262555e-07,
      "loss": 0.1381,
      "step": 27706
    },
    {
      "epoch": 0.8083027014411576,
      "grad_norm": 0.8704295928961485,
      "learning_rate": 9.331453936615798e-07,
      "loss": 0.1012,
      "step": 27707
    },
    {
      "epoch": 0.8083318746718011,
      "grad_norm": 0.9215930248056898,
      "learning_rate": 9.328705754048095e-07,
      "loss": 0.1118,
      "step": 27708
    },
    {
      "epoch": 0.8083610479024447,
      "grad_norm": 0.8713600958684332,
      "learning_rate": 9.325957934584001e-07,
      "loss": 0.1059,
      "step": 27709
    },
    {
      "epoch": 0.8083902211330882,
      "grad_norm": 0.7628119466868182,
      "learning_rate": 9.323210478248057e-07,
      "loss": 0.1111,
      "step": 27710
    },
    {
      "epoch": 0.8084193943637319,
      "grad_norm": 0.9370638454702768,
      "learning_rate": 9.320463385064766e-07,
      "loss": 0.1165,
      "step": 27711
    },
    {
      "epoch": 0.8084485675943754,
      "grad_norm": 0.8615433986827776,
      "learning_rate": 9.317716655058678e-07,
      "loss": 0.1085,
      "step": 27712
    },
    {
      "epoch": 0.808477740825019,
      "grad_norm": 0.8872474163883061,
      "learning_rate": 9.314970288254304e-07,
      "loss": 0.1003,
      "step": 27713
    },
    {
      "epoch": 0.8085069140556626,
      "grad_norm": 0.9321866280096461,
      "learning_rate": 9.312224284676158e-07,
      "loss": 0.1269,
      "step": 27714
    },
    {
      "epoch": 0.8085360872863061,
      "grad_norm": 0.8037436535570405,
      "learning_rate": 9.309478644348751e-07,
      "loss": 0.122,
      "step": 27715
    },
    {
      "epoch": 0.8085652605169497,
      "grad_norm": 0.9128272271350647,
      "learning_rate": 9.306733367296622e-07,
      "loss": 0.1243,
      "step": 27716
    },
    {
      "epoch": 0.8085944337475932,
      "grad_norm": 1.0434697059821711,
      "learning_rate": 9.303988453544266e-07,
      "loss": 0.102,
      "step": 27717
    },
    {
      "epoch": 0.8086236069782368,
      "grad_norm": 0.7839297890649839,
      "learning_rate": 9.301243903116169e-07,
      "loss": 0.1151,
      "step": 27718
    },
    {
      "epoch": 0.8086527802088803,
      "grad_norm": 0.7976192774185791,
      "learning_rate": 9.298499716036863e-07,
      "loss": 0.1098,
      "step": 27719
    },
    {
      "epoch": 0.8086819534395239,
      "grad_norm": 0.9749869644334156,
      "learning_rate": 9.295755892330838e-07,
      "loss": 0.1226,
      "step": 27720
    },
    {
      "epoch": 0.8087111266701674,
      "grad_norm": 0.9294002766208463,
      "learning_rate": 9.293012432022563e-07,
      "loss": 0.1096,
      "step": 27721
    },
    {
      "epoch": 0.808740299900811,
      "grad_norm": 1.0845043734624233,
      "learning_rate": 9.290269335136576e-07,
      "loss": 0.1059,
      "step": 27722
    },
    {
      "epoch": 0.8087694731314545,
      "grad_norm": 0.8523074332648163,
      "learning_rate": 9.287526601697349e-07,
      "loss": 0.1204,
      "step": 27723
    },
    {
      "epoch": 0.8087986463620981,
      "grad_norm": 0.886266788779363,
      "learning_rate": 9.284784231729355e-07,
      "loss": 0.1131,
      "step": 27724
    },
    {
      "epoch": 0.8088278195927417,
      "grad_norm": 1.6201560529506194,
      "learning_rate": 9.282042225257099e-07,
      "loss": 0.0941,
      "step": 27725
    },
    {
      "epoch": 0.8088569928233853,
      "grad_norm": 0.8354550398540652,
      "learning_rate": 9.279300582305051e-07,
      "loss": 0.1198,
      "step": 27726
    },
    {
      "epoch": 0.8088861660540289,
      "grad_norm": 0.992604360467188,
      "learning_rate": 9.276559302897669e-07,
      "loss": 0.121,
      "step": 27727
    },
    {
      "epoch": 0.8089153392846724,
      "grad_norm": 1.0072788773839294,
      "learning_rate": 9.273818387059452e-07,
      "loss": 0.1139,
      "step": 27728
    },
    {
      "epoch": 0.808944512515316,
      "grad_norm": 0.9233711831501684,
      "learning_rate": 9.271077834814868e-07,
      "loss": 0.1056,
      "step": 27729
    },
    {
      "epoch": 0.8089736857459595,
      "grad_norm": 0.904104940376141,
      "learning_rate": 9.26833764618838e-07,
      "loss": 0.1421,
      "step": 27730
    },
    {
      "epoch": 0.8090028589766031,
      "grad_norm": 1.1756509487612572,
      "learning_rate": 9.265597821204441e-07,
      "loss": 0.1115,
      "step": 27731
    },
    {
      "epoch": 0.8090320322072466,
      "grad_norm": 0.9482912710344759,
      "learning_rate": 9.262858359887528e-07,
      "loss": 0.1092,
      "step": 27732
    },
    {
      "epoch": 0.8090612054378902,
      "grad_norm": 0.8394346876055967,
      "learning_rate": 9.260119262262085e-07,
      "loss": 0.1242,
      "step": 27733
    },
    {
      "epoch": 0.8090903786685337,
      "grad_norm": 0.9702058106309435,
      "learning_rate": 9.257380528352578e-07,
      "loss": 0.1043,
      "step": 27734
    },
    {
      "epoch": 0.8091195518991773,
      "grad_norm": 0.8907473523033501,
      "learning_rate": 9.254642158183441e-07,
      "loss": 0.0907,
      "step": 27735
    },
    {
      "epoch": 0.8091487251298208,
      "grad_norm": 0.9795930823804683,
      "learning_rate": 9.251904151779145e-07,
      "loss": 0.1045,
      "step": 27736
    },
    {
      "epoch": 0.8091778983604644,
      "grad_norm": 1.1694958440117467,
      "learning_rate": 9.24916650916412e-07,
      "loss": 0.1211,
      "step": 27737
    },
    {
      "epoch": 0.809207071591108,
      "grad_norm": 0.9396109202494459,
      "learning_rate": 9.246429230362797e-07,
      "loss": 0.1233,
      "step": 27738
    },
    {
      "epoch": 0.8092362448217516,
      "grad_norm": 0.8639898766860498,
      "learning_rate": 9.243692315399627e-07,
      "loss": 0.1127,
      "step": 27739
    },
    {
      "epoch": 0.8092654180523952,
      "grad_norm": 0.9180130668315164,
      "learning_rate": 9.240955764299053e-07,
      "loss": 0.1187,
      "step": 27740
    },
    {
      "epoch": 0.8092945912830387,
      "grad_norm": 1.0764279828707437,
      "learning_rate": 9.238219577085483e-07,
      "loss": 0.1268,
      "step": 27741
    },
    {
      "epoch": 0.8093237645136823,
      "grad_norm": 0.9050155148591006,
      "learning_rate": 9.235483753783375e-07,
      "loss": 0.1141,
      "step": 27742
    },
    {
      "epoch": 0.8093529377443258,
      "grad_norm": 0.8308343890269729,
      "learning_rate": 9.232748294417132e-07,
      "loss": 0.1148,
      "step": 27743
    },
    {
      "epoch": 0.8093821109749694,
      "grad_norm": 0.8282001548967434,
      "learning_rate": 9.230013199011168e-07,
      "loss": 0.1227,
      "step": 27744
    },
    {
      "epoch": 0.8094112842056129,
      "grad_norm": 1.0009096968741495,
      "learning_rate": 9.227278467589918e-07,
      "loss": 0.1234,
      "step": 27745
    },
    {
      "epoch": 0.8094404574362565,
      "grad_norm": 0.7376641217003067,
      "learning_rate": 9.224544100177801e-07,
      "loss": 0.0985,
      "step": 27746
    },
    {
      "epoch": 0.8094696306669,
      "grad_norm": 0.8762697498729055,
      "learning_rate": 9.221810096799222e-07,
      "loss": 0.0991,
      "step": 27747
    },
    {
      "epoch": 0.8094988038975436,
      "grad_norm": 1.0207785273047654,
      "learning_rate": 9.21907645747857e-07,
      "loss": 0.1134,
      "step": 27748
    },
    {
      "epoch": 0.8095279771281871,
      "grad_norm": 0.8337569521718208,
      "learning_rate": 9.21634318224029e-07,
      "loss": 0.0921,
      "step": 27749
    },
    {
      "epoch": 0.8095571503588307,
      "grad_norm": 0.7603370679586554,
      "learning_rate": 9.213610271108753e-07,
      "loss": 0.0994,
      "step": 27750
    },
    {
      "epoch": 0.8095863235894742,
      "grad_norm": 0.9298924413491145,
      "learning_rate": 9.210877724108347e-07,
      "loss": 0.1153,
      "step": 27751
    },
    {
      "epoch": 0.8096154968201179,
      "grad_norm": 0.765337120934524,
      "learning_rate": 9.208145541263514e-07,
      "loss": 0.1163,
      "step": 27752
    },
    {
      "epoch": 0.8096446700507615,
      "grad_norm": 0.9483243862733962,
      "learning_rate": 9.205413722598616e-07,
      "loss": 0.1175,
      "step": 27753
    },
    {
      "epoch": 0.809673843281405,
      "grad_norm": 0.9522069946984685,
      "learning_rate": 9.202682268138036e-07,
      "loss": 0.1199,
      "step": 27754
    },
    {
      "epoch": 0.8097030165120486,
      "grad_norm": 1.0336830645120536,
      "learning_rate": 9.19995117790618e-07,
      "loss": 0.1015,
      "step": 27755
    },
    {
      "epoch": 0.8097321897426921,
      "grad_norm": 0.8547717435982832,
      "learning_rate": 9.197220451927424e-07,
      "loss": 0.0967,
      "step": 27756
    },
    {
      "epoch": 0.8097613629733357,
      "grad_norm": 0.862362092638936,
      "learning_rate": 9.194490090226127e-07,
      "loss": 0.1078,
      "step": 27757
    },
    {
      "epoch": 0.8097905362039792,
      "grad_norm": 0.787151199295326,
      "learning_rate": 9.191760092826685e-07,
      "loss": 0.0973,
      "step": 27758
    },
    {
      "epoch": 0.8098197094346228,
      "grad_norm": 0.9128466409672559,
      "learning_rate": 9.189030459753473e-07,
      "loss": 0.1148,
      "step": 27759
    },
    {
      "epoch": 0.8098488826652663,
      "grad_norm": 0.8140903532431837,
      "learning_rate": 9.186301191030861e-07,
      "loss": 0.1199,
      "step": 27760
    },
    {
      "epoch": 0.8098780558959099,
      "grad_norm": 0.8068983792589345,
      "learning_rate": 9.183572286683195e-07,
      "loss": 0.1148,
      "step": 27761
    },
    {
      "epoch": 0.8099072291265534,
      "grad_norm": 0.9276602676040361,
      "learning_rate": 9.180843746734863e-07,
      "loss": 0.1135,
      "step": 27762
    },
    {
      "epoch": 0.809936402357197,
      "grad_norm": 0.9907750424037756,
      "learning_rate": 9.178115571210206e-07,
      "loss": 0.1409,
      "step": 27763
    },
    {
      "epoch": 0.8099655755878405,
      "grad_norm": 0.8760567874999488,
      "learning_rate": 9.175387760133591e-07,
      "loss": 0.1075,
      "step": 27764
    },
    {
      "epoch": 0.8099947488184842,
      "grad_norm": 1.0320778962467883,
      "learning_rate": 9.172660313529363e-07,
      "loss": 0.1209,
      "step": 27765
    },
    {
      "epoch": 0.8100239220491278,
      "grad_norm": 1.4146373263182843,
      "learning_rate": 9.16993323142189e-07,
      "loss": 0.1152,
      "step": 27766
    },
    {
      "epoch": 0.8100530952797713,
      "grad_norm": 0.8040419550955294,
      "learning_rate": 9.167206513835508e-07,
      "loss": 0.1067,
      "step": 27767
    },
    {
      "epoch": 0.8100822685104149,
      "grad_norm": 0.875952119479627,
      "learning_rate": 9.164480160794543e-07,
      "loss": 0.1287,
      "step": 27768
    },
    {
      "epoch": 0.8101114417410584,
      "grad_norm": 0.912674442463428,
      "learning_rate": 9.161754172323351e-07,
      "loss": 0.1207,
      "step": 27769
    },
    {
      "epoch": 0.810140614971702,
      "grad_norm": 0.941059971365258,
      "learning_rate": 9.159028548446281e-07,
      "loss": 0.1162,
      "step": 27770
    },
    {
      "epoch": 0.8101697882023455,
      "grad_norm": 0.8365171229245679,
      "learning_rate": 9.15630328918764e-07,
      "loss": 0.1123,
      "step": 27771
    },
    {
      "epoch": 0.8101989614329891,
      "grad_norm": 0.8387735287196747,
      "learning_rate": 9.153578394571788e-07,
      "loss": 0.1018,
      "step": 27772
    },
    {
      "epoch": 0.8102281346636326,
      "grad_norm": 0.8553702571973816,
      "learning_rate": 9.150853864623039e-07,
      "loss": 0.0986,
      "step": 27773
    },
    {
      "epoch": 0.8102573078942762,
      "grad_norm": 0.8284252048007986,
      "learning_rate": 9.148129699365699e-07,
      "loss": 0.1017,
      "step": 27774
    },
    {
      "epoch": 0.8102864811249197,
      "grad_norm": 0.9572269906039605,
      "learning_rate": 9.145405898824106e-07,
      "loss": 0.0964,
      "step": 27775
    },
    {
      "epoch": 0.8103156543555633,
      "grad_norm": 0.8696717750444994,
      "learning_rate": 9.142682463022589e-07,
      "loss": 0.1072,
      "step": 27776
    },
    {
      "epoch": 0.8103448275862069,
      "grad_norm": 0.8478827962096758,
      "learning_rate": 9.139959391985453e-07,
      "loss": 0.1148,
      "step": 27777
    },
    {
      "epoch": 0.8103740008168504,
      "grad_norm": 0.9358642936458822,
      "learning_rate": 9.137236685736988e-07,
      "loss": 0.1179,
      "step": 27778
    },
    {
      "epoch": 0.8104031740474941,
      "grad_norm": 0.8906320175570829,
      "learning_rate": 9.134514344301537e-07,
      "loss": 0.119,
      "step": 27779
    },
    {
      "epoch": 0.8104323472781376,
      "grad_norm": 0.9854779651616952,
      "learning_rate": 9.131792367703385e-07,
      "loss": 0.1043,
      "step": 27780
    },
    {
      "epoch": 0.8104615205087812,
      "grad_norm": 0.9770178275001017,
      "learning_rate": 9.129070755966807e-07,
      "loss": 0.1219,
      "step": 27781
    },
    {
      "epoch": 0.8104906937394247,
      "grad_norm": 0.7729644612952208,
      "learning_rate": 9.126349509116156e-07,
      "loss": 0.1022,
      "step": 27782
    },
    {
      "epoch": 0.8105198669700683,
      "grad_norm": 1.0305427076463296,
      "learning_rate": 9.123628627175696e-07,
      "loss": 0.1196,
      "step": 27783
    },
    {
      "epoch": 0.8105490402007118,
      "grad_norm": 1.077065156779928,
      "learning_rate": 9.120908110169713e-07,
      "loss": 0.1118,
      "step": 27784
    },
    {
      "epoch": 0.8105782134313554,
      "grad_norm": 1.1021114832715748,
      "learning_rate": 9.118187958122515e-07,
      "loss": 0.1193,
      "step": 27785
    },
    {
      "epoch": 0.8106073866619989,
      "grad_norm": 0.9724480263050775,
      "learning_rate": 9.115468171058373e-07,
      "loss": 0.1157,
      "step": 27786
    },
    {
      "epoch": 0.8106365598926425,
      "grad_norm": 0.944353015182522,
      "learning_rate": 9.11274874900156e-07,
      "loss": 0.0942,
      "step": 27787
    },
    {
      "epoch": 0.810665733123286,
      "grad_norm": 1.1658182485253474,
      "learning_rate": 9.110029691976368e-07,
      "loss": 0.092,
      "step": 27788
    },
    {
      "epoch": 0.8106949063539296,
      "grad_norm": 0.8876520200068828,
      "learning_rate": 9.10731100000708e-07,
      "loss": 0.1068,
      "step": 27789
    },
    {
      "epoch": 0.8107240795845732,
      "grad_norm": 0.819044641947194,
      "learning_rate": 9.104592673117956e-07,
      "loss": 0.1288,
      "step": 27790
    },
    {
      "epoch": 0.8107532528152167,
      "grad_norm": 0.8926254486360656,
      "learning_rate": 9.101874711333258e-07,
      "loss": 0.1207,
      "step": 27791
    },
    {
      "epoch": 0.8107824260458604,
      "grad_norm": 0.8922334135057118,
      "learning_rate": 9.09915711467727e-07,
      "loss": 0.1231,
      "step": 27792
    },
    {
      "epoch": 0.8108115992765039,
      "grad_norm": 0.815953010826716,
      "learning_rate": 9.09643988317423e-07,
      "loss": 0.1105,
      "step": 27793
    },
    {
      "epoch": 0.8108407725071475,
      "grad_norm": 0.8055476566901355,
      "learning_rate": 9.093723016848427e-07,
      "loss": 0.1228,
      "step": 27794
    },
    {
      "epoch": 0.810869945737791,
      "grad_norm": 0.9657413088559,
      "learning_rate": 9.091006515724083e-07,
      "loss": 0.1214,
      "step": 27795
    },
    {
      "epoch": 0.8108991189684346,
      "grad_norm": 0.9176982476270428,
      "learning_rate": 9.088290379825481e-07,
      "loss": 0.1097,
      "step": 27796
    },
    {
      "epoch": 0.8109282921990781,
      "grad_norm": 1.1033443173486115,
      "learning_rate": 9.085574609176856e-07,
      "loss": 0.1253,
      "step": 27797
    },
    {
      "epoch": 0.8109574654297217,
      "grad_norm": 0.8570696955032819,
      "learning_rate": 9.082859203802436e-07,
      "loss": 0.1237,
      "step": 27798
    },
    {
      "epoch": 0.8109866386603652,
      "grad_norm": 1.1833928866140564,
      "learning_rate": 9.08014416372649e-07,
      "loss": 0.1124,
      "step": 27799
    },
    {
      "epoch": 0.8110158118910088,
      "grad_norm": 0.8803872613469325,
      "learning_rate": 9.077429488973255e-07,
      "loss": 0.0946,
      "step": 27800
    },
    {
      "epoch": 0.8110449851216524,
      "grad_norm": 0.8849863394553857,
      "learning_rate": 9.07471517956695e-07,
      "loss": 0.1138,
      "step": 27801
    },
    {
      "epoch": 0.8110741583522959,
      "grad_norm": 0.9009437140714899,
      "learning_rate": 9.07200123553183e-07,
      "loss": 0.1063,
      "step": 27802
    },
    {
      "epoch": 0.8111033315829395,
      "grad_norm": 0.8611692773242475,
      "learning_rate": 9.069287656892118e-07,
      "loss": 0.1176,
      "step": 27803
    },
    {
      "epoch": 0.811132504813583,
      "grad_norm": 0.714812702387066,
      "learning_rate": 9.066574443672016e-07,
      "loss": 0.1183,
      "step": 27804
    },
    {
      "epoch": 0.8111616780442266,
      "grad_norm": 0.772792801634389,
      "learning_rate": 9.063861595895767e-07,
      "loss": 0.1088,
      "step": 27805
    },
    {
      "epoch": 0.8111908512748702,
      "grad_norm": 1.0948717600218414,
      "learning_rate": 9.061149113587603e-07,
      "loss": 0.1112,
      "step": 27806
    },
    {
      "epoch": 0.8112200245055138,
      "grad_norm": 0.8362913322933143,
      "learning_rate": 9.058436996771724e-07,
      "loss": 0.106,
      "step": 27807
    },
    {
      "epoch": 0.8112491977361573,
      "grad_norm": 1.2474769822718166,
      "learning_rate": 9.055725245472335e-07,
      "loss": 0.1213,
      "step": 27808
    },
    {
      "epoch": 0.8112783709668009,
      "grad_norm": 0.8080131307207777,
      "learning_rate": 9.053013859713672e-07,
      "loss": 0.1128,
      "step": 27809
    },
    {
      "epoch": 0.8113075441974444,
      "grad_norm": 0.744940244018974,
      "learning_rate": 9.050302839519926e-07,
      "loss": 0.1084,
      "step": 27810
    },
    {
      "epoch": 0.811336717428088,
      "grad_norm": 0.7687660014294762,
      "learning_rate": 9.047592184915272e-07,
      "loss": 0.0974,
      "step": 27811
    },
    {
      "epoch": 0.8113658906587315,
      "grad_norm": 0.8384119192449437,
      "learning_rate": 9.044881895923969e-07,
      "loss": 0.1269,
      "step": 27812
    },
    {
      "epoch": 0.8113950638893751,
      "grad_norm": 0.9045942397770038,
      "learning_rate": 9.042171972570179e-07,
      "loss": 0.1215,
      "step": 27813
    },
    {
      "epoch": 0.8114242371200187,
      "grad_norm": 0.8526699918228667,
      "learning_rate": 9.039462414878092e-07,
      "loss": 0.1143,
      "step": 27814
    },
    {
      "epoch": 0.8114534103506622,
      "grad_norm": 0.8874721617085721,
      "learning_rate": 9.036753222871914e-07,
      "loss": 0.1192,
      "step": 27815
    },
    {
      "epoch": 0.8114825835813058,
      "grad_norm": 0.8227247161142672,
      "learning_rate": 9.034044396575825e-07,
      "loss": 0.1189,
      "step": 27816
    },
    {
      "epoch": 0.8115117568119493,
      "grad_norm": 0.9705319413020947,
      "learning_rate": 9.031335936014001e-07,
      "loss": 0.1076,
      "step": 27817
    },
    {
      "epoch": 0.8115409300425929,
      "grad_norm": 1.1221833081400452,
      "learning_rate": 9.028627841210625e-07,
      "loss": 0.119,
      "step": 27818
    },
    {
      "epoch": 0.8115701032732365,
      "grad_norm": 0.9065303454259425,
      "learning_rate": 9.025920112189895e-07,
      "loss": 0.1022,
      "step": 27819
    },
    {
      "epoch": 0.8115992765038801,
      "grad_norm": 0.8308899240624873,
      "learning_rate": 9.023212748975968e-07,
      "loss": 0.1304,
      "step": 27820
    },
    {
      "epoch": 0.8116284497345236,
      "grad_norm": 0.8682517098380476,
      "learning_rate": 9.020505751593001e-07,
      "loss": 0.1038,
      "step": 27821
    },
    {
      "epoch": 0.8116576229651672,
      "grad_norm": 0.8321078365736718,
      "learning_rate": 9.01779912006519e-07,
      "loss": 0.1128,
      "step": 27822
    },
    {
      "epoch": 0.8116867961958107,
      "grad_norm": 0.9589914766280058,
      "learning_rate": 9.015092854416668e-07,
      "loss": 0.1025,
      "step": 27823
    },
    {
      "epoch": 0.8117159694264543,
      "grad_norm": 0.9256429735490215,
      "learning_rate": 9.012386954671631e-07,
      "loss": 0.1162,
      "step": 27824
    },
    {
      "epoch": 0.8117451426570979,
      "grad_norm": 0.9232161626985291,
      "learning_rate": 9.009681420854205e-07,
      "loss": 0.1126,
      "step": 27825
    },
    {
      "epoch": 0.8117743158877414,
      "grad_norm": 0.8141652835612888,
      "learning_rate": 9.006976252988569e-07,
      "loss": 0.1282,
      "step": 27826
    },
    {
      "epoch": 0.811803489118385,
      "grad_norm": 0.8589456492842638,
      "learning_rate": 9.004271451098867e-07,
      "loss": 0.1413,
      "step": 27827
    },
    {
      "epoch": 0.8118326623490285,
      "grad_norm": 0.8635579234007896,
      "learning_rate": 9.001567015209229e-07,
      "loss": 0.1083,
      "step": 27828
    },
    {
      "epoch": 0.8118618355796721,
      "grad_norm": 0.8168511748696249,
      "learning_rate": 8.998862945343811e-07,
      "loss": 0.114,
      "step": 27829
    },
    {
      "epoch": 0.8118910088103156,
      "grad_norm": 1.0173724138335858,
      "learning_rate": 8.996159241526775e-07,
      "loss": 0.1285,
      "step": 27830
    },
    {
      "epoch": 0.8119201820409592,
      "grad_norm": 1.1457423515664586,
      "learning_rate": 8.993455903782222e-07,
      "loss": 0.1166,
      "step": 27831
    },
    {
      "epoch": 0.8119493552716027,
      "grad_norm": 1.0103305109512088,
      "learning_rate": 8.990752932134322e-07,
      "loss": 0.0882,
      "step": 27832
    },
    {
      "epoch": 0.8119785285022464,
      "grad_norm": 0.8465947302811158,
      "learning_rate": 8.98805032660719e-07,
      "loss": 0.1103,
      "step": 27833
    },
    {
      "epoch": 0.8120077017328899,
      "grad_norm": 0.8989232846716325,
      "learning_rate": 8.985348087224943e-07,
      "loss": 0.1199,
      "step": 27834
    },
    {
      "epoch": 0.8120368749635335,
      "grad_norm": 1.0875244874638665,
      "learning_rate": 8.982646214011715e-07,
      "loss": 0.1047,
      "step": 27835
    },
    {
      "epoch": 0.812066048194177,
      "grad_norm": 0.9733932925803976,
      "learning_rate": 8.979944706991639e-07,
      "loss": 0.0913,
      "step": 27836
    },
    {
      "epoch": 0.8120952214248206,
      "grad_norm": 1.05482053175379,
      "learning_rate": 8.977243566188831e-07,
      "loss": 0.1282,
      "step": 27837
    },
    {
      "epoch": 0.8121243946554642,
      "grad_norm": 0.9503921749161258,
      "learning_rate": 8.974542791627383e-07,
      "loss": 0.1615,
      "step": 27838
    },
    {
      "epoch": 0.8121535678861077,
      "grad_norm": 0.9053194371817254,
      "learning_rate": 8.971842383331436e-07,
      "loss": 0.123,
      "step": 27839
    },
    {
      "epoch": 0.8121827411167513,
      "grad_norm": 0.9312336263357013,
      "learning_rate": 8.969142341325088e-07,
      "loss": 0.1165,
      "step": 27840
    },
    {
      "epoch": 0.8122119143473948,
      "grad_norm": 0.8743210919694165,
      "learning_rate": 8.966442665632418e-07,
      "loss": 0.1173,
      "step": 27841
    },
    {
      "epoch": 0.8122410875780384,
      "grad_norm": 1.1143132195729024,
      "learning_rate": 8.963743356277577e-07,
      "loss": 0.1225,
      "step": 27842
    },
    {
      "epoch": 0.8122702608086819,
      "grad_norm": 0.8115040841378355,
      "learning_rate": 8.961044413284636e-07,
      "loss": 0.1087,
      "step": 27843
    },
    {
      "epoch": 0.8122994340393255,
      "grad_norm": 0.9327315347288102,
      "learning_rate": 8.958345836677684e-07,
      "loss": 0.1204,
      "step": 27844
    },
    {
      "epoch": 0.812328607269969,
      "grad_norm": 1.105081627639045,
      "learning_rate": 8.955647626480835e-07,
      "loss": 0.0988,
      "step": 27845
    },
    {
      "epoch": 0.8123577805006127,
      "grad_norm": 0.9425030223238433,
      "learning_rate": 8.952949782718162e-07,
      "loss": 0.1166,
      "step": 27846
    },
    {
      "epoch": 0.8123869537312562,
      "grad_norm": 0.882574285397809,
      "learning_rate": 8.950252305413748e-07,
      "loss": 0.1379,
      "step": 27847
    },
    {
      "epoch": 0.8124161269618998,
      "grad_norm": 0.7754649697258346,
      "learning_rate": 8.947555194591679e-07,
      "loss": 0.1188,
      "step": 27848
    },
    {
      "epoch": 0.8124453001925434,
      "grad_norm": 1.03417462139561,
      "learning_rate": 8.944858450276051e-07,
      "loss": 0.1215,
      "step": 27849
    },
    {
      "epoch": 0.8124744734231869,
      "grad_norm": 0.9348661378267702,
      "learning_rate": 8.942162072490924e-07,
      "loss": 0.1114,
      "step": 27850
    },
    {
      "epoch": 0.8125036466538305,
      "grad_norm": 0.6796537220442412,
      "learning_rate": 8.93946606126036e-07,
      "loss": 0.1118,
      "step": 27851
    },
    {
      "epoch": 0.812532819884474,
      "grad_norm": 1.1326677677469623,
      "learning_rate": 8.93677041660846e-07,
      "loss": 0.1392,
      "step": 27852
    },
    {
      "epoch": 0.8125619931151176,
      "grad_norm": 0.8264266582231248,
      "learning_rate": 8.93407513855925e-07,
      "loss": 0.1091,
      "step": 27853
    },
    {
      "epoch": 0.8125911663457611,
      "grad_norm": 0.7842864713261196,
      "learning_rate": 8.931380227136832e-07,
      "loss": 0.0898,
      "step": 27854
    },
    {
      "epoch": 0.8126203395764047,
      "grad_norm": 1.0625415526642308,
      "learning_rate": 8.928685682365229e-07,
      "loss": 0.1081,
      "step": 27855
    },
    {
      "epoch": 0.8126495128070482,
      "grad_norm": 0.8304302302014871,
      "learning_rate": 8.925991504268533e-07,
      "loss": 0.1101,
      "step": 27856
    },
    {
      "epoch": 0.8126786860376918,
      "grad_norm": 0.8309523917579732,
      "learning_rate": 8.92329769287078e-07,
      "loss": 0.1093,
      "step": 27857
    },
    {
      "epoch": 0.8127078592683353,
      "grad_norm": 1.4803614141624153,
      "learning_rate": 8.920604248196007e-07,
      "loss": 0.1201,
      "step": 27858
    },
    {
      "epoch": 0.8127370324989789,
      "grad_norm": 1.0780219900720958,
      "learning_rate": 8.917911170268273e-07,
      "loss": 0.1222,
      "step": 27859
    },
    {
      "epoch": 0.8127662057296225,
      "grad_norm": 0.7461002323140512,
      "learning_rate": 8.91521845911163e-07,
      "loss": 0.0937,
      "step": 27860
    },
    {
      "epoch": 0.8127953789602661,
      "grad_norm": 0.8102491491294351,
      "learning_rate": 8.912526114750097e-07,
      "loss": 0.1078,
      "step": 27861
    },
    {
      "epoch": 0.8128245521909097,
      "grad_norm": 1.1440873401470806,
      "learning_rate": 8.90983413720774e-07,
      "loss": 0.1217,
      "step": 27862
    },
    {
      "epoch": 0.8128537254215532,
      "grad_norm": 0.7364765684460183,
      "learning_rate": 8.907142526508572e-07,
      "loss": 0.12,
      "step": 27863
    },
    {
      "epoch": 0.8128828986521968,
      "grad_norm": 1.0929421996467108,
      "learning_rate": 8.904451282676612e-07,
      "loss": 0.1081,
      "step": 27864
    },
    {
      "epoch": 0.8129120718828403,
      "grad_norm": 0.8992288094888158,
      "learning_rate": 8.901760405735898e-07,
      "loss": 0.1081,
      "step": 27865
    },
    {
      "epoch": 0.8129412451134839,
      "grad_norm": 1.0922194465089228,
      "learning_rate": 8.899069895710477e-07,
      "loss": 0.1267,
      "step": 27866
    },
    {
      "epoch": 0.8129704183441274,
      "grad_norm": 0.9395317411324351,
      "learning_rate": 8.89637975262434e-07,
      "loss": 0.1228,
      "step": 27867
    },
    {
      "epoch": 0.812999591574771,
      "grad_norm": 0.7290633268101204,
      "learning_rate": 8.893689976501507e-07,
      "loss": 0.0967,
      "step": 27868
    },
    {
      "epoch": 0.8130287648054145,
      "grad_norm": 0.8547884924335176,
      "learning_rate": 8.891000567366004e-07,
      "loss": 0.1173,
      "step": 27869
    },
    {
      "epoch": 0.8130579380360581,
      "grad_norm": 1.0524216083013698,
      "learning_rate": 8.888311525241822e-07,
      "loss": 0.1247,
      "step": 27870
    },
    {
      "epoch": 0.8130871112667016,
      "grad_norm": 0.9536313632947281,
      "learning_rate": 8.885622850152986e-07,
      "loss": 0.0942,
      "step": 27871
    },
    {
      "epoch": 0.8131162844973452,
      "grad_norm": 0.9656085483237548,
      "learning_rate": 8.8829345421235e-07,
      "loss": 0.1135,
      "step": 27872
    },
    {
      "epoch": 0.8131454577279889,
      "grad_norm": 0.788840840057328,
      "learning_rate": 8.880246601177361e-07,
      "loss": 0.1236,
      "step": 27873
    },
    {
      "epoch": 0.8131746309586324,
      "grad_norm": 0.8092387032392127,
      "learning_rate": 8.877559027338556e-07,
      "loss": 0.108,
      "step": 27874
    },
    {
      "epoch": 0.813203804189276,
      "grad_norm": 1.5655596722106258,
      "learning_rate": 8.874871820631098e-07,
      "loss": 0.1187,
      "step": 27875
    },
    {
      "epoch": 0.8132329774199195,
      "grad_norm": 0.7460362896971863,
      "learning_rate": 8.872184981078952e-07,
      "loss": 0.1205,
      "step": 27876
    },
    {
      "epoch": 0.8132621506505631,
      "grad_norm": 0.924290849914,
      "learning_rate": 8.869498508706137e-07,
      "loss": 0.1047,
      "step": 27877
    },
    {
      "epoch": 0.8132913238812066,
      "grad_norm": 0.7165676662127671,
      "learning_rate": 8.866812403536601e-07,
      "loss": 0.1213,
      "step": 27878
    },
    {
      "epoch": 0.8133204971118502,
      "grad_norm": 0.7409949292685086,
      "learning_rate": 8.864126665594363e-07,
      "loss": 0.1043,
      "step": 27879
    },
    {
      "epoch": 0.8133496703424937,
      "grad_norm": 0.825083620554212,
      "learning_rate": 8.861441294903383e-07,
      "loss": 0.1412,
      "step": 27880
    },
    {
      "epoch": 0.8133788435731373,
      "grad_norm": 0.8182300223152272,
      "learning_rate": 8.858756291487619e-07,
      "loss": 0.1253,
      "step": 27881
    },
    {
      "epoch": 0.8134080168037808,
      "grad_norm": 0.7415768586442154,
      "learning_rate": 8.856071655371057e-07,
      "loss": 0.1178,
      "step": 27882
    },
    {
      "epoch": 0.8134371900344244,
      "grad_norm": 0.8896231090110454,
      "learning_rate": 8.853387386577677e-07,
      "loss": 0.1163,
      "step": 27883
    },
    {
      "epoch": 0.8134663632650679,
      "grad_norm": 0.8696533261783911,
      "learning_rate": 8.85070348513144e-07,
      "loss": 0.1107,
      "step": 27884
    },
    {
      "epoch": 0.8134955364957115,
      "grad_norm": 0.775099061652046,
      "learning_rate": 8.84801995105628e-07,
      "loss": 0.1231,
      "step": 27885
    },
    {
      "epoch": 0.813524709726355,
      "grad_norm": 0.7282668608994736,
      "learning_rate": 8.845336784376185e-07,
      "loss": 0.1067,
      "step": 27886
    },
    {
      "epoch": 0.8135538829569987,
      "grad_norm": 0.5516454272813409,
      "learning_rate": 8.842653985115102e-07,
      "loss": 0.1039,
      "step": 27887
    },
    {
      "epoch": 0.8135830561876423,
      "grad_norm": 0.9617941084820119,
      "learning_rate": 8.839971553296956e-07,
      "loss": 0.1312,
      "step": 27888
    },
    {
      "epoch": 0.8136122294182858,
      "grad_norm": 0.84508873568692,
      "learning_rate": 8.837289488945738e-07,
      "loss": 0.0992,
      "step": 27889
    },
    {
      "epoch": 0.8136414026489294,
      "grad_norm": 0.9907097824580179,
      "learning_rate": 8.834607792085375e-07,
      "loss": 0.109,
      "step": 27890
    },
    {
      "epoch": 0.8136705758795729,
      "grad_norm": 0.9375159128166626,
      "learning_rate": 8.831926462739799e-07,
      "loss": 0.1248,
      "step": 27891
    },
    {
      "epoch": 0.8136997491102165,
      "grad_norm": 0.9239841975052013,
      "learning_rate": 8.829245500932959e-07,
      "loss": 0.1352,
      "step": 27892
    },
    {
      "epoch": 0.81372892234086,
      "grad_norm": 0.8556550843005295,
      "learning_rate": 8.826564906688794e-07,
      "loss": 0.1083,
      "step": 27893
    },
    {
      "epoch": 0.8137580955715036,
      "grad_norm": 0.9812198149961999,
      "learning_rate": 8.823884680031214e-07,
      "loss": 0.1285,
      "step": 27894
    },
    {
      "epoch": 0.8137872688021471,
      "grad_norm": 0.6997671115908596,
      "learning_rate": 8.821204820984164e-07,
      "loss": 0.122,
      "step": 27895
    },
    {
      "epoch": 0.8138164420327907,
      "grad_norm": 0.7670904985101262,
      "learning_rate": 8.818525329571581e-07,
      "loss": 0.0922,
      "step": 27896
    },
    {
      "epoch": 0.8138456152634342,
      "grad_norm": 0.9271316138154613,
      "learning_rate": 8.815846205817369e-07,
      "loss": 0.0938,
      "step": 27897
    },
    {
      "epoch": 0.8138747884940778,
      "grad_norm": 0.8567940019276363,
      "learning_rate": 8.813167449745436e-07,
      "loss": 0.1027,
      "step": 27898
    },
    {
      "epoch": 0.8139039617247213,
      "grad_norm": 0.7482419574374475,
      "learning_rate": 8.810489061379728e-07,
      "loss": 0.1055,
      "step": 27899
    },
    {
      "epoch": 0.813933134955365,
      "grad_norm": 0.8392858184176683,
      "learning_rate": 8.80781104074413e-07,
      "loss": 0.1094,
      "step": 27900
    },
    {
      "epoch": 0.8139623081860086,
      "grad_norm": 0.7941550962089481,
      "learning_rate": 8.805133387862558e-07,
      "loss": 0.103,
      "step": 27901
    },
    {
      "epoch": 0.8139914814166521,
      "grad_norm": 0.979570616426688,
      "learning_rate": 8.802456102758938e-07,
      "loss": 0.1238,
      "step": 27902
    },
    {
      "epoch": 0.8140206546472957,
      "grad_norm": 0.7537379325636532,
      "learning_rate": 8.799779185457153e-07,
      "loss": 0.0994,
      "step": 27903
    },
    {
      "epoch": 0.8140498278779392,
      "grad_norm": 0.803492811338486,
      "learning_rate": 8.797102635981092e-07,
      "loss": 0.1026,
      "step": 27904
    },
    {
      "epoch": 0.8140790011085828,
      "grad_norm": 0.9189270594140458,
      "learning_rate": 8.794426454354671e-07,
      "loss": 0.1066,
      "step": 27905
    },
    {
      "epoch": 0.8141081743392263,
      "grad_norm": 0.9376877902881371,
      "learning_rate": 8.791750640601765e-07,
      "loss": 0.1217,
      "step": 27906
    },
    {
      "epoch": 0.8141373475698699,
      "grad_norm": 0.7900601135877897,
      "learning_rate": 8.789075194746288e-07,
      "loss": 0.1024,
      "step": 27907
    },
    {
      "epoch": 0.8141665208005134,
      "grad_norm": 0.7052171852321076,
      "learning_rate": 8.786400116812093e-07,
      "loss": 0.1132,
      "step": 27908
    },
    {
      "epoch": 0.814195694031157,
      "grad_norm": 0.7881679378547637,
      "learning_rate": 8.783725406823095e-07,
      "loss": 0.1118,
      "step": 27909
    },
    {
      "epoch": 0.8142248672618005,
      "grad_norm": 0.8932581135162265,
      "learning_rate": 8.781051064803153e-07,
      "loss": 0.1019,
      "step": 27910
    },
    {
      "epoch": 0.8142540404924441,
      "grad_norm": 0.7752407613929274,
      "learning_rate": 8.778377090776136e-07,
      "loss": 0.1169,
      "step": 27911
    },
    {
      "epoch": 0.8142832137230877,
      "grad_norm": 0.9243660125538735,
      "learning_rate": 8.775703484765929e-07,
      "loss": 0.1159,
      "step": 27912
    },
    {
      "epoch": 0.8143123869537312,
      "grad_norm": 0.8063009476385474,
      "learning_rate": 8.773030246796416e-07,
      "loss": 0.1161,
      "step": 27913
    },
    {
      "epoch": 0.8143415601843749,
      "grad_norm": 0.8555816929791252,
      "learning_rate": 8.770357376891442e-07,
      "loss": 0.1098,
      "step": 27914
    },
    {
      "epoch": 0.8143707334150184,
      "grad_norm": 0.9909714390437983,
      "learning_rate": 8.767684875074867e-07,
      "loss": 0.1255,
      "step": 27915
    },
    {
      "epoch": 0.814399906645662,
      "grad_norm": 0.769080409173358,
      "learning_rate": 8.765012741370566e-07,
      "loss": 0.1154,
      "step": 27916
    },
    {
      "epoch": 0.8144290798763055,
      "grad_norm": 0.9236014058704317,
      "learning_rate": 8.762340975802392e-07,
      "loss": 0.0981,
      "step": 27917
    },
    {
      "epoch": 0.8144582531069491,
      "grad_norm": 0.7957683463367754,
      "learning_rate": 8.759669578394165e-07,
      "loss": 0.1003,
      "step": 27918
    },
    {
      "epoch": 0.8144874263375926,
      "grad_norm": 0.9393352776753457,
      "learning_rate": 8.756998549169793e-07,
      "loss": 0.1296,
      "step": 27919
    },
    {
      "epoch": 0.8145165995682362,
      "grad_norm": 0.9901589837775178,
      "learning_rate": 8.754327888153085e-07,
      "loss": 0.1265,
      "step": 27920
    },
    {
      "epoch": 0.8145457727988797,
      "grad_norm": 0.9328615769706948,
      "learning_rate": 8.751657595367885e-07,
      "loss": 0.1105,
      "step": 27921
    },
    {
      "epoch": 0.8145749460295233,
      "grad_norm": 0.8225611506888997,
      "learning_rate": 8.74898767083805e-07,
      "loss": 0.1214,
      "step": 27922
    },
    {
      "epoch": 0.8146041192601668,
      "grad_norm": 0.7273666981494035,
      "learning_rate": 8.74631811458741e-07,
      "loss": 0.0917,
      "step": 27923
    },
    {
      "epoch": 0.8146332924908104,
      "grad_norm": 0.7965634618209972,
      "learning_rate": 8.743648926639775e-07,
      "loss": 0.1348,
      "step": 27924
    },
    {
      "epoch": 0.814662465721454,
      "grad_norm": 0.9075342283984179,
      "learning_rate": 8.740980107018998e-07,
      "loss": 0.1138,
      "step": 27925
    },
    {
      "epoch": 0.8146916389520975,
      "grad_norm": 0.958721682505962,
      "learning_rate": 8.73831165574891e-07,
      "loss": 0.1293,
      "step": 27926
    },
    {
      "epoch": 0.8147208121827412,
      "grad_norm": 0.8471685867698534,
      "learning_rate": 8.735643572853325e-07,
      "loss": 0.0987,
      "step": 27927
    },
    {
      "epoch": 0.8147499854133847,
      "grad_norm": 0.9322191934295897,
      "learning_rate": 8.732975858356057e-07,
      "loss": 0.1086,
      "step": 27928
    },
    {
      "epoch": 0.8147791586440283,
      "grad_norm": 0.8270460426510711,
      "learning_rate": 8.730308512280938e-07,
      "loss": 0.1044,
      "step": 27929
    },
    {
      "epoch": 0.8148083318746718,
      "grad_norm": 0.7654599046546438,
      "learning_rate": 8.72764153465176e-07,
      "loss": 0.1168,
      "step": 27930
    },
    {
      "epoch": 0.8148375051053154,
      "grad_norm": 0.953571558305935,
      "learning_rate": 8.724974925492347e-07,
      "loss": 0.1308,
      "step": 27931
    },
    {
      "epoch": 0.8148666783359589,
      "grad_norm": 0.8514589093817848,
      "learning_rate": 8.722308684826514e-07,
      "loss": 0.1106,
      "step": 27932
    },
    {
      "epoch": 0.8148958515666025,
      "grad_norm": 0.8506731842034587,
      "learning_rate": 8.719642812678059e-07,
      "loss": 0.1331,
      "step": 27933
    },
    {
      "epoch": 0.814925024797246,
      "grad_norm": 1.485484176016856,
      "learning_rate": 8.716977309070762e-07,
      "loss": 0.1125,
      "step": 27934
    },
    {
      "epoch": 0.8149541980278896,
      "grad_norm": 1.0000498783709355,
      "learning_rate": 8.714312174028456e-07,
      "loss": 0.1156,
      "step": 27935
    },
    {
      "epoch": 0.8149833712585332,
      "grad_norm": 1.086574623093909,
      "learning_rate": 8.711647407574897e-07,
      "loss": 0.0874,
      "step": 27936
    },
    {
      "epoch": 0.8150125444891767,
      "grad_norm": 1.00993218665719,
      "learning_rate": 8.708983009733906e-07,
      "loss": 0.1451,
      "step": 27937
    },
    {
      "epoch": 0.8150417177198203,
      "grad_norm": 1.927800729165628,
      "learning_rate": 8.706318980529249e-07,
      "loss": 0.1237,
      "step": 27938
    },
    {
      "epoch": 0.8150708909504638,
      "grad_norm": 0.7889614899462662,
      "learning_rate": 8.703655319984728e-07,
      "loss": 0.1027,
      "step": 27939
    },
    {
      "epoch": 0.8151000641811074,
      "grad_norm": 1.124823704480609,
      "learning_rate": 8.700992028124116e-07,
      "loss": 0.1134,
      "step": 27940
    },
    {
      "epoch": 0.815129237411751,
      "grad_norm": 1.026337449686936,
      "learning_rate": 8.698329104971176e-07,
      "loss": 0.116,
      "step": 27941
    },
    {
      "epoch": 0.8151584106423946,
      "grad_norm": 0.8714488107477869,
      "learning_rate": 8.695666550549692e-07,
      "loss": 0.0998,
      "step": 27942
    },
    {
      "epoch": 0.8151875838730381,
      "grad_norm": 0.7970873611052459,
      "learning_rate": 8.693004364883451e-07,
      "loss": 0.117,
      "step": 27943
    },
    {
      "epoch": 0.8152167571036817,
      "grad_norm": 0.9040347466080777,
      "learning_rate": 8.690342547996205e-07,
      "loss": 0.1015,
      "step": 27944
    },
    {
      "epoch": 0.8152459303343252,
      "grad_norm": 1.1349387215847295,
      "learning_rate": 8.687681099911704e-07,
      "loss": 0.1231,
      "step": 27945
    },
    {
      "epoch": 0.8152751035649688,
      "grad_norm": 1.0015897725252845,
      "learning_rate": 8.685020020653745e-07,
      "loss": 0.1299,
      "step": 27946
    },
    {
      "epoch": 0.8153042767956123,
      "grad_norm": 1.0853462300215388,
      "learning_rate": 8.682359310246058e-07,
      "loss": 0.1263,
      "step": 27947
    },
    {
      "epoch": 0.8153334500262559,
      "grad_norm": 1.021627090189694,
      "learning_rate": 8.67969896871238e-07,
      "loss": 0.1222,
      "step": 27948
    },
    {
      "epoch": 0.8153626232568995,
      "grad_norm": 0.9893004089744691,
      "learning_rate": 8.677038996076509e-07,
      "loss": 0.1376,
      "step": 27949
    },
    {
      "epoch": 0.815391796487543,
      "grad_norm": 0.677533835465219,
      "learning_rate": 8.674379392362175e-07,
      "loss": 0.1018,
      "step": 27950
    },
    {
      "epoch": 0.8154209697181866,
      "grad_norm": 0.6357550110472617,
      "learning_rate": 8.671720157593099e-07,
      "loss": 0.1037,
      "step": 27951
    },
    {
      "epoch": 0.8154501429488301,
      "grad_norm": 0.8820956278642804,
      "learning_rate": 8.669061291793051e-07,
      "loss": 0.129,
      "step": 27952
    },
    {
      "epoch": 0.8154793161794737,
      "grad_norm": 0.862788822741437,
      "learning_rate": 8.666402794985762e-07,
      "loss": 0.1071,
      "step": 27953
    },
    {
      "epoch": 0.8155084894101172,
      "grad_norm": 0.9375392918641782,
      "learning_rate": 8.663744667194946e-07,
      "loss": 0.1031,
      "step": 27954
    },
    {
      "epoch": 0.8155376626407609,
      "grad_norm": 0.759271459198332,
      "learning_rate": 8.661086908444349e-07,
      "loss": 0.1047,
      "step": 27955
    },
    {
      "epoch": 0.8155668358714044,
      "grad_norm": 1.0672211803055243,
      "learning_rate": 8.658429518757716e-07,
      "loss": 0.1075,
      "step": 27956
    },
    {
      "epoch": 0.815596009102048,
      "grad_norm": 0.835350576889197,
      "learning_rate": 8.655772498158754e-07,
      "loss": 0.1138,
      "step": 27957
    },
    {
      "epoch": 0.8156251823326915,
      "grad_norm": 0.6703112766894321,
      "learning_rate": 8.653115846671173e-07,
      "loss": 0.1132,
      "step": 27958
    },
    {
      "epoch": 0.8156543555633351,
      "grad_norm": 0.8498449228358369,
      "learning_rate": 8.650459564318714e-07,
      "loss": 0.1214,
      "step": 27959
    },
    {
      "epoch": 0.8156835287939787,
      "grad_norm": 1.0296137250965731,
      "learning_rate": 8.647803651125069e-07,
      "loss": 0.1206,
      "step": 27960
    },
    {
      "epoch": 0.8157127020246222,
      "grad_norm": 0.9620023355296395,
      "learning_rate": 8.645148107113976e-07,
      "loss": 0.1066,
      "step": 27961
    },
    {
      "epoch": 0.8157418752552658,
      "grad_norm": 0.8697029912694123,
      "learning_rate": 8.642492932309116e-07,
      "loss": 0.1154,
      "step": 27962
    },
    {
      "epoch": 0.8157710484859093,
      "grad_norm": 0.872224754036844,
      "learning_rate": 8.639838126734218e-07,
      "loss": 0.1058,
      "step": 27963
    },
    {
      "epoch": 0.8158002217165529,
      "grad_norm": 0.7181466266117666,
      "learning_rate": 8.63718369041296e-07,
      "loss": 0.1086,
      "step": 27964
    },
    {
      "epoch": 0.8158293949471964,
      "grad_norm": 0.7093394701015387,
      "learning_rate": 8.634529623369059e-07,
      "loss": 0.1303,
      "step": 27965
    },
    {
      "epoch": 0.81585856817784,
      "grad_norm": 1.0146880919739127,
      "learning_rate": 8.631875925626193e-07,
      "loss": 0.1167,
      "step": 27966
    },
    {
      "epoch": 0.8158877414084835,
      "grad_norm": 0.6976651318338014,
      "learning_rate": 8.629222597208081e-07,
      "loss": 0.1152,
      "step": 27967
    },
    {
      "epoch": 0.8159169146391272,
      "grad_norm": 0.7889779039807315,
      "learning_rate": 8.626569638138377e-07,
      "loss": 0.0986,
      "step": 27968
    },
    {
      "epoch": 0.8159460878697707,
      "grad_norm": 0.7200946525928537,
      "learning_rate": 8.623917048440794e-07,
      "loss": 0.1368,
      "step": 27969
    },
    {
      "epoch": 0.8159752611004143,
      "grad_norm": 0.8065351815209023,
      "learning_rate": 8.621264828139003e-07,
      "loss": 0.1251,
      "step": 27970
    },
    {
      "epoch": 0.8160044343310578,
      "grad_norm": 0.8627913754233999,
      "learning_rate": 8.618612977256674e-07,
      "loss": 0.1024,
      "step": 27971
    },
    {
      "epoch": 0.8160336075617014,
      "grad_norm": 0.7703068451678616,
      "learning_rate": 8.615961495817482e-07,
      "loss": 0.1271,
      "step": 27972
    },
    {
      "epoch": 0.816062780792345,
      "grad_norm": 0.9032384205047582,
      "learning_rate": 8.613310383845125e-07,
      "loss": 0.1096,
      "step": 27973
    },
    {
      "epoch": 0.8160919540229885,
      "grad_norm": 0.9759871484921885,
      "learning_rate": 8.610659641363251e-07,
      "loss": 0.1051,
      "step": 27974
    },
    {
      "epoch": 0.8161211272536321,
      "grad_norm": 0.8059160076351759,
      "learning_rate": 8.608009268395512e-07,
      "loss": 0.1085,
      "step": 27975
    },
    {
      "epoch": 0.8161503004842756,
      "grad_norm": 0.8301027795536169,
      "learning_rate": 8.605359264965602e-07,
      "loss": 0.1036,
      "step": 27976
    },
    {
      "epoch": 0.8161794737149192,
      "grad_norm": 0.777042585692951,
      "learning_rate": 8.602709631097161e-07,
      "loss": 0.1043,
      "step": 27977
    },
    {
      "epoch": 0.8162086469455627,
      "grad_norm": 0.8054281415233515,
      "learning_rate": 8.600060366813823e-07,
      "loss": 0.1165,
      "step": 27978
    },
    {
      "epoch": 0.8162378201762063,
      "grad_norm": 0.9361815134274353,
      "learning_rate": 8.597411472139288e-07,
      "loss": 0.1392,
      "step": 27979
    },
    {
      "epoch": 0.8162669934068498,
      "grad_norm": 0.7066121911380071,
      "learning_rate": 8.594762947097173e-07,
      "loss": 0.1209,
      "step": 27980
    },
    {
      "epoch": 0.8162961666374934,
      "grad_norm": 1.027407194909586,
      "learning_rate": 8.592114791711126e-07,
      "loss": 0.1014,
      "step": 27981
    },
    {
      "epoch": 0.816325339868137,
      "grad_norm": 0.7264163145303579,
      "learning_rate": 8.589467006004803e-07,
      "loss": 0.1121,
      "step": 27982
    },
    {
      "epoch": 0.8163545130987806,
      "grad_norm": 0.7785567537892137,
      "learning_rate": 8.586819590001833e-07,
      "loss": 0.1018,
      "step": 27983
    },
    {
      "epoch": 0.8163836863294242,
      "grad_norm": 0.7948416690473252,
      "learning_rate": 8.584172543725839e-07,
      "loss": 0.1166,
      "step": 27984
    },
    {
      "epoch": 0.8164128595600677,
      "grad_norm": 0.9166306968610213,
      "learning_rate": 8.581525867200464e-07,
      "loss": 0.1146,
      "step": 27985
    },
    {
      "epoch": 0.8164420327907113,
      "grad_norm": 0.826955882779524,
      "learning_rate": 8.578879560449354e-07,
      "loss": 0.1108,
      "step": 27986
    },
    {
      "epoch": 0.8164712060213548,
      "grad_norm": 0.6589801802365504,
      "learning_rate": 8.576233623496117e-07,
      "loss": 0.1048,
      "step": 27987
    },
    {
      "epoch": 0.8165003792519984,
      "grad_norm": 1.039948581199204,
      "learning_rate": 8.573588056364368e-07,
      "loss": 0.1344,
      "step": 27988
    },
    {
      "epoch": 0.8165295524826419,
      "grad_norm": 0.6468888511256666,
      "learning_rate": 8.570942859077747e-07,
      "loss": 0.0856,
      "step": 27989
    },
    {
      "epoch": 0.8165587257132855,
      "grad_norm": 0.7987498188958376,
      "learning_rate": 8.568298031659844e-07,
      "loss": 0.121,
      "step": 27990
    },
    {
      "epoch": 0.816587898943929,
      "grad_norm": 0.6352220061579698,
      "learning_rate": 8.565653574134297e-07,
      "loss": 0.1071,
      "step": 27991
    },
    {
      "epoch": 0.8166170721745726,
      "grad_norm": 0.9729734228360658,
      "learning_rate": 8.563009486524698e-07,
      "loss": 0.1183,
      "step": 27992
    },
    {
      "epoch": 0.8166462454052161,
      "grad_norm": 0.7873868919153967,
      "learning_rate": 8.560365768854662e-07,
      "loss": 0.1241,
      "step": 27993
    },
    {
      "epoch": 0.8166754186358597,
      "grad_norm": 0.6202144193652498,
      "learning_rate": 8.55772242114778e-07,
      "loss": 0.0951,
      "step": 27994
    },
    {
      "epoch": 0.8167045918665033,
      "grad_norm": 0.7052777954048747,
      "learning_rate": 8.555079443427672e-07,
      "loss": 0.1106,
      "step": 27995
    },
    {
      "epoch": 0.8167337650971469,
      "grad_norm": 0.878384548400134,
      "learning_rate": 8.552436835717909e-07,
      "loss": 0.0756,
      "step": 27996
    },
    {
      "epoch": 0.8167629383277905,
      "grad_norm": 0.9384075757366674,
      "learning_rate": 8.549794598042104e-07,
      "loss": 0.1041,
      "step": 27997
    },
    {
      "epoch": 0.816792111558434,
      "grad_norm": 0.7983972119889551,
      "learning_rate": 8.54715273042383e-07,
      "loss": 0.1188,
      "step": 27998
    },
    {
      "epoch": 0.8168212847890776,
      "grad_norm": 0.8255508273022011,
      "learning_rate": 8.544511232886693e-07,
      "loss": 0.1267,
      "step": 27999
    },
    {
      "epoch": 0.8168504580197211,
      "grad_norm": 0.7603824207116087,
      "learning_rate": 8.541870105454264e-07,
      "loss": 0.0905,
      "step": 28000
    },
    {
      "epoch": 0.8168796312503647,
      "grad_norm": 0.8667780567402311,
      "learning_rate": 8.539229348150107e-07,
      "loss": 0.1176,
      "step": 28001
    },
    {
      "epoch": 0.8169088044810082,
      "grad_norm": 1.0316246110869005,
      "learning_rate": 8.536588960997811e-07,
      "loss": 0.112,
      "step": 28002
    },
    {
      "epoch": 0.8169379777116518,
      "grad_norm": 0.7797496813247969,
      "learning_rate": 8.53394894402097e-07,
      "loss": 0.1251,
      "step": 28003
    },
    {
      "epoch": 0.8169671509422953,
      "grad_norm": 0.9794218367100127,
      "learning_rate": 8.531309297243129e-07,
      "loss": 0.1226,
      "step": 28004
    },
    {
      "epoch": 0.8169963241729389,
      "grad_norm": 0.7280829179655549,
      "learning_rate": 8.528670020687845e-07,
      "loss": 0.1169,
      "step": 28005
    },
    {
      "epoch": 0.8170254974035824,
      "grad_norm": 1.0092566018665785,
      "learning_rate": 8.526031114378713e-07,
      "loss": 0.1391,
      "step": 28006
    },
    {
      "epoch": 0.817054670634226,
      "grad_norm": 0.983347000050237,
      "learning_rate": 8.523392578339268e-07,
      "loss": 0.0923,
      "step": 28007
    },
    {
      "epoch": 0.8170838438648695,
      "grad_norm": 0.7769789828232809,
      "learning_rate": 8.520754412593052e-07,
      "loss": 0.1325,
      "step": 28008
    },
    {
      "epoch": 0.8171130170955132,
      "grad_norm": 0.8762249085199176,
      "learning_rate": 8.518116617163664e-07,
      "loss": 0.104,
      "step": 28009
    },
    {
      "epoch": 0.8171421903261568,
      "grad_norm": 0.7477001410736634,
      "learning_rate": 8.515479192074627e-07,
      "loss": 0.1061,
      "step": 28010
    },
    {
      "epoch": 0.8171713635568003,
      "grad_norm": 0.7165997494065797,
      "learning_rate": 8.512842137349475e-07,
      "loss": 0.1028,
      "step": 28011
    },
    {
      "epoch": 0.8172005367874439,
      "grad_norm": 0.7707761993942428,
      "learning_rate": 8.510205453011783e-07,
      "loss": 0.124,
      "step": 28012
    },
    {
      "epoch": 0.8172297100180874,
      "grad_norm": 0.7468551414347658,
      "learning_rate": 8.507569139085064e-07,
      "loss": 0.1219,
      "step": 28013
    },
    {
      "epoch": 0.817258883248731,
      "grad_norm": 1.0180306974051387,
      "learning_rate": 8.504933195592858e-07,
      "loss": 0.1092,
      "step": 28014
    },
    {
      "epoch": 0.8172880564793745,
      "grad_norm": 0.7306808186430649,
      "learning_rate": 8.502297622558697e-07,
      "loss": 0.1106,
      "step": 28015
    },
    {
      "epoch": 0.8173172297100181,
      "grad_norm": 0.813070514036749,
      "learning_rate": 8.499662420006127e-07,
      "loss": 0.1224,
      "step": 28016
    },
    {
      "epoch": 0.8173464029406616,
      "grad_norm": 0.9905332900506604,
      "learning_rate": 8.497027587958672e-07,
      "loss": 0.1114,
      "step": 28017
    },
    {
      "epoch": 0.8173755761713052,
      "grad_norm": 0.8700338934573142,
      "learning_rate": 8.494393126439831e-07,
      "loss": 0.1199,
      "step": 28018
    },
    {
      "epoch": 0.8174047494019487,
      "grad_norm": 0.9352580233423357,
      "learning_rate": 8.491759035473152e-07,
      "loss": 0.1441,
      "step": 28019
    },
    {
      "epoch": 0.8174339226325923,
      "grad_norm": 0.9657119282357742,
      "learning_rate": 8.489125315082125e-07,
      "loss": 0.1272,
      "step": 28020
    },
    {
      "epoch": 0.8174630958632358,
      "grad_norm": 0.8445003314221814,
      "learning_rate": 8.486491965290294e-07,
      "loss": 0.1174,
      "step": 28021
    },
    {
      "epoch": 0.8174922690938795,
      "grad_norm": 0.9901304891181352,
      "learning_rate": 8.483858986121135e-07,
      "loss": 0.1183,
      "step": 28022
    },
    {
      "epoch": 0.8175214423245231,
      "grad_norm": 1.0211973522370925,
      "learning_rate": 8.48122637759819e-07,
      "loss": 0.1154,
      "step": 28023
    },
    {
      "epoch": 0.8175506155551666,
      "grad_norm": 1.0334704825490604,
      "learning_rate": 8.478594139744928e-07,
      "loss": 0.1232,
      "step": 28024
    },
    {
      "epoch": 0.8175797887858102,
      "grad_norm": 1.0091919402124505,
      "learning_rate": 8.475962272584881e-07,
      "loss": 0.1213,
      "step": 28025
    },
    {
      "epoch": 0.8176089620164537,
      "grad_norm": 1.010854824605726,
      "learning_rate": 8.47333077614152e-07,
      "loss": 0.1386,
      "step": 28026
    },
    {
      "epoch": 0.8176381352470973,
      "grad_norm": 0.8002253611829233,
      "learning_rate": 8.470699650438358e-07,
      "loss": 0.1223,
      "step": 28027
    },
    {
      "epoch": 0.8176673084777408,
      "grad_norm": 0.7758489461325259,
      "learning_rate": 8.468068895498859e-07,
      "loss": 0.1112,
      "step": 28028
    },
    {
      "epoch": 0.8176964817083844,
      "grad_norm": 1.0687019772086732,
      "learning_rate": 8.465438511346546e-07,
      "loss": 0.1709,
      "step": 28029
    },
    {
      "epoch": 0.8177256549390279,
      "grad_norm": 0.8801451995809791,
      "learning_rate": 8.462808498004882e-07,
      "loss": 0.1212,
      "step": 28030
    },
    {
      "epoch": 0.8177548281696715,
      "grad_norm": 0.8419010267623476,
      "learning_rate": 8.460178855497331e-07,
      "loss": 0.1316,
      "step": 28031
    },
    {
      "epoch": 0.817784001400315,
      "grad_norm": 0.8620228436395571,
      "learning_rate": 8.457549583847391e-07,
      "loss": 0.0872,
      "step": 28032
    },
    {
      "epoch": 0.8178131746309586,
      "grad_norm": 0.9043170259284783,
      "learning_rate": 8.454920683078544e-07,
      "loss": 0.1323,
      "step": 28033
    },
    {
      "epoch": 0.8178423478616021,
      "grad_norm": 0.7789213372353636,
      "learning_rate": 8.452292153214242e-07,
      "loss": 0.1269,
      "step": 28034
    },
    {
      "epoch": 0.8178715210922457,
      "grad_norm": 0.905291898223673,
      "learning_rate": 8.449663994277951e-07,
      "loss": 0.1293,
      "step": 28035
    },
    {
      "epoch": 0.8179006943228894,
      "grad_norm": 0.9083828888139835,
      "learning_rate": 8.447036206293152e-07,
      "loss": 0.125,
      "step": 28036
    },
    {
      "epoch": 0.8179298675535329,
      "grad_norm": 0.7955954016926723,
      "learning_rate": 8.444408789283292e-07,
      "loss": 0.1247,
      "step": 28037
    },
    {
      "epoch": 0.8179590407841765,
      "grad_norm": 0.9815952664476065,
      "learning_rate": 8.44178174327181e-07,
      "loss": 0.1119,
      "step": 28038
    },
    {
      "epoch": 0.81798821401482,
      "grad_norm": 0.8610262579032661,
      "learning_rate": 8.439155068282201e-07,
      "loss": 0.1226,
      "step": 28039
    },
    {
      "epoch": 0.8180173872454636,
      "grad_norm": 0.8876390810937166,
      "learning_rate": 8.436528764337892e-07,
      "loss": 0.1318,
      "step": 28040
    },
    {
      "epoch": 0.8180465604761071,
      "grad_norm": 1.015407307769133,
      "learning_rate": 8.43390283146232e-07,
      "loss": 0.1333,
      "step": 28041
    },
    {
      "epoch": 0.8180757337067507,
      "grad_norm": 0.9222701906240001,
      "learning_rate": 8.431277269678961e-07,
      "loss": 0.1041,
      "step": 28042
    },
    {
      "epoch": 0.8181049069373942,
      "grad_norm": 0.8136467820095734,
      "learning_rate": 8.428652079011229e-07,
      "loss": 0.1135,
      "step": 28043
    },
    {
      "epoch": 0.8181340801680378,
      "grad_norm": 0.8692521287609517,
      "learning_rate": 8.426027259482555e-07,
      "loss": 0.1144,
      "step": 28044
    },
    {
      "epoch": 0.8181632533986813,
      "grad_norm": 1.0202616954193195,
      "learning_rate": 8.423402811116388e-07,
      "loss": 0.1025,
      "step": 28045
    },
    {
      "epoch": 0.8181924266293249,
      "grad_norm": 0.7730376981748913,
      "learning_rate": 8.420778733936164e-07,
      "loss": 0.082,
      "step": 28046
    },
    {
      "epoch": 0.8182215998599685,
      "grad_norm": 0.9667693727371081,
      "learning_rate": 8.418155027965302e-07,
      "loss": 0.1,
      "step": 28047
    },
    {
      "epoch": 0.818250773090612,
      "grad_norm": 0.8015330055711267,
      "learning_rate": 8.41553169322722e-07,
      "loss": 0.1074,
      "step": 28048
    },
    {
      "epoch": 0.8182799463212557,
      "grad_norm": 0.8360952802038947,
      "learning_rate": 8.41290872974535e-07,
      "loss": 0.1423,
      "step": 28049
    },
    {
      "epoch": 0.8183091195518992,
      "grad_norm": 1.0510750425775541,
      "learning_rate": 8.410286137543089e-07,
      "loss": 0.1079,
      "step": 28050
    },
    {
      "epoch": 0.8183382927825428,
      "grad_norm": 0.7413942925453786,
      "learning_rate": 8.407663916643882e-07,
      "loss": 0.1062,
      "step": 28051
    },
    {
      "epoch": 0.8183674660131863,
      "grad_norm": 0.9235686785184701,
      "learning_rate": 8.405042067071112e-07,
      "loss": 0.1146,
      "step": 28052
    },
    {
      "epoch": 0.8183966392438299,
      "grad_norm": 0.8694952551010822,
      "learning_rate": 8.402420588848204e-07,
      "loss": 0.1028,
      "step": 28053
    },
    {
      "epoch": 0.8184258124744734,
      "grad_norm": 0.8309566220828405,
      "learning_rate": 8.399799481998555e-07,
      "loss": 0.0997,
      "step": 28054
    },
    {
      "epoch": 0.818454985705117,
      "grad_norm": 0.7166765632882739,
      "learning_rate": 8.397178746545558e-07,
      "loss": 0.1293,
      "step": 28055
    },
    {
      "epoch": 0.8184841589357605,
      "grad_norm": 0.7696646028108041,
      "learning_rate": 8.394558382512613e-07,
      "loss": 0.1124,
      "step": 28056
    },
    {
      "epoch": 0.8185133321664041,
      "grad_norm": 0.7930973318720292,
      "learning_rate": 8.391938389923132e-07,
      "loss": 0.1048,
      "step": 28057
    },
    {
      "epoch": 0.8185425053970476,
      "grad_norm": 0.7494214465106429,
      "learning_rate": 8.389318768800481e-07,
      "loss": 0.0871,
      "step": 28058
    },
    {
      "epoch": 0.8185716786276912,
      "grad_norm": 0.7757831673639287,
      "learning_rate": 8.386699519168074e-07,
      "loss": 0.1177,
      "step": 28059
    },
    {
      "epoch": 0.8186008518583348,
      "grad_norm": 0.8360261929111451,
      "learning_rate": 8.384080641049275e-07,
      "loss": 0.1219,
      "step": 28060
    },
    {
      "epoch": 0.8186300250889783,
      "grad_norm": 1.2544170598866653,
      "learning_rate": 8.38146213446746e-07,
      "loss": 0.118,
      "step": 28061
    },
    {
      "epoch": 0.8186591983196219,
      "grad_norm": 0.8124155581438096,
      "learning_rate": 8.378843999446018e-07,
      "loss": 0.1273,
      "step": 28062
    },
    {
      "epoch": 0.8186883715502655,
      "grad_norm": 0.893277986606559,
      "learning_rate": 8.376226236008328e-07,
      "loss": 0.1079,
      "step": 28063
    },
    {
      "epoch": 0.8187175447809091,
      "grad_norm": 0.7547091523477031,
      "learning_rate": 8.373608844177755e-07,
      "loss": 0.1154,
      "step": 28064
    },
    {
      "epoch": 0.8187467180115526,
      "grad_norm": 0.7725148549140893,
      "learning_rate": 8.370991823977653e-07,
      "loss": 0.1162,
      "step": 28065
    },
    {
      "epoch": 0.8187758912421962,
      "grad_norm": 0.7866570268503806,
      "learning_rate": 8.368375175431415e-07,
      "loss": 0.1197,
      "step": 28066
    },
    {
      "epoch": 0.8188050644728397,
      "grad_norm": 0.9351223328356347,
      "learning_rate": 8.365758898562371e-07,
      "loss": 0.1261,
      "step": 28067
    },
    {
      "epoch": 0.8188342377034833,
      "grad_norm": 0.9229732339443021,
      "learning_rate": 8.363142993393891e-07,
      "loss": 0.1184,
      "step": 28068
    },
    {
      "epoch": 0.8188634109341268,
      "grad_norm": 0.8975736632063561,
      "learning_rate": 8.360527459949341e-07,
      "loss": 0.1325,
      "step": 28069
    },
    {
      "epoch": 0.8188925841647704,
      "grad_norm": 0.7978540709436979,
      "learning_rate": 8.357912298252063e-07,
      "loss": 0.1086,
      "step": 28070
    },
    {
      "epoch": 0.818921757395414,
      "grad_norm": 0.8436162053129328,
      "learning_rate": 8.355297508325394e-07,
      "loss": 0.0938,
      "step": 28071
    },
    {
      "epoch": 0.8189509306260575,
      "grad_norm": 0.9512952476076122,
      "learning_rate": 8.352683090192698e-07,
      "loss": 0.159,
      "step": 28072
    },
    {
      "epoch": 0.8189801038567011,
      "grad_norm": 0.8949113859449455,
      "learning_rate": 8.35006904387729e-07,
      "loss": 0.1217,
      "step": 28073
    },
    {
      "epoch": 0.8190092770873446,
      "grad_norm": 0.9513471759480473,
      "learning_rate": 8.34745536940254e-07,
      "loss": 0.1079,
      "step": 28074
    },
    {
      "epoch": 0.8190384503179882,
      "grad_norm": 0.8236986247300286,
      "learning_rate": 8.344842066791753e-07,
      "loss": 0.1031,
      "step": 28075
    },
    {
      "epoch": 0.8190676235486318,
      "grad_norm": 0.8718510701854801,
      "learning_rate": 8.342229136068281e-07,
      "loss": 0.1315,
      "step": 28076
    },
    {
      "epoch": 0.8190967967792754,
      "grad_norm": 0.7291032550965775,
      "learning_rate": 8.339616577255444e-07,
      "loss": 0.0978,
      "step": 28077
    },
    {
      "epoch": 0.8191259700099189,
      "grad_norm": 0.8902461305151809,
      "learning_rate": 8.337004390376552e-07,
      "loss": 0.1079,
      "step": 28078
    },
    {
      "epoch": 0.8191551432405625,
      "grad_norm": 0.9204624887828335,
      "learning_rate": 8.334392575454941e-07,
      "loss": 0.106,
      "step": 28079
    },
    {
      "epoch": 0.819184316471206,
      "grad_norm": 1.0351279618199487,
      "learning_rate": 8.331781132513939e-07,
      "loss": 0.1142,
      "step": 28080
    },
    {
      "epoch": 0.8192134897018496,
      "grad_norm": 0.8509110671830511,
      "learning_rate": 8.329170061576847e-07,
      "loss": 0.1323,
      "step": 28081
    },
    {
      "epoch": 0.8192426629324931,
      "grad_norm": 0.7621287085683462,
      "learning_rate": 8.326559362666964e-07,
      "loss": 0.1025,
      "step": 28082
    },
    {
      "epoch": 0.8192718361631367,
      "grad_norm": 0.7648280607430413,
      "learning_rate": 8.323949035807621e-07,
      "loss": 0.117,
      "step": 28083
    },
    {
      "epoch": 0.8193010093937803,
      "grad_norm": 0.7945796102385012,
      "learning_rate": 8.321339081022117e-07,
      "loss": 0.1058,
      "step": 28084
    },
    {
      "epoch": 0.8193301826244238,
      "grad_norm": 0.8999082911118238,
      "learning_rate": 8.318729498333722e-07,
      "loss": 0.0838,
      "step": 28085
    },
    {
      "epoch": 0.8193593558550674,
      "grad_norm": 0.950698878684458,
      "learning_rate": 8.316120287765784e-07,
      "loss": 0.1321,
      "step": 28086
    },
    {
      "epoch": 0.8193885290857109,
      "grad_norm": 0.8058811638166943,
      "learning_rate": 8.313511449341572e-07,
      "loss": 0.1081,
      "step": 28087
    },
    {
      "epoch": 0.8194177023163545,
      "grad_norm": 0.8291288706638122,
      "learning_rate": 8.310902983084368e-07,
      "loss": 0.1225,
      "step": 28088
    },
    {
      "epoch": 0.819446875546998,
      "grad_norm": 0.718478792515388,
      "learning_rate": 8.308294889017482e-07,
      "loss": 0.0867,
      "step": 28089
    },
    {
      "epoch": 0.8194760487776417,
      "grad_norm": 0.9614708259651105,
      "learning_rate": 8.305687167164189e-07,
      "loss": 0.1117,
      "step": 28090
    },
    {
      "epoch": 0.8195052220082852,
      "grad_norm": 0.7657402132189794,
      "learning_rate": 8.303079817547749e-07,
      "loss": 0.1058,
      "step": 28091
    },
    {
      "epoch": 0.8195343952389288,
      "grad_norm": 0.9436221345772089,
      "learning_rate": 8.300472840191464e-07,
      "loss": 0.1092,
      "step": 28092
    },
    {
      "epoch": 0.8195635684695723,
      "grad_norm": 0.9302532658571502,
      "learning_rate": 8.297866235118612e-07,
      "loss": 0.1071,
      "step": 28093
    },
    {
      "epoch": 0.8195927417002159,
      "grad_norm": 0.744906274912989,
      "learning_rate": 8.295260002352462e-07,
      "loss": 0.1177,
      "step": 28094
    },
    {
      "epoch": 0.8196219149308595,
      "grad_norm": 0.7902433385563541,
      "learning_rate": 8.292654141916257e-07,
      "loss": 0.111,
      "step": 28095
    },
    {
      "epoch": 0.819651088161503,
      "grad_norm": 0.8382154140304817,
      "learning_rate": 8.290048653833288e-07,
      "loss": 0.1032,
      "step": 28096
    },
    {
      "epoch": 0.8196802613921466,
      "grad_norm": 1.0535307215415357,
      "learning_rate": 8.287443538126805e-07,
      "loss": 0.1179,
      "step": 28097
    },
    {
      "epoch": 0.8197094346227901,
      "grad_norm": 0.9495198768864338,
      "learning_rate": 8.284838794820061e-07,
      "loss": 0.13,
      "step": 28098
    },
    {
      "epoch": 0.8197386078534337,
      "grad_norm": 0.7835200583928362,
      "learning_rate": 8.28223442393633e-07,
      "loss": 0.1196,
      "step": 28099
    },
    {
      "epoch": 0.8197677810840772,
      "grad_norm": 1.00873338218089,
      "learning_rate": 8.279630425498858e-07,
      "loss": 0.1248,
      "step": 28100
    },
    {
      "epoch": 0.8197969543147208,
      "grad_norm": 1.0290157010544827,
      "learning_rate": 8.277026799530869e-07,
      "loss": 0.1043,
      "step": 28101
    },
    {
      "epoch": 0.8198261275453643,
      "grad_norm": 0.8396608742587166,
      "learning_rate": 8.274423546055638e-07,
      "loss": 0.0975,
      "step": 28102
    },
    {
      "epoch": 0.819855300776008,
      "grad_norm": 1.0409349443091456,
      "learning_rate": 8.271820665096381e-07,
      "loss": 0.1415,
      "step": 28103
    },
    {
      "epoch": 0.8198844740066515,
      "grad_norm": 0.7578961483672078,
      "learning_rate": 8.269218156676356e-07,
      "loss": 0.1113,
      "step": 28104
    },
    {
      "epoch": 0.8199136472372951,
      "grad_norm": 0.8745063625424268,
      "learning_rate": 8.266616020818779e-07,
      "loss": 0.1206,
      "step": 28105
    },
    {
      "epoch": 0.8199428204679386,
      "grad_norm": 1.0504859406376101,
      "learning_rate": 8.264014257546909e-07,
      "loss": 0.1223,
      "step": 28106
    },
    {
      "epoch": 0.8199719936985822,
      "grad_norm": 0.7646400680302314,
      "learning_rate": 8.26141286688395e-07,
      "loss": 0.1032,
      "step": 28107
    },
    {
      "epoch": 0.8200011669292258,
      "grad_norm": 0.7847866097437022,
      "learning_rate": 8.258811848853126e-07,
      "loss": 0.1245,
      "step": 28108
    },
    {
      "epoch": 0.8200303401598693,
      "grad_norm": 0.8507550174118889,
      "learning_rate": 8.256211203477659e-07,
      "loss": 0.1199,
      "step": 28109
    },
    {
      "epoch": 0.8200595133905129,
      "grad_norm": 1.0639099007405886,
      "learning_rate": 8.253610930780793e-07,
      "loss": 0.1216,
      "step": 28110
    },
    {
      "epoch": 0.8200886866211564,
      "grad_norm": 0.7559782093999984,
      "learning_rate": 8.251011030785722e-07,
      "loss": 0.1073,
      "step": 28111
    },
    {
      "epoch": 0.8201178598518,
      "grad_norm": 0.6794944671157042,
      "learning_rate": 8.248411503515641e-07,
      "loss": 0.1069,
      "step": 28112
    },
    {
      "epoch": 0.8201470330824435,
      "grad_norm": 1.510744603051289,
      "learning_rate": 8.245812348993793e-07,
      "loss": 0.1256,
      "step": 28113
    },
    {
      "epoch": 0.8201762063130871,
      "grad_norm": 0.9243085518108931,
      "learning_rate": 8.243213567243357e-07,
      "loss": 0.0988,
      "step": 28114
    },
    {
      "epoch": 0.8202053795437306,
      "grad_norm": 0.7633260998978164,
      "learning_rate": 8.240615158287524e-07,
      "loss": 0.1006,
      "step": 28115
    },
    {
      "epoch": 0.8202345527743742,
      "grad_norm": 0.9381278677959661,
      "learning_rate": 8.238017122149533e-07,
      "loss": 0.1101,
      "step": 28116
    },
    {
      "epoch": 0.8202637260050178,
      "grad_norm": 0.9691918239872492,
      "learning_rate": 8.235419458852556e-07,
      "loss": 0.0933,
      "step": 28117
    },
    {
      "epoch": 0.8202928992356614,
      "grad_norm": 0.9012611915988304,
      "learning_rate": 8.232822168419774e-07,
      "loss": 0.1247,
      "step": 28118
    },
    {
      "epoch": 0.820322072466305,
      "grad_norm": 0.7544283074306304,
      "learning_rate": 8.230225250874391e-07,
      "loss": 0.1091,
      "step": 28119
    },
    {
      "epoch": 0.8203512456969485,
      "grad_norm": 0.847683240702552,
      "learning_rate": 8.227628706239593e-07,
      "loss": 0.111,
      "step": 28120
    },
    {
      "epoch": 0.8203804189275921,
      "grad_norm": 1.009555390596786,
      "learning_rate": 8.225032534538535e-07,
      "loss": 0.107,
      "step": 28121
    },
    {
      "epoch": 0.8204095921582356,
      "grad_norm": 0.9268798893189278,
      "learning_rate": 8.22243673579442e-07,
      "loss": 0.1019,
      "step": 28122
    },
    {
      "epoch": 0.8204387653888792,
      "grad_norm": 0.8621761592208615,
      "learning_rate": 8.219841310030424e-07,
      "loss": 0.1083,
      "step": 28123
    },
    {
      "epoch": 0.8204679386195227,
      "grad_norm": 1.209421675021601,
      "learning_rate": 8.217246257269712e-07,
      "loss": 0.1147,
      "step": 28124
    },
    {
      "epoch": 0.8204971118501663,
      "grad_norm": 0.8386429567325634,
      "learning_rate": 8.214651577535442e-07,
      "loss": 0.0829,
      "step": 28125
    },
    {
      "epoch": 0.8205262850808098,
      "grad_norm": 0.915009359660299,
      "learning_rate": 8.212057270850798e-07,
      "loss": 0.0961,
      "step": 28126
    },
    {
      "epoch": 0.8205554583114534,
      "grad_norm": 0.8658750313762614,
      "learning_rate": 8.209463337238921e-07,
      "loss": 0.1111,
      "step": 28127
    },
    {
      "epoch": 0.8205846315420969,
      "grad_norm": 1.661013780257039,
      "learning_rate": 8.206869776722976e-07,
      "loss": 0.1069,
      "step": 28128
    },
    {
      "epoch": 0.8206138047727405,
      "grad_norm": 0.9154463517099505,
      "learning_rate": 8.204276589326132e-07,
      "loss": 0.104,
      "step": 28129
    },
    {
      "epoch": 0.8206429780033841,
      "grad_norm": 0.805309686255245,
      "learning_rate": 8.201683775071534e-07,
      "loss": 0.116,
      "step": 28130
    },
    {
      "epoch": 0.8206721512340277,
      "grad_norm": 0.9345352578915128,
      "learning_rate": 8.199091333982312e-07,
      "loss": 0.1046,
      "step": 28131
    },
    {
      "epoch": 0.8207013244646713,
      "grad_norm": 0.8183592785988367,
      "learning_rate": 8.196499266081631e-07,
      "loss": 0.089,
      "step": 28132
    },
    {
      "epoch": 0.8207304976953148,
      "grad_norm": 0.7325455817803519,
      "learning_rate": 8.193907571392617e-07,
      "loss": 0.0994,
      "step": 28133
    },
    {
      "epoch": 0.8207596709259584,
      "grad_norm": 0.8562035668368565,
      "learning_rate": 8.19131624993843e-07,
      "loss": 0.1379,
      "step": 28134
    },
    {
      "epoch": 0.8207888441566019,
      "grad_norm": 1.034877966516031,
      "learning_rate": 8.188725301742178e-07,
      "loss": 0.1013,
      "step": 28135
    },
    {
      "epoch": 0.8208180173872455,
      "grad_norm": 0.7534523804468917,
      "learning_rate": 8.186134726827016e-07,
      "loss": 0.1121,
      "step": 28136
    },
    {
      "epoch": 0.820847190617889,
      "grad_norm": 0.976663392916067,
      "learning_rate": 8.183544525216059e-07,
      "loss": 0.1151,
      "step": 28137
    },
    {
      "epoch": 0.8208763638485326,
      "grad_norm": 0.8498206421645776,
      "learning_rate": 8.180954696932425e-07,
      "loss": 0.1111,
      "step": 28138
    },
    {
      "epoch": 0.8209055370791761,
      "grad_norm": 0.8489290048667434,
      "learning_rate": 8.178365241999247e-07,
      "loss": 0.0992,
      "step": 28139
    },
    {
      "epoch": 0.8209347103098197,
      "grad_norm": 0.7877822691146962,
      "learning_rate": 8.175776160439646e-07,
      "loss": 0.1064,
      "step": 28140
    },
    {
      "epoch": 0.8209638835404632,
      "grad_norm": 0.8335305962650439,
      "learning_rate": 8.173187452276738e-07,
      "loss": 0.1234,
      "step": 28141
    },
    {
      "epoch": 0.8209930567711068,
      "grad_norm": 1.4482998243119232,
      "learning_rate": 8.170599117533612e-07,
      "loss": 0.1236,
      "step": 28142
    },
    {
      "epoch": 0.8210222300017503,
      "grad_norm": 0.8594430263994742,
      "learning_rate": 8.168011156233402e-07,
      "loss": 0.0965,
      "step": 28143
    },
    {
      "epoch": 0.821051403232394,
      "grad_norm": 0.8527821907224292,
      "learning_rate": 8.165423568399206e-07,
      "loss": 0.1167,
      "step": 28144
    },
    {
      "epoch": 0.8210805764630376,
      "grad_norm": 0.9662625297186198,
      "learning_rate": 8.162836354054093e-07,
      "loss": 0.0954,
      "step": 28145
    },
    {
      "epoch": 0.8211097496936811,
      "grad_norm": 0.870731752773125,
      "learning_rate": 8.160249513221218e-07,
      "loss": 0.1277,
      "step": 28146
    },
    {
      "epoch": 0.8211389229243247,
      "grad_norm": 0.8296927225677133,
      "learning_rate": 8.157663045923647e-07,
      "loss": 0.1056,
      "step": 28147
    },
    {
      "epoch": 0.8211680961549682,
      "grad_norm": 1.1020732115297716,
      "learning_rate": 8.15507695218446e-07,
      "loss": 0.1275,
      "step": 28148
    },
    {
      "epoch": 0.8211972693856118,
      "grad_norm": 0.9708489056716605,
      "learning_rate": 8.152491232026766e-07,
      "loss": 0.1423,
      "step": 28149
    },
    {
      "epoch": 0.8212264426162553,
      "grad_norm": 0.8520652340420806,
      "learning_rate": 8.149905885473641e-07,
      "loss": 0.1178,
      "step": 28150
    },
    {
      "epoch": 0.8212556158468989,
      "grad_norm": 0.9514485146187803,
      "learning_rate": 8.147320912548156e-07,
      "loss": 0.1154,
      "step": 28151
    },
    {
      "epoch": 0.8212847890775424,
      "grad_norm": 0.9543060882194034,
      "learning_rate": 8.1447363132734e-07,
      "loss": 0.0939,
      "step": 28152
    },
    {
      "epoch": 0.821313962308186,
      "grad_norm": 0.7780015591522186,
      "learning_rate": 8.142152087672456e-07,
      "loss": 0.1069,
      "step": 28153
    },
    {
      "epoch": 0.8213431355388295,
      "grad_norm": 0.8282556396543697,
      "learning_rate": 8.139568235768386e-07,
      "loss": 0.1097,
      "step": 28154
    },
    {
      "epoch": 0.8213723087694731,
      "grad_norm": 0.9320926407715043,
      "learning_rate": 8.136984757584243e-07,
      "loss": 0.1249,
      "step": 28155
    },
    {
      "epoch": 0.8214014820001166,
      "grad_norm": 0.8407397356659568,
      "learning_rate": 8.134401653143126e-07,
      "loss": 0.1363,
      "step": 28156
    },
    {
      "epoch": 0.8214306552307603,
      "grad_norm": 0.9340377620301075,
      "learning_rate": 8.13181892246806e-07,
      "loss": 0.0888,
      "step": 28157
    },
    {
      "epoch": 0.8214598284614039,
      "grad_norm": 0.688415117109616,
      "learning_rate": 8.129236565582121e-07,
      "loss": 0.1216,
      "step": 28158
    },
    {
      "epoch": 0.8214890016920474,
      "grad_norm": 1.2667062439300032,
      "learning_rate": 8.12665458250837e-07,
      "loss": 0.1067,
      "step": 28159
    },
    {
      "epoch": 0.821518174922691,
      "grad_norm": 0.9749497627623536,
      "learning_rate": 8.124072973269859e-07,
      "loss": 0.1164,
      "step": 28160
    },
    {
      "epoch": 0.8215473481533345,
      "grad_norm": 0.893241137790369,
      "learning_rate": 8.121491737889609e-07,
      "loss": 0.1099,
      "step": 28161
    },
    {
      "epoch": 0.8215765213839781,
      "grad_norm": 0.7536120269677897,
      "learning_rate": 8.118910876390701e-07,
      "loss": 0.1173,
      "step": 28162
    },
    {
      "epoch": 0.8216056946146216,
      "grad_norm": 0.7341619083776675,
      "learning_rate": 8.116330388796146e-07,
      "loss": 0.1106,
      "step": 28163
    },
    {
      "epoch": 0.8216348678452652,
      "grad_norm": 0.7074439101407712,
      "learning_rate": 8.113750275129001e-07,
      "loss": 0.1123,
      "step": 28164
    },
    {
      "epoch": 0.8216640410759087,
      "grad_norm": 0.7957780752170743,
      "learning_rate": 8.111170535412288e-07,
      "loss": 0.1048,
      "step": 28165
    },
    {
      "epoch": 0.8216932143065523,
      "grad_norm": 1.0663470356321327,
      "learning_rate": 8.108591169669055e-07,
      "loss": 0.1067,
      "step": 28166
    },
    {
      "epoch": 0.8217223875371958,
      "grad_norm": 0.8461527373718306,
      "learning_rate": 8.106012177922323e-07,
      "loss": 0.1075,
      "step": 28167
    },
    {
      "epoch": 0.8217515607678394,
      "grad_norm": 0.7391384934802716,
      "learning_rate": 8.103433560195095e-07,
      "loss": 0.0978,
      "step": 28168
    },
    {
      "epoch": 0.821780733998483,
      "grad_norm": 0.9557676295109226,
      "learning_rate": 8.100855316510414e-07,
      "loss": 0.1519,
      "step": 28169
    },
    {
      "epoch": 0.8218099072291265,
      "grad_norm": 0.8545413239568856,
      "learning_rate": 8.098277446891306e-07,
      "loss": 0.1335,
      "step": 28170
    },
    {
      "epoch": 0.8218390804597702,
      "grad_norm": 0.8413890070645333,
      "learning_rate": 8.095699951360775e-07,
      "loss": 0.1153,
      "step": 28171
    },
    {
      "epoch": 0.8218682536904137,
      "grad_norm": 0.8415007361696992,
      "learning_rate": 8.093122829941824e-07,
      "loss": 0.1111,
      "step": 28172
    },
    {
      "epoch": 0.8218974269210573,
      "grad_norm": 0.8162786862022892,
      "learning_rate": 8.090546082657475e-07,
      "loss": 0.1084,
      "step": 28173
    },
    {
      "epoch": 0.8219266001517008,
      "grad_norm": 0.8794433969944587,
      "learning_rate": 8.087969709530724e-07,
      "loss": 0.0986,
      "step": 28174
    },
    {
      "epoch": 0.8219557733823444,
      "grad_norm": 0.6406696687558314,
      "learning_rate": 8.085393710584555e-07,
      "loss": 0.1084,
      "step": 28175
    },
    {
      "epoch": 0.8219849466129879,
      "grad_norm": 0.8138758942418084,
      "learning_rate": 8.082818085842009e-07,
      "loss": 0.1007,
      "step": 28176
    },
    {
      "epoch": 0.8220141198436315,
      "grad_norm": 0.7493458640073105,
      "learning_rate": 8.080242835326052e-07,
      "loss": 0.1064,
      "step": 28177
    },
    {
      "epoch": 0.822043293074275,
      "grad_norm": 0.8126863787995176,
      "learning_rate": 8.077667959059671e-07,
      "loss": 0.1132,
      "step": 28178
    },
    {
      "epoch": 0.8220724663049186,
      "grad_norm": 1.1214392039574879,
      "learning_rate": 8.075093457065875e-07,
      "loss": 0.0934,
      "step": 28179
    },
    {
      "epoch": 0.8221016395355621,
      "grad_norm": 0.7769434290556544,
      "learning_rate": 8.072519329367634e-07,
      "loss": 0.1237,
      "step": 28180
    },
    {
      "epoch": 0.8221308127662057,
      "grad_norm": 1.1882184696955005,
      "learning_rate": 8.069945575987925e-07,
      "loss": 0.1196,
      "step": 28181
    },
    {
      "epoch": 0.8221599859968493,
      "grad_norm": 0.8913308246801798,
      "learning_rate": 8.06737219694973e-07,
      "loss": 0.0993,
      "step": 28182
    },
    {
      "epoch": 0.8221891592274928,
      "grad_norm": 0.7085190988101954,
      "learning_rate": 8.064799192276035e-07,
      "loss": 0.1173,
      "step": 28183
    },
    {
      "epoch": 0.8222183324581365,
      "grad_norm": 0.7946797222797932,
      "learning_rate": 8.062226561989806e-07,
      "loss": 0.1224,
      "step": 28184
    },
    {
      "epoch": 0.82224750568878,
      "grad_norm": 0.9038636203393499,
      "learning_rate": 8.05965430611399e-07,
      "loss": 0.0989,
      "step": 28185
    },
    {
      "epoch": 0.8222766789194236,
      "grad_norm": 0.8029605503880207,
      "learning_rate": 8.057082424671586e-07,
      "loss": 0.1353,
      "step": 28186
    },
    {
      "epoch": 0.8223058521500671,
      "grad_norm": 1.155784660196701,
      "learning_rate": 8.05451091768552e-07,
      "loss": 0.1096,
      "step": 28187
    },
    {
      "epoch": 0.8223350253807107,
      "grad_norm": 0.9439379323722944,
      "learning_rate": 8.051939785178769e-07,
      "loss": 0.121,
      "step": 28188
    },
    {
      "epoch": 0.8223641986113542,
      "grad_norm": 0.8646942937179766,
      "learning_rate": 8.049369027174303e-07,
      "loss": 0.1036,
      "step": 28189
    },
    {
      "epoch": 0.8223933718419978,
      "grad_norm": 0.8208873320004823,
      "learning_rate": 8.046798643695047e-07,
      "loss": 0.1146,
      "step": 28190
    },
    {
      "epoch": 0.8224225450726413,
      "grad_norm": 0.8275727422519764,
      "learning_rate": 8.044228634763951e-07,
      "loss": 0.1333,
      "step": 28191
    },
    {
      "epoch": 0.8224517183032849,
      "grad_norm": 0.9578165442255402,
      "learning_rate": 8.041659000403979e-07,
      "loss": 0.0911,
      "step": 28192
    },
    {
      "epoch": 0.8224808915339284,
      "grad_norm": 0.9774819388114641,
      "learning_rate": 8.039089740638045e-07,
      "loss": 0.1077,
      "step": 28193
    },
    {
      "epoch": 0.822510064764572,
      "grad_norm": 1.1650693290225729,
      "learning_rate": 8.036520855489116e-07,
      "loss": 0.1145,
      "step": 28194
    },
    {
      "epoch": 0.8225392379952156,
      "grad_norm": 0.7307145376854071,
      "learning_rate": 8.033952344980095e-07,
      "loss": 0.1182,
      "step": 28195
    },
    {
      "epoch": 0.8225684112258591,
      "grad_norm": 1.01455580386506,
      "learning_rate": 8.031384209133941e-07,
      "loss": 0.1035,
      "step": 28196
    },
    {
      "epoch": 0.8225975844565027,
      "grad_norm": 0.975275404336792,
      "learning_rate": 8.028816447973575e-07,
      "loss": 0.1207,
      "step": 28197
    },
    {
      "epoch": 0.8226267576871463,
      "grad_norm": 0.8215151553917331,
      "learning_rate": 8.026249061521901e-07,
      "loss": 0.1081,
      "step": 28198
    },
    {
      "epoch": 0.8226559309177899,
      "grad_norm": 0.9120500380292472,
      "learning_rate": 8.023682049801857e-07,
      "loss": 0.14,
      "step": 28199
    },
    {
      "epoch": 0.8226851041484334,
      "grad_norm": 0.932260325372986,
      "learning_rate": 8.021115412836372e-07,
      "loss": 0.0803,
      "step": 28200
    },
    {
      "epoch": 0.822714277379077,
      "grad_norm": 0.7206661322297229,
      "learning_rate": 8.018549150648342e-07,
      "loss": 0.1321,
      "step": 28201
    },
    {
      "epoch": 0.8227434506097205,
      "grad_norm": 0.9936279788538864,
      "learning_rate": 8.015983263260679e-07,
      "loss": 0.1029,
      "step": 28202
    },
    {
      "epoch": 0.8227726238403641,
      "grad_norm": 1.2841456857653297,
      "learning_rate": 8.013417750696301e-07,
      "loss": 0.1055,
      "step": 28203
    },
    {
      "epoch": 0.8228017970710076,
      "grad_norm": 0.9865292061306443,
      "learning_rate": 8.010852612978109e-07,
      "loss": 0.1093,
      "step": 28204
    },
    {
      "epoch": 0.8228309703016512,
      "grad_norm": 0.8830803277538544,
      "learning_rate": 8.008287850128976e-07,
      "loss": 0.1041,
      "step": 28205
    },
    {
      "epoch": 0.8228601435322948,
      "grad_norm": 0.8307317317359434,
      "learning_rate": 8.005723462171849e-07,
      "loss": 0.1225,
      "step": 28206
    },
    {
      "epoch": 0.8228893167629383,
      "grad_norm": 0.8542356041045629,
      "learning_rate": 8.0031594491296e-07,
      "loss": 0.0983,
      "step": 28207
    },
    {
      "epoch": 0.8229184899935819,
      "grad_norm": 1.0179093888808328,
      "learning_rate": 8.000595811025103e-07,
      "loss": 0.1079,
      "step": 28208
    },
    {
      "epoch": 0.8229476632242254,
      "grad_norm": 0.8032717625037453,
      "learning_rate": 7.998032547881274e-07,
      "loss": 0.1096,
      "step": 28209
    },
    {
      "epoch": 0.822976836454869,
      "grad_norm": 1.074792740723042,
      "learning_rate": 7.995469659720984e-07,
      "loss": 0.1033,
      "step": 28210
    },
    {
      "epoch": 0.8230060096855125,
      "grad_norm": 0.8809499433955115,
      "learning_rate": 7.992907146567103e-07,
      "loss": 0.1106,
      "step": 28211
    },
    {
      "epoch": 0.8230351829161562,
      "grad_norm": 1.1853681386942079,
      "learning_rate": 7.990345008442518e-07,
      "loss": 0.1166,
      "step": 28212
    },
    {
      "epoch": 0.8230643561467997,
      "grad_norm": 1.0374941834845035,
      "learning_rate": 7.987783245370118e-07,
      "loss": 0.1157,
      "step": 28213
    },
    {
      "epoch": 0.8230935293774433,
      "grad_norm": 1.0796943034117419,
      "learning_rate": 7.985221857372754e-07,
      "loss": 0.12,
      "step": 28214
    },
    {
      "epoch": 0.8231227026080868,
      "grad_norm": 0.7717978272424924,
      "learning_rate": 7.982660844473295e-07,
      "loss": 0.0969,
      "step": 28215
    },
    {
      "epoch": 0.8231518758387304,
      "grad_norm": 0.8402358338303498,
      "learning_rate": 7.980100206694613e-07,
      "loss": 0.1374,
      "step": 28216
    },
    {
      "epoch": 0.823181049069374,
      "grad_norm": 0.8793454759852466,
      "learning_rate": 7.977539944059559e-07,
      "loss": 0.0878,
      "step": 28217
    },
    {
      "epoch": 0.8232102223000175,
      "grad_norm": 0.9858176212897587,
      "learning_rate": 7.974980056590997e-07,
      "loss": 0.1203,
      "step": 28218
    },
    {
      "epoch": 0.8232393955306611,
      "grad_norm": 0.9134492697793195,
      "learning_rate": 7.972420544311793e-07,
      "loss": 0.1054,
      "step": 28219
    },
    {
      "epoch": 0.8232685687613046,
      "grad_norm": 0.9446679430183189,
      "learning_rate": 7.969861407244784e-07,
      "loss": 0.12,
      "step": 28220
    },
    {
      "epoch": 0.8232977419919482,
      "grad_norm": 0.9735403902127442,
      "learning_rate": 7.967302645412811e-07,
      "loss": 0.1044,
      "step": 28221
    },
    {
      "epoch": 0.8233269152225917,
      "grad_norm": 0.8884458166026682,
      "learning_rate": 7.96474425883873e-07,
      "loss": 0.0979,
      "step": 28222
    },
    {
      "epoch": 0.8233560884532353,
      "grad_norm": 0.7660160862184069,
      "learning_rate": 7.962186247545373e-07,
      "loss": 0.117,
      "step": 28223
    },
    {
      "epoch": 0.8233852616838788,
      "grad_norm": 0.9650815747394327,
      "learning_rate": 7.959628611555592e-07,
      "loss": 0.1147,
      "step": 28224
    },
    {
      "epoch": 0.8234144349145225,
      "grad_norm": 1.0124418689911145,
      "learning_rate": 7.957071350892198e-07,
      "loss": 0.1039,
      "step": 28225
    },
    {
      "epoch": 0.823443608145166,
      "grad_norm": 0.8996514207594016,
      "learning_rate": 7.954514465578044e-07,
      "loss": 0.1114,
      "step": 28226
    },
    {
      "epoch": 0.8234727813758096,
      "grad_norm": 0.8903856104697206,
      "learning_rate": 7.951957955635953e-07,
      "loss": 0.1135,
      "step": 28227
    },
    {
      "epoch": 0.8235019546064531,
      "grad_norm": 0.8601415069949574,
      "learning_rate": 7.949401821088726e-07,
      "loss": 0.1242,
      "step": 28228
    },
    {
      "epoch": 0.8235311278370967,
      "grad_norm": 0.9129479336715849,
      "learning_rate": 7.946846061959207e-07,
      "loss": 0.1315,
      "step": 28229
    },
    {
      "epoch": 0.8235603010677403,
      "grad_norm": 1.0827694069197806,
      "learning_rate": 7.944290678270216e-07,
      "loss": 0.1142,
      "step": 28230
    },
    {
      "epoch": 0.8235894742983838,
      "grad_norm": 0.8177956924191697,
      "learning_rate": 7.941735670044559e-07,
      "loss": 0.1044,
      "step": 28231
    },
    {
      "epoch": 0.8236186475290274,
      "grad_norm": 0.9078116440610156,
      "learning_rate": 7.939181037305033e-07,
      "loss": 0.1291,
      "step": 28232
    },
    {
      "epoch": 0.8236478207596709,
      "grad_norm": 0.7443518641912515,
      "learning_rate": 7.936626780074475e-07,
      "loss": 0.0973,
      "step": 28233
    },
    {
      "epoch": 0.8236769939903145,
      "grad_norm": 0.8551287825765357,
      "learning_rate": 7.934072898375667e-07,
      "loss": 0.1121,
      "step": 28234
    },
    {
      "epoch": 0.823706167220958,
      "grad_norm": 0.8493242321268851,
      "learning_rate": 7.93151939223139e-07,
      "loss": 0.1203,
      "step": 28235
    },
    {
      "epoch": 0.8237353404516016,
      "grad_norm": 0.6591109536062264,
      "learning_rate": 7.92896626166449e-07,
      "loss": 0.1245,
      "step": 28236
    },
    {
      "epoch": 0.8237645136822451,
      "grad_norm": 0.7665058536724771,
      "learning_rate": 7.926413506697733e-07,
      "loss": 0.1158,
      "step": 28237
    },
    {
      "epoch": 0.8237936869128887,
      "grad_norm": 0.8320924718203171,
      "learning_rate": 7.923861127353905e-07,
      "loss": 0.0953,
      "step": 28238
    },
    {
      "epoch": 0.8238228601435323,
      "grad_norm": 1.051981188820652,
      "learning_rate": 7.921309123655812e-07,
      "loss": 0.1072,
      "step": 28239
    },
    {
      "epoch": 0.8238520333741759,
      "grad_norm": 0.7169976623296156,
      "learning_rate": 7.918757495626228e-07,
      "loss": 0.1117,
      "step": 28240
    },
    {
      "epoch": 0.8238812066048194,
      "grad_norm": 0.8116336118984168,
      "learning_rate": 7.916206243287916e-07,
      "loss": 0.1129,
      "step": 28241
    },
    {
      "epoch": 0.823910379835463,
      "grad_norm": 0.9123025125674905,
      "learning_rate": 7.913655366663669e-07,
      "loss": 0.0958,
      "step": 28242
    },
    {
      "epoch": 0.8239395530661066,
      "grad_norm": 0.8207178203459888,
      "learning_rate": 7.91110486577627e-07,
      "loss": 0.1152,
      "step": 28243
    },
    {
      "epoch": 0.8239687262967501,
      "grad_norm": 0.7823137649557989,
      "learning_rate": 7.908554740648483e-07,
      "loss": 0.0939,
      "step": 28244
    },
    {
      "epoch": 0.8239978995273937,
      "grad_norm": 0.9079250673663318,
      "learning_rate": 7.906004991303057e-07,
      "loss": 0.1139,
      "step": 28245
    },
    {
      "epoch": 0.8240270727580372,
      "grad_norm": 0.8535080065364986,
      "learning_rate": 7.903455617762785e-07,
      "loss": 0.1037,
      "step": 28246
    },
    {
      "epoch": 0.8240562459886808,
      "grad_norm": 0.7694194856557461,
      "learning_rate": 7.900906620050397e-07,
      "loss": 0.126,
      "step": 28247
    },
    {
      "epoch": 0.8240854192193243,
      "grad_norm": 1.024056981269128,
      "learning_rate": 7.898357998188666e-07,
      "loss": 0.1345,
      "step": 28248
    },
    {
      "epoch": 0.8241145924499679,
      "grad_norm": 1.157752385255544,
      "learning_rate": 7.895809752200356e-07,
      "loss": 0.1222,
      "step": 28249
    },
    {
      "epoch": 0.8241437656806114,
      "grad_norm": 1.0403615699776128,
      "learning_rate": 7.893261882108205e-07,
      "loss": 0.0903,
      "step": 28250
    },
    {
      "epoch": 0.824172938911255,
      "grad_norm": 0.8346576357266661,
      "learning_rate": 7.89071438793495e-07,
      "loss": 0.1283,
      "step": 28251
    },
    {
      "epoch": 0.8242021121418986,
      "grad_norm": 1.0467219620973778,
      "learning_rate": 7.888167269703339e-07,
      "loss": 0.1292,
      "step": 28252
    },
    {
      "epoch": 0.8242312853725422,
      "grad_norm": 0.877842756180668,
      "learning_rate": 7.885620527436133e-07,
      "loss": 0.0997,
      "step": 28253
    },
    {
      "epoch": 0.8242604586031858,
      "grad_norm": 0.9062617894548268,
      "learning_rate": 7.883074161156056e-07,
      "loss": 0.0874,
      "step": 28254
    },
    {
      "epoch": 0.8242896318338293,
      "grad_norm": 0.9725653219617851,
      "learning_rate": 7.880528170885826e-07,
      "loss": 0.095,
      "step": 28255
    },
    {
      "epoch": 0.8243188050644729,
      "grad_norm": 1.1414445368593333,
      "learning_rate": 7.877982556648195e-07,
      "loss": 0.1184,
      "step": 28256
    },
    {
      "epoch": 0.8243479782951164,
      "grad_norm": 1.1247366079705248,
      "learning_rate": 7.875437318465884e-07,
      "loss": 0.105,
      "step": 28257
    },
    {
      "epoch": 0.82437715152576,
      "grad_norm": 0.7926035231731909,
      "learning_rate": 7.872892456361597e-07,
      "loss": 0.111,
      "step": 28258
    },
    {
      "epoch": 0.8244063247564035,
      "grad_norm": 0.8265197543925429,
      "learning_rate": 7.870347970358072e-07,
      "loss": 0.1112,
      "step": 28259
    },
    {
      "epoch": 0.8244354979870471,
      "grad_norm": 0.977496687454048,
      "learning_rate": 7.867803860478035e-07,
      "loss": 0.1006,
      "step": 28260
    },
    {
      "epoch": 0.8244646712176906,
      "grad_norm": 0.8298812136312068,
      "learning_rate": 7.86526012674419e-07,
      "loss": 0.0981,
      "step": 28261
    },
    {
      "epoch": 0.8244938444483342,
      "grad_norm": 0.923303082482779,
      "learning_rate": 7.86271676917923e-07,
      "loss": 0.1067,
      "step": 28262
    },
    {
      "epoch": 0.8245230176789777,
      "grad_norm": 0.8738865679941992,
      "learning_rate": 7.860173787805886e-07,
      "loss": 0.1028,
      "step": 28263
    },
    {
      "epoch": 0.8245521909096213,
      "grad_norm": 1.0582842785978028,
      "learning_rate": 7.857631182646835e-07,
      "loss": 0.1153,
      "step": 28264
    },
    {
      "epoch": 0.8245813641402648,
      "grad_norm": 0.8562993519550272,
      "learning_rate": 7.855088953724799e-07,
      "loss": 0.1219,
      "step": 28265
    },
    {
      "epoch": 0.8246105373709085,
      "grad_norm": 0.9664005471330721,
      "learning_rate": 7.85254710106248e-07,
      "loss": 0.1184,
      "step": 28266
    },
    {
      "epoch": 0.8246397106015521,
      "grad_norm": 0.9763949671009416,
      "learning_rate": 7.850005624682555e-07,
      "loss": 0.0945,
      "step": 28267
    },
    {
      "epoch": 0.8246688838321956,
      "grad_norm": 0.8966174290579905,
      "learning_rate": 7.847464524607712e-07,
      "loss": 0.1308,
      "step": 28268
    },
    {
      "epoch": 0.8246980570628392,
      "grad_norm": 0.8161974105240922,
      "learning_rate": 7.844923800860649e-07,
      "loss": 0.1297,
      "step": 28269
    },
    {
      "epoch": 0.8247272302934827,
      "grad_norm": 0.817094311582936,
      "learning_rate": 7.842383453464037e-07,
      "loss": 0.1091,
      "step": 28270
    },
    {
      "epoch": 0.8247564035241263,
      "grad_norm": 1.1028670740452124,
      "learning_rate": 7.839843482440568e-07,
      "loss": 0.0993,
      "step": 28271
    },
    {
      "epoch": 0.8247855767547698,
      "grad_norm": 0.9547552172157876,
      "learning_rate": 7.837303887812903e-07,
      "loss": 0.1278,
      "step": 28272
    },
    {
      "epoch": 0.8248147499854134,
      "grad_norm": 0.8519816492596898,
      "learning_rate": 7.834764669603733e-07,
      "loss": 0.1466,
      "step": 28273
    },
    {
      "epoch": 0.8248439232160569,
      "grad_norm": 1.3415020570432128,
      "learning_rate": 7.832225827835721e-07,
      "loss": 0.1201,
      "step": 28274
    },
    {
      "epoch": 0.8248730964467005,
      "grad_norm": 1.135995784586104,
      "learning_rate": 7.829687362531518e-07,
      "loss": 0.1079,
      "step": 28275
    },
    {
      "epoch": 0.824902269677344,
      "grad_norm": 0.7050498967393973,
      "learning_rate": 7.827149273713797e-07,
      "loss": 0.0952,
      "step": 28276
    },
    {
      "epoch": 0.8249314429079876,
      "grad_norm": 0.7523177070831721,
      "learning_rate": 7.824611561405238e-07,
      "loss": 0.1256,
      "step": 28277
    },
    {
      "epoch": 0.8249606161386311,
      "grad_norm": 0.7396224695897348,
      "learning_rate": 7.822074225628462e-07,
      "loss": 0.1192,
      "step": 28278
    },
    {
      "epoch": 0.8249897893692748,
      "grad_norm": 1.1024753633581628,
      "learning_rate": 7.819537266406152e-07,
      "loss": 0.1076,
      "step": 28279
    },
    {
      "epoch": 0.8250189625999184,
      "grad_norm": 0.8183379302789136,
      "learning_rate": 7.81700068376094e-07,
      "loss": 0.1262,
      "step": 28280
    },
    {
      "epoch": 0.8250481358305619,
      "grad_norm": 1.1095814611202723,
      "learning_rate": 7.814464477715466e-07,
      "loss": 0.1288,
      "step": 28281
    },
    {
      "epoch": 0.8250773090612055,
      "grad_norm": 0.8442212664368564,
      "learning_rate": 7.811928648292389e-07,
      "loss": 0.1186,
      "step": 28282
    },
    {
      "epoch": 0.825106482291849,
      "grad_norm": 0.8772915047651296,
      "learning_rate": 7.809393195514348e-07,
      "loss": 0.125,
      "step": 28283
    },
    {
      "epoch": 0.8251356555224926,
      "grad_norm": 0.813365149645741,
      "learning_rate": 7.806858119403976e-07,
      "loss": 0.0965,
      "step": 28284
    },
    {
      "epoch": 0.8251648287531361,
      "grad_norm": 0.9009509604505839,
      "learning_rate": 7.804323419983884e-07,
      "loss": 0.0976,
      "step": 28285
    },
    {
      "epoch": 0.8251940019837797,
      "grad_norm": 0.9275007492531333,
      "learning_rate": 7.801789097276735e-07,
      "loss": 0.0937,
      "step": 28286
    },
    {
      "epoch": 0.8252231752144232,
      "grad_norm": 0.9853359204091033,
      "learning_rate": 7.799255151305141e-07,
      "loss": 0.1149,
      "step": 28287
    },
    {
      "epoch": 0.8252523484450668,
      "grad_norm": 0.8139979900434158,
      "learning_rate": 7.79672158209171e-07,
      "loss": 0.1243,
      "step": 28288
    },
    {
      "epoch": 0.8252815216757103,
      "grad_norm": 1.0410753243598199,
      "learning_rate": 7.794188389659074e-07,
      "loss": 0.1094,
      "step": 28289
    },
    {
      "epoch": 0.8253106949063539,
      "grad_norm": 0.9731979014587019,
      "learning_rate": 7.791655574029866e-07,
      "loss": 0.1214,
      "step": 28290
    },
    {
      "epoch": 0.8253398681369974,
      "grad_norm": 0.7663712521466084,
      "learning_rate": 7.789123135226672e-07,
      "loss": 0.102,
      "step": 28291
    },
    {
      "epoch": 0.825369041367641,
      "grad_norm": 0.9390017754213866,
      "learning_rate": 7.786591073272104e-07,
      "loss": 0.1176,
      "step": 28292
    },
    {
      "epoch": 0.8253982145982847,
      "grad_norm": 0.8612843747806949,
      "learning_rate": 7.784059388188786e-07,
      "loss": 0.1048,
      "step": 28293
    },
    {
      "epoch": 0.8254273878289282,
      "grad_norm": 0.8004162637113328,
      "learning_rate": 7.78152807999929e-07,
      "loss": 0.1109,
      "step": 28294
    },
    {
      "epoch": 0.8254565610595718,
      "grad_norm": 0.9804348695270982,
      "learning_rate": 7.778997148726236e-07,
      "loss": 0.1222,
      "step": 28295
    },
    {
      "epoch": 0.8254857342902153,
      "grad_norm": 1.0311878124273608,
      "learning_rate": 7.776466594392229e-07,
      "loss": 0.1052,
      "step": 28296
    },
    {
      "epoch": 0.8255149075208589,
      "grad_norm": 0.8249509708880214,
      "learning_rate": 7.773936417019851e-07,
      "loss": 0.1189,
      "step": 28297
    },
    {
      "epoch": 0.8255440807515024,
      "grad_norm": 0.8672929515206617,
      "learning_rate": 7.771406616631677e-07,
      "loss": 0.1211,
      "step": 28298
    },
    {
      "epoch": 0.825573253982146,
      "grad_norm": 0.8216202561782774,
      "learning_rate": 7.768877193250313e-07,
      "loss": 0.1076,
      "step": 28299
    },
    {
      "epoch": 0.8256024272127895,
      "grad_norm": 1.0132895677276117,
      "learning_rate": 7.766348146898317e-07,
      "loss": 0.0942,
      "step": 28300
    },
    {
      "epoch": 0.8256316004434331,
      "grad_norm": 0.7401115594171007,
      "learning_rate": 7.7638194775983e-07,
      "loss": 0.1026,
      "step": 28301
    },
    {
      "epoch": 0.8256607736740766,
      "grad_norm": 1.1460493593402705,
      "learning_rate": 7.761291185372804e-07,
      "loss": 0.1073,
      "step": 28302
    },
    {
      "epoch": 0.8256899469047202,
      "grad_norm": 0.93550391849133,
      "learning_rate": 7.758763270244435e-07,
      "loss": 0.1057,
      "step": 28303
    },
    {
      "epoch": 0.8257191201353637,
      "grad_norm": 0.68782910234542,
      "learning_rate": 7.756235732235739e-07,
      "loss": 0.1132,
      "step": 28304
    },
    {
      "epoch": 0.8257482933660073,
      "grad_norm": 0.7713530952904493,
      "learning_rate": 7.753708571369273e-07,
      "loss": 0.1097,
      "step": 28305
    },
    {
      "epoch": 0.825777466596651,
      "grad_norm": 3.2181783992514017,
      "learning_rate": 7.751181787667616e-07,
      "loss": 0.1162,
      "step": 28306
    },
    {
      "epoch": 0.8258066398272945,
      "grad_norm": 0.9646956568179892,
      "learning_rate": 7.748655381153331e-07,
      "loss": 0.1285,
      "step": 28307
    },
    {
      "epoch": 0.8258358130579381,
      "grad_norm": 0.7694065702105926,
      "learning_rate": 7.746129351848957e-07,
      "loss": 0.1013,
      "step": 28308
    },
    {
      "epoch": 0.8258649862885816,
      "grad_norm": 0.9747134014101453,
      "learning_rate": 7.743603699777064e-07,
      "loss": 0.0955,
      "step": 28309
    },
    {
      "epoch": 0.8258941595192252,
      "grad_norm": 0.9093358083483087,
      "learning_rate": 7.741078424960186e-07,
      "loss": 0.1085,
      "step": 28310
    },
    {
      "epoch": 0.8259233327498687,
      "grad_norm": 0.9312594654961549,
      "learning_rate": 7.738553527420861e-07,
      "loss": 0.1365,
      "step": 28311
    },
    {
      "epoch": 0.8259525059805123,
      "grad_norm": 0.9024985437063421,
      "learning_rate": 7.736029007181644e-07,
      "loss": 0.1296,
      "step": 28312
    },
    {
      "epoch": 0.8259816792111558,
      "grad_norm": 0.676235094909886,
      "learning_rate": 7.733504864265079e-07,
      "loss": 0.1071,
      "step": 28313
    },
    {
      "epoch": 0.8260108524417994,
      "grad_norm": 1.0936149343330415,
      "learning_rate": 7.730981098693696e-07,
      "loss": 0.1107,
      "step": 28314
    },
    {
      "epoch": 0.826040025672443,
      "grad_norm": 0.7801232048413431,
      "learning_rate": 7.728457710490011e-07,
      "loss": 0.1191,
      "step": 28315
    },
    {
      "epoch": 0.8260691989030865,
      "grad_norm": 0.8271271730496492,
      "learning_rate": 7.725934699676574e-07,
      "loss": 0.1175,
      "step": 28316
    },
    {
      "epoch": 0.82609837213373,
      "grad_norm": 0.832033601569443,
      "learning_rate": 7.723412066275904e-07,
      "loss": 0.114,
      "step": 28317
    },
    {
      "epoch": 0.8261275453643736,
      "grad_norm": 0.7192394984985441,
      "learning_rate": 7.720889810310506e-07,
      "loss": 0.1147,
      "step": 28318
    },
    {
      "epoch": 0.8261567185950172,
      "grad_norm": 0.8398302146468973,
      "learning_rate": 7.718367931802906e-07,
      "loss": 0.1047,
      "step": 28319
    },
    {
      "epoch": 0.8261858918256608,
      "grad_norm": 0.8094270219011735,
      "learning_rate": 7.715846430775642e-07,
      "loss": 0.0997,
      "step": 28320
    },
    {
      "epoch": 0.8262150650563044,
      "grad_norm": 0.8033581687404708,
      "learning_rate": 7.713325307251201e-07,
      "loss": 0.1192,
      "step": 28321
    },
    {
      "epoch": 0.8262442382869479,
      "grad_norm": 0.7806039180298726,
      "learning_rate": 7.710804561252089e-07,
      "loss": 0.1096,
      "step": 28322
    },
    {
      "epoch": 0.8262734115175915,
      "grad_norm": 0.713031695630493,
      "learning_rate": 7.70828419280083e-07,
      "loss": 0.0981,
      "step": 28323
    },
    {
      "epoch": 0.826302584748235,
      "grad_norm": 0.9534880108445638,
      "learning_rate": 7.705764201919902e-07,
      "loss": 0.119,
      "step": 28324
    },
    {
      "epoch": 0.8263317579788786,
      "grad_norm": 0.9174454231049807,
      "learning_rate": 7.70324458863182e-07,
      "loss": 0.1009,
      "step": 28325
    },
    {
      "epoch": 0.8263609312095221,
      "grad_norm": 0.7142890473135709,
      "learning_rate": 7.700725352959076e-07,
      "loss": 0.1174,
      "step": 28326
    },
    {
      "epoch": 0.8263901044401657,
      "grad_norm": 0.8562145201052048,
      "learning_rate": 7.698206494924165e-07,
      "loss": 0.1552,
      "step": 28327
    },
    {
      "epoch": 0.8264192776708092,
      "grad_norm": 0.7943972321512707,
      "learning_rate": 7.695688014549552e-07,
      "loss": 0.1111,
      "step": 28328
    },
    {
      "epoch": 0.8264484509014528,
      "grad_norm": 0.8320070793705426,
      "learning_rate": 7.693169911857751e-07,
      "loss": 0.11,
      "step": 28329
    },
    {
      "epoch": 0.8264776241320964,
      "grad_norm": 0.6725606630061371,
      "learning_rate": 7.690652186871217e-07,
      "loss": 0.101,
      "step": 28330
    },
    {
      "epoch": 0.8265067973627399,
      "grad_norm": 1.0683871223509236,
      "learning_rate": 7.688134839612454e-07,
      "loss": 0.1383,
      "step": 28331
    },
    {
      "epoch": 0.8265359705933835,
      "grad_norm": 0.8267029761207979,
      "learning_rate": 7.685617870103912e-07,
      "loss": 0.0912,
      "step": 28332
    },
    {
      "epoch": 0.8265651438240271,
      "grad_norm": 0.7858096230308077,
      "learning_rate": 7.683101278368077e-07,
      "loss": 0.1146,
      "step": 28333
    },
    {
      "epoch": 0.8265943170546707,
      "grad_norm": 0.8060557571466852,
      "learning_rate": 7.68058506442742e-07,
      "loss": 0.1435,
      "step": 28334
    },
    {
      "epoch": 0.8266234902853142,
      "grad_norm": 0.9064266915483007,
      "learning_rate": 7.67806922830438e-07,
      "loss": 0.0946,
      "step": 28335
    },
    {
      "epoch": 0.8266526635159578,
      "grad_norm": 1.0676803527842267,
      "learning_rate": 7.675553770021438e-07,
      "loss": 0.1296,
      "step": 28336
    },
    {
      "epoch": 0.8266818367466013,
      "grad_norm": 0.812890709765754,
      "learning_rate": 7.673038689601059e-07,
      "loss": 0.1345,
      "step": 28337
    },
    {
      "epoch": 0.8267110099772449,
      "grad_norm": 0.85359666285905,
      "learning_rate": 7.670523987065675e-07,
      "loss": 0.1196,
      "step": 28338
    },
    {
      "epoch": 0.8267401832078884,
      "grad_norm": 1.0561929947567026,
      "learning_rate": 7.668009662437759e-07,
      "loss": 0.0988,
      "step": 28339
    },
    {
      "epoch": 0.826769356438532,
      "grad_norm": 0.8477233441117852,
      "learning_rate": 7.665495715739745e-07,
      "loss": 0.1302,
      "step": 28340
    },
    {
      "epoch": 0.8267985296691756,
      "grad_norm": 1.1954087286146777,
      "learning_rate": 7.662982146994074e-07,
      "loss": 0.1227,
      "step": 28341
    },
    {
      "epoch": 0.8268277028998191,
      "grad_norm": 0.7946048547454742,
      "learning_rate": 7.660468956223188e-07,
      "loss": 0.1163,
      "step": 28342
    },
    {
      "epoch": 0.8268568761304627,
      "grad_norm": 0.8484589925491341,
      "learning_rate": 7.657956143449535e-07,
      "loss": 0.1123,
      "step": 28343
    },
    {
      "epoch": 0.8268860493611062,
      "grad_norm": 0.9025290487939114,
      "learning_rate": 7.655443708695548e-07,
      "loss": 0.1266,
      "step": 28344
    },
    {
      "epoch": 0.8269152225917498,
      "grad_norm": 1.1840698900904802,
      "learning_rate": 7.652931651983636e-07,
      "loss": 0.1127,
      "step": 28345
    },
    {
      "epoch": 0.8269443958223933,
      "grad_norm": 0.7510775826390401,
      "learning_rate": 7.650419973336254e-07,
      "loss": 0.1074,
      "step": 28346
    },
    {
      "epoch": 0.826973569053037,
      "grad_norm": 1.043439873566343,
      "learning_rate": 7.647908672775817e-07,
      "loss": 0.1289,
      "step": 28347
    },
    {
      "epoch": 0.8270027422836805,
      "grad_norm": 0.8373814056569358,
      "learning_rate": 7.645397750324723e-07,
      "loss": 0.115,
      "step": 28348
    },
    {
      "epoch": 0.8270319155143241,
      "grad_norm": 1.0559775862677445,
      "learning_rate": 7.642887206005412e-07,
      "loss": 0.1226,
      "step": 28349
    },
    {
      "epoch": 0.8270610887449676,
      "grad_norm": 0.7475334772010874,
      "learning_rate": 7.640377039840302e-07,
      "loss": 0.0901,
      "step": 28350
    },
    {
      "epoch": 0.8270902619756112,
      "grad_norm": 1.031190810352983,
      "learning_rate": 7.637867251851794e-07,
      "loss": 0.1258,
      "step": 28351
    },
    {
      "epoch": 0.8271194352062547,
      "grad_norm": 0.7248062253201605,
      "learning_rate": 7.635357842062279e-07,
      "loss": 0.0978,
      "step": 28352
    },
    {
      "epoch": 0.8271486084368983,
      "grad_norm": 0.8071471013482424,
      "learning_rate": 7.632848810494193e-07,
      "loss": 0.0878,
      "step": 28353
    },
    {
      "epoch": 0.8271777816675419,
      "grad_norm": 0.6077534103469545,
      "learning_rate": 7.630340157169902e-07,
      "loss": 0.105,
      "step": 28354
    },
    {
      "epoch": 0.8272069548981854,
      "grad_norm": 0.7467996251164862,
      "learning_rate": 7.627831882111825e-07,
      "loss": 0.1185,
      "step": 28355
    },
    {
      "epoch": 0.827236128128829,
      "grad_norm": 1.3510960449703084,
      "learning_rate": 7.625323985342359e-07,
      "loss": 0.1043,
      "step": 28356
    },
    {
      "epoch": 0.8272653013594725,
      "grad_norm": 0.9039522451332882,
      "learning_rate": 7.622816466883887e-07,
      "loss": 0.0866,
      "step": 28357
    },
    {
      "epoch": 0.8272944745901161,
      "grad_norm": 0.9638217489673516,
      "learning_rate": 7.620309326758779e-07,
      "loss": 0.1101,
      "step": 28358
    },
    {
      "epoch": 0.8273236478207596,
      "grad_norm": 0.8155457194286022,
      "learning_rate": 7.617802564989446e-07,
      "loss": 0.1231,
      "step": 28359
    },
    {
      "epoch": 0.8273528210514033,
      "grad_norm": 0.7700458546478592,
      "learning_rate": 7.615296181598242e-07,
      "loss": 0.1051,
      "step": 28360
    },
    {
      "epoch": 0.8273819942820468,
      "grad_norm": 0.8312943947121507,
      "learning_rate": 7.612790176607566e-07,
      "loss": 0.1067,
      "step": 28361
    },
    {
      "epoch": 0.8274111675126904,
      "grad_norm": 1.020073930862499,
      "learning_rate": 7.61028455003977e-07,
      "loss": 0.1432,
      "step": 28362
    },
    {
      "epoch": 0.827440340743334,
      "grad_norm": 0.814031831138474,
      "learning_rate": 7.607779301917245e-07,
      "loss": 0.0982,
      "step": 28363
    },
    {
      "epoch": 0.8274695139739775,
      "grad_norm": 1.0610711076104806,
      "learning_rate": 7.60527443226235e-07,
      "loss": 0.0922,
      "step": 28364
    },
    {
      "epoch": 0.827498687204621,
      "grad_norm": 0.9477252653121643,
      "learning_rate": 7.602769941097427e-07,
      "loss": 0.1193,
      "step": 28365
    },
    {
      "epoch": 0.8275278604352646,
      "grad_norm": 0.7560137550405617,
      "learning_rate": 7.600265828444858e-07,
      "loss": 0.1206,
      "step": 28366
    },
    {
      "epoch": 0.8275570336659082,
      "grad_norm": 0.706287256192517,
      "learning_rate": 7.597762094327004e-07,
      "loss": 0.1111,
      "step": 28367
    },
    {
      "epoch": 0.8275862068965517,
      "grad_norm": 1.1547158265989943,
      "learning_rate": 7.595258738766192e-07,
      "loss": 0.1025,
      "step": 28368
    },
    {
      "epoch": 0.8276153801271953,
      "grad_norm": 0.6871472421710532,
      "learning_rate": 7.592755761784803e-07,
      "loss": 0.1125,
      "step": 28369
    },
    {
      "epoch": 0.8276445533578388,
      "grad_norm": 0.8584336200980527,
      "learning_rate": 7.59025316340517e-07,
      "loss": 0.1152,
      "step": 28370
    },
    {
      "epoch": 0.8276737265884824,
      "grad_norm": 1.0571173083816197,
      "learning_rate": 7.587750943649618e-07,
      "loss": 0.118,
      "step": 28371
    },
    {
      "epoch": 0.8277028998191259,
      "grad_norm": 0.8932959152501599,
      "learning_rate": 7.585249102540498e-07,
      "loss": 0.1206,
      "step": 28372
    },
    {
      "epoch": 0.8277320730497695,
      "grad_norm": 0.6940551219872615,
      "learning_rate": 7.582747640100168e-07,
      "loss": 0.1036,
      "step": 28373
    },
    {
      "epoch": 0.8277612462804131,
      "grad_norm": 0.7990764587298337,
      "learning_rate": 7.580246556350934e-07,
      "loss": 0.1088,
      "step": 28374
    },
    {
      "epoch": 0.8277904195110567,
      "grad_norm": 0.7090968228632207,
      "learning_rate": 7.577745851315127e-07,
      "loss": 0.0993,
      "step": 28375
    },
    {
      "epoch": 0.8278195927417002,
      "grad_norm": 0.689448746294295,
      "learning_rate": 7.575245525015085e-07,
      "loss": 0.0948,
      "step": 28376
    },
    {
      "epoch": 0.8278487659723438,
      "grad_norm": 0.8383856371408687,
      "learning_rate": 7.572745577473123e-07,
      "loss": 0.128,
      "step": 28377
    },
    {
      "epoch": 0.8278779392029874,
      "grad_norm": 1.0460968061717753,
      "learning_rate": 7.570246008711552e-07,
      "loss": 0.1143,
      "step": 28378
    },
    {
      "epoch": 0.8279071124336309,
      "grad_norm": 0.8701828394876752,
      "learning_rate": 7.567746818752692e-07,
      "loss": 0.1155,
      "step": 28379
    },
    {
      "epoch": 0.8279362856642745,
      "grad_norm": 0.7056535521867008,
      "learning_rate": 7.565248007618875e-07,
      "loss": 0.1285,
      "step": 28380
    },
    {
      "epoch": 0.827965458894918,
      "grad_norm": 1.0685582939700338,
      "learning_rate": 7.56274957533239e-07,
      "loss": 0.0988,
      "step": 28381
    },
    {
      "epoch": 0.8279946321255616,
      "grad_norm": 0.8933234757039856,
      "learning_rate": 7.560251521915534e-07,
      "loss": 0.1244,
      "step": 28382
    },
    {
      "epoch": 0.8280238053562051,
      "grad_norm": 0.7656982208089511,
      "learning_rate": 7.557753847390637e-07,
      "loss": 0.1076,
      "step": 28383
    },
    {
      "epoch": 0.8280529785868487,
      "grad_norm": 0.7442566120946866,
      "learning_rate": 7.555256551779966e-07,
      "loss": 0.1127,
      "step": 28384
    },
    {
      "epoch": 0.8280821518174922,
      "grad_norm": 0.9755368554092401,
      "learning_rate": 7.552759635105832e-07,
      "loss": 0.1139,
      "step": 28385
    },
    {
      "epoch": 0.8281113250481358,
      "grad_norm": 0.8705401918820534,
      "learning_rate": 7.550263097390543e-07,
      "loss": 0.1158,
      "step": 28386
    },
    {
      "epoch": 0.8281404982787794,
      "grad_norm": 0.7506131579224751,
      "learning_rate": 7.54776693865637e-07,
      "loss": 0.1143,
      "step": 28387
    },
    {
      "epoch": 0.828169671509423,
      "grad_norm": 0.8008476601379567,
      "learning_rate": 7.545271158925588e-07,
      "loss": 0.1052,
      "step": 28388
    },
    {
      "epoch": 0.8281988447400666,
      "grad_norm": 0.8488696894521165,
      "learning_rate": 7.542775758220499e-07,
      "loss": 0.1243,
      "step": 28389
    },
    {
      "epoch": 0.8282280179707101,
      "grad_norm": 0.7289801411330107,
      "learning_rate": 7.540280736563366e-07,
      "loss": 0.1159,
      "step": 28390
    },
    {
      "epoch": 0.8282571912013537,
      "grad_norm": 0.7703778206123986,
      "learning_rate": 7.537786093976479e-07,
      "loss": 0.124,
      "step": 28391
    },
    {
      "epoch": 0.8282863644319972,
      "grad_norm": 0.9645049119459579,
      "learning_rate": 7.535291830482088e-07,
      "loss": 0.1437,
      "step": 28392
    },
    {
      "epoch": 0.8283155376626408,
      "grad_norm": 0.8649477174827838,
      "learning_rate": 7.532797946102488e-07,
      "loss": 0.1335,
      "step": 28393
    },
    {
      "epoch": 0.8283447108932843,
      "grad_norm": 0.7159959822166435,
      "learning_rate": 7.530304440859932e-07,
      "loss": 0.1023,
      "step": 28394
    },
    {
      "epoch": 0.8283738841239279,
      "grad_norm": 0.8603737686192554,
      "learning_rate": 7.527811314776667e-07,
      "loss": 0.1196,
      "step": 28395
    },
    {
      "epoch": 0.8284030573545714,
      "grad_norm": 0.9851910441865979,
      "learning_rate": 7.525318567874962e-07,
      "loss": 0.1158,
      "step": 28396
    },
    {
      "epoch": 0.828432230585215,
      "grad_norm": 0.8980323382883142,
      "learning_rate": 7.522826200177085e-07,
      "loss": 0.122,
      "step": 28397
    },
    {
      "epoch": 0.8284614038158585,
      "grad_norm": 0.7215558665966592,
      "learning_rate": 7.520334211705265e-07,
      "loss": 0.1099,
      "step": 28398
    },
    {
      "epoch": 0.8284905770465021,
      "grad_norm": 0.6656369779731551,
      "learning_rate": 7.517842602481773e-07,
      "loss": 0.0843,
      "step": 28399
    },
    {
      "epoch": 0.8285197502771456,
      "grad_norm": 1.0008539603737099,
      "learning_rate": 7.515351372528839e-07,
      "loss": 0.1196,
      "step": 28400
    },
    {
      "epoch": 0.8285489235077893,
      "grad_norm": 0.8997140369278976,
      "learning_rate": 7.512860521868693e-07,
      "loss": 0.1255,
      "step": 28401
    },
    {
      "epoch": 0.8285780967384329,
      "grad_norm": 0.8186369579234746,
      "learning_rate": 7.510370050523591e-07,
      "loss": 0.1262,
      "step": 28402
    },
    {
      "epoch": 0.8286072699690764,
      "grad_norm": 0.9062082953019484,
      "learning_rate": 7.507879958515768e-07,
      "loss": 0.1302,
      "step": 28403
    },
    {
      "epoch": 0.82863644319972,
      "grad_norm": 0.7423631168798381,
      "learning_rate": 7.505390245867455e-07,
      "loss": 0.1106,
      "step": 28404
    },
    {
      "epoch": 0.8286656164303635,
      "grad_norm": 0.8827426244144843,
      "learning_rate": 7.502900912600858e-07,
      "loss": 0.1131,
      "step": 28405
    },
    {
      "epoch": 0.8286947896610071,
      "grad_norm": 0.8395354430181127,
      "learning_rate": 7.50041195873823e-07,
      "loss": 0.1241,
      "step": 28406
    },
    {
      "epoch": 0.8287239628916506,
      "grad_norm": 0.946227806949324,
      "learning_rate": 7.497923384301775e-07,
      "loss": 0.0881,
      "step": 28407
    },
    {
      "epoch": 0.8287531361222942,
      "grad_norm": 0.8016717106451814,
      "learning_rate": 7.495435189313704e-07,
      "loss": 0.1367,
      "step": 28408
    },
    {
      "epoch": 0.8287823093529377,
      "grad_norm": 0.8166243097367427,
      "learning_rate": 7.492947373796244e-07,
      "loss": 0.1323,
      "step": 28409
    },
    {
      "epoch": 0.8288114825835813,
      "grad_norm": 1.3640015216564052,
      "learning_rate": 7.49045993777161e-07,
      "loss": 0.1214,
      "step": 28410
    },
    {
      "epoch": 0.8288406558142248,
      "grad_norm": 0.8039236960400502,
      "learning_rate": 7.487972881262006e-07,
      "loss": 0.1399,
      "step": 28411
    },
    {
      "epoch": 0.8288698290448684,
      "grad_norm": 0.7848311756178701,
      "learning_rate": 7.485486204289616e-07,
      "loss": 0.1247,
      "step": 28412
    },
    {
      "epoch": 0.8288990022755119,
      "grad_norm": 0.7767831778621073,
      "learning_rate": 7.48299990687667e-07,
      "loss": 0.1125,
      "step": 28413
    },
    {
      "epoch": 0.8289281755061556,
      "grad_norm": 0.758910254961333,
      "learning_rate": 7.480513989045341e-07,
      "loss": 0.1005,
      "step": 28414
    },
    {
      "epoch": 0.8289573487367992,
      "grad_norm": 0.7087555708668387,
      "learning_rate": 7.478028450817832e-07,
      "loss": 0.1061,
      "step": 28415
    },
    {
      "epoch": 0.8289865219674427,
      "grad_norm": 1.0047912122330391,
      "learning_rate": 7.475543292216347e-07,
      "loss": 0.1199,
      "step": 28416
    },
    {
      "epoch": 0.8290156951980863,
      "grad_norm": 1.527544903022475,
      "learning_rate": 7.473058513263054e-07,
      "loss": 0.1127,
      "step": 28417
    },
    {
      "epoch": 0.8290448684287298,
      "grad_norm": 0.9787763908199145,
      "learning_rate": 7.470574113980139e-07,
      "loss": 0.1093,
      "step": 28418
    },
    {
      "epoch": 0.8290740416593734,
      "grad_norm": 0.7679047763089842,
      "learning_rate": 7.46809009438979e-07,
      "loss": 0.1215,
      "step": 28419
    },
    {
      "epoch": 0.8291032148900169,
      "grad_norm": 0.9601933188428016,
      "learning_rate": 7.465606454514174e-07,
      "loss": 0.1043,
      "step": 28420
    },
    {
      "epoch": 0.8291323881206605,
      "grad_norm": 0.7876307216321129,
      "learning_rate": 7.463123194375476e-07,
      "loss": 0.1112,
      "step": 28421
    },
    {
      "epoch": 0.829161561351304,
      "grad_norm": 0.7313640996354795,
      "learning_rate": 7.460640313995854e-07,
      "loss": 0.1209,
      "step": 28422
    },
    {
      "epoch": 0.8291907345819476,
      "grad_norm": 0.7396376101369221,
      "learning_rate": 7.458157813397487e-07,
      "loss": 0.105,
      "step": 28423
    },
    {
      "epoch": 0.8292199078125911,
      "grad_norm": 0.8886354246380967,
      "learning_rate": 7.455675692602532e-07,
      "loss": 0.1176,
      "step": 28424
    },
    {
      "epoch": 0.8292490810432347,
      "grad_norm": 1.0904938595718519,
      "learning_rate": 7.453193951633142e-07,
      "loss": 0.1045,
      "step": 28425
    },
    {
      "epoch": 0.8292782542738782,
      "grad_norm": 0.7608969060337386,
      "learning_rate": 7.450712590511472e-07,
      "loss": 0.1128,
      "step": 28426
    },
    {
      "epoch": 0.8293074275045218,
      "grad_norm": 0.7658006455646321,
      "learning_rate": 7.448231609259699e-07,
      "loss": 0.0987,
      "step": 28427
    },
    {
      "epoch": 0.8293366007351655,
      "grad_norm": 0.8597849271304524,
      "learning_rate": 7.445751007899943e-07,
      "loss": 0.1226,
      "step": 28428
    },
    {
      "epoch": 0.829365773965809,
      "grad_norm": 0.6598982366190425,
      "learning_rate": 7.443270786454376e-07,
      "loss": 0.1004,
      "step": 28429
    },
    {
      "epoch": 0.8293949471964526,
      "grad_norm": 0.9080284371942519,
      "learning_rate": 7.440790944945131e-07,
      "loss": 0.1058,
      "step": 28430
    },
    {
      "epoch": 0.8294241204270961,
      "grad_norm": 1.4034088887775558,
      "learning_rate": 7.438311483394328e-07,
      "loss": 0.1247,
      "step": 28431
    },
    {
      "epoch": 0.8294532936577397,
      "grad_norm": 0.7383696981856201,
      "learning_rate": 7.435832401824122e-07,
      "loss": 0.096,
      "step": 28432
    },
    {
      "epoch": 0.8294824668883832,
      "grad_norm": 0.7879472713030384,
      "learning_rate": 7.433353700256651e-07,
      "loss": 0.1043,
      "step": 28433
    },
    {
      "epoch": 0.8295116401190268,
      "grad_norm": 0.784470214110543,
      "learning_rate": 7.430875378714042e-07,
      "loss": 0.115,
      "step": 28434
    },
    {
      "epoch": 0.8295408133496703,
      "grad_norm": 0.7232544424523933,
      "learning_rate": 7.428397437218404e-07,
      "loss": 0.1079,
      "step": 28435
    },
    {
      "epoch": 0.8295699865803139,
      "grad_norm": 0.7159335182674081,
      "learning_rate": 7.425919875791881e-07,
      "loss": 0.104,
      "step": 28436
    },
    {
      "epoch": 0.8295991598109574,
      "grad_norm": 0.7607774667796043,
      "learning_rate": 7.423442694456584e-07,
      "loss": 0.114,
      "step": 28437
    },
    {
      "epoch": 0.829628333041601,
      "grad_norm": 0.8795922124661933,
      "learning_rate": 7.420965893234611e-07,
      "loss": 0.1102,
      "step": 28438
    },
    {
      "epoch": 0.8296575062722445,
      "grad_norm": 0.8882395773551321,
      "learning_rate": 7.418489472148094e-07,
      "loss": 0.121,
      "step": 28439
    },
    {
      "epoch": 0.8296866795028881,
      "grad_norm": 0.907455358333316,
      "learning_rate": 7.416013431219149e-07,
      "loss": 0.1074,
      "step": 28440
    },
    {
      "epoch": 0.8297158527335317,
      "grad_norm": 0.7848234078597283,
      "learning_rate": 7.41353777046987e-07,
      "loss": 0.112,
      "step": 28441
    },
    {
      "epoch": 0.8297450259641753,
      "grad_norm": 0.9077419057850704,
      "learning_rate": 7.411062489922344e-07,
      "loss": 0.1009,
      "step": 28442
    },
    {
      "epoch": 0.8297741991948189,
      "grad_norm": 0.863099550723619,
      "learning_rate": 7.408587589598704e-07,
      "loss": 0.125,
      "step": 28443
    },
    {
      "epoch": 0.8298033724254624,
      "grad_norm": 0.8234409787337859,
      "learning_rate": 7.406113069521009e-07,
      "loss": 0.1115,
      "step": 28444
    },
    {
      "epoch": 0.829832545656106,
      "grad_norm": 0.9249257401348212,
      "learning_rate": 7.403638929711371e-07,
      "loss": 0.1137,
      "step": 28445
    },
    {
      "epoch": 0.8298617188867495,
      "grad_norm": 0.9947568607836801,
      "learning_rate": 7.401165170191887e-07,
      "loss": 0.1254,
      "step": 28446
    },
    {
      "epoch": 0.8298908921173931,
      "grad_norm": 0.7455723298912932,
      "learning_rate": 7.39869179098463e-07,
      "loss": 0.0924,
      "step": 28447
    },
    {
      "epoch": 0.8299200653480366,
      "grad_norm": 0.7798383077405091,
      "learning_rate": 7.396218792111676e-07,
      "loss": 0.1023,
      "step": 28448
    },
    {
      "epoch": 0.8299492385786802,
      "grad_norm": 0.8745388904463481,
      "learning_rate": 7.393746173595106e-07,
      "loss": 0.1106,
      "step": 28449
    },
    {
      "epoch": 0.8299784118093237,
      "grad_norm": 1.0526718887968298,
      "learning_rate": 7.391273935457016e-07,
      "loss": 0.1324,
      "step": 28450
    },
    {
      "epoch": 0.8300075850399673,
      "grad_norm": 0.716904351557592,
      "learning_rate": 7.388802077719454e-07,
      "loss": 0.1236,
      "step": 28451
    },
    {
      "epoch": 0.8300367582706109,
      "grad_norm": 0.9323271799953796,
      "learning_rate": 7.386330600404484e-07,
      "loss": 0.1218,
      "step": 28452
    },
    {
      "epoch": 0.8300659315012544,
      "grad_norm": 1.0813662179731656,
      "learning_rate": 7.383859503534197e-07,
      "loss": 0.113,
      "step": 28453
    },
    {
      "epoch": 0.830095104731898,
      "grad_norm": 0.787413754439722,
      "learning_rate": 7.381388787130639e-07,
      "loss": 0.1015,
      "step": 28454
    },
    {
      "epoch": 0.8301242779625416,
      "grad_norm": 0.7645144026430417,
      "learning_rate": 7.378918451215844e-07,
      "loss": 0.1067,
      "step": 28455
    },
    {
      "epoch": 0.8301534511931852,
      "grad_norm": 0.9540995963289592,
      "learning_rate": 7.376448495811911e-07,
      "loss": 0.108,
      "step": 28456
    },
    {
      "epoch": 0.8301826244238287,
      "grad_norm": 1.043364353745594,
      "learning_rate": 7.373978920940878e-07,
      "loss": 0.1079,
      "step": 28457
    },
    {
      "epoch": 0.8302117976544723,
      "grad_norm": 0.9012451293447941,
      "learning_rate": 7.371509726624765e-07,
      "loss": 0.1326,
      "step": 28458
    },
    {
      "epoch": 0.8302409708851158,
      "grad_norm": 0.8397749882451837,
      "learning_rate": 7.369040912885656e-07,
      "loss": 0.1267,
      "step": 28459
    },
    {
      "epoch": 0.8302701441157594,
      "grad_norm": 0.7710792796993645,
      "learning_rate": 7.366572479745565e-07,
      "loss": 0.1121,
      "step": 28460
    },
    {
      "epoch": 0.8302993173464029,
      "grad_norm": 1.0625839752446422,
      "learning_rate": 7.364104427226532e-07,
      "loss": 0.1063,
      "step": 28461
    },
    {
      "epoch": 0.8303284905770465,
      "grad_norm": 0.8124520740403229,
      "learning_rate": 7.361636755350593e-07,
      "loss": 0.1366,
      "step": 28462
    },
    {
      "epoch": 0.83035766380769,
      "grad_norm": 1.019349418711587,
      "learning_rate": 7.359169464139798e-07,
      "loss": 0.1123,
      "step": 28463
    },
    {
      "epoch": 0.8303868370383336,
      "grad_norm": 0.6286083303873796,
      "learning_rate": 7.356702553616157e-07,
      "loss": 0.1103,
      "step": 28464
    },
    {
      "epoch": 0.8304160102689772,
      "grad_norm": 0.7864544122825847,
      "learning_rate": 7.354236023801687e-07,
      "loss": 0.1179,
      "step": 28465
    },
    {
      "epoch": 0.8304451834996207,
      "grad_norm": 0.8116342155865951,
      "learning_rate": 7.351769874718423e-07,
      "loss": 0.1124,
      "step": 28466
    },
    {
      "epoch": 0.8304743567302643,
      "grad_norm": 1.110231707576032,
      "learning_rate": 7.349304106388366e-07,
      "loss": 0.1167,
      "step": 28467
    },
    {
      "epoch": 0.8305035299609078,
      "grad_norm": 0.7610493832294706,
      "learning_rate": 7.34683871883356e-07,
      "loss": 0.1048,
      "step": 28468
    },
    {
      "epoch": 0.8305327031915515,
      "grad_norm": 0.769739157904961,
      "learning_rate": 7.344373712075976e-07,
      "loss": 0.1166,
      "step": 28469
    },
    {
      "epoch": 0.830561876422195,
      "grad_norm": 0.9835055826127647,
      "learning_rate": 7.341909086137655e-07,
      "loss": 0.1197,
      "step": 28470
    },
    {
      "epoch": 0.8305910496528386,
      "grad_norm": 0.6848325801545342,
      "learning_rate": 7.339444841040583e-07,
      "loss": 0.1031,
      "step": 28471
    },
    {
      "epoch": 0.8306202228834821,
      "grad_norm": 0.8539488257851424,
      "learning_rate": 7.336980976806757e-07,
      "loss": 0.1098,
      "step": 28472
    },
    {
      "epoch": 0.8306493961141257,
      "grad_norm": 0.9767131282538098,
      "learning_rate": 7.334517493458176e-07,
      "loss": 0.1091,
      "step": 28473
    },
    {
      "epoch": 0.8306785693447692,
      "grad_norm": 0.8913909267721453,
      "learning_rate": 7.332054391016852e-07,
      "loss": 0.1029,
      "step": 28474
    },
    {
      "epoch": 0.8307077425754128,
      "grad_norm": 0.9429812227510052,
      "learning_rate": 7.329591669504748e-07,
      "loss": 0.1085,
      "step": 28475
    },
    {
      "epoch": 0.8307369158060564,
      "grad_norm": 0.7317002307282251,
      "learning_rate": 7.327129328943877e-07,
      "loss": 0.1227,
      "step": 28476
    },
    {
      "epoch": 0.8307660890366999,
      "grad_norm": 0.8530850221167933,
      "learning_rate": 7.324667369356209e-07,
      "loss": 0.1042,
      "step": 28477
    },
    {
      "epoch": 0.8307952622673435,
      "grad_norm": 1.2876696314117573,
      "learning_rate": 7.322205790763709e-07,
      "loss": 0.1223,
      "step": 28478
    },
    {
      "epoch": 0.830824435497987,
      "grad_norm": 0.9535450140788281,
      "learning_rate": 7.319744593188371e-07,
      "loss": 0.0964,
      "step": 28479
    },
    {
      "epoch": 0.8308536087286306,
      "grad_norm": 0.9078480178805935,
      "learning_rate": 7.317283776652173e-07,
      "loss": 0.105,
      "step": 28480
    },
    {
      "epoch": 0.8308827819592741,
      "grad_norm": 1.0122084769082274,
      "learning_rate": 7.31482334117708e-07,
      "loss": 0.1058,
      "step": 28481
    },
    {
      "epoch": 0.8309119551899178,
      "grad_norm": 1.1380085633167951,
      "learning_rate": 7.31236328678504e-07,
      "loss": 0.0953,
      "step": 28482
    },
    {
      "epoch": 0.8309411284205613,
      "grad_norm": 1.2484178967668609,
      "learning_rate": 7.309903613498037e-07,
      "loss": 0.1333,
      "step": 28483
    },
    {
      "epoch": 0.8309703016512049,
      "grad_norm": 0.9646207965847657,
      "learning_rate": 7.307444321338031e-07,
      "loss": 0.1051,
      "step": 28484
    },
    {
      "epoch": 0.8309994748818484,
      "grad_norm": 1.0211502583035859,
      "learning_rate": 7.304985410326942e-07,
      "loss": 0.1078,
      "step": 28485
    },
    {
      "epoch": 0.831028648112492,
      "grad_norm": 0.6957929901096648,
      "learning_rate": 7.302526880486782e-07,
      "loss": 0.0964,
      "step": 28486
    },
    {
      "epoch": 0.8310578213431355,
      "grad_norm": 0.7550524268357655,
      "learning_rate": 7.300068731839461e-07,
      "loss": 0.1153,
      "step": 28487
    },
    {
      "epoch": 0.8310869945737791,
      "grad_norm": 0.754587778508101,
      "learning_rate": 7.297610964406926e-07,
      "loss": 0.1045,
      "step": 28488
    },
    {
      "epoch": 0.8311161678044227,
      "grad_norm": 0.7728547608988162,
      "learning_rate": 7.295153578211139e-07,
      "loss": 0.1108,
      "step": 28489
    },
    {
      "epoch": 0.8311453410350662,
      "grad_norm": 0.8260731903976712,
      "learning_rate": 7.292696573274022e-07,
      "loss": 0.1014,
      "step": 28490
    },
    {
      "epoch": 0.8311745142657098,
      "grad_norm": 0.776247445470421,
      "learning_rate": 7.290239949617506e-07,
      "loss": 0.1014,
      "step": 28491
    },
    {
      "epoch": 0.8312036874963533,
      "grad_norm": 0.960356033116326,
      "learning_rate": 7.287783707263535e-07,
      "loss": 0.1173,
      "step": 28492
    },
    {
      "epoch": 0.8312328607269969,
      "grad_norm": 0.6693668087549213,
      "learning_rate": 7.285327846234042e-07,
      "loss": 0.1096,
      "step": 28493
    },
    {
      "epoch": 0.8312620339576404,
      "grad_norm": 0.7847598719804603,
      "learning_rate": 7.282872366550947e-07,
      "loss": 0.1076,
      "step": 28494
    },
    {
      "epoch": 0.831291207188284,
      "grad_norm": 0.8124027714812048,
      "learning_rate": 7.280417268236157e-07,
      "loss": 0.1241,
      "step": 28495
    },
    {
      "epoch": 0.8313203804189276,
      "grad_norm": 0.8112382677692418,
      "learning_rate": 7.277962551311613e-07,
      "loss": 0.0886,
      "step": 28496
    },
    {
      "epoch": 0.8313495536495712,
      "grad_norm": 0.8637708261310176,
      "learning_rate": 7.275508215799216e-07,
      "loss": 0.1072,
      "step": 28497
    },
    {
      "epoch": 0.8313787268802147,
      "grad_norm": 0.8126661809103095,
      "learning_rate": 7.273054261720891e-07,
      "loss": 0.0964,
      "step": 28498
    },
    {
      "epoch": 0.8314079001108583,
      "grad_norm": 1.1081717731744656,
      "learning_rate": 7.270600689098523e-07,
      "loss": 0.1114,
      "step": 28499
    },
    {
      "epoch": 0.8314370733415019,
      "grad_norm": 1.0169366316178285,
      "learning_rate": 7.268147497954048e-07,
      "loss": 0.1013,
      "step": 28500
    },
    {
      "epoch": 0.8314662465721454,
      "grad_norm": 0.8697696658821369,
      "learning_rate": 7.265694688309349e-07,
      "loss": 0.1141,
      "step": 28501
    },
    {
      "epoch": 0.831495419802789,
      "grad_norm": 0.8830334202234175,
      "learning_rate": 7.263242260186315e-07,
      "loss": 0.1095,
      "step": 28502
    },
    {
      "epoch": 0.8315245930334325,
      "grad_norm": 0.9855130207278656,
      "learning_rate": 7.26079021360685e-07,
      "loss": 0.1219,
      "step": 28503
    },
    {
      "epoch": 0.8315537662640761,
      "grad_norm": 0.6709105986819079,
      "learning_rate": 7.258338548592858e-07,
      "loss": 0.0972,
      "step": 28504
    },
    {
      "epoch": 0.8315829394947196,
      "grad_norm": 0.7081321681791498,
      "learning_rate": 7.255887265166211e-07,
      "loss": 0.113,
      "step": 28505
    },
    {
      "epoch": 0.8316121127253632,
      "grad_norm": 1.1133234397478888,
      "learning_rate": 7.253436363348804e-07,
      "loss": 0.1133,
      "step": 28506
    },
    {
      "epoch": 0.8316412859560067,
      "grad_norm": 1.0341565117238025,
      "learning_rate": 7.250985843162517e-07,
      "loss": 0.1146,
      "step": 28507
    },
    {
      "epoch": 0.8316704591866503,
      "grad_norm": 1.0097485894293217,
      "learning_rate": 7.248535704629211e-07,
      "loss": 0.1001,
      "step": 28508
    },
    {
      "epoch": 0.8316996324172939,
      "grad_norm": 0.8673098107296484,
      "learning_rate": 7.24608594777077e-07,
      "loss": 0.1183,
      "step": 28509
    },
    {
      "epoch": 0.8317288056479375,
      "grad_norm": 0.8743674749189494,
      "learning_rate": 7.24363657260908e-07,
      "loss": 0.1216,
      "step": 28510
    },
    {
      "epoch": 0.831757978878581,
      "grad_norm": 0.821606492559704,
      "learning_rate": 7.241187579165998e-07,
      "loss": 0.1215,
      "step": 28511
    },
    {
      "epoch": 0.8317871521092246,
      "grad_norm": 1.0138135264361696,
      "learning_rate": 7.238738967463372e-07,
      "loss": 0.1017,
      "step": 28512
    },
    {
      "epoch": 0.8318163253398682,
      "grad_norm": 0.9482798737416205,
      "learning_rate": 7.236290737523089e-07,
      "loss": 0.1171,
      "step": 28513
    },
    {
      "epoch": 0.8318454985705117,
      "grad_norm": 0.8324205475097127,
      "learning_rate": 7.233842889366993e-07,
      "loss": 0.0775,
      "step": 28514
    },
    {
      "epoch": 0.8318746718011553,
      "grad_norm": 0.8960529727392906,
      "learning_rate": 7.231395423016918e-07,
      "loss": 0.1208,
      "step": 28515
    },
    {
      "epoch": 0.8319038450317988,
      "grad_norm": 0.9782130382983932,
      "learning_rate": 7.228948338494757e-07,
      "loss": 0.1251,
      "step": 28516
    },
    {
      "epoch": 0.8319330182624424,
      "grad_norm": 0.9436371391172321,
      "learning_rate": 7.226501635822337e-07,
      "loss": 0.1176,
      "step": 28517
    },
    {
      "epoch": 0.8319621914930859,
      "grad_norm": 1.0025269470149112,
      "learning_rate": 7.224055315021484e-07,
      "loss": 0.1165,
      "step": 28518
    },
    {
      "epoch": 0.8319913647237295,
      "grad_norm": 1.2910502736752922,
      "learning_rate": 7.221609376114069e-07,
      "loss": 0.1367,
      "step": 28519
    },
    {
      "epoch": 0.832020537954373,
      "grad_norm": 0.7938050409417399,
      "learning_rate": 7.21916381912191e-07,
      "loss": 0.1186,
      "step": 28520
    },
    {
      "epoch": 0.8320497111850166,
      "grad_norm": 0.8765039578690622,
      "learning_rate": 7.216718644066834e-07,
      "loss": 0.1342,
      "step": 28521
    },
    {
      "epoch": 0.8320788844156601,
      "grad_norm": 0.7935005075097074,
      "learning_rate": 7.214273850970677e-07,
      "loss": 0.1083,
      "step": 28522
    },
    {
      "epoch": 0.8321080576463038,
      "grad_norm": 0.8121681822286977,
      "learning_rate": 7.211829439855284e-07,
      "loss": 0.1123,
      "step": 28523
    },
    {
      "epoch": 0.8321372308769474,
      "grad_norm": 0.7256000871820524,
      "learning_rate": 7.209385410742465e-07,
      "loss": 0.1016,
      "step": 28524
    },
    {
      "epoch": 0.8321664041075909,
      "grad_norm": 0.918670339112401,
      "learning_rate": 7.206941763654024e-07,
      "loss": 0.0928,
      "step": 28525
    },
    {
      "epoch": 0.8321955773382345,
      "grad_norm": 0.7244585269610889,
      "learning_rate": 7.204498498611806e-07,
      "loss": 0.1038,
      "step": 28526
    },
    {
      "epoch": 0.832224750568878,
      "grad_norm": 1.2410483979623603,
      "learning_rate": 7.202055615637594e-07,
      "loss": 0.1294,
      "step": 28527
    },
    {
      "epoch": 0.8322539237995216,
      "grad_norm": 1.0056864448028837,
      "learning_rate": 7.199613114753228e-07,
      "loss": 0.1168,
      "step": 28528
    },
    {
      "epoch": 0.8322830970301651,
      "grad_norm": 0.914242684557372,
      "learning_rate": 7.197170995980485e-07,
      "loss": 0.1154,
      "step": 28529
    },
    {
      "epoch": 0.8323122702608087,
      "grad_norm": 0.7913292705219948,
      "learning_rate": 7.194729259341194e-07,
      "loss": 0.1113,
      "step": 28530
    },
    {
      "epoch": 0.8323414434914522,
      "grad_norm": 1.0144165012259367,
      "learning_rate": 7.192287904857138e-07,
      "loss": 0.0951,
      "step": 28531
    },
    {
      "epoch": 0.8323706167220958,
      "grad_norm": 0.6879136500285986,
      "learning_rate": 7.18984693255011e-07,
      "loss": 0.0979,
      "step": 28532
    },
    {
      "epoch": 0.8323997899527393,
      "grad_norm": 0.7006082843911513,
      "learning_rate": 7.187406342441905e-07,
      "loss": 0.0871,
      "step": 28533
    },
    {
      "epoch": 0.8324289631833829,
      "grad_norm": 0.815068687722015,
      "learning_rate": 7.184966134554333e-07,
      "loss": 0.1152,
      "step": 28534
    },
    {
      "epoch": 0.8324581364140264,
      "grad_norm": 0.8554049222639807,
      "learning_rate": 7.182526308909149e-07,
      "loss": 0.1244,
      "step": 28535
    },
    {
      "epoch": 0.8324873096446701,
      "grad_norm": 0.7513868656783036,
      "learning_rate": 7.180086865528157e-07,
      "loss": 0.1048,
      "step": 28536
    },
    {
      "epoch": 0.8325164828753137,
      "grad_norm": 0.6375296876128865,
      "learning_rate": 7.17764780443313e-07,
      "loss": 0.1205,
      "step": 28537
    },
    {
      "epoch": 0.8325456561059572,
      "grad_norm": 0.8367052356876804,
      "learning_rate": 7.175209125645827e-07,
      "loss": 0.0938,
      "step": 28538
    },
    {
      "epoch": 0.8325748293366008,
      "grad_norm": 0.7349705041397777,
      "learning_rate": 7.172770829188036e-07,
      "loss": 0.0966,
      "step": 28539
    },
    {
      "epoch": 0.8326040025672443,
      "grad_norm": 1.0130061093738332,
      "learning_rate": 7.170332915081535e-07,
      "loss": 0.1151,
      "step": 28540
    },
    {
      "epoch": 0.8326331757978879,
      "grad_norm": 1.0458129887242262,
      "learning_rate": 7.167895383348078e-07,
      "loss": 0.1209,
      "step": 28541
    },
    {
      "epoch": 0.8326623490285314,
      "grad_norm": 0.9933350286807865,
      "learning_rate": 7.165458234009415e-07,
      "loss": 0.1053,
      "step": 28542
    },
    {
      "epoch": 0.832691522259175,
      "grad_norm": 0.7640996831201017,
      "learning_rate": 7.163021467087322e-07,
      "loss": 0.0778,
      "step": 28543
    },
    {
      "epoch": 0.8327206954898185,
      "grad_norm": 0.9615357386913754,
      "learning_rate": 7.160585082603549e-07,
      "loss": 0.1144,
      "step": 28544
    },
    {
      "epoch": 0.8327498687204621,
      "grad_norm": 0.7987610593492653,
      "learning_rate": 7.15814908057983e-07,
      "loss": 0.1051,
      "step": 28545
    },
    {
      "epoch": 0.8327790419511056,
      "grad_norm": 1.015639203436034,
      "learning_rate": 7.155713461037944e-07,
      "loss": 0.0999,
      "step": 28546
    },
    {
      "epoch": 0.8328082151817492,
      "grad_norm": 0.7743864515395077,
      "learning_rate": 7.153278223999622e-07,
      "loss": 0.1014,
      "step": 28547
    },
    {
      "epoch": 0.8328373884123927,
      "grad_norm": 0.8477173807154128,
      "learning_rate": 7.150843369486593e-07,
      "loss": 0.1058,
      "step": 28548
    },
    {
      "epoch": 0.8328665616430363,
      "grad_norm": 0.8275212261359371,
      "learning_rate": 7.14840889752062e-07,
      "loss": 0.1286,
      "step": 28549
    },
    {
      "epoch": 0.83289573487368,
      "grad_norm": 0.6775216173110621,
      "learning_rate": 7.145974808123418e-07,
      "loss": 0.0743,
      "step": 28550
    },
    {
      "epoch": 0.8329249081043235,
      "grad_norm": 0.8947868673066094,
      "learning_rate": 7.143541101316715e-07,
      "loss": 0.1197,
      "step": 28551
    },
    {
      "epoch": 0.8329540813349671,
      "grad_norm": 1.1815004802146287,
      "learning_rate": 7.141107777122242e-07,
      "loss": 0.1286,
      "step": 28552
    },
    {
      "epoch": 0.8329832545656106,
      "grad_norm": 0.8162810130464536,
      "learning_rate": 7.138674835561743e-07,
      "loss": 0.1211,
      "step": 28553
    },
    {
      "epoch": 0.8330124277962542,
      "grad_norm": 0.731954929166307,
      "learning_rate": 7.136242276656924e-07,
      "loss": 0.1077,
      "step": 28554
    },
    {
      "epoch": 0.8330416010268977,
      "grad_norm": 0.6193620227743445,
      "learning_rate": 7.133810100429489e-07,
      "loss": 0.1164,
      "step": 28555
    },
    {
      "epoch": 0.8330707742575413,
      "grad_norm": 1.0063182105702355,
      "learning_rate": 7.131378306901171e-07,
      "loss": 0.1186,
      "step": 28556
    },
    {
      "epoch": 0.8330999474881848,
      "grad_norm": 1.1640924578124354,
      "learning_rate": 7.128946896093669e-07,
      "loss": 0.1063,
      "step": 28557
    },
    {
      "epoch": 0.8331291207188284,
      "grad_norm": 0.7617651428155251,
      "learning_rate": 7.126515868028705e-07,
      "loss": 0.1291,
      "step": 28558
    },
    {
      "epoch": 0.8331582939494719,
      "grad_norm": 0.7278757571787543,
      "learning_rate": 7.124085222727956e-07,
      "loss": 0.1187,
      "step": 28559
    },
    {
      "epoch": 0.8331874671801155,
      "grad_norm": 0.7965604064954516,
      "learning_rate": 7.121654960213159e-07,
      "loss": 0.1071,
      "step": 28560
    },
    {
      "epoch": 0.833216640410759,
      "grad_norm": 0.908660796583881,
      "learning_rate": 7.119225080505982e-07,
      "loss": 0.115,
      "step": 28561
    },
    {
      "epoch": 0.8332458136414026,
      "grad_norm": 0.9092177506653578,
      "learning_rate": 7.116795583628122e-07,
      "loss": 0.0974,
      "step": 28562
    },
    {
      "epoch": 0.8332749868720463,
      "grad_norm": 0.8069928837933115,
      "learning_rate": 7.114366469601269e-07,
      "loss": 0.1212,
      "step": 28563
    },
    {
      "epoch": 0.8333041601026898,
      "grad_norm": 0.8121585361316815,
      "learning_rate": 7.111937738447127e-07,
      "loss": 0.0945,
      "step": 28564
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 0.7402716149894177,
      "learning_rate": 7.109509390187358e-07,
      "loss": 0.0928,
      "step": 28565
    },
    {
      "epoch": 0.8333625065639769,
      "grad_norm": 0.8233873005469028,
      "learning_rate": 7.107081424843665e-07,
      "loss": 0.1142,
      "step": 28566
    },
    {
      "epoch": 0.8333916797946205,
      "grad_norm": 0.8461371407414695,
      "learning_rate": 7.104653842437703e-07,
      "loss": 0.1124,
      "step": 28567
    },
    {
      "epoch": 0.833420853025264,
      "grad_norm": 0.8776924496588524,
      "learning_rate": 7.10222664299114e-07,
      "loss": 0.1131,
      "step": 28568
    },
    {
      "epoch": 0.8334500262559076,
      "grad_norm": 0.7461887739325114,
      "learning_rate": 7.09979982652566e-07,
      "loss": 0.1015,
      "step": 28569
    },
    {
      "epoch": 0.8334791994865511,
      "grad_norm": 1.1116031844994403,
      "learning_rate": 7.09737339306294e-07,
      "loss": 0.1304,
      "step": 28570
    },
    {
      "epoch": 0.8335083727171947,
      "grad_norm": 0.8107043254877853,
      "learning_rate": 7.094947342624625e-07,
      "loss": 0.089,
      "step": 28571
    },
    {
      "epoch": 0.8335375459478382,
      "grad_norm": 0.7352173617171506,
      "learning_rate": 7.092521675232367e-07,
      "loss": 0.1032,
      "step": 28572
    },
    {
      "epoch": 0.8335667191784818,
      "grad_norm": 1.0507661539210595,
      "learning_rate": 7.090096390907842e-07,
      "loss": 0.1445,
      "step": 28573
    },
    {
      "epoch": 0.8335958924091253,
      "grad_norm": 1.0960354019228602,
      "learning_rate": 7.087671489672693e-07,
      "loss": 0.1124,
      "step": 28574
    },
    {
      "epoch": 0.8336250656397689,
      "grad_norm": 0.9175672214080708,
      "learning_rate": 7.085246971548549e-07,
      "loss": 0.1031,
      "step": 28575
    },
    {
      "epoch": 0.8336542388704125,
      "grad_norm": 0.8436657160071062,
      "learning_rate": 7.082822836557097e-07,
      "loss": 0.1119,
      "step": 28576
    },
    {
      "epoch": 0.8336834121010561,
      "grad_norm": 0.9998008421766619,
      "learning_rate": 7.080399084719957e-07,
      "loss": 0.1209,
      "step": 28577
    },
    {
      "epoch": 0.8337125853316997,
      "grad_norm": 0.8883943602055342,
      "learning_rate": 7.07797571605876e-07,
      "loss": 0.1448,
      "step": 28578
    },
    {
      "epoch": 0.8337417585623432,
      "grad_norm": 0.6888493874528057,
      "learning_rate": 7.075552730595159e-07,
      "loss": 0.124,
      "step": 28579
    },
    {
      "epoch": 0.8337709317929868,
      "grad_norm": 0.7837341038410314,
      "learning_rate": 7.073130128350775e-07,
      "loss": 0.1377,
      "step": 28580
    },
    {
      "epoch": 0.8338001050236303,
      "grad_norm": 0.928583318848546,
      "learning_rate": 7.07070790934723e-07,
      "loss": 0.1233,
      "step": 28581
    },
    {
      "epoch": 0.8338292782542739,
      "grad_norm": 0.9859291086138892,
      "learning_rate": 7.068286073606151e-07,
      "loss": 0.0833,
      "step": 28582
    },
    {
      "epoch": 0.8338584514849174,
      "grad_norm": 0.7622521249094621,
      "learning_rate": 7.065864621149182e-07,
      "loss": 0.1083,
      "step": 28583
    },
    {
      "epoch": 0.833887624715561,
      "grad_norm": 0.8917953768939532,
      "learning_rate": 7.063443551997923e-07,
      "loss": 0.1309,
      "step": 28584
    },
    {
      "epoch": 0.8339167979462045,
      "grad_norm": 0.7811004491707488,
      "learning_rate": 7.061022866173978e-07,
      "loss": 0.1166,
      "step": 28585
    },
    {
      "epoch": 0.8339459711768481,
      "grad_norm": 1.0898986487645377,
      "learning_rate": 7.058602563698979e-07,
      "loss": 0.1164,
      "step": 28586
    },
    {
      "epoch": 0.8339751444074917,
      "grad_norm": 0.9799969989463303,
      "learning_rate": 7.056182644594517e-07,
      "loss": 0.109,
      "step": 28587
    },
    {
      "epoch": 0.8340043176381352,
      "grad_norm": 0.8211650187716997,
      "learning_rate": 7.053763108882217e-07,
      "loss": 0.117,
      "step": 28588
    },
    {
      "epoch": 0.8340334908687788,
      "grad_norm": 1.0052000997715311,
      "learning_rate": 7.051343956583656e-07,
      "loss": 0.1026,
      "step": 28589
    },
    {
      "epoch": 0.8340626640994224,
      "grad_norm": 1.0879712078598416,
      "learning_rate": 7.048925187720451e-07,
      "loss": 0.1214,
      "step": 28590
    },
    {
      "epoch": 0.834091837330066,
      "grad_norm": 0.975316319835064,
      "learning_rate": 7.046506802314196e-07,
      "loss": 0.1092,
      "step": 28591
    },
    {
      "epoch": 0.8341210105607095,
      "grad_norm": 1.0447357396549664,
      "learning_rate": 7.044088800386456e-07,
      "loss": 0.1008,
      "step": 28592
    },
    {
      "epoch": 0.8341501837913531,
      "grad_norm": 0.8410676282452567,
      "learning_rate": 7.041671181958842e-07,
      "loss": 0.1031,
      "step": 28593
    },
    {
      "epoch": 0.8341793570219966,
      "grad_norm": 0.7683735529820956,
      "learning_rate": 7.039253947052943e-07,
      "loss": 0.137,
      "step": 28594
    },
    {
      "epoch": 0.8342085302526402,
      "grad_norm": 0.792190140443723,
      "learning_rate": 7.036837095690314e-07,
      "loss": 0.1138,
      "step": 28595
    },
    {
      "epoch": 0.8342377034832837,
      "grad_norm": 0.8358270075550457,
      "learning_rate": 7.034420627892563e-07,
      "loss": 0.1176,
      "step": 28596
    },
    {
      "epoch": 0.8342668767139273,
      "grad_norm": 0.8197322268176289,
      "learning_rate": 7.032004543681248e-07,
      "loss": 0.108,
      "step": 28597
    },
    {
      "epoch": 0.8342960499445708,
      "grad_norm": 1.1150473509834575,
      "learning_rate": 7.029588843077922e-07,
      "loss": 0.1358,
      "step": 28598
    },
    {
      "epoch": 0.8343252231752144,
      "grad_norm": 0.7481916598519731,
      "learning_rate": 7.027173526104175e-07,
      "loss": 0.1005,
      "step": 28599
    },
    {
      "epoch": 0.834354396405858,
      "grad_norm": 0.6620424226777286,
      "learning_rate": 7.024758592781577e-07,
      "loss": 0.0926,
      "step": 28600
    },
    {
      "epoch": 0.8343835696365015,
      "grad_norm": 0.7548615752952624,
      "learning_rate": 7.022344043131668e-07,
      "loss": 0.1049,
      "step": 28601
    },
    {
      "epoch": 0.8344127428671451,
      "grad_norm": 0.672330819651534,
      "learning_rate": 7.019929877176007e-07,
      "loss": 0.1168,
      "step": 28602
    },
    {
      "epoch": 0.8344419160977886,
      "grad_norm": 0.8648402334944325,
      "learning_rate": 7.017516094936161e-07,
      "loss": 0.0999,
      "step": 28603
    },
    {
      "epoch": 0.8344710893284323,
      "grad_norm": 0.8599387318696812,
      "learning_rate": 7.015102696433668e-07,
      "loss": 0.116,
      "step": 28604
    },
    {
      "epoch": 0.8345002625590758,
      "grad_norm": 0.866340663535028,
      "learning_rate": 7.01268968169006e-07,
      "loss": 0.1092,
      "step": 28605
    },
    {
      "epoch": 0.8345294357897194,
      "grad_norm": 0.9427175962869279,
      "learning_rate": 7.010277050726916e-07,
      "loss": 0.131,
      "step": 28606
    },
    {
      "epoch": 0.8345586090203629,
      "grad_norm": 0.9318603541137698,
      "learning_rate": 7.007864803565756e-07,
      "loss": 0.1418,
      "step": 28607
    },
    {
      "epoch": 0.8345877822510065,
      "grad_norm": 0.8435687070765879,
      "learning_rate": 7.005452940228103e-07,
      "loss": 0.1241,
      "step": 28608
    },
    {
      "epoch": 0.83461695548165,
      "grad_norm": 0.9562018194829276,
      "learning_rate": 7.003041460735516e-07,
      "loss": 0.1257,
      "step": 28609
    },
    {
      "epoch": 0.8346461287122936,
      "grad_norm": 0.9332906271173597,
      "learning_rate": 7.000630365109506e-07,
      "loss": 0.0974,
      "step": 28610
    },
    {
      "epoch": 0.8346753019429372,
      "grad_norm": 0.9706392694115391,
      "learning_rate": 6.998219653371597e-07,
      "loss": 0.1262,
      "step": 28611
    },
    {
      "epoch": 0.8347044751735807,
      "grad_norm": 0.7587883368411257,
      "learning_rate": 6.995809325543318e-07,
      "loss": 0.1283,
      "step": 28612
    },
    {
      "epoch": 0.8347336484042243,
      "grad_norm": 1.0124911720343897,
      "learning_rate": 6.993399381646198e-07,
      "loss": 0.1336,
      "step": 28613
    },
    {
      "epoch": 0.8347628216348678,
      "grad_norm": 0.990090424597686,
      "learning_rate": 6.990989821701738e-07,
      "loss": 0.0891,
      "step": 28614
    },
    {
      "epoch": 0.8347919948655114,
      "grad_norm": 0.8324856108831794,
      "learning_rate": 6.988580645731446e-07,
      "loss": 0.1154,
      "step": 28615
    },
    {
      "epoch": 0.8348211680961549,
      "grad_norm": 0.8441041372041478,
      "learning_rate": 6.986171853756851e-07,
      "loss": 0.1084,
      "step": 28616
    },
    {
      "epoch": 0.8348503413267986,
      "grad_norm": 0.9744669453409137,
      "learning_rate": 6.983763445799429e-07,
      "loss": 0.1214,
      "step": 28617
    },
    {
      "epoch": 0.8348795145574421,
      "grad_norm": 0.8003730853774177,
      "learning_rate": 6.981355421880715e-07,
      "loss": 0.0847,
      "step": 28618
    },
    {
      "epoch": 0.8349086877880857,
      "grad_norm": 0.919042214983713,
      "learning_rate": 6.978947782022177e-07,
      "loss": 0.1117,
      "step": 28619
    },
    {
      "epoch": 0.8349378610187292,
      "grad_norm": 1.0268314494193647,
      "learning_rate": 6.976540526245335e-07,
      "loss": 0.1355,
      "step": 28620
    },
    {
      "epoch": 0.8349670342493728,
      "grad_norm": 1.0784022321210935,
      "learning_rate": 6.974133654571668e-07,
      "loss": 0.1227,
      "step": 28621
    },
    {
      "epoch": 0.8349962074800164,
      "grad_norm": 0.8930931507470183,
      "learning_rate": 6.971727167022652e-07,
      "loss": 0.0935,
      "step": 28622
    },
    {
      "epoch": 0.8350253807106599,
      "grad_norm": 0.8160656435612694,
      "learning_rate": 6.969321063619788e-07,
      "loss": 0.1242,
      "step": 28623
    },
    {
      "epoch": 0.8350545539413035,
      "grad_norm": 0.6302078749308491,
      "learning_rate": 6.966915344384562e-07,
      "loss": 0.0843,
      "step": 28624
    },
    {
      "epoch": 0.835083727171947,
      "grad_norm": 1.0298737572591399,
      "learning_rate": 6.964510009338432e-07,
      "loss": 0.1219,
      "step": 28625
    },
    {
      "epoch": 0.8351129004025906,
      "grad_norm": 0.8240990607958188,
      "learning_rate": 6.962105058502894e-07,
      "loss": 0.1127,
      "step": 28626
    },
    {
      "epoch": 0.8351420736332341,
      "grad_norm": 0.7479982485384717,
      "learning_rate": 6.959700491899408e-07,
      "loss": 0.1204,
      "step": 28627
    },
    {
      "epoch": 0.8351712468638777,
      "grad_norm": 0.8899750430844123,
      "learning_rate": 6.957296309549432e-07,
      "loss": 0.0914,
      "step": 28628
    },
    {
      "epoch": 0.8352004200945212,
      "grad_norm": 0.659858287058075,
      "learning_rate": 6.954892511474437e-07,
      "loss": 0.114,
      "step": 28629
    },
    {
      "epoch": 0.8352295933251648,
      "grad_norm": 0.9106006986190525,
      "learning_rate": 6.952489097695897e-07,
      "loss": 0.1175,
      "step": 28630
    },
    {
      "epoch": 0.8352587665558084,
      "grad_norm": 0.9145590676697298,
      "learning_rate": 6.950086068235262e-07,
      "loss": 0.1229,
      "step": 28631
    },
    {
      "epoch": 0.835287939786452,
      "grad_norm": 0.6906119242767612,
      "learning_rate": 6.947683423113966e-07,
      "loss": 0.0883,
      "step": 28632
    },
    {
      "epoch": 0.8353171130170955,
      "grad_norm": 0.8851635895069407,
      "learning_rate": 6.94528116235349e-07,
      "loss": 0.1021,
      "step": 28633
    },
    {
      "epoch": 0.8353462862477391,
      "grad_norm": 0.8354837908061795,
      "learning_rate": 6.942879285975263e-07,
      "loss": 0.0986,
      "step": 28634
    },
    {
      "epoch": 0.8353754594783827,
      "grad_norm": 0.8622551654046755,
      "learning_rate": 6.940477794000711e-07,
      "loss": 0.1011,
      "step": 28635
    },
    {
      "epoch": 0.8354046327090262,
      "grad_norm": 1.2199028664512792,
      "learning_rate": 6.938076686451312e-07,
      "loss": 0.1133,
      "step": 28636
    },
    {
      "epoch": 0.8354338059396698,
      "grad_norm": 0.7976686518802866,
      "learning_rate": 6.935675963348487e-07,
      "loss": 0.1042,
      "step": 28637
    },
    {
      "epoch": 0.8354629791703133,
      "grad_norm": 0.8755527093589413,
      "learning_rate": 6.933275624713659e-07,
      "loss": 0.116,
      "step": 28638
    },
    {
      "epoch": 0.8354921524009569,
      "grad_norm": 0.8567428163946965,
      "learning_rate": 6.930875670568271e-07,
      "loss": 0.1204,
      "step": 28639
    },
    {
      "epoch": 0.8355213256316004,
      "grad_norm": 0.7874076456759808,
      "learning_rate": 6.92847610093374e-07,
      "loss": 0.1327,
      "step": 28640
    },
    {
      "epoch": 0.835550498862244,
      "grad_norm": 0.8660864449366423,
      "learning_rate": 6.926076915831498e-07,
      "loss": 0.1299,
      "step": 28641
    },
    {
      "epoch": 0.8355796720928875,
      "grad_norm": 0.8542416222610314,
      "learning_rate": 6.923678115282945e-07,
      "loss": 0.1121,
      "step": 28642
    },
    {
      "epoch": 0.8356088453235311,
      "grad_norm": 0.9936849820137602,
      "learning_rate": 6.921279699309525e-07,
      "loss": 0.1304,
      "step": 28643
    },
    {
      "epoch": 0.8356380185541747,
      "grad_norm": 0.8727545532687586,
      "learning_rate": 6.918881667932637e-07,
      "loss": 0.0936,
      "step": 28644
    },
    {
      "epoch": 0.8356671917848183,
      "grad_norm": 0.7728314756224676,
      "learning_rate": 6.916484021173681e-07,
      "loss": 0.1354,
      "step": 28645
    },
    {
      "epoch": 0.8356963650154619,
      "grad_norm": 0.8778921020468223,
      "learning_rate": 6.914086759054062e-07,
      "loss": 0.1112,
      "step": 28646
    },
    {
      "epoch": 0.8357255382461054,
      "grad_norm": 0.6652813785176614,
      "learning_rate": 6.911689881595208e-07,
      "loss": 0.0941,
      "step": 28647
    },
    {
      "epoch": 0.835754711476749,
      "grad_norm": 0.7999509435973636,
      "learning_rate": 6.9092933888185e-07,
      "loss": 0.1247,
      "step": 28648
    },
    {
      "epoch": 0.8357838847073925,
      "grad_norm": 0.8654250706204483,
      "learning_rate": 6.906897280745322e-07,
      "loss": 0.1163,
      "step": 28649
    },
    {
      "epoch": 0.8358130579380361,
      "grad_norm": 0.9017734917226445,
      "learning_rate": 6.904501557397092e-07,
      "loss": 0.0887,
      "step": 28650
    },
    {
      "epoch": 0.8358422311686796,
      "grad_norm": 0.7807807142597256,
      "learning_rate": 6.902106218795185e-07,
      "loss": 0.1143,
      "step": 28651
    },
    {
      "epoch": 0.8358714043993232,
      "grad_norm": 0.9637840937523848,
      "learning_rate": 6.899711264960957e-07,
      "loss": 0.1182,
      "step": 28652
    },
    {
      "epoch": 0.8359005776299667,
      "grad_norm": 0.8176700269805882,
      "learning_rate": 6.897316695915846e-07,
      "loss": 0.0901,
      "step": 28653
    },
    {
      "epoch": 0.8359297508606103,
      "grad_norm": 0.9653718107085448,
      "learning_rate": 6.894922511681196e-07,
      "loss": 0.1326,
      "step": 28654
    },
    {
      "epoch": 0.8359589240912538,
      "grad_norm": 0.7124793179035528,
      "learning_rate": 6.892528712278385e-07,
      "loss": 0.1109,
      "step": 28655
    },
    {
      "epoch": 0.8359880973218974,
      "grad_norm": 1.3319571241127004,
      "learning_rate": 6.89013529772879e-07,
      "loss": 0.1338,
      "step": 28656
    },
    {
      "epoch": 0.8360172705525409,
      "grad_norm": 0.9256128810957058,
      "learning_rate": 6.887742268053782e-07,
      "loss": 0.1128,
      "step": 28657
    },
    {
      "epoch": 0.8360464437831846,
      "grad_norm": 0.7285361643222902,
      "learning_rate": 6.885349623274706e-07,
      "loss": 0.1223,
      "step": 28658
    },
    {
      "epoch": 0.8360756170138282,
      "grad_norm": 1.2547232560492405,
      "learning_rate": 6.882957363412934e-07,
      "loss": 0.1158,
      "step": 28659
    },
    {
      "epoch": 0.8361047902444717,
      "grad_norm": 0.950298002722007,
      "learning_rate": 6.880565488489837e-07,
      "loss": 0.1141,
      "step": 28660
    },
    {
      "epoch": 0.8361339634751153,
      "grad_norm": 0.7211726500922024,
      "learning_rate": 6.87817399852676e-07,
      "loss": 0.1033,
      "step": 28661
    },
    {
      "epoch": 0.8361631367057588,
      "grad_norm": 0.9019731638988802,
      "learning_rate": 6.875782893545042e-07,
      "loss": 0.1038,
      "step": 28662
    },
    {
      "epoch": 0.8361923099364024,
      "grad_norm": 0.9445192804347247,
      "learning_rate": 6.873392173566051e-07,
      "loss": 0.126,
      "step": 28663
    },
    {
      "epoch": 0.8362214831670459,
      "grad_norm": 1.0122184251294524,
      "learning_rate": 6.871001838611102e-07,
      "loss": 0.1162,
      "step": 28664
    },
    {
      "epoch": 0.8362506563976895,
      "grad_norm": 0.777162636737857,
      "learning_rate": 6.86861188870156e-07,
      "loss": 0.1099,
      "step": 28665
    },
    {
      "epoch": 0.836279829628333,
      "grad_norm": 0.8608607181598449,
      "learning_rate": 6.866222323858762e-07,
      "loss": 0.1315,
      "step": 28666
    },
    {
      "epoch": 0.8363090028589766,
      "grad_norm": 0.8781240607716954,
      "learning_rate": 6.863833144104037e-07,
      "loss": 0.1152,
      "step": 28667
    },
    {
      "epoch": 0.8363381760896201,
      "grad_norm": 0.6626272101503468,
      "learning_rate": 6.861444349458702e-07,
      "loss": 0.0938,
      "step": 28668
    },
    {
      "epoch": 0.8363673493202637,
      "grad_norm": 0.7128834338638651,
      "learning_rate": 6.859055939944098e-07,
      "loss": 0.1088,
      "step": 28669
    },
    {
      "epoch": 0.8363965225509072,
      "grad_norm": 0.6701056151114528,
      "learning_rate": 6.856667915581538e-07,
      "loss": 0.1087,
      "step": 28670
    },
    {
      "epoch": 0.8364256957815509,
      "grad_norm": 0.7778893549588168,
      "learning_rate": 6.854280276392361e-07,
      "loss": 0.1027,
      "step": 28671
    },
    {
      "epoch": 0.8364548690121945,
      "grad_norm": 0.9109236468173831,
      "learning_rate": 6.851893022397855e-07,
      "loss": 0.1345,
      "step": 28672
    },
    {
      "epoch": 0.836484042242838,
      "grad_norm": 0.9414579629667669,
      "learning_rate": 6.849506153619356e-07,
      "loss": 0.1168,
      "step": 28673
    },
    {
      "epoch": 0.8365132154734816,
      "grad_norm": 0.9713870553892409,
      "learning_rate": 6.847119670078173e-07,
      "loss": 0.1127,
      "step": 28674
    },
    {
      "epoch": 0.8365423887041251,
      "grad_norm": 0.8368938544227067,
      "learning_rate": 6.844733571795587e-07,
      "loss": 0.1246,
      "step": 28675
    },
    {
      "epoch": 0.8365715619347687,
      "grad_norm": 0.7175759144395739,
      "learning_rate": 6.842347858792919e-07,
      "loss": 0.1108,
      "step": 28676
    },
    {
      "epoch": 0.8366007351654122,
      "grad_norm": 0.9315659849847924,
      "learning_rate": 6.839962531091482e-07,
      "loss": 0.1034,
      "step": 28677
    },
    {
      "epoch": 0.8366299083960558,
      "grad_norm": 0.9265283512957927,
      "learning_rate": 6.837577588712551e-07,
      "loss": 0.1021,
      "step": 28678
    },
    {
      "epoch": 0.8366590816266993,
      "grad_norm": 0.7840001819232951,
      "learning_rate": 6.835193031677418e-07,
      "loss": 0.1124,
      "step": 28679
    },
    {
      "epoch": 0.8366882548573429,
      "grad_norm": 0.7422903185418308,
      "learning_rate": 6.832808860007384e-07,
      "loss": 0.0994,
      "step": 28680
    },
    {
      "epoch": 0.8367174280879864,
      "grad_norm": 0.8006926436386917,
      "learning_rate": 6.830425073723728e-07,
      "loss": 0.1197,
      "step": 28681
    },
    {
      "epoch": 0.83674660131863,
      "grad_norm": 0.7505425422294162,
      "learning_rate": 6.828041672847707e-07,
      "loss": 0.0911,
      "step": 28682
    },
    {
      "epoch": 0.8367757745492735,
      "grad_norm": 0.7827003981639024,
      "learning_rate": 6.825658657400653e-07,
      "loss": 0.1224,
      "step": 28683
    },
    {
      "epoch": 0.8368049477799171,
      "grad_norm": 0.806682899445697,
      "learning_rate": 6.823276027403808e-07,
      "loss": 0.1044,
      "step": 28684
    },
    {
      "epoch": 0.8368341210105608,
      "grad_norm": 1.0913688915087802,
      "learning_rate": 6.820893782878435e-07,
      "loss": 0.119,
      "step": 28685
    },
    {
      "epoch": 0.8368632942412043,
      "grad_norm": 0.7646187400611799,
      "learning_rate": 6.818511923845828e-07,
      "loss": 0.0914,
      "step": 28686
    },
    {
      "epoch": 0.8368924674718479,
      "grad_norm": 0.9484808691312467,
      "learning_rate": 6.816130450327235e-07,
      "loss": 0.1129,
      "step": 28687
    },
    {
      "epoch": 0.8369216407024914,
      "grad_norm": 1.0347871767578014,
      "learning_rate": 6.813749362343914e-07,
      "loss": 0.1108,
      "step": 28688
    },
    {
      "epoch": 0.836950813933135,
      "grad_norm": 0.9270575124098639,
      "learning_rate": 6.811368659917128e-07,
      "loss": 0.1118,
      "step": 28689
    },
    {
      "epoch": 0.8369799871637785,
      "grad_norm": 0.9329141279665811,
      "learning_rate": 6.808988343068146e-07,
      "loss": 0.132,
      "step": 28690
    },
    {
      "epoch": 0.8370091603944221,
      "grad_norm": 1.13431048449298,
      "learning_rate": 6.8066084118182e-07,
      "loss": 0.1086,
      "step": 28691
    },
    {
      "epoch": 0.8370383336250656,
      "grad_norm": 0.9504017432662675,
      "learning_rate": 6.804228866188534e-07,
      "loss": 0.0996,
      "step": 28692
    },
    {
      "epoch": 0.8370675068557092,
      "grad_norm": 0.7452738299748206,
      "learning_rate": 6.801849706200414e-07,
      "loss": 0.103,
      "step": 28693
    },
    {
      "epoch": 0.8370966800863527,
      "grad_norm": 0.7461686281640708,
      "learning_rate": 6.799470931875051e-07,
      "loss": 0.1085,
      "step": 28694
    },
    {
      "epoch": 0.8371258533169963,
      "grad_norm": 0.8425623946505637,
      "learning_rate": 6.797092543233719e-07,
      "loss": 0.1244,
      "step": 28695
    },
    {
      "epoch": 0.8371550265476398,
      "grad_norm": 0.9280261705744823,
      "learning_rate": 6.794714540297615e-07,
      "loss": 0.1075,
      "step": 28696
    },
    {
      "epoch": 0.8371841997782834,
      "grad_norm": 0.853209451411559,
      "learning_rate": 6.792336923087994e-07,
      "loss": 0.1131,
      "step": 28697
    },
    {
      "epoch": 0.837213373008927,
      "grad_norm": 0.8637432778009454,
      "learning_rate": 6.789959691626069e-07,
      "loss": 0.1373,
      "step": 28698
    },
    {
      "epoch": 0.8372425462395706,
      "grad_norm": 0.8197077682181532,
      "learning_rate": 6.787582845933078e-07,
      "loss": 0.101,
      "step": 28699
    },
    {
      "epoch": 0.8372717194702142,
      "grad_norm": 1.0369316022870985,
      "learning_rate": 6.785206386030219e-07,
      "loss": 0.0987,
      "step": 28700
    },
    {
      "epoch": 0.8373008927008577,
      "grad_norm": 0.7420193447575757,
      "learning_rate": 6.782830311938731e-07,
      "loss": 0.1012,
      "step": 28701
    },
    {
      "epoch": 0.8373300659315013,
      "grad_norm": 1.0916979403699227,
      "learning_rate": 6.78045462367981e-07,
      "loss": 0.1111,
      "step": 28702
    },
    {
      "epoch": 0.8373592391621448,
      "grad_norm": 0.8667817266571434,
      "learning_rate": 6.778079321274683e-07,
      "loss": 0.1054,
      "step": 28703
    },
    {
      "epoch": 0.8373884123927884,
      "grad_norm": 0.8127985451597882,
      "learning_rate": 6.775704404744543e-07,
      "loss": 0.0948,
      "step": 28704
    },
    {
      "epoch": 0.8374175856234319,
      "grad_norm": 0.945295115772799,
      "learning_rate": 6.77332987411059e-07,
      "loss": 0.1382,
      "step": 28705
    },
    {
      "epoch": 0.8374467588540755,
      "grad_norm": 0.9591986280141048,
      "learning_rate": 6.770955729394024e-07,
      "loss": 0.1178,
      "step": 28706
    },
    {
      "epoch": 0.837475932084719,
      "grad_norm": 0.9707874551440682,
      "learning_rate": 6.768581970616056e-07,
      "loss": 0.0897,
      "step": 28707
    },
    {
      "epoch": 0.8375051053153626,
      "grad_norm": 0.5864343240786279,
      "learning_rate": 6.766208597797874e-07,
      "loss": 0.1081,
      "step": 28708
    },
    {
      "epoch": 0.8375342785460062,
      "grad_norm": 0.929317101844393,
      "learning_rate": 6.763835610960645e-07,
      "loss": 0.1371,
      "step": 28709
    },
    {
      "epoch": 0.8375634517766497,
      "grad_norm": 0.7194146484129015,
      "learning_rate": 6.76146301012558e-07,
      "loss": 0.1138,
      "step": 28710
    },
    {
      "epoch": 0.8375926250072933,
      "grad_norm": 0.8593105085498757,
      "learning_rate": 6.759090795313856e-07,
      "loss": 0.1377,
      "step": 28711
    },
    {
      "epoch": 0.8376217982379369,
      "grad_norm": 0.8303128010172225,
      "learning_rate": 6.756718966546622e-07,
      "loss": 0.1122,
      "step": 28712
    },
    {
      "epoch": 0.8376509714685805,
      "grad_norm": 0.9188633021514161,
      "learning_rate": 6.754347523845101e-07,
      "loss": 0.1058,
      "step": 28713
    },
    {
      "epoch": 0.837680144699224,
      "grad_norm": 0.8497070208353793,
      "learning_rate": 6.751976467230442e-07,
      "loss": 0.1181,
      "step": 28714
    },
    {
      "epoch": 0.8377093179298676,
      "grad_norm": 0.9130469832679831,
      "learning_rate": 6.749605796723802e-07,
      "loss": 0.1136,
      "step": 28715
    },
    {
      "epoch": 0.8377384911605111,
      "grad_norm": 0.8934997775976855,
      "learning_rate": 6.747235512346368e-07,
      "loss": 0.1219,
      "step": 28716
    },
    {
      "epoch": 0.8377676643911547,
      "grad_norm": 0.8038434375759682,
      "learning_rate": 6.744865614119289e-07,
      "loss": 0.1038,
      "step": 28717
    },
    {
      "epoch": 0.8377968376217982,
      "grad_norm": 0.8213997320272479,
      "learning_rate": 6.742496102063711e-07,
      "loss": 0.1127,
      "step": 28718
    },
    {
      "epoch": 0.8378260108524418,
      "grad_norm": 0.7663259946596382,
      "learning_rate": 6.740126976200806e-07,
      "loss": 0.1193,
      "step": 28719
    },
    {
      "epoch": 0.8378551840830853,
      "grad_norm": 0.7842153966592031,
      "learning_rate": 6.737758236551728e-07,
      "loss": 0.0954,
      "step": 28720
    },
    {
      "epoch": 0.8378843573137289,
      "grad_norm": 0.9729076057399066,
      "learning_rate": 6.735389883137616e-07,
      "loss": 0.0939,
      "step": 28721
    },
    {
      "epoch": 0.8379135305443725,
      "grad_norm": 0.909854706876548,
      "learning_rate": 6.73302191597961e-07,
      "loss": 0.1236,
      "step": 28722
    },
    {
      "epoch": 0.837942703775016,
      "grad_norm": 0.8856700792705716,
      "learning_rate": 6.730654335098857e-07,
      "loss": 0.1431,
      "step": 28723
    },
    {
      "epoch": 0.8379718770056596,
      "grad_norm": 1.1578674171675276,
      "learning_rate": 6.728287140516487e-07,
      "loss": 0.1113,
      "step": 28724
    },
    {
      "epoch": 0.8380010502363031,
      "grad_norm": 0.9210415753413871,
      "learning_rate": 6.725920332253654e-07,
      "loss": 0.1201,
      "step": 28725
    },
    {
      "epoch": 0.8380302234669468,
      "grad_norm": 0.8147646490799825,
      "learning_rate": 6.72355391033146e-07,
      "loss": 0.1157,
      "step": 28726
    },
    {
      "epoch": 0.8380593966975903,
      "grad_norm": 0.8097996550992647,
      "learning_rate": 6.721187874771057e-07,
      "loss": 0.1085,
      "step": 28727
    },
    {
      "epoch": 0.8380885699282339,
      "grad_norm": 0.7883422579529276,
      "learning_rate": 6.718822225593547e-07,
      "loss": 0.1182,
      "step": 28728
    },
    {
      "epoch": 0.8381177431588774,
      "grad_norm": 0.8299692523240949,
      "learning_rate": 6.716456962820067e-07,
      "loss": 0.1399,
      "step": 28729
    },
    {
      "epoch": 0.838146916389521,
      "grad_norm": 0.6813307692782673,
      "learning_rate": 6.714092086471718e-07,
      "loss": 0.1046,
      "step": 28730
    },
    {
      "epoch": 0.8381760896201645,
      "grad_norm": 0.9338365931502542,
      "learning_rate": 6.711727596569639e-07,
      "loss": 0.1224,
      "step": 28731
    },
    {
      "epoch": 0.8382052628508081,
      "grad_norm": 0.8818751665529719,
      "learning_rate": 6.709363493134902e-07,
      "loss": 0.1017,
      "step": 28732
    },
    {
      "epoch": 0.8382344360814517,
      "grad_norm": 0.7867298737998444,
      "learning_rate": 6.706999776188649e-07,
      "loss": 0.1068,
      "step": 28733
    },
    {
      "epoch": 0.8382636093120952,
      "grad_norm": 0.8124559165017454,
      "learning_rate": 6.704636445751966e-07,
      "loss": 0.1138,
      "step": 28734
    },
    {
      "epoch": 0.8382927825427388,
      "grad_norm": 0.8695400580267945,
      "learning_rate": 6.702273501845946e-07,
      "loss": 0.103,
      "step": 28735
    },
    {
      "epoch": 0.8383219557733823,
      "grad_norm": 0.9273491181602084,
      "learning_rate": 6.699910944491689e-07,
      "loss": 0.1379,
      "step": 28736
    },
    {
      "epoch": 0.8383511290040259,
      "grad_norm": 0.7142323570772079,
      "learning_rate": 6.6975487737103e-07,
      "loss": 0.1187,
      "step": 28737
    },
    {
      "epoch": 0.8383803022346694,
      "grad_norm": 0.768054204273176,
      "learning_rate": 6.695186989522856e-07,
      "loss": 0.1004,
      "step": 28738
    },
    {
      "epoch": 0.8384094754653131,
      "grad_norm": 0.9379413389553742,
      "learning_rate": 6.692825591950441e-07,
      "loss": 0.1177,
      "step": 28739
    },
    {
      "epoch": 0.8384386486959566,
      "grad_norm": 0.8190572260259622,
      "learning_rate": 6.69046458101415e-07,
      "loss": 0.118,
      "step": 28740
    },
    {
      "epoch": 0.8384678219266002,
      "grad_norm": 0.8160379824822651,
      "learning_rate": 6.688103956735048e-07,
      "loss": 0.1163,
      "step": 28741
    },
    {
      "epoch": 0.8384969951572437,
      "grad_norm": 1.0185656110838428,
      "learning_rate": 6.685743719134197e-07,
      "loss": 0.1283,
      "step": 28742
    },
    {
      "epoch": 0.8385261683878873,
      "grad_norm": 1.1103838702868496,
      "learning_rate": 6.683383868232706e-07,
      "loss": 0.0913,
      "step": 28743
    },
    {
      "epoch": 0.8385553416185308,
      "grad_norm": 0.7070828225170954,
      "learning_rate": 6.681024404051623e-07,
      "loss": 0.0981,
      "step": 28744
    },
    {
      "epoch": 0.8385845148491744,
      "grad_norm": 0.9366319569932889,
      "learning_rate": 6.678665326612005e-07,
      "loss": 0.1358,
      "step": 28745
    },
    {
      "epoch": 0.838613688079818,
      "grad_norm": 0.9124516318112963,
      "learning_rate": 6.676306635934926e-07,
      "loss": 0.1194,
      "step": 28746
    },
    {
      "epoch": 0.8386428613104615,
      "grad_norm": 0.8135006791237972,
      "learning_rate": 6.673948332041446e-07,
      "loss": 0.107,
      "step": 28747
    },
    {
      "epoch": 0.8386720345411051,
      "grad_norm": 0.6885575632321572,
      "learning_rate": 6.6715904149526e-07,
      "loss": 0.1078,
      "step": 28748
    },
    {
      "epoch": 0.8387012077717486,
      "grad_norm": 0.7077370536667261,
      "learning_rate": 6.669232884689448e-07,
      "loss": 0.0977,
      "step": 28749
    },
    {
      "epoch": 0.8387303810023922,
      "grad_norm": 0.8249751267352112,
      "learning_rate": 6.666875741273055e-07,
      "loss": 0.0833,
      "step": 28750
    },
    {
      "epoch": 0.8387595542330357,
      "grad_norm": 0.8238758026084099,
      "learning_rate": 6.66451898472445e-07,
      "loss": 0.1151,
      "step": 28751
    },
    {
      "epoch": 0.8387887274636793,
      "grad_norm": 0.7147478588553577,
      "learning_rate": 6.662162615064666e-07,
      "loss": 0.093,
      "step": 28752
    },
    {
      "epoch": 0.8388179006943229,
      "grad_norm": 0.7429986693398549,
      "learning_rate": 6.659806632314753e-07,
      "loss": 0.0861,
      "step": 28753
    },
    {
      "epoch": 0.8388470739249665,
      "grad_norm": 0.7751296716191228,
      "learning_rate": 6.657451036495738e-07,
      "loss": 0.1197,
      "step": 28754
    },
    {
      "epoch": 0.83887624715561,
      "grad_norm": 0.7673811335053209,
      "learning_rate": 6.65509582762866e-07,
      "loss": 0.1165,
      "step": 28755
    },
    {
      "epoch": 0.8389054203862536,
      "grad_norm": 1.0133916581262266,
      "learning_rate": 6.652741005734525e-07,
      "loss": 0.1292,
      "step": 28756
    },
    {
      "epoch": 0.8389345936168972,
      "grad_norm": 0.8180041015398647,
      "learning_rate": 6.650386570834383e-07,
      "loss": 0.0914,
      "step": 28757
    },
    {
      "epoch": 0.8389637668475407,
      "grad_norm": 0.7476366684534999,
      "learning_rate": 6.648032522949232e-07,
      "loss": 0.1249,
      "step": 28758
    },
    {
      "epoch": 0.8389929400781843,
      "grad_norm": 0.8546995788144813,
      "learning_rate": 6.645678862100114e-07,
      "loss": 0.1323,
      "step": 28759
    },
    {
      "epoch": 0.8390221133088278,
      "grad_norm": 0.7531425851554054,
      "learning_rate": 6.643325588308008e-07,
      "loss": 0.1014,
      "step": 28760
    },
    {
      "epoch": 0.8390512865394714,
      "grad_norm": 0.9611365207426606,
      "learning_rate": 6.64097270159395e-07,
      "loss": 0.097,
      "step": 28761
    },
    {
      "epoch": 0.8390804597701149,
      "grad_norm": 0.6552977587311081,
      "learning_rate": 6.638620201978929e-07,
      "loss": 0.1244,
      "step": 28762
    },
    {
      "epoch": 0.8391096330007585,
      "grad_norm": 0.7306251649323,
      "learning_rate": 6.636268089483971e-07,
      "loss": 0.1107,
      "step": 28763
    },
    {
      "epoch": 0.839138806231402,
      "grad_norm": 0.8586736099696834,
      "learning_rate": 6.633916364130056e-07,
      "loss": 0.1058,
      "step": 28764
    },
    {
      "epoch": 0.8391679794620456,
      "grad_norm": 1.0360708242033867,
      "learning_rate": 6.631565025938169e-07,
      "loss": 0.1135,
      "step": 28765
    },
    {
      "epoch": 0.8391971526926892,
      "grad_norm": 0.948705596794399,
      "learning_rate": 6.629214074929319e-07,
      "loss": 0.1187,
      "step": 28766
    },
    {
      "epoch": 0.8392263259233328,
      "grad_norm": 0.8846009342703027,
      "learning_rate": 6.626863511124504e-07,
      "loss": 0.1144,
      "step": 28767
    },
    {
      "epoch": 0.8392554991539763,
      "grad_norm": 0.8417190637982709,
      "learning_rate": 6.624513334544697e-07,
      "loss": 0.1015,
      "step": 28768
    },
    {
      "epoch": 0.8392846723846199,
      "grad_norm": 0.7031646678553474,
      "learning_rate": 6.622163545210875e-07,
      "loss": 0.1209,
      "step": 28769
    },
    {
      "epoch": 0.8393138456152635,
      "grad_norm": 0.8554940765743669,
      "learning_rate": 6.619814143144026e-07,
      "loss": 0.1413,
      "step": 28770
    },
    {
      "epoch": 0.839343018845907,
      "grad_norm": 0.8921323870672512,
      "learning_rate": 6.61746512836512e-07,
      "loss": 0.0912,
      "step": 28771
    },
    {
      "epoch": 0.8393721920765506,
      "grad_norm": 0.7582561652777967,
      "learning_rate": 6.615116500895113e-07,
      "loss": 0.1007,
      "step": 28772
    },
    {
      "epoch": 0.8394013653071941,
      "grad_norm": 0.9667372846011172,
      "learning_rate": 6.612768260755004e-07,
      "loss": 0.1223,
      "step": 28773
    },
    {
      "epoch": 0.8394305385378377,
      "grad_norm": 0.8493696642065195,
      "learning_rate": 6.610420407965745e-07,
      "loss": 0.0737,
      "step": 28774
    },
    {
      "epoch": 0.8394597117684812,
      "grad_norm": 0.9896193161742983,
      "learning_rate": 6.608072942548288e-07,
      "loss": 0.1174,
      "step": 28775
    },
    {
      "epoch": 0.8394888849991248,
      "grad_norm": 0.8209049064261055,
      "learning_rate": 6.605725864523604e-07,
      "loss": 0.1345,
      "step": 28776
    },
    {
      "epoch": 0.8395180582297683,
      "grad_norm": 0.8505099019023185,
      "learning_rate": 6.603379173912644e-07,
      "loss": 0.1197,
      "step": 28777
    },
    {
      "epoch": 0.8395472314604119,
      "grad_norm": 0.8435687476471607,
      "learning_rate": 6.601032870736341e-07,
      "loss": 0.095,
      "step": 28778
    },
    {
      "epoch": 0.8395764046910554,
      "grad_norm": 0.8049885646169743,
      "learning_rate": 6.598686955015654e-07,
      "loss": 0.1135,
      "step": 28779
    },
    {
      "epoch": 0.8396055779216991,
      "grad_norm": 0.8305185722291034,
      "learning_rate": 6.596341426771546e-07,
      "loss": 0.1203,
      "step": 28780
    },
    {
      "epoch": 0.8396347511523427,
      "grad_norm": 1.6308803784404653,
      "learning_rate": 6.593996286024934e-07,
      "loss": 0.1166,
      "step": 28781
    },
    {
      "epoch": 0.8396639243829862,
      "grad_norm": 0.8362606748811074,
      "learning_rate": 6.591651532796755e-07,
      "loss": 0.1029,
      "step": 28782
    },
    {
      "epoch": 0.8396930976136298,
      "grad_norm": 0.8043625644606235,
      "learning_rate": 6.589307167107962e-07,
      "loss": 0.1072,
      "step": 28783
    },
    {
      "epoch": 0.8397222708442733,
      "grad_norm": 0.7346860174215263,
      "learning_rate": 6.586963188979456e-07,
      "loss": 0.1369,
      "step": 28784
    },
    {
      "epoch": 0.8397514440749169,
      "grad_norm": 0.8034760711491574,
      "learning_rate": 6.584619598432191e-07,
      "loss": 0.1306,
      "step": 28785
    },
    {
      "epoch": 0.8397806173055604,
      "grad_norm": 0.9681865502098288,
      "learning_rate": 6.58227639548707e-07,
      "loss": 0.1322,
      "step": 28786
    },
    {
      "epoch": 0.839809790536204,
      "grad_norm": 0.8605951304570972,
      "learning_rate": 6.579933580165027e-07,
      "loss": 0.1088,
      "step": 28787
    },
    {
      "epoch": 0.8398389637668475,
      "grad_norm": 0.9754889755454179,
      "learning_rate": 6.577591152486972e-07,
      "loss": 0.1163,
      "step": 28788
    },
    {
      "epoch": 0.8398681369974911,
      "grad_norm": 0.9073355830069386,
      "learning_rate": 6.575249112473808e-07,
      "loss": 0.1017,
      "step": 28789
    },
    {
      "epoch": 0.8398973102281346,
      "grad_norm": 0.7126419293482791,
      "learning_rate": 6.572907460146454e-07,
      "loss": 0.1243,
      "step": 28790
    },
    {
      "epoch": 0.8399264834587782,
      "grad_norm": 0.6861012492374159,
      "learning_rate": 6.570566195525829e-07,
      "loss": 0.1029,
      "step": 28791
    },
    {
      "epoch": 0.8399556566894217,
      "grad_norm": 0.9680994117995753,
      "learning_rate": 6.568225318632804e-07,
      "loss": 0.1054,
      "step": 28792
    },
    {
      "epoch": 0.8399848299200654,
      "grad_norm": 0.7902755960454412,
      "learning_rate": 6.565884829488312e-07,
      "loss": 0.1102,
      "step": 28793
    },
    {
      "epoch": 0.840014003150709,
      "grad_norm": 0.7962644914448704,
      "learning_rate": 6.56354472811323e-07,
      "loss": 0.119,
      "step": 28794
    },
    {
      "epoch": 0.8400431763813525,
      "grad_norm": 0.9050618458773342,
      "learning_rate": 6.561205014528443e-07,
      "loss": 0.0997,
      "step": 28795
    },
    {
      "epoch": 0.8400723496119961,
      "grad_norm": 0.7665868849262667,
      "learning_rate": 6.558865688754845e-07,
      "loss": 0.0844,
      "step": 28796
    },
    {
      "epoch": 0.8401015228426396,
      "grad_norm": 0.822595988242091,
      "learning_rate": 6.556526750813336e-07,
      "loss": 0.1064,
      "step": 28797
    },
    {
      "epoch": 0.8401306960732832,
      "grad_norm": 0.8578890853877902,
      "learning_rate": 6.554188200724782e-07,
      "loss": 0.1158,
      "step": 28798
    },
    {
      "epoch": 0.8401598693039267,
      "grad_norm": 0.9404242245978279,
      "learning_rate": 6.551850038510054e-07,
      "loss": 0.114,
      "step": 28799
    },
    {
      "epoch": 0.8401890425345703,
      "grad_norm": 0.8845685891665658,
      "learning_rate": 6.54951226419005e-07,
      "loss": 0.1284,
      "step": 28800
    },
    {
      "epoch": 0.8402182157652138,
      "grad_norm": 0.8490759394963238,
      "learning_rate": 6.547174877785628e-07,
      "loss": 0.1224,
      "step": 28801
    },
    {
      "epoch": 0.8402473889958574,
      "grad_norm": 0.8678591029861535,
      "learning_rate": 6.54483787931764e-07,
      "loss": 0.1058,
      "step": 28802
    },
    {
      "epoch": 0.8402765622265009,
      "grad_norm": 0.9107735500306756,
      "learning_rate": 6.542501268806978e-07,
      "loss": 0.1079,
      "step": 28803
    },
    {
      "epoch": 0.8403057354571445,
      "grad_norm": 0.7782338705327391,
      "learning_rate": 6.540165046274493e-07,
      "loss": 0.1166,
      "step": 28804
    },
    {
      "epoch": 0.840334908687788,
      "grad_norm": 0.7252490773874591,
      "learning_rate": 6.537829211741032e-07,
      "loss": 0.1214,
      "step": 28805
    },
    {
      "epoch": 0.8403640819184316,
      "grad_norm": 0.8941322386607158,
      "learning_rate": 6.535493765227463e-07,
      "loss": 0.1138,
      "step": 28806
    },
    {
      "epoch": 0.8403932551490753,
      "grad_norm": 0.8353832920115603,
      "learning_rate": 6.533158706754633e-07,
      "loss": 0.1018,
      "step": 28807
    },
    {
      "epoch": 0.8404224283797188,
      "grad_norm": 0.7030860241272633,
      "learning_rate": 6.530824036343375e-07,
      "loss": 0.1051,
      "step": 28808
    },
    {
      "epoch": 0.8404516016103624,
      "grad_norm": 0.8681428224464733,
      "learning_rate": 6.528489754014545e-07,
      "loss": 0.0976,
      "step": 28809
    },
    {
      "epoch": 0.8404807748410059,
      "grad_norm": 0.9715518332274773,
      "learning_rate": 6.526155859788985e-07,
      "loss": 0.1011,
      "step": 28810
    },
    {
      "epoch": 0.8405099480716495,
      "grad_norm": 0.7589110212252026,
      "learning_rate": 6.523822353687531e-07,
      "loss": 0.1148,
      "step": 28811
    },
    {
      "epoch": 0.840539121302293,
      "grad_norm": 0.8907199418658908,
      "learning_rate": 6.521489235731005e-07,
      "loss": 0.1172,
      "step": 28812
    },
    {
      "epoch": 0.8405682945329366,
      "grad_norm": 0.8646787870284771,
      "learning_rate": 6.519156505940249e-07,
      "loss": 0.115,
      "step": 28813
    },
    {
      "epoch": 0.8405974677635801,
      "grad_norm": 0.8748720508629039,
      "learning_rate": 6.516824164336077e-07,
      "loss": 0.1362,
      "step": 28814
    },
    {
      "epoch": 0.8406266409942237,
      "grad_norm": 0.7613177884739198,
      "learning_rate": 6.514492210939327e-07,
      "loss": 0.0978,
      "step": 28815
    },
    {
      "epoch": 0.8406558142248672,
      "grad_norm": 0.96189654180933,
      "learning_rate": 6.512160645770799e-07,
      "loss": 0.1513,
      "step": 28816
    },
    {
      "epoch": 0.8406849874555108,
      "grad_norm": 0.8913120494635219,
      "learning_rate": 6.509829468851336e-07,
      "loss": 0.1031,
      "step": 28817
    },
    {
      "epoch": 0.8407141606861543,
      "grad_norm": 0.6909259859319695,
      "learning_rate": 6.50749868020173e-07,
      "loss": 0.0985,
      "step": 28818
    },
    {
      "epoch": 0.8407433339167979,
      "grad_norm": 0.6436536944911209,
      "learning_rate": 6.505168279842777e-07,
      "loss": 0.1226,
      "step": 28819
    },
    {
      "epoch": 0.8407725071474416,
      "grad_norm": 0.7144179993522757,
      "learning_rate": 6.502838267795303e-07,
      "loss": 0.1128,
      "step": 28820
    },
    {
      "epoch": 0.8408016803780851,
      "grad_norm": 0.8841992410911953,
      "learning_rate": 6.500508644080117e-07,
      "loss": 0.1044,
      "step": 28821
    },
    {
      "epoch": 0.8408308536087287,
      "grad_norm": 0.7712873026638355,
      "learning_rate": 6.498179408717992e-07,
      "loss": 0.1162,
      "step": 28822
    },
    {
      "epoch": 0.8408600268393722,
      "grad_norm": 0.7415406444945843,
      "learning_rate": 6.495850561729749e-07,
      "loss": 0.1158,
      "step": 28823
    },
    {
      "epoch": 0.8408892000700158,
      "grad_norm": 0.8610246489611074,
      "learning_rate": 6.493522103136169e-07,
      "loss": 0.102,
      "step": 28824
    },
    {
      "epoch": 0.8409183733006593,
      "grad_norm": 0.6609714171145477,
      "learning_rate": 6.491194032958026e-07,
      "loss": 0.0973,
      "step": 28825
    },
    {
      "epoch": 0.8409475465313029,
      "grad_norm": 0.7704983458435415,
      "learning_rate": 6.488866351216116e-07,
      "loss": 0.1128,
      "step": 28826
    },
    {
      "epoch": 0.8409767197619464,
      "grad_norm": 0.7011720107836845,
      "learning_rate": 6.486539057931229e-07,
      "loss": 0.1119,
      "step": 28827
    },
    {
      "epoch": 0.84100589299259,
      "grad_norm": 0.8783664330634164,
      "learning_rate": 6.484212153124137e-07,
      "loss": 0.1091,
      "step": 28828
    },
    {
      "epoch": 0.8410350662232335,
      "grad_norm": 0.9538451986776406,
      "learning_rate": 6.481885636815599e-07,
      "loss": 0.1047,
      "step": 28829
    },
    {
      "epoch": 0.8410642394538771,
      "grad_norm": 0.811201426363696,
      "learning_rate": 6.479559509026406e-07,
      "loss": 0.1258,
      "step": 28830
    },
    {
      "epoch": 0.8410934126845206,
      "grad_norm": 0.9946951198396704,
      "learning_rate": 6.477233769777319e-07,
      "loss": 0.1382,
      "step": 28831
    },
    {
      "epoch": 0.8411225859151642,
      "grad_norm": 0.8552353285760814,
      "learning_rate": 6.474908419089076e-07,
      "loss": 0.1221,
      "step": 28832
    },
    {
      "epoch": 0.8411517591458078,
      "grad_norm": 0.7219144875927064,
      "learning_rate": 6.472583456982485e-07,
      "loss": 0.0966,
      "step": 28833
    },
    {
      "epoch": 0.8411809323764514,
      "grad_norm": 1.2242165187967142,
      "learning_rate": 6.470258883478275e-07,
      "loss": 0.12,
      "step": 28834
    },
    {
      "epoch": 0.841210105607095,
      "grad_norm": 1.1793211548741807,
      "learning_rate": 6.467934698597189e-07,
      "loss": 0.1456,
      "step": 28835
    },
    {
      "epoch": 0.8412392788377385,
      "grad_norm": 0.9663827119742197,
      "learning_rate": 6.465610902360009e-07,
      "loss": 0.1191,
      "step": 28836
    },
    {
      "epoch": 0.8412684520683821,
      "grad_norm": 0.8042727150124604,
      "learning_rate": 6.463287494787446e-07,
      "loss": 0.1278,
      "step": 28837
    },
    {
      "epoch": 0.8412976252990256,
      "grad_norm": 1.083381842465957,
      "learning_rate": 6.460964475900266e-07,
      "loss": 0.1202,
      "step": 28838
    },
    {
      "epoch": 0.8413267985296692,
      "grad_norm": 0.9834436853846289,
      "learning_rate": 6.4586418457192e-07,
      "loss": 0.098,
      "step": 28839
    },
    {
      "epoch": 0.8413559717603127,
      "grad_norm": 0.6787730454878973,
      "learning_rate": 6.456319604264988e-07,
      "loss": 0.1179,
      "step": 28840
    },
    {
      "epoch": 0.8413851449909563,
      "grad_norm": 0.8495841239183446,
      "learning_rate": 6.453997751558366e-07,
      "loss": 0.1191,
      "step": 28841
    },
    {
      "epoch": 0.8414143182215998,
      "grad_norm": 1.1290615715573953,
      "learning_rate": 6.451676287620046e-07,
      "loss": 0.1132,
      "step": 28842
    },
    {
      "epoch": 0.8414434914522434,
      "grad_norm": 0.8453385437174072,
      "learning_rate": 6.44935521247076e-07,
      "loss": 0.114,
      "step": 28843
    },
    {
      "epoch": 0.841472664682887,
      "grad_norm": 0.9923323922290838,
      "learning_rate": 6.447034526131247e-07,
      "loss": 0.1179,
      "step": 28844
    },
    {
      "epoch": 0.8415018379135305,
      "grad_norm": 0.8784270460169328,
      "learning_rate": 6.444714228622212e-07,
      "loss": 0.1197,
      "step": 28845
    },
    {
      "epoch": 0.8415310111441741,
      "grad_norm": 0.8813807687323639,
      "learning_rate": 6.442394319964362e-07,
      "loss": 0.1099,
      "step": 28846
    },
    {
      "epoch": 0.8415601843748177,
      "grad_norm": 0.7550142799237555,
      "learning_rate": 6.440074800178426e-07,
      "loss": 0.1135,
      "step": 28847
    },
    {
      "epoch": 0.8415893576054613,
      "grad_norm": 0.7698178913705608,
      "learning_rate": 6.437755669285106e-07,
      "loss": 0.1148,
      "step": 28848
    },
    {
      "epoch": 0.8416185308361048,
      "grad_norm": 0.7689855145974284,
      "learning_rate": 6.435436927305077e-07,
      "loss": 0.1112,
      "step": 28849
    },
    {
      "epoch": 0.8416477040667484,
      "grad_norm": 0.8660549598806329,
      "learning_rate": 6.433118574259095e-07,
      "loss": 0.1002,
      "step": 28850
    },
    {
      "epoch": 0.8416768772973919,
      "grad_norm": 0.8977184197766986,
      "learning_rate": 6.430800610167831e-07,
      "loss": 0.1298,
      "step": 28851
    },
    {
      "epoch": 0.8417060505280355,
      "grad_norm": 0.9849880656802578,
      "learning_rate": 6.428483035051963e-07,
      "loss": 0.1067,
      "step": 28852
    },
    {
      "epoch": 0.841735223758679,
      "grad_norm": 0.8153342556747524,
      "learning_rate": 6.426165848932208e-07,
      "loss": 0.1035,
      "step": 28853
    },
    {
      "epoch": 0.8417643969893226,
      "grad_norm": 0.9065290418267716,
      "learning_rate": 6.423849051829246e-07,
      "loss": 0.0998,
      "step": 28854
    },
    {
      "epoch": 0.8417935702199661,
      "grad_norm": 0.7417628350177068,
      "learning_rate": 6.421532643763745e-07,
      "loss": 0.0989,
      "step": 28855
    },
    {
      "epoch": 0.8418227434506097,
      "grad_norm": 0.8666948957920616,
      "learning_rate": 6.419216624756397e-07,
      "loss": 0.1439,
      "step": 28856
    },
    {
      "epoch": 0.8418519166812533,
      "grad_norm": 0.8978597665981048,
      "learning_rate": 6.41690099482789e-07,
      "loss": 0.103,
      "step": 28857
    },
    {
      "epoch": 0.8418810899118968,
      "grad_norm": 0.7343306993336536,
      "learning_rate": 6.414585753998887e-07,
      "loss": 0.0889,
      "step": 28858
    },
    {
      "epoch": 0.8419102631425404,
      "grad_norm": 0.8811943569068373,
      "learning_rate": 6.412270902290047e-07,
      "loss": 0.1351,
      "step": 28859
    },
    {
      "epoch": 0.8419394363731839,
      "grad_norm": 0.9439589139156926,
      "learning_rate": 6.40995643972206e-07,
      "loss": 0.1295,
      "step": 28860
    },
    {
      "epoch": 0.8419686096038276,
      "grad_norm": 0.789718850268482,
      "learning_rate": 6.407642366315564e-07,
      "loss": 0.1274,
      "step": 28861
    },
    {
      "epoch": 0.8419977828344711,
      "grad_norm": 0.7178593481303421,
      "learning_rate": 6.405328682091228e-07,
      "loss": 0.1152,
      "step": 28862
    },
    {
      "epoch": 0.8420269560651147,
      "grad_norm": 0.9482712353650073,
      "learning_rate": 6.403015387069722e-07,
      "loss": 0.12,
      "step": 28863
    },
    {
      "epoch": 0.8420561292957582,
      "grad_norm": 0.8814999547315627,
      "learning_rate": 6.400702481271692e-07,
      "loss": 0.0973,
      "step": 28864
    },
    {
      "epoch": 0.8420853025264018,
      "grad_norm": 0.9815439228739841,
      "learning_rate": 6.398389964717766e-07,
      "loss": 0.1252,
      "step": 28865
    },
    {
      "epoch": 0.8421144757570453,
      "grad_norm": 0.9033488606404223,
      "learning_rate": 6.396077837428621e-07,
      "loss": 0.1454,
      "step": 28866
    },
    {
      "epoch": 0.8421436489876889,
      "grad_norm": 0.7170638165342558,
      "learning_rate": 6.393766099424869e-07,
      "loss": 0.1095,
      "step": 28867
    },
    {
      "epoch": 0.8421728222183325,
      "grad_norm": 1.0315142844876628,
      "learning_rate": 6.391454750727177e-07,
      "loss": 0.1149,
      "step": 28868
    },
    {
      "epoch": 0.842201995448976,
      "grad_norm": 0.7692115385358217,
      "learning_rate": 6.389143791356156e-07,
      "loss": 0.1185,
      "step": 28869
    },
    {
      "epoch": 0.8422311686796196,
      "grad_norm": 0.8417598144992364,
      "learning_rate": 6.386833221332456e-07,
      "loss": 0.1369,
      "step": 28870
    },
    {
      "epoch": 0.8422603419102631,
      "grad_norm": 0.8156129188232211,
      "learning_rate": 6.384523040676704e-07,
      "loss": 0.1071,
      "step": 28871
    },
    {
      "epoch": 0.8422895151409067,
      "grad_norm": 0.7775657771437443,
      "learning_rate": 6.382213249409502e-07,
      "loss": 0.1181,
      "step": 28872
    },
    {
      "epoch": 0.8423186883715502,
      "grad_norm": 0.9616514026711401,
      "learning_rate": 6.379903847551489e-07,
      "loss": 0.1025,
      "step": 28873
    },
    {
      "epoch": 0.8423478616021939,
      "grad_norm": 0.9075184500056969,
      "learning_rate": 6.377594835123296e-07,
      "loss": 0.1177,
      "step": 28874
    },
    {
      "epoch": 0.8423770348328374,
      "grad_norm": 1.0023575745946811,
      "learning_rate": 6.375286212145521e-07,
      "loss": 0.0963,
      "step": 28875
    },
    {
      "epoch": 0.842406208063481,
      "grad_norm": 0.8121770301739846,
      "learning_rate": 6.372977978638762e-07,
      "loss": 0.1237,
      "step": 28876
    },
    {
      "epoch": 0.8424353812941245,
      "grad_norm": 0.8369541291810983,
      "learning_rate": 6.370670134623652e-07,
      "loss": 0.0942,
      "step": 28877
    },
    {
      "epoch": 0.8424645545247681,
      "grad_norm": 1.0474218154312502,
      "learning_rate": 6.368362680120787e-07,
      "loss": 0.0904,
      "step": 28878
    },
    {
      "epoch": 0.8424937277554116,
      "grad_norm": 0.8552151482340444,
      "learning_rate": 6.366055615150746e-07,
      "loss": 0.0898,
      "step": 28879
    },
    {
      "epoch": 0.8425229009860552,
      "grad_norm": 0.6530577140130184,
      "learning_rate": 6.36374893973416e-07,
      "loss": 0.1051,
      "step": 28880
    },
    {
      "epoch": 0.8425520742166988,
      "grad_norm": 1.2912320210230699,
      "learning_rate": 6.361442653891608e-07,
      "loss": 0.0954,
      "step": 28881
    },
    {
      "epoch": 0.8425812474473423,
      "grad_norm": 0.8506607381697783,
      "learning_rate": 6.35913675764367e-07,
      "loss": 0.0853,
      "step": 28882
    },
    {
      "epoch": 0.8426104206779859,
      "grad_norm": 0.8377732292276981,
      "learning_rate": 6.356831251010948e-07,
      "loss": 0.0926,
      "step": 28883
    },
    {
      "epoch": 0.8426395939086294,
      "grad_norm": 0.8006864847043814,
      "learning_rate": 6.354526134014022e-07,
      "loss": 0.12,
      "step": 28884
    },
    {
      "epoch": 0.842668767139273,
      "grad_norm": 0.8652466306530592,
      "learning_rate": 6.352221406673453e-07,
      "loss": 0.1278,
      "step": 28885
    },
    {
      "epoch": 0.8426979403699165,
      "grad_norm": 0.8295095284993502,
      "learning_rate": 6.349917069009837e-07,
      "loss": 0.1077,
      "step": 28886
    },
    {
      "epoch": 0.8427271136005601,
      "grad_norm": 0.8357083084260215,
      "learning_rate": 6.347613121043745e-07,
      "loss": 0.1279,
      "step": 28887
    },
    {
      "epoch": 0.8427562868312037,
      "grad_norm": 0.8238125863301013,
      "learning_rate": 6.345309562795748e-07,
      "loss": 0.1479,
      "step": 28888
    },
    {
      "epoch": 0.8427854600618473,
      "grad_norm": 0.9650659380382625,
      "learning_rate": 6.343006394286394e-07,
      "loss": 0.1171,
      "step": 28889
    },
    {
      "epoch": 0.8428146332924908,
      "grad_norm": 0.71697884241401,
      "learning_rate": 6.340703615536264e-07,
      "loss": 0.1078,
      "step": 28890
    },
    {
      "epoch": 0.8428438065231344,
      "grad_norm": 0.8489867200630906,
      "learning_rate": 6.338401226565904e-07,
      "loss": 0.1126,
      "step": 28891
    },
    {
      "epoch": 0.842872979753778,
      "grad_norm": 0.9940494807831788,
      "learning_rate": 6.336099227395875e-07,
      "loss": 0.1168,
      "step": 28892
    },
    {
      "epoch": 0.8429021529844215,
      "grad_norm": 0.9758747411540146,
      "learning_rate": 6.333797618046739e-07,
      "loss": 0.0921,
      "step": 28893
    },
    {
      "epoch": 0.8429313262150651,
      "grad_norm": 0.7884703811383246,
      "learning_rate": 6.331496398539033e-07,
      "loss": 0.0993,
      "step": 28894
    },
    {
      "epoch": 0.8429604994457086,
      "grad_norm": 0.8741558895470134,
      "learning_rate": 6.329195568893292e-07,
      "loss": 0.1189,
      "step": 28895
    },
    {
      "epoch": 0.8429896726763522,
      "grad_norm": 0.784373689885044,
      "learning_rate": 6.326895129130079e-07,
      "loss": 0.1095,
      "step": 28896
    },
    {
      "epoch": 0.8430188459069957,
      "grad_norm": 0.9079845061376804,
      "learning_rate": 6.324595079269907e-07,
      "loss": 0.1144,
      "step": 28897
    },
    {
      "epoch": 0.8430480191376393,
      "grad_norm": 0.773522031670242,
      "learning_rate": 6.322295419333335e-07,
      "loss": 0.1287,
      "step": 28898
    },
    {
      "epoch": 0.8430771923682828,
      "grad_norm": 1.0060439764611728,
      "learning_rate": 6.319996149340873e-07,
      "loss": 0.1115,
      "step": 28899
    },
    {
      "epoch": 0.8431063655989264,
      "grad_norm": 0.849708583536126,
      "learning_rate": 6.317697269313072e-07,
      "loss": 0.0998,
      "step": 28900
    },
    {
      "epoch": 0.84313553882957,
      "grad_norm": 0.6597539800178907,
      "learning_rate": 6.315398779270443e-07,
      "loss": 0.116,
      "step": 28901
    },
    {
      "epoch": 0.8431647120602136,
      "grad_norm": 0.7636676781210086,
      "learning_rate": 6.313100679233491e-07,
      "loss": 0.1026,
      "step": 28902
    },
    {
      "epoch": 0.8431938852908571,
      "grad_norm": 0.9582051504730216,
      "learning_rate": 6.310802969222745e-07,
      "loss": 0.1268,
      "step": 28903
    },
    {
      "epoch": 0.8432230585215007,
      "grad_norm": 0.802010833128608,
      "learning_rate": 6.308505649258734e-07,
      "loss": 0.1331,
      "step": 28904
    },
    {
      "epoch": 0.8432522317521443,
      "grad_norm": 0.6908653461538637,
      "learning_rate": 6.306208719361956e-07,
      "loss": 0.1057,
      "step": 28905
    },
    {
      "epoch": 0.8432814049827878,
      "grad_norm": 0.9270209884296832,
      "learning_rate": 6.303912179552902e-07,
      "loss": 0.1208,
      "step": 28906
    },
    {
      "epoch": 0.8433105782134314,
      "grad_norm": 0.821447517787425,
      "learning_rate": 6.301616029852103e-07,
      "loss": 0.1123,
      "step": 28907
    },
    {
      "epoch": 0.8433397514440749,
      "grad_norm": 0.7620011929699494,
      "learning_rate": 6.299320270280046e-07,
      "loss": 0.1154,
      "step": 28908
    },
    {
      "epoch": 0.8433689246747185,
      "grad_norm": 0.7979724597079998,
      "learning_rate": 6.297024900857196e-07,
      "loss": 0.1114,
      "step": 28909
    },
    {
      "epoch": 0.843398097905362,
      "grad_norm": 0.912165101893905,
      "learning_rate": 6.294729921604104e-07,
      "loss": 0.1277,
      "step": 28910
    },
    {
      "epoch": 0.8434272711360056,
      "grad_norm": 0.8670551328968326,
      "learning_rate": 6.29243533254123e-07,
      "loss": 0.102,
      "step": 28911
    },
    {
      "epoch": 0.8434564443666491,
      "grad_norm": 0.9780130220486077,
      "learning_rate": 6.290141133689043e-07,
      "loss": 0.0969,
      "step": 28912
    },
    {
      "epoch": 0.8434856175972927,
      "grad_norm": 1.0302144398091853,
      "learning_rate": 6.287847325068059e-07,
      "loss": 0.1178,
      "step": 28913
    },
    {
      "epoch": 0.8435147908279362,
      "grad_norm": 0.7133364137578695,
      "learning_rate": 6.285553906698732e-07,
      "loss": 0.1008,
      "step": 28914
    },
    {
      "epoch": 0.8435439640585799,
      "grad_norm": 0.9786751119061714,
      "learning_rate": 6.283260878601538e-07,
      "loss": 0.0976,
      "step": 28915
    },
    {
      "epoch": 0.8435731372892235,
      "grad_norm": 0.9970447061315006,
      "learning_rate": 6.280968240796953e-07,
      "loss": 0.1148,
      "step": 28916
    },
    {
      "epoch": 0.843602310519867,
      "grad_norm": 0.7532731327396806,
      "learning_rate": 6.278675993305461e-07,
      "loss": 0.1119,
      "step": 28917
    },
    {
      "epoch": 0.8436314837505106,
      "grad_norm": 0.9077873625714724,
      "learning_rate": 6.276384136147512e-07,
      "loss": 0.1159,
      "step": 28918
    },
    {
      "epoch": 0.8436606569811541,
      "grad_norm": 0.8612141675562002,
      "learning_rate": 6.274092669343551e-07,
      "loss": 0.0998,
      "step": 28919
    },
    {
      "epoch": 0.8436898302117977,
      "grad_norm": 1.2176983928374796,
      "learning_rate": 6.271801592914068e-07,
      "loss": 0.1136,
      "step": 28920
    },
    {
      "epoch": 0.8437190034424412,
      "grad_norm": 0.9229084585460244,
      "learning_rate": 6.269510906879489e-07,
      "loss": 0.1134,
      "step": 28921
    },
    {
      "epoch": 0.8437481766730848,
      "grad_norm": 0.8544153087523163,
      "learning_rate": 6.267220611260283e-07,
      "loss": 0.1189,
      "step": 28922
    },
    {
      "epoch": 0.8437773499037283,
      "grad_norm": 1.08151919459781,
      "learning_rate": 6.264930706076894e-07,
      "loss": 0.1027,
      "step": 28923
    },
    {
      "epoch": 0.8438065231343719,
      "grad_norm": 0.8685751850216777,
      "learning_rate": 6.262641191349773e-07,
      "loss": 0.1052,
      "step": 28924
    },
    {
      "epoch": 0.8438356963650154,
      "grad_norm": 0.9392648152313195,
      "learning_rate": 6.260352067099329e-07,
      "loss": 0.1011,
      "step": 28925
    },
    {
      "epoch": 0.843864869595659,
      "grad_norm": 0.678042428996098,
      "learning_rate": 6.258063333346037e-07,
      "loss": 0.1097,
      "step": 28926
    },
    {
      "epoch": 0.8438940428263025,
      "grad_norm": 0.9806532946104165,
      "learning_rate": 6.255774990110303e-07,
      "loss": 0.1035,
      "step": 28927
    },
    {
      "epoch": 0.8439232160569462,
      "grad_norm": 0.9535940013267304,
      "learning_rate": 6.253487037412575e-07,
      "loss": 0.0961,
      "step": 28928
    },
    {
      "epoch": 0.8439523892875898,
      "grad_norm": 0.9213404933200429,
      "learning_rate": 6.251199475273262e-07,
      "loss": 0.0942,
      "step": 28929
    },
    {
      "epoch": 0.8439815625182333,
      "grad_norm": 0.7984942682764017,
      "learning_rate": 6.248912303712812e-07,
      "loss": 0.1244,
      "step": 28930
    },
    {
      "epoch": 0.8440107357488769,
      "grad_norm": 0.8499461483793341,
      "learning_rate": 6.246625522751621e-07,
      "loss": 0.1154,
      "step": 28931
    },
    {
      "epoch": 0.8440399089795204,
      "grad_norm": 0.7866133750070303,
      "learning_rate": 6.244339132410104e-07,
      "loss": 0.0921,
      "step": 28932
    },
    {
      "epoch": 0.844069082210164,
      "grad_norm": 0.742497463167279,
      "learning_rate": 6.242053132708686e-07,
      "loss": 0.1212,
      "step": 28933
    },
    {
      "epoch": 0.8440982554408075,
      "grad_norm": 0.8884821717707002,
      "learning_rate": 6.239767523667778e-07,
      "loss": 0.101,
      "step": 28934
    },
    {
      "epoch": 0.8441274286714511,
      "grad_norm": 0.9183989534593693,
      "learning_rate": 6.237482305307785e-07,
      "loss": 0.0791,
      "step": 28935
    },
    {
      "epoch": 0.8441566019020946,
      "grad_norm": 0.946605435463055,
      "learning_rate": 6.235197477649085e-07,
      "loss": 0.1147,
      "step": 28936
    },
    {
      "epoch": 0.8441857751327382,
      "grad_norm": 0.7782264256580208,
      "learning_rate": 6.232913040712107e-07,
      "loss": 0.1029,
      "step": 28937
    },
    {
      "epoch": 0.8442149483633817,
      "grad_norm": 0.9038559295240802,
      "learning_rate": 6.230628994517235e-07,
      "loss": 0.1046,
      "step": 28938
    },
    {
      "epoch": 0.8442441215940253,
      "grad_norm": 1.011085336469972,
      "learning_rate": 6.22834533908484e-07,
      "loss": 0.1062,
      "step": 28939
    },
    {
      "epoch": 0.8442732948246688,
      "grad_norm": 0.898068911669527,
      "learning_rate": 6.226062074435347e-07,
      "loss": 0.1139,
      "step": 28940
    },
    {
      "epoch": 0.8443024680553124,
      "grad_norm": 0.7582485393852866,
      "learning_rate": 6.22377920058912e-07,
      "loss": 0.1064,
      "step": 28941
    },
    {
      "epoch": 0.8443316412859561,
      "grad_norm": 0.8052222974506925,
      "learning_rate": 6.221496717566533e-07,
      "loss": 0.1142,
      "step": 28942
    },
    {
      "epoch": 0.8443608145165996,
      "grad_norm": 0.8742033596095347,
      "learning_rate": 6.219214625387987e-07,
      "loss": 0.0949,
      "step": 28943
    },
    {
      "epoch": 0.8443899877472432,
      "grad_norm": 1.0294737368038258,
      "learning_rate": 6.216932924073837e-07,
      "loss": 0.1574,
      "step": 28944
    },
    {
      "epoch": 0.8444191609778867,
      "grad_norm": 0.8464921950502365,
      "learning_rate": 6.214651613644445e-07,
      "loss": 0.0981,
      "step": 28945
    },
    {
      "epoch": 0.8444483342085303,
      "grad_norm": 1.0762773562063659,
      "learning_rate": 6.212370694120196e-07,
      "loss": 0.0903,
      "step": 28946
    },
    {
      "epoch": 0.8444775074391738,
      "grad_norm": 0.7914913901208562,
      "learning_rate": 6.21009016552146e-07,
      "loss": 0.1177,
      "step": 28947
    },
    {
      "epoch": 0.8445066806698174,
      "grad_norm": 0.7304359791593851,
      "learning_rate": 6.207810027868583e-07,
      "loss": 0.0968,
      "step": 28948
    },
    {
      "epoch": 0.8445358539004609,
      "grad_norm": 0.887361182023445,
      "learning_rate": 6.205530281181915e-07,
      "loss": 0.1259,
      "step": 28949
    },
    {
      "epoch": 0.8445650271311045,
      "grad_norm": 0.8771738497094493,
      "learning_rate": 6.203250925481824e-07,
      "loss": 0.1092,
      "step": 28950
    },
    {
      "epoch": 0.844594200361748,
      "grad_norm": 0.9470336523330438,
      "learning_rate": 6.200971960788649e-07,
      "loss": 0.1156,
      "step": 28951
    },
    {
      "epoch": 0.8446233735923916,
      "grad_norm": 0.949541291182677,
      "learning_rate": 6.19869338712274e-07,
      "loss": 0.1268,
      "step": 28952
    },
    {
      "epoch": 0.8446525468230351,
      "grad_norm": 0.8543379562142469,
      "learning_rate": 6.196415204504447e-07,
      "loss": 0.1099,
      "step": 28953
    },
    {
      "epoch": 0.8446817200536787,
      "grad_norm": 0.852058767861762,
      "learning_rate": 6.194137412954104e-07,
      "loss": 0.1063,
      "step": 28954
    },
    {
      "epoch": 0.8447108932843223,
      "grad_norm": 0.7605570777580096,
      "learning_rate": 6.191860012492034e-07,
      "loss": 0.1026,
      "step": 28955
    },
    {
      "epoch": 0.8447400665149659,
      "grad_norm": 0.9689081783921539,
      "learning_rate": 6.189583003138588e-07,
      "loss": 0.1123,
      "step": 28956
    },
    {
      "epoch": 0.8447692397456095,
      "grad_norm": 0.7616158465788677,
      "learning_rate": 6.187306384914082e-07,
      "loss": 0.1023,
      "step": 28957
    },
    {
      "epoch": 0.844798412976253,
      "grad_norm": 0.8094604344402836,
      "learning_rate": 6.185030157838851e-07,
      "loss": 0.1169,
      "step": 28958
    },
    {
      "epoch": 0.8448275862068966,
      "grad_norm": 0.7705378157013021,
      "learning_rate": 6.182754321933204e-07,
      "loss": 0.1298,
      "step": 28959
    },
    {
      "epoch": 0.8448567594375401,
      "grad_norm": 1.1339312308593168,
      "learning_rate": 6.180478877217477e-07,
      "loss": 0.0925,
      "step": 28960
    },
    {
      "epoch": 0.8448859326681837,
      "grad_norm": 0.9206239070075416,
      "learning_rate": 6.17820382371197e-07,
      "loss": 0.1178,
      "step": 28961
    },
    {
      "epoch": 0.8449151058988272,
      "grad_norm": 0.8088148687671737,
      "learning_rate": 6.175929161436994e-07,
      "loss": 0.108,
      "step": 28962
    },
    {
      "epoch": 0.8449442791294708,
      "grad_norm": 0.8366321837183356,
      "learning_rate": 6.173654890412855e-07,
      "loss": 0.1118,
      "step": 28963
    },
    {
      "epoch": 0.8449734523601143,
      "grad_norm": 0.7771326612400864,
      "learning_rate": 6.171381010659877e-07,
      "loss": 0.1103,
      "step": 28964
    },
    {
      "epoch": 0.8450026255907579,
      "grad_norm": 0.95233919865386,
      "learning_rate": 6.169107522198348e-07,
      "loss": 0.0969,
      "step": 28965
    },
    {
      "epoch": 0.8450317988214014,
      "grad_norm": 0.8560933825205206,
      "learning_rate": 6.166834425048545e-07,
      "loss": 0.1113,
      "step": 28966
    },
    {
      "epoch": 0.845060972052045,
      "grad_norm": 0.9462886365787525,
      "learning_rate": 6.1645617192308e-07,
      "loss": 0.1134,
      "step": 28967
    },
    {
      "epoch": 0.8450901452826886,
      "grad_norm": 0.8744992932669856,
      "learning_rate": 6.162289404765382e-07,
      "loss": 0.1297,
      "step": 28968
    },
    {
      "epoch": 0.8451193185133322,
      "grad_norm": 1.0338832755218144,
      "learning_rate": 6.160017481672553e-07,
      "loss": 0.1231,
      "step": 28969
    },
    {
      "epoch": 0.8451484917439758,
      "grad_norm": 1.1384718410664043,
      "learning_rate": 6.157745949972649e-07,
      "loss": 0.1184,
      "step": 28970
    },
    {
      "epoch": 0.8451776649746193,
      "grad_norm": 0.8257966487801969,
      "learning_rate": 6.155474809685919e-07,
      "loss": 0.1207,
      "step": 28971
    },
    {
      "epoch": 0.8452068382052629,
      "grad_norm": 1.1902644605351222,
      "learning_rate": 6.153204060832635e-07,
      "loss": 0.126,
      "step": 28972
    },
    {
      "epoch": 0.8452360114359064,
      "grad_norm": 0.9941656567051275,
      "learning_rate": 6.150933703433087e-07,
      "loss": 0.1029,
      "step": 28973
    },
    {
      "epoch": 0.84526518466655,
      "grad_norm": 0.8267997592841663,
      "learning_rate": 6.148663737507537e-07,
      "loss": 0.1198,
      "step": 28974
    },
    {
      "epoch": 0.8452943578971935,
      "grad_norm": 0.8514516111687394,
      "learning_rate": 6.146394163076241e-07,
      "loss": 0.1205,
      "step": 28975
    },
    {
      "epoch": 0.8453235311278371,
      "grad_norm": 0.794370338561396,
      "learning_rate": 6.144124980159466e-07,
      "loss": 0.1073,
      "step": 28976
    },
    {
      "epoch": 0.8453527043584806,
      "grad_norm": 0.8941573073639782,
      "learning_rate": 6.141856188777484e-07,
      "loss": 0.115,
      "step": 28977
    },
    {
      "epoch": 0.8453818775891242,
      "grad_norm": 0.9754947754227669,
      "learning_rate": 6.13958778895054e-07,
      "loss": 0.0951,
      "step": 28978
    },
    {
      "epoch": 0.8454110508197678,
      "grad_norm": 0.8238535281379181,
      "learning_rate": 6.137319780698881e-07,
      "loss": 0.1168,
      "step": 28979
    },
    {
      "epoch": 0.8454402240504113,
      "grad_norm": 0.6944915606910066,
      "learning_rate": 6.135052164042765e-07,
      "loss": 0.0883,
      "step": 28980
    },
    {
      "epoch": 0.8454693972810549,
      "grad_norm": 0.9429286371602528,
      "learning_rate": 6.132784939002423e-07,
      "loss": 0.0976,
      "step": 28981
    },
    {
      "epoch": 0.8454985705116984,
      "grad_norm": 0.7588408904627072,
      "learning_rate": 6.130518105598104e-07,
      "loss": 0.1065,
      "step": 28982
    },
    {
      "epoch": 0.8455277437423421,
      "grad_norm": 0.8980012058379951,
      "learning_rate": 6.128251663850055e-07,
      "loss": 0.0994,
      "step": 28983
    },
    {
      "epoch": 0.8455569169729856,
      "grad_norm": 1.234274059590637,
      "learning_rate": 6.125985613778506e-07,
      "loss": 0.1242,
      "step": 28984
    },
    {
      "epoch": 0.8455860902036292,
      "grad_norm": 0.9430980757964642,
      "learning_rate": 6.123719955403673e-07,
      "loss": 0.1086,
      "step": 28985
    },
    {
      "epoch": 0.8456152634342727,
      "grad_norm": 0.8886403987846055,
      "learning_rate": 6.121454688745804e-07,
      "loss": 0.0884,
      "step": 28986
    },
    {
      "epoch": 0.8456444366649163,
      "grad_norm": 0.7905198450456818,
      "learning_rate": 6.119189813825105e-07,
      "loss": 0.1259,
      "step": 28987
    },
    {
      "epoch": 0.8456736098955598,
      "grad_norm": 0.8787226332974925,
      "learning_rate": 6.11692533066181e-07,
      "loss": 0.1236,
      "step": 28988
    },
    {
      "epoch": 0.8457027831262034,
      "grad_norm": 0.9199138971677424,
      "learning_rate": 6.114661239276121e-07,
      "loss": 0.1005,
      "step": 28989
    },
    {
      "epoch": 0.845731956356847,
      "grad_norm": 1.2170396444377143,
      "learning_rate": 6.112397539688269e-07,
      "loss": 0.107,
      "step": 28990
    },
    {
      "epoch": 0.8457611295874905,
      "grad_norm": 0.9045354533634172,
      "learning_rate": 6.110134231918458e-07,
      "loss": 0.1182,
      "step": 28991
    },
    {
      "epoch": 0.845790302818134,
      "grad_norm": 0.7436760800768687,
      "learning_rate": 6.107871315986879e-07,
      "loss": 0.101,
      "step": 28992
    },
    {
      "epoch": 0.8458194760487776,
      "grad_norm": 0.7470511240234238,
      "learning_rate": 6.105608791913747e-07,
      "loss": 0.097,
      "step": 28993
    },
    {
      "epoch": 0.8458486492794212,
      "grad_norm": 0.9281311415140583,
      "learning_rate": 6.103346659719278e-07,
      "loss": 0.105,
      "step": 28994
    },
    {
      "epoch": 0.8458778225100647,
      "grad_norm": 0.8024697346946731,
      "learning_rate": 6.101084919423645e-07,
      "loss": 0.1032,
      "step": 28995
    },
    {
      "epoch": 0.8459069957407084,
      "grad_norm": 0.772975732299244,
      "learning_rate": 6.098823571047036e-07,
      "loss": 0.1081,
      "step": 28996
    },
    {
      "epoch": 0.8459361689713519,
      "grad_norm": 0.9238825247852168,
      "learning_rate": 6.096562614609658e-07,
      "loss": 0.0956,
      "step": 28997
    },
    {
      "epoch": 0.8459653422019955,
      "grad_norm": 0.898659153580409,
      "learning_rate": 6.094302050131695e-07,
      "loss": 0.1478,
      "step": 28998
    },
    {
      "epoch": 0.845994515432639,
      "grad_norm": 1.5643563022734468,
      "learning_rate": 6.092041877633298e-07,
      "loss": 0.1151,
      "step": 28999
    },
    {
      "epoch": 0.8460236886632826,
      "grad_norm": 0.8831666935557358,
      "learning_rate": 6.089782097134689e-07,
      "loss": 0.1099,
      "step": 29000
    },
    {
      "epoch": 0.8460528618939261,
      "grad_norm": 0.8419068549251498,
      "learning_rate": 6.087522708656024e-07,
      "loss": 0.1242,
      "step": 29001
    },
    {
      "epoch": 0.8460820351245697,
      "grad_norm": 0.6877823378891237,
      "learning_rate": 6.085263712217465e-07,
      "loss": 0.1057,
      "step": 29002
    },
    {
      "epoch": 0.8461112083552133,
      "grad_norm": 0.9322222929739505,
      "learning_rate": 6.083005107839196e-07,
      "loss": 0.0968,
      "step": 29003
    },
    {
      "epoch": 0.8461403815858568,
      "grad_norm": 1.1745189573711559,
      "learning_rate": 6.080746895541372e-07,
      "loss": 0.1306,
      "step": 29004
    },
    {
      "epoch": 0.8461695548165004,
      "grad_norm": 0.9126592759386749,
      "learning_rate": 6.078489075344152e-07,
      "loss": 0.116,
      "step": 29005
    },
    {
      "epoch": 0.8461987280471439,
      "grad_norm": 0.919260997371977,
      "learning_rate": 6.076231647267689e-07,
      "loss": 0.1192,
      "step": 29006
    },
    {
      "epoch": 0.8462279012777875,
      "grad_norm": 0.888339511523776,
      "learning_rate": 6.073974611332156e-07,
      "loss": 0.1072,
      "step": 29007
    },
    {
      "epoch": 0.846257074508431,
      "grad_norm": 0.9077946045990275,
      "learning_rate": 6.071717967557694e-07,
      "loss": 0.1007,
      "step": 29008
    },
    {
      "epoch": 0.8462862477390746,
      "grad_norm": 0.8680779547275776,
      "learning_rate": 6.069461715964436e-07,
      "loss": 0.1135,
      "step": 29009
    },
    {
      "epoch": 0.8463154209697182,
      "grad_norm": 1.0610882156130454,
      "learning_rate": 6.06720585657255e-07,
      "loss": 0.1028,
      "step": 29010
    },
    {
      "epoch": 0.8463445942003618,
      "grad_norm": 1.1003124603593972,
      "learning_rate": 6.064950389402152e-07,
      "loss": 0.0991,
      "step": 29011
    },
    {
      "epoch": 0.8463737674310053,
      "grad_norm": 1.0180243383169565,
      "learning_rate": 6.062695314473383e-07,
      "loss": 0.1175,
      "step": 29012
    },
    {
      "epoch": 0.8464029406616489,
      "grad_norm": 0.9826360830307576,
      "learning_rate": 6.060440631806397e-07,
      "loss": 0.1146,
      "step": 29013
    },
    {
      "epoch": 0.8464321138922924,
      "grad_norm": 0.8642952642862485,
      "learning_rate": 6.058186341421307e-07,
      "loss": 0.112,
      "step": 29014
    },
    {
      "epoch": 0.846461287122936,
      "grad_norm": 0.9622097652971888,
      "learning_rate": 6.05593244333823e-07,
      "loss": 0.1192,
      "step": 29015
    },
    {
      "epoch": 0.8464904603535796,
      "grad_norm": 0.7831867065941874,
      "learning_rate": 6.053678937577306e-07,
      "loss": 0.1062,
      "step": 29016
    },
    {
      "epoch": 0.8465196335842231,
      "grad_norm": 0.9394779054339935,
      "learning_rate": 6.051425824158636e-07,
      "loss": 0.0972,
      "step": 29017
    },
    {
      "epoch": 0.8465488068148667,
      "grad_norm": 1.0162801455659933,
      "learning_rate": 6.049173103102357e-07,
      "loss": 0.1355,
      "step": 29018
    },
    {
      "epoch": 0.8465779800455102,
      "grad_norm": 0.8091046858537951,
      "learning_rate": 6.046920774428555e-07,
      "loss": 0.1249,
      "step": 29019
    },
    {
      "epoch": 0.8466071532761538,
      "grad_norm": 0.9962912216737734,
      "learning_rate": 6.044668838157364e-07,
      "loss": 0.1096,
      "step": 29020
    },
    {
      "epoch": 0.8466363265067973,
      "grad_norm": 0.965340154386159,
      "learning_rate": 6.042417294308878e-07,
      "loss": 0.1179,
      "step": 29021
    },
    {
      "epoch": 0.8466654997374409,
      "grad_norm": 0.7149561802564454,
      "learning_rate": 6.040166142903186e-07,
      "loss": 0.0928,
      "step": 29022
    },
    {
      "epoch": 0.8466946729680845,
      "grad_norm": 0.8466089184322997,
      "learning_rate": 6.037915383960391e-07,
      "loss": 0.1121,
      "step": 29023
    },
    {
      "epoch": 0.8467238461987281,
      "grad_norm": 1.3270229358566914,
      "learning_rate": 6.035665017500609e-07,
      "loss": 0.134,
      "step": 29024
    },
    {
      "epoch": 0.8467530194293716,
      "grad_norm": 1.099903479306349,
      "learning_rate": 6.033415043543916e-07,
      "loss": 0.1099,
      "step": 29025
    },
    {
      "epoch": 0.8467821926600152,
      "grad_norm": 0.7118010932488361,
      "learning_rate": 6.031165462110383e-07,
      "loss": 0.1156,
      "step": 29026
    },
    {
      "epoch": 0.8468113658906588,
      "grad_norm": 1.0062081837952301,
      "learning_rate": 6.02891627322012e-07,
      "loss": 0.1156,
      "step": 29027
    },
    {
      "epoch": 0.8468405391213023,
      "grad_norm": 0.9127783006903045,
      "learning_rate": 6.0266674768932e-07,
      "loss": 0.1033,
      "step": 29028
    },
    {
      "epoch": 0.8468697123519459,
      "grad_norm": 0.9014088640034168,
      "learning_rate": 6.024419073149668e-07,
      "loss": 0.1145,
      "step": 29029
    },
    {
      "epoch": 0.8468988855825894,
      "grad_norm": 1.0071058171151983,
      "learning_rate": 6.022171062009652e-07,
      "loss": 0.1179,
      "step": 29030
    },
    {
      "epoch": 0.846928058813233,
      "grad_norm": 0.9446048324157715,
      "learning_rate": 6.019923443493192e-07,
      "loss": 0.1213,
      "step": 29031
    },
    {
      "epoch": 0.8469572320438765,
      "grad_norm": 0.8361849720046881,
      "learning_rate": 6.017676217620344e-07,
      "loss": 0.1067,
      "step": 29032
    },
    {
      "epoch": 0.8469864052745201,
      "grad_norm": 0.9667808011358108,
      "learning_rate": 6.015429384411192e-07,
      "loss": 0.1115,
      "step": 29033
    },
    {
      "epoch": 0.8470155785051636,
      "grad_norm": 1.0742950683483192,
      "learning_rate": 6.013182943885781e-07,
      "loss": 0.1079,
      "step": 29034
    },
    {
      "epoch": 0.8470447517358072,
      "grad_norm": 1.1633351695493122,
      "learning_rate": 6.010936896064184e-07,
      "loss": 0.1199,
      "step": 29035
    },
    {
      "epoch": 0.8470739249664507,
      "grad_norm": 0.8533109669312983,
      "learning_rate": 6.008691240966425e-07,
      "loss": 0.108,
      "step": 29036
    },
    {
      "epoch": 0.8471030981970944,
      "grad_norm": 0.8061642141837491,
      "learning_rate": 6.006445978612585e-07,
      "loss": 0.1057,
      "step": 29037
    },
    {
      "epoch": 0.847132271427738,
      "grad_norm": 0.8820922330996973,
      "learning_rate": 6.004201109022689e-07,
      "loss": 0.0857,
      "step": 29038
    },
    {
      "epoch": 0.8471614446583815,
      "grad_norm": 0.9703849497398688,
      "learning_rate": 6.001956632216771e-07,
      "loss": 0.1074,
      "step": 29039
    },
    {
      "epoch": 0.847190617889025,
      "grad_norm": 0.8490460378689196,
      "learning_rate": 5.999712548214886e-07,
      "loss": 0.1062,
      "step": 29040
    },
    {
      "epoch": 0.8472197911196686,
      "grad_norm": 0.9537004209596786,
      "learning_rate": 5.99746885703707e-07,
      "loss": 0.1173,
      "step": 29041
    },
    {
      "epoch": 0.8472489643503122,
      "grad_norm": 0.9381791552270514,
      "learning_rate": 5.995225558703344e-07,
      "loss": 0.1304,
      "step": 29042
    },
    {
      "epoch": 0.8472781375809557,
      "grad_norm": 1.2164745413556348,
      "learning_rate": 5.992982653233742e-07,
      "loss": 0.1075,
      "step": 29043
    },
    {
      "epoch": 0.8473073108115993,
      "grad_norm": 0.7229966815710739,
      "learning_rate": 5.990740140648288e-07,
      "loss": 0.1095,
      "step": 29044
    },
    {
      "epoch": 0.8473364840422428,
      "grad_norm": 0.8851650160487878,
      "learning_rate": 5.988498020966993e-07,
      "loss": 0.1282,
      "step": 29045
    },
    {
      "epoch": 0.8473656572728864,
      "grad_norm": 3.132183776134045,
      "learning_rate": 5.986256294209874e-07,
      "loss": 0.1191,
      "step": 29046
    },
    {
      "epoch": 0.8473948305035299,
      "grad_norm": 0.8087015027414471,
      "learning_rate": 5.984014960396972e-07,
      "loss": 0.1082,
      "step": 29047
    },
    {
      "epoch": 0.8474240037341735,
      "grad_norm": 0.9006617659695598,
      "learning_rate": 5.98177401954827e-07,
      "loss": 0.1113,
      "step": 29048
    },
    {
      "epoch": 0.847453176964817,
      "grad_norm": 0.8753993150754519,
      "learning_rate": 5.979533471683773e-07,
      "loss": 0.1255,
      "step": 29049
    },
    {
      "epoch": 0.8474823501954607,
      "grad_norm": 0.8116099502956838,
      "learning_rate": 5.977293316823502e-07,
      "loss": 0.1078,
      "step": 29050
    },
    {
      "epoch": 0.8475115234261043,
      "grad_norm": 0.8751003965449491,
      "learning_rate": 5.975053554987448e-07,
      "loss": 0.1115,
      "step": 29051
    },
    {
      "epoch": 0.8475406966567478,
      "grad_norm": 0.9368127147652712,
      "learning_rate": 5.972814186195597e-07,
      "loss": 0.116,
      "step": 29052
    },
    {
      "epoch": 0.8475698698873914,
      "grad_norm": 0.847932109256919,
      "learning_rate": 5.970575210467949e-07,
      "loss": 0.0994,
      "step": 29053
    },
    {
      "epoch": 0.8475990431180349,
      "grad_norm": 0.7030066060440572,
      "learning_rate": 5.968336627824506e-07,
      "loss": 0.1091,
      "step": 29054
    },
    {
      "epoch": 0.8476282163486785,
      "grad_norm": 0.660917535734426,
      "learning_rate": 5.966098438285245e-07,
      "loss": 0.1247,
      "step": 29055
    },
    {
      "epoch": 0.847657389579322,
      "grad_norm": 0.7533538106123413,
      "learning_rate": 5.963860641870134e-07,
      "loss": 0.1049,
      "step": 29056
    },
    {
      "epoch": 0.8476865628099656,
      "grad_norm": 0.9245893197000575,
      "learning_rate": 5.961623238599168e-07,
      "loss": 0.0943,
      "step": 29057
    },
    {
      "epoch": 0.8477157360406091,
      "grad_norm": 0.7611611264482083,
      "learning_rate": 5.959386228492314e-07,
      "loss": 0.1128,
      "step": 29058
    },
    {
      "epoch": 0.8477449092712527,
      "grad_norm": 0.9054708090581463,
      "learning_rate": 5.957149611569541e-07,
      "loss": 0.1033,
      "step": 29059
    },
    {
      "epoch": 0.8477740825018962,
      "grad_norm": 0.7867854167182577,
      "learning_rate": 5.954913387850836e-07,
      "loss": 0.1185,
      "step": 29060
    },
    {
      "epoch": 0.8478032557325398,
      "grad_norm": 0.9561588506171079,
      "learning_rate": 5.952677557356146e-07,
      "loss": 0.1009,
      "step": 29061
    },
    {
      "epoch": 0.8478324289631833,
      "grad_norm": 0.8367745657832105,
      "learning_rate": 5.950442120105432e-07,
      "loss": 0.1215,
      "step": 29062
    },
    {
      "epoch": 0.8478616021938269,
      "grad_norm": 0.7966824773289977,
      "learning_rate": 5.948207076118662e-07,
      "loss": 0.1464,
      "step": 29063
    },
    {
      "epoch": 0.8478907754244706,
      "grad_norm": 0.9122979652791569,
      "learning_rate": 5.945972425415769e-07,
      "loss": 0.1128,
      "step": 29064
    },
    {
      "epoch": 0.8479199486551141,
      "grad_norm": 0.7476498141301914,
      "learning_rate": 5.943738168016732e-07,
      "loss": 0.1098,
      "step": 29065
    },
    {
      "epoch": 0.8479491218857577,
      "grad_norm": 1.0757619620389016,
      "learning_rate": 5.941504303941475e-07,
      "loss": 0.1206,
      "step": 29066
    },
    {
      "epoch": 0.8479782951164012,
      "grad_norm": 1.0973587077063143,
      "learning_rate": 5.939270833209959e-07,
      "loss": 0.1108,
      "step": 29067
    },
    {
      "epoch": 0.8480074683470448,
      "grad_norm": 0.8395425331530202,
      "learning_rate": 5.937037755842112e-07,
      "loss": 0.0973,
      "step": 29068
    },
    {
      "epoch": 0.8480366415776883,
      "grad_norm": 0.8268829295462345,
      "learning_rate": 5.934805071857863e-07,
      "loss": 0.1319,
      "step": 29069
    },
    {
      "epoch": 0.8480658148083319,
      "grad_norm": 0.7921914210874075,
      "learning_rate": 5.932572781277158e-07,
      "loss": 0.1068,
      "step": 29070
    },
    {
      "epoch": 0.8480949880389754,
      "grad_norm": 0.8860279687171584,
      "learning_rate": 5.930340884119934e-07,
      "loss": 0.1045,
      "step": 29071
    },
    {
      "epoch": 0.848124161269619,
      "grad_norm": 1.0539568396441068,
      "learning_rate": 5.928109380406094e-07,
      "loss": 0.1192,
      "step": 29072
    },
    {
      "epoch": 0.8481533345002625,
      "grad_norm": 0.776371118999152,
      "learning_rate": 5.925878270155582e-07,
      "loss": 0.1015,
      "step": 29073
    },
    {
      "epoch": 0.8481825077309061,
      "grad_norm": 0.8651158857965905,
      "learning_rate": 5.923647553388312e-07,
      "loss": 0.1196,
      "step": 29074
    },
    {
      "epoch": 0.8482116809615496,
      "grad_norm": 0.8464081537327086,
      "learning_rate": 5.921417230124177e-07,
      "loss": 0.1063,
      "step": 29075
    },
    {
      "epoch": 0.8482408541921932,
      "grad_norm": 0.9301961433279016,
      "learning_rate": 5.919187300383112e-07,
      "loss": 0.1133,
      "step": 29076
    },
    {
      "epoch": 0.8482700274228369,
      "grad_norm": 0.9654583692499626,
      "learning_rate": 5.916957764185033e-07,
      "loss": 0.1393,
      "step": 29077
    },
    {
      "epoch": 0.8482992006534804,
      "grad_norm": 0.7106699956578029,
      "learning_rate": 5.914728621549826e-07,
      "loss": 0.0997,
      "step": 29078
    },
    {
      "epoch": 0.848328373884124,
      "grad_norm": 0.8398080467262697,
      "learning_rate": 5.91249987249739e-07,
      "loss": 0.1002,
      "step": 29079
    },
    {
      "epoch": 0.8483575471147675,
      "grad_norm": 1.082671993972656,
      "learning_rate": 5.910271517047639e-07,
      "loss": 0.115,
      "step": 29080
    },
    {
      "epoch": 0.8483867203454111,
      "grad_norm": 0.732134523449264,
      "learning_rate": 5.90804355522046e-07,
      "loss": 0.1049,
      "step": 29081
    },
    {
      "epoch": 0.8484158935760546,
      "grad_norm": 0.8603765456311879,
      "learning_rate": 5.905815987035735e-07,
      "loss": 0.1001,
      "step": 29082
    },
    {
      "epoch": 0.8484450668066982,
      "grad_norm": 0.8973437925039011,
      "learning_rate": 5.903588812513356e-07,
      "loss": 0.1394,
      "step": 29083
    },
    {
      "epoch": 0.8484742400373417,
      "grad_norm": 0.8189177716404316,
      "learning_rate": 5.901362031673219e-07,
      "loss": 0.1304,
      "step": 29084
    },
    {
      "epoch": 0.8485034132679853,
      "grad_norm": 0.8655269686689555,
      "learning_rate": 5.899135644535193e-07,
      "loss": 0.1104,
      "step": 29085
    },
    {
      "epoch": 0.8485325864986288,
      "grad_norm": 0.8718371207005509,
      "learning_rate": 5.896909651119149e-07,
      "loss": 0.1005,
      "step": 29086
    },
    {
      "epoch": 0.8485617597292724,
      "grad_norm": 0.7441187259645008,
      "learning_rate": 5.894684051444977e-07,
      "loss": 0.1259,
      "step": 29087
    },
    {
      "epoch": 0.848590932959916,
      "grad_norm": 0.8453179416188262,
      "learning_rate": 5.892458845532528e-07,
      "loss": 0.1064,
      "step": 29088
    },
    {
      "epoch": 0.8486201061905595,
      "grad_norm": 0.885642919851678,
      "learning_rate": 5.890234033401676e-07,
      "loss": 0.1155,
      "step": 29089
    },
    {
      "epoch": 0.848649279421203,
      "grad_norm": 1.123644647537821,
      "learning_rate": 5.888009615072293e-07,
      "loss": 0.1318,
      "step": 29090
    },
    {
      "epoch": 0.8486784526518467,
      "grad_norm": 0.9043374872091943,
      "learning_rate": 5.88578559056423e-07,
      "loss": 0.129,
      "step": 29091
    },
    {
      "epoch": 0.8487076258824903,
      "grad_norm": 1.048869038299374,
      "learning_rate": 5.883561959897338e-07,
      "loss": 0.1224,
      "step": 29092
    },
    {
      "epoch": 0.8487367991131338,
      "grad_norm": 0.7790492693780142,
      "learning_rate": 5.881338723091478e-07,
      "loss": 0.0853,
      "step": 29093
    },
    {
      "epoch": 0.8487659723437774,
      "grad_norm": 0.8720151642317396,
      "learning_rate": 5.879115880166486e-07,
      "loss": 0.1216,
      "step": 29094
    },
    {
      "epoch": 0.8487951455744209,
      "grad_norm": 0.7905411336337679,
      "learning_rate": 5.876893431142222e-07,
      "loss": 0.1265,
      "step": 29095
    },
    {
      "epoch": 0.8488243188050645,
      "grad_norm": 0.9281130594392916,
      "learning_rate": 5.874671376038516e-07,
      "loss": 0.1101,
      "step": 29096
    },
    {
      "epoch": 0.848853492035708,
      "grad_norm": 0.8018721161052225,
      "learning_rate": 5.872449714875217e-07,
      "loss": 0.1089,
      "step": 29097
    },
    {
      "epoch": 0.8488826652663516,
      "grad_norm": 0.8999977959568195,
      "learning_rate": 5.870228447672149e-07,
      "loss": 0.0836,
      "step": 29098
    },
    {
      "epoch": 0.8489118384969951,
      "grad_norm": 1.014786378940118,
      "learning_rate": 5.868007574449141e-07,
      "loss": 0.1261,
      "step": 29099
    },
    {
      "epoch": 0.8489410117276387,
      "grad_norm": 0.9986420432333819,
      "learning_rate": 5.865787095226028e-07,
      "loss": 0.0987,
      "step": 29100
    },
    {
      "epoch": 0.8489701849582822,
      "grad_norm": 0.7767268360772743,
      "learning_rate": 5.863567010022637e-07,
      "loss": 0.1271,
      "step": 29101
    },
    {
      "epoch": 0.8489993581889258,
      "grad_norm": 0.9771046731481287,
      "learning_rate": 5.861347318858779e-07,
      "loss": 0.1224,
      "step": 29102
    },
    {
      "epoch": 0.8490285314195694,
      "grad_norm": 0.7944777870991566,
      "learning_rate": 5.859128021754279e-07,
      "loss": 0.1122,
      "step": 29103
    },
    {
      "epoch": 0.849057704650213,
      "grad_norm": 0.7144871424506061,
      "learning_rate": 5.856909118728954e-07,
      "loss": 0.1138,
      "step": 29104
    },
    {
      "epoch": 0.8490868778808566,
      "grad_norm": 0.7735633126790501,
      "learning_rate": 5.854690609802593e-07,
      "loss": 0.1159,
      "step": 29105
    },
    {
      "epoch": 0.8491160511115001,
      "grad_norm": 0.9817189047356594,
      "learning_rate": 5.852472494995015e-07,
      "loss": 0.1034,
      "step": 29106
    },
    {
      "epoch": 0.8491452243421437,
      "grad_norm": 0.8178974331985509,
      "learning_rate": 5.850254774326037e-07,
      "loss": 0.109,
      "step": 29107
    },
    {
      "epoch": 0.8491743975727872,
      "grad_norm": 1.000510735201703,
      "learning_rate": 5.848037447815441e-07,
      "loss": 0.1211,
      "step": 29108
    },
    {
      "epoch": 0.8492035708034308,
      "grad_norm": 0.9691590387254037,
      "learning_rate": 5.84582051548302e-07,
      "loss": 0.1213,
      "step": 29109
    },
    {
      "epoch": 0.8492327440340743,
      "grad_norm": 1.0165256421212807,
      "learning_rate": 5.843603977348577e-07,
      "loss": 0.1086,
      "step": 29110
    },
    {
      "epoch": 0.8492619172647179,
      "grad_norm": 0.752107434507686,
      "learning_rate": 5.841387833431906e-07,
      "loss": 0.125,
      "step": 29111
    },
    {
      "epoch": 0.8492910904953614,
      "grad_norm": 0.722377620849228,
      "learning_rate": 5.839172083752765e-07,
      "loss": 0.1099,
      "step": 29112
    },
    {
      "epoch": 0.849320263726005,
      "grad_norm": 1.157240859714747,
      "learning_rate": 5.836956728330955e-07,
      "loss": 0.1158,
      "step": 29113
    },
    {
      "epoch": 0.8493494369566486,
      "grad_norm": 0.9839289493941649,
      "learning_rate": 5.834741767186264e-07,
      "loss": 0.1229,
      "step": 29114
    },
    {
      "epoch": 0.8493786101872921,
      "grad_norm": 0.9185839013973843,
      "learning_rate": 5.832527200338455e-07,
      "loss": 0.1161,
      "step": 29115
    },
    {
      "epoch": 0.8494077834179357,
      "grad_norm": 0.7015963374246615,
      "learning_rate": 5.830313027807294e-07,
      "loss": 0.1145,
      "step": 29116
    },
    {
      "epoch": 0.8494369566485792,
      "grad_norm": 0.7002043277821434,
      "learning_rate": 5.828099249612556e-07,
      "loss": 0.1176,
      "step": 29117
    },
    {
      "epoch": 0.8494661298792229,
      "grad_norm": 0.8103848443445484,
      "learning_rate": 5.825885865774001e-07,
      "loss": 0.0924,
      "step": 29118
    },
    {
      "epoch": 0.8494953031098664,
      "grad_norm": 0.6318928773755528,
      "learning_rate": 5.823672876311387e-07,
      "loss": 0.1167,
      "step": 29119
    },
    {
      "epoch": 0.84952447634051,
      "grad_norm": 0.8146860468507399,
      "learning_rate": 5.821460281244489e-07,
      "loss": 0.1168,
      "step": 29120
    },
    {
      "epoch": 0.8495536495711535,
      "grad_norm": 0.9467762026272764,
      "learning_rate": 5.819248080593043e-07,
      "loss": 0.133,
      "step": 29121
    },
    {
      "epoch": 0.8495828228017971,
      "grad_norm": 0.6215715522082205,
      "learning_rate": 5.817036274376797e-07,
      "loss": 0.0927,
      "step": 29122
    },
    {
      "epoch": 0.8496119960324406,
      "grad_norm": 1.9880019343072017,
      "learning_rate": 5.814824862615514e-07,
      "loss": 0.1337,
      "step": 29123
    },
    {
      "epoch": 0.8496411692630842,
      "grad_norm": 0.7421360015741368,
      "learning_rate": 5.812613845328912e-07,
      "loss": 0.1097,
      "step": 29124
    },
    {
      "epoch": 0.8496703424937277,
      "grad_norm": 0.7819558106184666,
      "learning_rate": 5.810403222536759e-07,
      "loss": 0.1272,
      "step": 29125
    },
    {
      "epoch": 0.8496995157243713,
      "grad_norm": 2.3119527103291024,
      "learning_rate": 5.808192994258771e-07,
      "loss": 0.1193,
      "step": 29126
    },
    {
      "epoch": 0.8497286889550149,
      "grad_norm": 0.8243265048202371,
      "learning_rate": 5.805983160514689e-07,
      "loss": 0.0942,
      "step": 29127
    },
    {
      "epoch": 0.8497578621856584,
      "grad_norm": 0.7023324635356789,
      "learning_rate": 5.803773721324247e-07,
      "loss": 0.0879,
      "step": 29128
    },
    {
      "epoch": 0.849787035416302,
      "grad_norm": 0.5917597332407699,
      "learning_rate": 5.801564676707144e-07,
      "loss": 0.1061,
      "step": 29129
    },
    {
      "epoch": 0.8498162086469455,
      "grad_norm": 0.7443615849355073,
      "learning_rate": 5.799356026683128e-07,
      "loss": 0.1144,
      "step": 29130
    },
    {
      "epoch": 0.8498453818775892,
      "grad_norm": 0.8423593937147441,
      "learning_rate": 5.797147771271916e-07,
      "loss": 0.1046,
      "step": 29131
    },
    {
      "epoch": 0.8498745551082327,
      "grad_norm": 0.9524970229024283,
      "learning_rate": 5.794939910493208e-07,
      "loss": 0.1036,
      "step": 29132
    },
    {
      "epoch": 0.8499037283388763,
      "grad_norm": 0.8264582787598,
      "learning_rate": 5.792732444366734e-07,
      "loss": 0.1114,
      "step": 29133
    },
    {
      "epoch": 0.8499329015695198,
      "grad_norm": 0.7869697019267445,
      "learning_rate": 5.790525372912192e-07,
      "loss": 0.1102,
      "step": 29134
    },
    {
      "epoch": 0.8499620748001634,
      "grad_norm": 0.9496161098997309,
      "learning_rate": 5.78831869614927e-07,
      "loss": 0.1052,
      "step": 29135
    },
    {
      "epoch": 0.849991248030807,
      "grad_norm": 0.7375757422015793,
      "learning_rate": 5.786112414097689e-07,
      "loss": 0.1191,
      "step": 29136
    },
    {
      "epoch": 0.8500204212614505,
      "grad_norm": 0.7762538525516017,
      "learning_rate": 5.783906526777155e-07,
      "loss": 0.1233,
      "step": 29137
    },
    {
      "epoch": 0.850049594492094,
      "grad_norm": 0.8092677835828772,
      "learning_rate": 5.781701034207343e-07,
      "loss": 0.1214,
      "step": 29138
    },
    {
      "epoch": 0.8500787677227376,
      "grad_norm": 0.933973821805636,
      "learning_rate": 5.779495936407942e-07,
      "loss": 0.1021,
      "step": 29139
    },
    {
      "epoch": 0.8501079409533812,
      "grad_norm": 0.9931670136317993,
      "learning_rate": 5.777291233398652e-07,
      "loss": 0.1106,
      "step": 29140
    },
    {
      "epoch": 0.8501371141840247,
      "grad_norm": 0.9863575881144226,
      "learning_rate": 5.775086925199152e-07,
      "loss": 0.1032,
      "step": 29141
    },
    {
      "epoch": 0.8501662874146683,
      "grad_norm": 1.396160627450317,
      "learning_rate": 5.772883011829106e-07,
      "loss": 0.1191,
      "step": 29142
    },
    {
      "epoch": 0.8501954606453118,
      "grad_norm": 0.9039831491364916,
      "learning_rate": 5.770679493308206e-07,
      "loss": 0.1148,
      "step": 29143
    },
    {
      "epoch": 0.8502246338759554,
      "grad_norm": 0.7585223572741104,
      "learning_rate": 5.768476369656128e-07,
      "loss": 0.1146,
      "step": 29144
    },
    {
      "epoch": 0.850253807106599,
      "grad_norm": 0.8947061008193511,
      "learning_rate": 5.766273640892539e-07,
      "loss": 0.0994,
      "step": 29145
    },
    {
      "epoch": 0.8502829803372426,
      "grad_norm": 0.9824931411171514,
      "learning_rate": 5.764071307037083e-07,
      "loss": 0.1101,
      "step": 29146
    },
    {
      "epoch": 0.8503121535678861,
      "grad_norm": 0.8736909814841786,
      "learning_rate": 5.761869368109451e-07,
      "loss": 0.1086,
      "step": 29147
    },
    {
      "epoch": 0.8503413267985297,
      "grad_norm": 0.9175574839728452,
      "learning_rate": 5.759667824129278e-07,
      "loss": 0.1266,
      "step": 29148
    },
    {
      "epoch": 0.8503705000291732,
      "grad_norm": 0.712944830383276,
      "learning_rate": 5.75746667511623e-07,
      "loss": 0.1255,
      "step": 29149
    },
    {
      "epoch": 0.8503996732598168,
      "grad_norm": 0.8898608437365731,
      "learning_rate": 5.75526592108997e-07,
      "loss": 0.1163,
      "step": 29150
    },
    {
      "epoch": 0.8504288464904604,
      "grad_norm": 0.9883291396051264,
      "learning_rate": 5.753065562070131e-07,
      "loss": 0.1288,
      "step": 29151
    },
    {
      "epoch": 0.8504580197211039,
      "grad_norm": 0.8090705015277261,
      "learning_rate": 5.75086559807635e-07,
      "loss": 0.1093,
      "step": 29152
    },
    {
      "epoch": 0.8504871929517475,
      "grad_norm": 0.7690992370566142,
      "learning_rate": 5.748666029128292e-07,
      "loss": 0.1064,
      "step": 29153
    },
    {
      "epoch": 0.850516366182391,
      "grad_norm": 0.729171642889137,
      "learning_rate": 5.746466855245564e-07,
      "loss": 0.1278,
      "step": 29154
    },
    {
      "epoch": 0.8505455394130346,
      "grad_norm": 0.7557397395067963,
      "learning_rate": 5.744268076447829e-07,
      "loss": 0.1128,
      "step": 29155
    },
    {
      "epoch": 0.8505747126436781,
      "grad_norm": 0.7387186636381406,
      "learning_rate": 5.742069692754692e-07,
      "loss": 0.1202,
      "step": 29156
    },
    {
      "epoch": 0.8506038858743217,
      "grad_norm": 0.8967285687017305,
      "learning_rate": 5.739871704185807e-07,
      "loss": 0.1042,
      "step": 29157
    },
    {
      "epoch": 0.8506330591049653,
      "grad_norm": 0.6574592588010976,
      "learning_rate": 5.737674110760777e-07,
      "loss": 0.0932,
      "step": 29158
    },
    {
      "epoch": 0.8506622323356089,
      "grad_norm": 0.7309849420674775,
      "learning_rate": 5.735476912499216e-07,
      "loss": 0.0916,
      "step": 29159
    },
    {
      "epoch": 0.8506914055662524,
      "grad_norm": 0.7636606999105496,
      "learning_rate": 5.733280109420753e-07,
      "loss": 0.1137,
      "step": 29160
    },
    {
      "epoch": 0.850720578796896,
      "grad_norm": 0.895668734767981,
      "learning_rate": 5.731083701545003e-07,
      "loss": 0.1118,
      "step": 29161
    },
    {
      "epoch": 0.8507497520275396,
      "grad_norm": 0.9338595039586092,
      "learning_rate": 5.728887688891566e-07,
      "loss": 0.1279,
      "step": 29162
    },
    {
      "epoch": 0.8507789252581831,
      "grad_norm": 0.9975607117987849,
      "learning_rate": 5.726692071480061e-07,
      "loss": 0.1198,
      "step": 29163
    },
    {
      "epoch": 0.8508080984888267,
      "grad_norm": 1.03145749253362,
      "learning_rate": 5.724496849330075e-07,
      "loss": 0.1095,
      "step": 29164
    },
    {
      "epoch": 0.8508372717194702,
      "grad_norm": 0.9783715480649365,
      "learning_rate": 5.722302022461206e-07,
      "loss": 0.1096,
      "step": 29165
    },
    {
      "epoch": 0.8508664449501138,
      "grad_norm": 0.8587857273281144,
      "learning_rate": 5.720107590893054e-07,
      "loss": 0.1263,
      "step": 29166
    },
    {
      "epoch": 0.8508956181807573,
      "grad_norm": 0.9195945072151114,
      "learning_rate": 5.717913554645221e-07,
      "loss": 0.102,
      "step": 29167
    },
    {
      "epoch": 0.8509247914114009,
      "grad_norm": 0.8380710786381557,
      "learning_rate": 5.715719913737283e-07,
      "loss": 0.1133,
      "step": 29168
    },
    {
      "epoch": 0.8509539646420444,
      "grad_norm": 0.7497282590627513,
      "learning_rate": 5.713526668188818e-07,
      "loss": 0.1194,
      "step": 29169
    },
    {
      "epoch": 0.850983137872688,
      "grad_norm": 0.9293399746740582,
      "learning_rate": 5.711333818019421e-07,
      "loss": 0.1134,
      "step": 29170
    },
    {
      "epoch": 0.8510123111033315,
      "grad_norm": 0.8645695735950689,
      "learning_rate": 5.709141363248666e-07,
      "loss": 0.0919,
      "step": 29171
    },
    {
      "epoch": 0.8510414843339752,
      "grad_norm": 0.7521536442160687,
      "learning_rate": 5.706949303896115e-07,
      "loss": 0.1092,
      "step": 29172
    },
    {
      "epoch": 0.8510706575646187,
      "grad_norm": 0.7737213973962924,
      "learning_rate": 5.704757639981346e-07,
      "loss": 0.1224,
      "step": 29173
    },
    {
      "epoch": 0.8510998307952623,
      "grad_norm": 1.0631300616216923,
      "learning_rate": 5.702566371523937e-07,
      "loss": 0.1334,
      "step": 29174
    },
    {
      "epoch": 0.8511290040259059,
      "grad_norm": 0.8543005343058528,
      "learning_rate": 5.700375498543442e-07,
      "loss": 0.1037,
      "step": 29175
    },
    {
      "epoch": 0.8511581772565494,
      "grad_norm": 0.8937262599219347,
      "learning_rate": 5.698185021059404e-07,
      "loss": 0.1138,
      "step": 29176
    },
    {
      "epoch": 0.851187350487193,
      "grad_norm": 0.7674303809737473,
      "learning_rate": 5.69599493909141e-07,
      "loss": 0.0949,
      "step": 29177
    },
    {
      "epoch": 0.8512165237178365,
      "grad_norm": 0.7625463589295192,
      "learning_rate": 5.693805252658984e-07,
      "loss": 0.1123,
      "step": 29178
    },
    {
      "epoch": 0.8512456969484801,
      "grad_norm": 0.8400415243150124,
      "learning_rate": 5.691615961781694e-07,
      "loss": 0.1069,
      "step": 29179
    },
    {
      "epoch": 0.8512748701791236,
      "grad_norm": 0.7553999497936758,
      "learning_rate": 5.689427066479081e-07,
      "loss": 0.1042,
      "step": 29180
    },
    {
      "epoch": 0.8513040434097672,
      "grad_norm": 1.9784624781982205,
      "learning_rate": 5.687238566770692e-07,
      "loss": 0.1156,
      "step": 29181
    },
    {
      "epoch": 0.8513332166404107,
      "grad_norm": 0.990666517949037,
      "learning_rate": 5.685050462676045e-07,
      "loss": 0.1046,
      "step": 29182
    },
    {
      "epoch": 0.8513623898710543,
      "grad_norm": 0.7638324941083079,
      "learning_rate": 5.682862754214696e-07,
      "loss": 0.1085,
      "step": 29183
    },
    {
      "epoch": 0.8513915631016978,
      "grad_norm": 0.8732438449305735,
      "learning_rate": 5.680675441406164e-07,
      "loss": 0.1168,
      "step": 29184
    },
    {
      "epoch": 0.8514207363323415,
      "grad_norm": 0.8059618409293302,
      "learning_rate": 5.678488524269993e-07,
      "loss": 0.1185,
      "step": 29185
    },
    {
      "epoch": 0.851449909562985,
      "grad_norm": 0.9910972610017613,
      "learning_rate": 5.676302002825679e-07,
      "loss": 0.1026,
      "step": 29186
    },
    {
      "epoch": 0.8514790827936286,
      "grad_norm": 0.950750253391563,
      "learning_rate": 5.674115877092773e-07,
      "loss": 0.1188,
      "step": 29187
    },
    {
      "epoch": 0.8515082560242722,
      "grad_norm": 0.8802836415266408,
      "learning_rate": 5.671930147090782e-07,
      "loss": 0.1192,
      "step": 29188
    },
    {
      "epoch": 0.8515374292549157,
      "grad_norm": 0.984339877038203,
      "learning_rate": 5.669744812839207e-07,
      "loss": 0.1,
      "step": 29189
    },
    {
      "epoch": 0.8515666024855593,
      "grad_norm": 0.8359904446673222,
      "learning_rate": 5.667559874357564e-07,
      "loss": 0.1205,
      "step": 29190
    },
    {
      "epoch": 0.8515957757162028,
      "grad_norm": 0.8618322585781738,
      "learning_rate": 5.665375331665374e-07,
      "loss": 0.1327,
      "step": 29191
    },
    {
      "epoch": 0.8516249489468464,
      "grad_norm": 0.9559095642731167,
      "learning_rate": 5.663191184782118e-07,
      "loss": 0.1171,
      "step": 29192
    },
    {
      "epoch": 0.8516541221774899,
      "grad_norm": 0.853620565237487,
      "learning_rate": 5.661007433727322e-07,
      "loss": 0.1026,
      "step": 29193
    },
    {
      "epoch": 0.8516832954081335,
      "grad_norm": 1.006325333801254,
      "learning_rate": 5.658824078520464e-07,
      "loss": 0.1141,
      "step": 29194
    },
    {
      "epoch": 0.851712468638777,
      "grad_norm": 1.0933564406113303,
      "learning_rate": 5.656641119181033e-07,
      "loss": 0.1073,
      "step": 29195
    },
    {
      "epoch": 0.8517416418694206,
      "grad_norm": 0.8362935999240677,
      "learning_rate": 5.65445855572852e-07,
      "loss": 0.1064,
      "step": 29196
    },
    {
      "epoch": 0.8517708151000641,
      "grad_norm": 0.7974708708563673,
      "learning_rate": 5.652276388182426e-07,
      "loss": 0.1009,
      "step": 29197
    },
    {
      "epoch": 0.8517999883307077,
      "grad_norm": 0.8415166654775664,
      "learning_rate": 5.650094616562224e-07,
      "loss": 0.1204,
      "step": 29198
    },
    {
      "epoch": 0.8518291615613514,
      "grad_norm": 0.815356017037793,
      "learning_rate": 5.647913240887376e-07,
      "loss": 0.1104,
      "step": 29199
    },
    {
      "epoch": 0.8518583347919949,
      "grad_norm": 0.7089991203220957,
      "learning_rate": 5.645732261177384e-07,
      "loss": 0.0861,
      "step": 29200
    },
    {
      "epoch": 0.8518875080226385,
      "grad_norm": 0.762593618453199,
      "learning_rate": 5.643551677451703e-07,
      "loss": 0.1023,
      "step": 29201
    },
    {
      "epoch": 0.851916681253282,
      "grad_norm": 0.7838990712235988,
      "learning_rate": 5.641371489729797e-07,
      "loss": 0.1186,
      "step": 29202
    },
    {
      "epoch": 0.8519458544839256,
      "grad_norm": 0.9898869616111717,
      "learning_rate": 5.639191698031137e-07,
      "loss": 0.1036,
      "step": 29203
    },
    {
      "epoch": 0.8519750277145691,
      "grad_norm": 0.8933480439330093,
      "learning_rate": 5.637012302375195e-07,
      "loss": 0.0961,
      "step": 29204
    },
    {
      "epoch": 0.8520042009452127,
      "grad_norm": 0.9400445190726446,
      "learning_rate": 5.634833302781411e-07,
      "loss": 0.1377,
      "step": 29205
    },
    {
      "epoch": 0.8520333741758562,
      "grad_norm": 0.8242725405017413,
      "learning_rate": 5.632654699269241e-07,
      "loss": 0.1228,
      "step": 29206
    },
    {
      "epoch": 0.8520625474064998,
      "grad_norm": 0.8976782201804555,
      "learning_rate": 5.630476491858145e-07,
      "loss": 0.1244,
      "step": 29207
    },
    {
      "epoch": 0.8520917206371433,
      "grad_norm": 0.82601834719666,
      "learning_rate": 5.628298680567556e-07,
      "loss": 0.1274,
      "step": 29208
    },
    {
      "epoch": 0.8521208938677869,
      "grad_norm": 0.9804919939828782,
      "learning_rate": 5.626121265416917e-07,
      "loss": 0.0974,
      "step": 29209
    },
    {
      "epoch": 0.8521500670984304,
      "grad_norm": 1.145379006702492,
      "learning_rate": 5.623944246425695e-07,
      "loss": 0.1192,
      "step": 29210
    },
    {
      "epoch": 0.852179240329074,
      "grad_norm": 0.7038569782151766,
      "learning_rate": 5.621767623613294e-07,
      "loss": 0.0943,
      "step": 29211
    },
    {
      "epoch": 0.8522084135597175,
      "grad_norm": 0.9213798575557912,
      "learning_rate": 5.619591396999158e-07,
      "loss": 0.1392,
      "step": 29212
    },
    {
      "epoch": 0.8522375867903612,
      "grad_norm": 0.9035633900811643,
      "learning_rate": 5.617415566602718e-07,
      "loss": 0.1199,
      "step": 29213
    },
    {
      "epoch": 0.8522667600210048,
      "grad_norm": 0.8680700646262012,
      "learning_rate": 5.61524013244339e-07,
      "loss": 0.1093,
      "step": 29214
    },
    {
      "epoch": 0.8522959332516483,
      "grad_norm": 0.8398015135678619,
      "learning_rate": 5.613065094540615e-07,
      "loss": 0.1124,
      "step": 29215
    },
    {
      "epoch": 0.8523251064822919,
      "grad_norm": 0.9560727325836809,
      "learning_rate": 5.610890452913787e-07,
      "loss": 0.1163,
      "step": 29216
    },
    {
      "epoch": 0.8523542797129354,
      "grad_norm": 0.8763948693257041,
      "learning_rate": 5.608716207582338e-07,
      "loss": 0.0945,
      "step": 29217
    },
    {
      "epoch": 0.852383452943579,
      "grad_norm": 1.0084153223099461,
      "learning_rate": 5.606542358565681e-07,
      "loss": 0.1143,
      "step": 29218
    },
    {
      "epoch": 0.8524126261742225,
      "grad_norm": 0.7695030500440252,
      "learning_rate": 5.6043689058832e-07,
      "loss": 0.1286,
      "step": 29219
    },
    {
      "epoch": 0.8524417994048661,
      "grad_norm": 0.8144531799096898,
      "learning_rate": 5.60219584955432e-07,
      "loss": 0.1279,
      "step": 29220
    },
    {
      "epoch": 0.8524709726355096,
      "grad_norm": 1.0118076535744631,
      "learning_rate": 5.600023189598442e-07,
      "loss": 0.1185,
      "step": 29221
    },
    {
      "epoch": 0.8525001458661532,
      "grad_norm": 0.895815978752632,
      "learning_rate": 5.597850926034954e-07,
      "loss": 0.1265,
      "step": 29222
    },
    {
      "epoch": 0.8525293190967967,
      "grad_norm": 0.9172450629512476,
      "learning_rate": 5.595679058883257e-07,
      "loss": 0.1147,
      "step": 29223
    },
    {
      "epoch": 0.8525584923274403,
      "grad_norm": 0.7653174122887514,
      "learning_rate": 5.593507588162739e-07,
      "loss": 0.1096,
      "step": 29224
    },
    {
      "epoch": 0.8525876655580839,
      "grad_norm": 0.7425071369559749,
      "learning_rate": 5.591336513892776e-07,
      "loss": 0.102,
      "step": 29225
    },
    {
      "epoch": 0.8526168387887275,
      "grad_norm": 0.6504027414778139,
      "learning_rate": 5.589165836092759e-07,
      "loss": 0.1053,
      "step": 29226
    },
    {
      "epoch": 0.8526460120193711,
      "grad_norm": 1.1333888888884196,
      "learning_rate": 5.586995554782076e-07,
      "loss": 0.132,
      "step": 29227
    },
    {
      "epoch": 0.8526751852500146,
      "grad_norm": 0.9105737631709079,
      "learning_rate": 5.584825669980098e-07,
      "loss": 0.12,
      "step": 29228
    },
    {
      "epoch": 0.8527043584806582,
      "grad_norm": 0.9115820786569032,
      "learning_rate": 5.582656181706181e-07,
      "loss": 0.089,
      "step": 29229
    },
    {
      "epoch": 0.8527335317113017,
      "grad_norm": 0.968959144874335,
      "learning_rate": 5.58048708997972e-07,
      "loss": 0.1097,
      "step": 29230
    },
    {
      "epoch": 0.8527627049419453,
      "grad_norm": 1.0005124495851303,
      "learning_rate": 5.578318394820053e-07,
      "loss": 0.1077,
      "step": 29231
    },
    {
      "epoch": 0.8527918781725888,
      "grad_norm": 0.9823464381116843,
      "learning_rate": 5.576150096246563e-07,
      "loss": 0.1147,
      "step": 29232
    },
    {
      "epoch": 0.8528210514032324,
      "grad_norm": 0.7421311343553169,
      "learning_rate": 5.573982194278594e-07,
      "loss": 0.1203,
      "step": 29233
    },
    {
      "epoch": 0.8528502246338759,
      "grad_norm": 0.9075214868246244,
      "learning_rate": 5.571814688935517e-07,
      "loss": 0.1131,
      "step": 29234
    },
    {
      "epoch": 0.8528793978645195,
      "grad_norm": 0.9861239596102009,
      "learning_rate": 5.56964758023667e-07,
      "loss": 0.1174,
      "step": 29235
    },
    {
      "epoch": 0.852908571095163,
      "grad_norm": 1.2044479152443401,
      "learning_rate": 5.567480868201397e-07,
      "loss": 0.1481,
      "step": 29236
    },
    {
      "epoch": 0.8529377443258066,
      "grad_norm": 0.72138023383901,
      "learning_rate": 5.565314552849044e-07,
      "loss": 0.1024,
      "step": 29237
    },
    {
      "epoch": 0.8529669175564502,
      "grad_norm": 0.9495747437271533,
      "learning_rate": 5.563148634198967e-07,
      "loss": 0.1459,
      "step": 29238
    },
    {
      "epoch": 0.8529960907870937,
      "grad_norm": 1.001649971644289,
      "learning_rate": 5.560983112270479e-07,
      "loss": 0.1078,
      "step": 29239
    },
    {
      "epoch": 0.8530252640177374,
      "grad_norm": 0.9833936050822271,
      "learning_rate": 5.558817987082937e-07,
      "loss": 0.1111,
      "step": 29240
    },
    {
      "epoch": 0.8530544372483809,
      "grad_norm": 0.7548229031177789,
      "learning_rate": 5.556653258655659e-07,
      "loss": 0.11,
      "step": 29241
    },
    {
      "epoch": 0.8530836104790245,
      "grad_norm": 0.9911272248553084,
      "learning_rate": 5.554488927007961e-07,
      "loss": 0.0984,
      "step": 29242
    },
    {
      "epoch": 0.853112783709668,
      "grad_norm": 0.7981739377756809,
      "learning_rate": 5.552324992159175e-07,
      "loss": 0.0982,
      "step": 29243
    },
    {
      "epoch": 0.8531419569403116,
      "grad_norm": 0.7470635185418462,
      "learning_rate": 5.550161454128633e-07,
      "loss": 0.1243,
      "step": 29244
    },
    {
      "epoch": 0.8531711301709551,
      "grad_norm": 0.884584534357942,
      "learning_rate": 5.547998312935637e-07,
      "loss": 0.1326,
      "step": 29245
    },
    {
      "epoch": 0.8532003034015987,
      "grad_norm": 1.1726197074415614,
      "learning_rate": 5.545835568599489e-07,
      "loss": 0.1056,
      "step": 29246
    },
    {
      "epoch": 0.8532294766322422,
      "grad_norm": 0.7488514684875713,
      "learning_rate": 5.543673221139517e-07,
      "loss": 0.1449,
      "step": 29247
    },
    {
      "epoch": 0.8532586498628858,
      "grad_norm": 0.8045346970530131,
      "learning_rate": 5.541511270575023e-07,
      "loss": 0.1172,
      "step": 29248
    },
    {
      "epoch": 0.8532878230935294,
      "grad_norm": 1.257946166363082,
      "learning_rate": 5.539349716925285e-07,
      "loss": 0.0989,
      "step": 29249
    },
    {
      "epoch": 0.8533169963241729,
      "grad_norm": 0.8544562436703373,
      "learning_rate": 5.537188560209633e-07,
      "loss": 0.1055,
      "step": 29250
    },
    {
      "epoch": 0.8533461695548165,
      "grad_norm": 0.7590419976067295,
      "learning_rate": 5.535027800447351e-07,
      "loss": 0.1372,
      "step": 29251
    },
    {
      "epoch": 0.85337534278546,
      "grad_norm": 0.9760112363389173,
      "learning_rate": 5.532867437657718e-07,
      "loss": 0.092,
      "step": 29252
    },
    {
      "epoch": 0.8534045160161037,
      "grad_norm": 1.0380472022167764,
      "learning_rate": 5.530707471860036e-07,
      "loss": 0.1023,
      "step": 29253
    },
    {
      "epoch": 0.8534336892467472,
      "grad_norm": 1.066616929378313,
      "learning_rate": 5.528547903073583e-07,
      "loss": 0.1117,
      "step": 29254
    },
    {
      "epoch": 0.8534628624773908,
      "grad_norm": 0.6603745490932088,
      "learning_rate": 5.526388731317627e-07,
      "loss": 0.0984,
      "step": 29255
    },
    {
      "epoch": 0.8534920357080343,
      "grad_norm": 0.7552520615868892,
      "learning_rate": 5.524229956611454e-07,
      "loss": 0.1125,
      "step": 29256
    },
    {
      "epoch": 0.8535212089386779,
      "grad_norm": 0.8156985411177039,
      "learning_rate": 5.522071578974353e-07,
      "loss": 0.1077,
      "step": 29257
    },
    {
      "epoch": 0.8535503821693214,
      "grad_norm": 0.6509190477659933,
      "learning_rate": 5.51991359842558e-07,
      "loss": 0.11,
      "step": 29258
    },
    {
      "epoch": 0.853579555399965,
      "grad_norm": 0.81201383586702,
      "learning_rate": 5.517756014984388e-07,
      "loss": 0.1196,
      "step": 29259
    },
    {
      "epoch": 0.8536087286306085,
      "grad_norm": 0.7430861923950558,
      "learning_rate": 5.51559882867006e-07,
      "loss": 0.135,
      "step": 29260
    },
    {
      "epoch": 0.8536379018612521,
      "grad_norm": 1.1002617405827813,
      "learning_rate": 5.513442039501837e-07,
      "loss": 0.1079,
      "step": 29261
    },
    {
      "epoch": 0.8536670750918957,
      "grad_norm": 1.135539187412405,
      "learning_rate": 5.511285647498993e-07,
      "loss": 0.1157,
      "step": 29262
    },
    {
      "epoch": 0.8536962483225392,
      "grad_norm": 1.1742054621140416,
      "learning_rate": 5.509129652680761e-07,
      "loss": 0.1466,
      "step": 29263
    },
    {
      "epoch": 0.8537254215531828,
      "grad_norm": 0.8570858075514588,
      "learning_rate": 5.506974055066411e-07,
      "loss": 0.0934,
      "step": 29264
    },
    {
      "epoch": 0.8537545947838263,
      "grad_norm": 0.6999114656213908,
      "learning_rate": 5.504818854675176e-07,
      "loss": 0.118,
      "step": 29265
    },
    {
      "epoch": 0.8537837680144699,
      "grad_norm": 0.7353575055464785,
      "learning_rate": 5.502664051526285e-07,
      "loss": 0.0964,
      "step": 29266
    },
    {
      "epoch": 0.8538129412451135,
      "grad_norm": 0.8843302451918854,
      "learning_rate": 5.500509645638985e-07,
      "loss": 0.0923,
      "step": 29267
    },
    {
      "epoch": 0.8538421144757571,
      "grad_norm": 0.8845497856311907,
      "learning_rate": 5.498355637032521e-07,
      "loss": 0.0977,
      "step": 29268
    },
    {
      "epoch": 0.8538712877064006,
      "grad_norm": 0.8000443880087532,
      "learning_rate": 5.49620202572611e-07,
      "loss": 0.1068,
      "step": 29269
    },
    {
      "epoch": 0.8539004609370442,
      "grad_norm": 0.7903037974342727,
      "learning_rate": 5.494048811738989e-07,
      "loss": 0.1145,
      "step": 29270
    },
    {
      "epoch": 0.8539296341676877,
      "grad_norm": 0.9523686122631562,
      "learning_rate": 5.491895995090374e-07,
      "loss": 0.0961,
      "step": 29271
    },
    {
      "epoch": 0.8539588073983313,
      "grad_norm": 1.2468134396418666,
      "learning_rate": 5.489743575799483e-07,
      "loss": 0.1321,
      "step": 29272
    },
    {
      "epoch": 0.8539879806289749,
      "grad_norm": 0.9434175407452533,
      "learning_rate": 5.48759155388553e-07,
      "loss": 0.1158,
      "step": 29273
    },
    {
      "epoch": 0.8540171538596184,
      "grad_norm": 1.1145933180924381,
      "learning_rate": 5.485439929367748e-07,
      "loss": 0.104,
      "step": 29274
    },
    {
      "epoch": 0.854046327090262,
      "grad_norm": 0.7761601853374593,
      "learning_rate": 5.483288702265327e-07,
      "loss": 0.1093,
      "step": 29275
    },
    {
      "epoch": 0.8540755003209055,
      "grad_norm": 0.6857080858149806,
      "learning_rate": 5.481137872597469e-07,
      "loss": 0.1158,
      "step": 29276
    },
    {
      "epoch": 0.8541046735515491,
      "grad_norm": 0.9622116621490467,
      "learning_rate": 5.478987440383399e-07,
      "loss": 0.1361,
      "step": 29277
    },
    {
      "epoch": 0.8541338467821926,
      "grad_norm": 0.8826381949857007,
      "learning_rate": 5.476837405642299e-07,
      "loss": 0.103,
      "step": 29278
    },
    {
      "epoch": 0.8541630200128362,
      "grad_norm": 0.8971376210726807,
      "learning_rate": 5.474687768393344e-07,
      "loss": 0.1357,
      "step": 29279
    },
    {
      "epoch": 0.8541921932434798,
      "grad_norm": 1.140063921540892,
      "learning_rate": 5.472538528655769e-07,
      "loss": 0.1181,
      "step": 29280
    },
    {
      "epoch": 0.8542213664741234,
      "grad_norm": 0.7401249554485791,
      "learning_rate": 5.47038968644874e-07,
      "loss": 0.0968,
      "step": 29281
    },
    {
      "epoch": 0.8542505397047669,
      "grad_norm": 0.69552968201201,
      "learning_rate": 5.468241241791428e-07,
      "loss": 0.0871,
      "step": 29282
    },
    {
      "epoch": 0.8542797129354105,
      "grad_norm": 0.9130804690861541,
      "learning_rate": 5.466093194703043e-07,
      "loss": 0.1062,
      "step": 29283
    },
    {
      "epoch": 0.854308886166054,
      "grad_norm": 1.1165299293809114,
      "learning_rate": 5.463945545202748e-07,
      "loss": 0.1248,
      "step": 29284
    },
    {
      "epoch": 0.8543380593966976,
      "grad_norm": 1.119356456817356,
      "learning_rate": 5.4617982933097e-07,
      "loss": 0.1211,
      "step": 29285
    },
    {
      "epoch": 0.8543672326273412,
      "grad_norm": 0.7619026129903873,
      "learning_rate": 5.45965143904309e-07,
      "loss": 0.1175,
      "step": 29286
    },
    {
      "epoch": 0.8543964058579847,
      "grad_norm": 1.115115578102341,
      "learning_rate": 5.457504982422085e-07,
      "loss": 0.1192,
      "step": 29287
    },
    {
      "epoch": 0.8544255790886283,
      "grad_norm": 0.967813089467639,
      "learning_rate": 5.455358923465843e-07,
      "loss": 0.1106,
      "step": 29288
    },
    {
      "epoch": 0.8544547523192718,
      "grad_norm": 2.7302747049786613,
      "learning_rate": 5.453213262193513e-07,
      "loss": 0.1132,
      "step": 29289
    },
    {
      "epoch": 0.8544839255499154,
      "grad_norm": 0.9810793593185674,
      "learning_rate": 5.451067998624276e-07,
      "loss": 0.1049,
      "step": 29290
    },
    {
      "epoch": 0.8545130987805589,
      "grad_norm": 0.8378819714164876,
      "learning_rate": 5.448923132777256e-07,
      "loss": 0.1206,
      "step": 29291
    },
    {
      "epoch": 0.8545422720112025,
      "grad_norm": 1.0451784392384882,
      "learning_rate": 5.44677866467162e-07,
      "loss": 0.1264,
      "step": 29292
    },
    {
      "epoch": 0.854571445241846,
      "grad_norm": 0.8355508850984268,
      "learning_rate": 5.444634594326503e-07,
      "loss": 0.0946,
      "step": 29293
    },
    {
      "epoch": 0.8546006184724897,
      "grad_norm": 0.8607237234297561,
      "learning_rate": 5.442490921761062e-07,
      "loss": 0.1121,
      "step": 29294
    },
    {
      "epoch": 0.8546297917031332,
      "grad_norm": 0.7746667125089692,
      "learning_rate": 5.440347646994426e-07,
      "loss": 0.1262,
      "step": 29295
    },
    {
      "epoch": 0.8546589649337768,
      "grad_norm": 0.9918799982651284,
      "learning_rate": 5.438204770045719e-07,
      "loss": 0.1231,
      "step": 29296
    },
    {
      "epoch": 0.8546881381644204,
      "grad_norm": 0.8050739299835314,
      "learning_rate": 5.43606229093408e-07,
      "loss": 0.1094,
      "step": 29297
    },
    {
      "epoch": 0.8547173113950639,
      "grad_norm": 0.6753780555823767,
      "learning_rate": 5.433920209678651e-07,
      "loss": 0.1074,
      "step": 29298
    },
    {
      "epoch": 0.8547464846257075,
      "grad_norm": 0.7884651155454309,
      "learning_rate": 5.431778526298531e-07,
      "loss": 0.1059,
      "step": 29299
    },
    {
      "epoch": 0.854775657856351,
      "grad_norm": 0.8300237753886387,
      "learning_rate": 5.429637240812863e-07,
      "loss": 0.1112,
      "step": 29300
    },
    {
      "epoch": 0.8548048310869946,
      "grad_norm": 0.9567400843568022,
      "learning_rate": 5.427496353240757e-07,
      "loss": 0.1101,
      "step": 29301
    },
    {
      "epoch": 0.8548340043176381,
      "grad_norm": 0.9788802429972455,
      "learning_rate": 5.425355863601311e-07,
      "loss": 0.1172,
      "step": 29302
    },
    {
      "epoch": 0.8548631775482817,
      "grad_norm": 0.7857087629895083,
      "learning_rate": 5.423215771913648e-07,
      "loss": 0.0997,
      "step": 29303
    },
    {
      "epoch": 0.8548923507789252,
      "grad_norm": 1.191233660332849,
      "learning_rate": 5.42107607819688e-07,
      "loss": 0.1066,
      "step": 29304
    },
    {
      "epoch": 0.8549215240095688,
      "grad_norm": 0.9282773544238075,
      "learning_rate": 5.418936782470108e-07,
      "loss": 0.1357,
      "step": 29305
    },
    {
      "epoch": 0.8549506972402123,
      "grad_norm": 0.8048667103505373,
      "learning_rate": 5.416797884752412e-07,
      "loss": 0.0964,
      "step": 29306
    },
    {
      "epoch": 0.854979870470856,
      "grad_norm": 0.6759841812231248,
      "learning_rate": 5.414659385062915e-07,
      "loss": 0.0757,
      "step": 29307
    },
    {
      "epoch": 0.8550090437014995,
      "grad_norm": 0.8143535130999393,
      "learning_rate": 5.412521283420691e-07,
      "loss": 0.1249,
      "step": 29308
    },
    {
      "epoch": 0.8550382169321431,
      "grad_norm": 1.0261914213636394,
      "learning_rate": 5.410383579844819e-07,
      "loss": 0.1434,
      "step": 29309
    },
    {
      "epoch": 0.8550673901627867,
      "grad_norm": 0.9932610365277973,
      "learning_rate": 5.408246274354412e-07,
      "loss": 0.1141,
      "step": 29310
    },
    {
      "epoch": 0.8550965633934302,
      "grad_norm": 0.8289037734445469,
      "learning_rate": 5.406109366968542e-07,
      "loss": 0.0952,
      "step": 29311
    },
    {
      "epoch": 0.8551257366240738,
      "grad_norm": 0.707035527951989,
      "learning_rate": 5.403972857706269e-07,
      "loss": 0.0987,
      "step": 29312
    },
    {
      "epoch": 0.8551549098547173,
      "grad_norm": 1.27919610258187,
      "learning_rate": 5.401836746586691e-07,
      "loss": 0.121,
      "step": 29313
    },
    {
      "epoch": 0.8551840830853609,
      "grad_norm": 0.8770398345237028,
      "learning_rate": 5.399701033628873e-07,
      "loss": 0.0863,
      "step": 29314
    },
    {
      "epoch": 0.8552132563160044,
      "grad_norm": 1.0098450048278378,
      "learning_rate": 5.397565718851861e-07,
      "loss": 0.1365,
      "step": 29315
    },
    {
      "epoch": 0.855242429546648,
      "grad_norm": 0.7429027275631229,
      "learning_rate": 5.395430802274737e-07,
      "loss": 0.0928,
      "step": 29316
    },
    {
      "epoch": 0.8552716027772915,
      "grad_norm": 1.0558971372613972,
      "learning_rate": 5.393296283916571e-07,
      "loss": 0.1118,
      "step": 29317
    },
    {
      "epoch": 0.8553007760079351,
      "grad_norm": 0.872179606238544,
      "learning_rate": 5.391162163796404e-07,
      "loss": 0.1204,
      "step": 29318
    },
    {
      "epoch": 0.8553299492385786,
      "grad_norm": 0.7364807336666261,
      "learning_rate": 5.38902844193328e-07,
      "loss": 0.1309,
      "step": 29319
    },
    {
      "epoch": 0.8553591224692222,
      "grad_norm": 1.0647888786797322,
      "learning_rate": 5.386895118346275e-07,
      "loss": 0.1439,
      "step": 29320
    },
    {
      "epoch": 0.8553882956998659,
      "grad_norm": 0.8659584860814815,
      "learning_rate": 5.384762193054411e-07,
      "loss": 0.0887,
      "step": 29321
    },
    {
      "epoch": 0.8554174689305094,
      "grad_norm": 0.716367057438113,
      "learning_rate": 5.38262966607675e-07,
      "loss": 0.0905,
      "step": 29322
    },
    {
      "epoch": 0.855446642161153,
      "grad_norm": 0.8314557367445821,
      "learning_rate": 5.380497537432306e-07,
      "loss": 0.1015,
      "step": 29323
    },
    {
      "epoch": 0.8554758153917965,
      "grad_norm": 0.935713220005702,
      "learning_rate": 5.37836580714014e-07,
      "loss": 0.1101,
      "step": 29324
    },
    {
      "epoch": 0.8555049886224401,
      "grad_norm": 0.8077184679024814,
      "learning_rate": 5.376234475219272e-07,
      "loss": 0.1062,
      "step": 29325
    },
    {
      "epoch": 0.8555341618530836,
      "grad_norm": 0.7880069280216613,
      "learning_rate": 5.374103541688724e-07,
      "loss": 0.1079,
      "step": 29326
    },
    {
      "epoch": 0.8555633350837272,
      "grad_norm": 0.7149049891944173,
      "learning_rate": 5.371973006567521e-07,
      "loss": 0.0921,
      "step": 29327
    },
    {
      "epoch": 0.8555925083143707,
      "grad_norm": 0.7817950072121391,
      "learning_rate": 5.369842869874703e-07,
      "loss": 0.0931,
      "step": 29328
    },
    {
      "epoch": 0.8556216815450143,
      "grad_norm": 0.8093934164300488,
      "learning_rate": 5.367713131629259e-07,
      "loss": 0.1161,
      "step": 29329
    },
    {
      "epoch": 0.8556508547756578,
      "grad_norm": 0.8492960270568929,
      "learning_rate": 5.365583791850232e-07,
      "loss": 0.1023,
      "step": 29330
    },
    {
      "epoch": 0.8556800280063014,
      "grad_norm": 0.8401120955569392,
      "learning_rate": 5.363454850556621e-07,
      "loss": 0.1195,
      "step": 29331
    },
    {
      "epoch": 0.8557092012369449,
      "grad_norm": 0.6950237996322621,
      "learning_rate": 5.361326307767411e-07,
      "loss": 0.1113,
      "step": 29332
    },
    {
      "epoch": 0.8557383744675885,
      "grad_norm": 0.7877889633338129,
      "learning_rate": 5.359198163501628e-07,
      "loss": 0.105,
      "step": 29333
    },
    {
      "epoch": 0.8557675476982322,
      "grad_norm": 0.7789335755700192,
      "learning_rate": 5.357070417778282e-07,
      "loss": 0.1108,
      "step": 29334
    },
    {
      "epoch": 0.8557967209288757,
      "grad_norm": 0.8458783956068391,
      "learning_rate": 5.354943070616348e-07,
      "loss": 0.1081,
      "step": 29335
    },
    {
      "epoch": 0.8558258941595193,
      "grad_norm": 0.8052134834191995,
      "learning_rate": 5.352816122034815e-07,
      "loss": 0.1087,
      "step": 29336
    },
    {
      "epoch": 0.8558550673901628,
      "grad_norm": 0.766542615685328,
      "learning_rate": 5.350689572052692e-07,
      "loss": 0.1099,
      "step": 29337
    },
    {
      "epoch": 0.8558842406208064,
      "grad_norm": 0.864832166392014,
      "learning_rate": 5.348563420688951e-07,
      "loss": 0.0988,
      "step": 29338
    },
    {
      "epoch": 0.8559134138514499,
      "grad_norm": 0.7109001278436873,
      "learning_rate": 5.346437667962562e-07,
      "loss": 0.0895,
      "step": 29339
    },
    {
      "epoch": 0.8559425870820935,
      "grad_norm": 0.7544914283330012,
      "learning_rate": 5.344312313892536e-07,
      "loss": 0.1045,
      "step": 29340
    },
    {
      "epoch": 0.855971760312737,
      "grad_norm": 0.7666375340072559,
      "learning_rate": 5.34218735849783e-07,
      "loss": 0.1037,
      "step": 29341
    },
    {
      "epoch": 0.8560009335433806,
      "grad_norm": 0.7521466008665737,
      "learning_rate": 5.340062801797402e-07,
      "loss": 0.1083,
      "step": 29342
    },
    {
      "epoch": 0.8560301067740241,
      "grad_norm": 0.8834062688206936,
      "learning_rate": 5.337938643810248e-07,
      "loss": 0.1275,
      "step": 29343
    },
    {
      "epoch": 0.8560592800046677,
      "grad_norm": 0.8681128326531021,
      "learning_rate": 5.335814884555313e-07,
      "loss": 0.0979,
      "step": 29344
    },
    {
      "epoch": 0.8560884532353112,
      "grad_norm": 0.77129467969646,
      "learning_rate": 5.333691524051549e-07,
      "loss": 0.1254,
      "step": 29345
    },
    {
      "epoch": 0.8561176264659548,
      "grad_norm": 0.7712736447209378,
      "learning_rate": 5.331568562317924e-07,
      "loss": 0.1112,
      "step": 29346
    },
    {
      "epoch": 0.8561467996965983,
      "grad_norm": 0.9815960137582008,
      "learning_rate": 5.3294459993734e-07,
      "loss": 0.1491,
      "step": 29347
    },
    {
      "epoch": 0.856175972927242,
      "grad_norm": 0.7662818827976515,
      "learning_rate": 5.327323835236919e-07,
      "loss": 0.1138,
      "step": 29348
    },
    {
      "epoch": 0.8562051461578856,
      "grad_norm": 1.0146229817022845,
      "learning_rate": 5.325202069927421e-07,
      "loss": 0.1066,
      "step": 29349
    },
    {
      "epoch": 0.8562343193885291,
      "grad_norm": 1.1253188556326577,
      "learning_rate": 5.323080703463862e-07,
      "loss": 0.13,
      "step": 29350
    },
    {
      "epoch": 0.8562634926191727,
      "grad_norm": 0.9550917774014132,
      "learning_rate": 5.320959735865161e-07,
      "loss": 0.1288,
      "step": 29351
    },
    {
      "epoch": 0.8562926658498162,
      "grad_norm": 1.6951356957353556,
      "learning_rate": 5.31883916715028e-07,
      "loss": 0.1206,
      "step": 29352
    },
    {
      "epoch": 0.8563218390804598,
      "grad_norm": 0.7042063336446921,
      "learning_rate": 5.316718997338128e-07,
      "loss": 0.0904,
      "step": 29353
    },
    {
      "epoch": 0.8563510123111033,
      "grad_norm": 0.703958415806529,
      "learning_rate": 5.314599226447648e-07,
      "loss": 0.0926,
      "step": 29354
    },
    {
      "epoch": 0.8563801855417469,
      "grad_norm": 0.7579356122279145,
      "learning_rate": 5.312479854497754e-07,
      "loss": 0.0997,
      "step": 29355
    },
    {
      "epoch": 0.8564093587723904,
      "grad_norm": 0.986485784109621,
      "learning_rate": 5.31036088150737e-07,
      "loss": 0.1041,
      "step": 29356
    },
    {
      "epoch": 0.856438532003034,
      "grad_norm": 1.199734024618942,
      "learning_rate": 5.308242307495414e-07,
      "loss": 0.0954,
      "step": 29357
    },
    {
      "epoch": 0.8564677052336775,
      "grad_norm": 0.7066274475070856,
      "learning_rate": 5.306124132480811e-07,
      "loss": 0.1054,
      "step": 29358
    },
    {
      "epoch": 0.8564968784643211,
      "grad_norm": 0.8040161117063696,
      "learning_rate": 5.304006356482449e-07,
      "loss": 0.1134,
      "step": 29359
    },
    {
      "epoch": 0.8565260516949647,
      "grad_norm": 0.8210433065501983,
      "learning_rate": 5.301888979519265e-07,
      "loss": 0.1115,
      "step": 29360
    },
    {
      "epoch": 0.8565552249256083,
      "grad_norm": 0.7970420691389978,
      "learning_rate": 5.299772001610143e-07,
      "loss": 0.135,
      "step": 29361
    },
    {
      "epoch": 0.8565843981562519,
      "grad_norm": 0.6669580327023281,
      "learning_rate": 5.297655422773973e-07,
      "loss": 0.1014,
      "step": 29362
    },
    {
      "epoch": 0.8566135713868954,
      "grad_norm": 0.964123335847811,
      "learning_rate": 5.295539243029668e-07,
      "loss": 0.1128,
      "step": 29363
    },
    {
      "epoch": 0.856642744617539,
      "grad_norm": 0.7834984730795249,
      "learning_rate": 5.293423462396124e-07,
      "loss": 0.0842,
      "step": 29364
    },
    {
      "epoch": 0.8566719178481825,
      "grad_norm": 0.8848024773925179,
      "learning_rate": 5.291308080892226e-07,
      "loss": 0.0944,
      "step": 29365
    },
    {
      "epoch": 0.8567010910788261,
      "grad_norm": 0.6919986244584313,
      "learning_rate": 5.289193098536844e-07,
      "loss": 0.1162,
      "step": 29366
    },
    {
      "epoch": 0.8567302643094696,
      "grad_norm": 0.861378097059738,
      "learning_rate": 5.287078515348887e-07,
      "loss": 0.1364,
      "step": 29367
    },
    {
      "epoch": 0.8567594375401132,
      "grad_norm": 0.8173164390501377,
      "learning_rate": 5.284964331347214e-07,
      "loss": 0.0966,
      "step": 29368
    },
    {
      "epoch": 0.8567886107707567,
      "grad_norm": 0.885406075990331,
      "learning_rate": 5.282850546550689e-07,
      "loss": 0.1085,
      "step": 29369
    },
    {
      "epoch": 0.8568177840014003,
      "grad_norm": 0.9307121622400922,
      "learning_rate": 5.280737160978216e-07,
      "loss": 0.133,
      "step": 29370
    },
    {
      "epoch": 0.8568469572320438,
      "grad_norm": 0.749292932707458,
      "learning_rate": 5.27862417464865e-07,
      "loss": 0.1077,
      "step": 29371
    },
    {
      "epoch": 0.8568761304626874,
      "grad_norm": 0.7245720534743995,
      "learning_rate": 5.276511587580835e-07,
      "loss": 0.1093,
      "step": 29372
    },
    {
      "epoch": 0.856905303693331,
      "grad_norm": 0.7584360200431304,
      "learning_rate": 5.274399399793667e-07,
      "loss": 0.1209,
      "step": 29373
    },
    {
      "epoch": 0.8569344769239745,
      "grad_norm": 1.0455436498297157,
      "learning_rate": 5.272287611305976e-07,
      "loss": 0.1208,
      "step": 29374
    },
    {
      "epoch": 0.8569636501546182,
      "grad_norm": 0.8819058019966554,
      "learning_rate": 5.270176222136619e-07,
      "loss": 0.1199,
      "step": 29375
    },
    {
      "epoch": 0.8569928233852617,
      "grad_norm": 0.7700924778430094,
      "learning_rate": 5.268065232304448e-07,
      "loss": 0.1113,
      "step": 29376
    },
    {
      "epoch": 0.8570219966159053,
      "grad_norm": 1.129022640684438,
      "learning_rate": 5.265954641828325e-07,
      "loss": 0.1218,
      "step": 29377
    },
    {
      "epoch": 0.8570511698465488,
      "grad_norm": 0.8689289119023054,
      "learning_rate": 5.263844450727079e-07,
      "loss": 0.1053,
      "step": 29378
    },
    {
      "epoch": 0.8570803430771924,
      "grad_norm": 0.8711379810428355,
      "learning_rate": 5.261734659019541e-07,
      "loss": 0.1232,
      "step": 29379
    },
    {
      "epoch": 0.8571095163078359,
      "grad_norm": 0.8130489438871341,
      "learning_rate": 5.259625266724566e-07,
      "loss": 0.1019,
      "step": 29380
    },
    {
      "epoch": 0.8571386895384795,
      "grad_norm": 0.9020637647946841,
      "learning_rate": 5.257516273860963e-07,
      "loss": 0.1222,
      "step": 29381
    },
    {
      "epoch": 0.857167862769123,
      "grad_norm": 1.0732804808961944,
      "learning_rate": 5.255407680447589e-07,
      "loss": 0.1331,
      "step": 29382
    },
    {
      "epoch": 0.8571970359997666,
      "grad_norm": 0.8667888667591757,
      "learning_rate": 5.253299486503238e-07,
      "loss": 0.1082,
      "step": 29383
    },
    {
      "epoch": 0.8572262092304102,
      "grad_norm": 0.8101618514340407,
      "learning_rate": 5.251191692046764e-07,
      "loss": 0.1161,
      "step": 29384
    },
    {
      "epoch": 0.8572553824610537,
      "grad_norm": 0.70625914522278,
      "learning_rate": 5.249084297096962e-07,
      "loss": 0.108,
      "step": 29385
    },
    {
      "epoch": 0.8572845556916973,
      "grad_norm": 0.9503020444073192,
      "learning_rate": 5.246977301672645e-07,
      "loss": 0.1103,
      "step": 29386
    },
    {
      "epoch": 0.8573137289223408,
      "grad_norm": 0.933888693705328,
      "learning_rate": 5.244870705792632e-07,
      "loss": 0.1096,
      "step": 29387
    },
    {
      "epoch": 0.8573429021529845,
      "grad_norm": 0.8162679424213231,
      "learning_rate": 5.24276450947574e-07,
      "loss": 0.0949,
      "step": 29388
    },
    {
      "epoch": 0.857372075383628,
      "grad_norm": 0.8624342590855032,
      "learning_rate": 5.240658712740748e-07,
      "loss": 0.0991,
      "step": 29389
    },
    {
      "epoch": 0.8574012486142716,
      "grad_norm": 0.7814906469522114,
      "learning_rate": 5.238553315606482e-07,
      "loss": 0.1017,
      "step": 29390
    },
    {
      "epoch": 0.8574304218449151,
      "grad_norm": 0.7770873719225593,
      "learning_rate": 5.236448318091731e-07,
      "loss": 0.0958,
      "step": 29391
    },
    {
      "epoch": 0.8574595950755587,
      "grad_norm": 0.9589446488203188,
      "learning_rate": 5.234343720215268e-07,
      "loss": 0.1103,
      "step": 29392
    },
    {
      "epoch": 0.8574887683062022,
      "grad_norm": 0.8069289270927945,
      "learning_rate": 5.232239521995902e-07,
      "loss": 0.1151,
      "step": 29393
    },
    {
      "epoch": 0.8575179415368458,
      "grad_norm": 0.7421129880346099,
      "learning_rate": 5.230135723452423e-07,
      "loss": 0.1071,
      "step": 29394
    },
    {
      "epoch": 0.8575471147674893,
      "grad_norm": 1.084033587602726,
      "learning_rate": 5.228032324603605e-07,
      "loss": 0.1159,
      "step": 29395
    },
    {
      "epoch": 0.8575762879981329,
      "grad_norm": 0.8517420453028375,
      "learning_rate": 5.225929325468216e-07,
      "loss": 0.0996,
      "step": 29396
    },
    {
      "epoch": 0.8576054612287765,
      "grad_norm": 0.7296119333962274,
      "learning_rate": 5.223826726065045e-07,
      "loss": 0.1074,
      "step": 29397
    },
    {
      "epoch": 0.85763463445942,
      "grad_norm": 0.9196641582945607,
      "learning_rate": 5.221724526412869e-07,
      "loss": 0.1288,
      "step": 29398
    },
    {
      "epoch": 0.8576638076900636,
      "grad_norm": 1.0408563371365043,
      "learning_rate": 5.219622726530427e-07,
      "loss": 0.1,
      "step": 29399
    },
    {
      "epoch": 0.8576929809207071,
      "grad_norm": 0.8169846588627468,
      "learning_rate": 5.21752132643652e-07,
      "loss": 0.1215,
      "step": 29400
    },
    {
      "epoch": 0.8577221541513507,
      "grad_norm": 0.8105030774802362,
      "learning_rate": 5.215420326149889e-07,
      "loss": 0.133,
      "step": 29401
    },
    {
      "epoch": 0.8577513273819943,
      "grad_norm": 0.7658042004307059,
      "learning_rate": 5.213319725689292e-07,
      "loss": 0.1055,
      "step": 29402
    },
    {
      "epoch": 0.8577805006126379,
      "grad_norm": 0.8088666126180049,
      "learning_rate": 5.211219525073491e-07,
      "loss": 0.105,
      "step": 29403
    },
    {
      "epoch": 0.8578096738432814,
      "grad_norm": 0.7757079944570077,
      "learning_rate": 5.209119724321226e-07,
      "loss": 0.1079,
      "step": 29404
    },
    {
      "epoch": 0.857838847073925,
      "grad_norm": 0.7172074945031245,
      "learning_rate": 5.207020323451245e-07,
      "loss": 0.1189,
      "step": 29405
    },
    {
      "epoch": 0.8578680203045685,
      "grad_norm": 0.9422097405586951,
      "learning_rate": 5.204921322482292e-07,
      "loss": 0.1313,
      "step": 29406
    },
    {
      "epoch": 0.8578971935352121,
      "grad_norm": 0.8578235439343572,
      "learning_rate": 5.202822721433115e-07,
      "loss": 0.1045,
      "step": 29407
    },
    {
      "epoch": 0.8579263667658557,
      "grad_norm": 0.8219734969697102,
      "learning_rate": 5.200724520322448e-07,
      "loss": 0.0942,
      "step": 29408
    },
    {
      "epoch": 0.8579555399964992,
      "grad_norm": 0.8307940094535435,
      "learning_rate": 5.198626719169004e-07,
      "loss": 0.1394,
      "step": 29409
    },
    {
      "epoch": 0.8579847132271428,
      "grad_norm": 0.9403933662185611,
      "learning_rate": 5.196529317991534e-07,
      "loss": 0.0993,
      "step": 29410
    },
    {
      "epoch": 0.8580138864577863,
      "grad_norm": 0.743426296762458,
      "learning_rate": 5.194432316808745e-07,
      "loss": 0.1053,
      "step": 29411
    },
    {
      "epoch": 0.8580430596884299,
      "grad_norm": 0.7736891008032006,
      "learning_rate": 5.19233571563938e-07,
      "loss": 0.1329,
      "step": 29412
    },
    {
      "epoch": 0.8580722329190734,
      "grad_norm": 0.7645620492073064,
      "learning_rate": 5.190239514502138e-07,
      "loss": 0.1067,
      "step": 29413
    },
    {
      "epoch": 0.858101406149717,
      "grad_norm": 1.0641400675618282,
      "learning_rate": 5.18814371341575e-07,
      "loss": 0.1352,
      "step": 29414
    },
    {
      "epoch": 0.8581305793803606,
      "grad_norm": 0.897047553575903,
      "learning_rate": 5.186048312398911e-07,
      "loss": 0.1247,
      "step": 29415
    },
    {
      "epoch": 0.8581597526110042,
      "grad_norm": 0.7767238626142258,
      "learning_rate": 5.18395331147033e-07,
      "loss": 0.1234,
      "step": 29416
    },
    {
      "epoch": 0.8581889258416477,
      "grad_norm": 0.9954592295334918,
      "learning_rate": 5.181858710648719e-07,
      "loss": 0.1156,
      "step": 29417
    },
    {
      "epoch": 0.8582180990722913,
      "grad_norm": 0.8472966942564119,
      "learning_rate": 5.179764509952779e-07,
      "loss": 0.1121,
      "step": 29418
    },
    {
      "epoch": 0.8582472723029348,
      "grad_norm": 0.6922976041201744,
      "learning_rate": 5.177670709401195e-07,
      "loss": 0.1179,
      "step": 29419
    },
    {
      "epoch": 0.8582764455335784,
      "grad_norm": 0.8547285959502339,
      "learning_rate": 5.175577309012675e-07,
      "loss": 0.1235,
      "step": 29420
    },
    {
      "epoch": 0.858305618764222,
      "grad_norm": 0.8427890899975256,
      "learning_rate": 5.173484308805899e-07,
      "loss": 0.0913,
      "step": 29421
    },
    {
      "epoch": 0.8583347919948655,
      "grad_norm": 0.8593176410266,
      "learning_rate": 5.171391708799545e-07,
      "loss": 0.1306,
      "step": 29422
    },
    {
      "epoch": 0.8583639652255091,
      "grad_norm": 0.6973922847440359,
      "learning_rate": 5.169299509012304e-07,
      "loss": 0.0893,
      "step": 29423
    },
    {
      "epoch": 0.8583931384561526,
      "grad_norm": 0.8347714708783674,
      "learning_rate": 5.167207709462868e-07,
      "loss": 0.1275,
      "step": 29424
    },
    {
      "epoch": 0.8584223116867962,
      "grad_norm": 0.7919908106576974,
      "learning_rate": 5.165116310169899e-07,
      "loss": 0.1037,
      "step": 29425
    },
    {
      "epoch": 0.8584514849174397,
      "grad_norm": 0.8015650500711422,
      "learning_rate": 5.163025311152054e-07,
      "loss": 0.1129,
      "step": 29426
    },
    {
      "epoch": 0.8584806581480833,
      "grad_norm": 0.7722056462631454,
      "learning_rate": 5.160934712428029e-07,
      "loss": 0.1266,
      "step": 29427
    },
    {
      "epoch": 0.8585098313787268,
      "grad_norm": 0.9812732596639206,
      "learning_rate": 5.158844514016464e-07,
      "loss": 0.1156,
      "step": 29428
    },
    {
      "epoch": 0.8585390046093705,
      "grad_norm": 0.751644355534073,
      "learning_rate": 5.156754715936041e-07,
      "loss": 0.1065,
      "step": 29429
    },
    {
      "epoch": 0.858568177840014,
      "grad_norm": 0.7564942418699732,
      "learning_rate": 5.154665318205399e-07,
      "loss": 0.1209,
      "step": 29430
    },
    {
      "epoch": 0.8585973510706576,
      "grad_norm": 0.7916462788596871,
      "learning_rate": 5.152576320843206e-07,
      "loss": 0.1113,
      "step": 29431
    },
    {
      "epoch": 0.8586265243013012,
      "grad_norm": 0.8626458932377982,
      "learning_rate": 5.150487723868097e-07,
      "loss": 0.0907,
      "step": 29432
    },
    {
      "epoch": 0.8586556975319447,
      "grad_norm": 0.784768920987382,
      "learning_rate": 5.148399527298737e-07,
      "loss": 0.1249,
      "step": 29433
    },
    {
      "epoch": 0.8586848707625883,
      "grad_norm": 0.8921925899855412,
      "learning_rate": 5.146311731153752e-07,
      "loss": 0.1036,
      "step": 29434
    },
    {
      "epoch": 0.8587140439932318,
      "grad_norm": 0.9137812493085843,
      "learning_rate": 5.144224335451792e-07,
      "loss": 0.1109,
      "step": 29435
    },
    {
      "epoch": 0.8587432172238754,
      "grad_norm": 1.0167966710404392,
      "learning_rate": 5.142137340211483e-07,
      "loss": 0.1152,
      "step": 29436
    },
    {
      "epoch": 0.8587723904545189,
      "grad_norm": 0.7613544011294333,
      "learning_rate": 5.140050745451475e-07,
      "loss": 0.1214,
      "step": 29437
    },
    {
      "epoch": 0.8588015636851625,
      "grad_norm": 0.8576052611696953,
      "learning_rate": 5.137964551190383e-07,
      "loss": 0.119,
      "step": 29438
    },
    {
      "epoch": 0.858830736915806,
      "grad_norm": 1.0403343300488277,
      "learning_rate": 5.135878757446827e-07,
      "loss": 0.0913,
      "step": 29439
    },
    {
      "epoch": 0.8588599101464496,
      "grad_norm": 0.7258185632604269,
      "learning_rate": 5.133793364239431e-07,
      "loss": 0.0994,
      "step": 29440
    },
    {
      "epoch": 0.8588890833770931,
      "grad_norm": 1.0186480626788514,
      "learning_rate": 5.131708371586829e-07,
      "loss": 0.1223,
      "step": 29441
    },
    {
      "epoch": 0.8589182566077367,
      "grad_norm": 0.8462446482007799,
      "learning_rate": 5.129623779507625e-07,
      "loss": 0.1289,
      "step": 29442
    },
    {
      "epoch": 0.8589474298383803,
      "grad_norm": 0.9467615585489157,
      "learning_rate": 5.127539588020419e-07,
      "loss": 0.0982,
      "step": 29443
    },
    {
      "epoch": 0.8589766030690239,
      "grad_norm": 0.8042426959877547,
      "learning_rate": 5.125455797143836e-07,
      "loss": 0.11,
      "step": 29444
    },
    {
      "epoch": 0.8590057762996675,
      "grad_norm": 0.7695714351117898,
      "learning_rate": 5.123372406896471e-07,
      "loss": 0.1172,
      "step": 29445
    },
    {
      "epoch": 0.859034949530311,
      "grad_norm": 1.2927717821152398,
      "learning_rate": 5.121289417296904e-07,
      "loss": 0.1035,
      "step": 29446
    },
    {
      "epoch": 0.8590641227609546,
      "grad_norm": 0.781705172671196,
      "learning_rate": 5.119206828363777e-07,
      "loss": 0.12,
      "step": 29447
    },
    {
      "epoch": 0.8590932959915981,
      "grad_norm": 0.9423321120858826,
      "learning_rate": 5.117124640115651e-07,
      "loss": 0.1116,
      "step": 29448
    },
    {
      "epoch": 0.8591224692222417,
      "grad_norm": 0.878780722775897,
      "learning_rate": 5.115042852571111e-07,
      "loss": 0.1077,
      "step": 29449
    },
    {
      "epoch": 0.8591516424528852,
      "grad_norm": 0.9486237045265276,
      "learning_rate": 5.112961465748767e-07,
      "loss": 0.1201,
      "step": 29450
    },
    {
      "epoch": 0.8591808156835288,
      "grad_norm": 0.8706977732052311,
      "learning_rate": 5.11088047966719e-07,
      "loss": 0.1344,
      "step": 29451
    },
    {
      "epoch": 0.8592099889141723,
      "grad_norm": 0.7262858207686353,
      "learning_rate": 5.10879989434494e-07,
      "loss": 0.0984,
      "step": 29452
    },
    {
      "epoch": 0.8592391621448159,
      "grad_norm": 0.9363907226557692,
      "learning_rate": 5.106719709800612e-07,
      "loss": 0.1225,
      "step": 29453
    },
    {
      "epoch": 0.8592683353754594,
      "grad_norm": 0.7580962212275943,
      "learning_rate": 5.104639926052785e-07,
      "loss": 0.0964,
      "step": 29454
    },
    {
      "epoch": 0.859297508606103,
      "grad_norm": 0.8527587779765261,
      "learning_rate": 5.102560543120011e-07,
      "loss": 0.1116,
      "step": 29455
    },
    {
      "epoch": 0.8593266818367467,
      "grad_norm": 0.8396394476457001,
      "learning_rate": 5.100481561020853e-07,
      "loss": 0.1236,
      "step": 29456
    },
    {
      "epoch": 0.8593558550673902,
      "grad_norm": 0.8179078502682865,
      "learning_rate": 5.098402979773886e-07,
      "loss": 0.1001,
      "step": 29457
    },
    {
      "epoch": 0.8593850282980338,
      "grad_norm": 0.9643860638380869,
      "learning_rate": 5.096324799397645e-07,
      "loss": 0.1241,
      "step": 29458
    },
    {
      "epoch": 0.8594142015286773,
      "grad_norm": 0.8908851752833253,
      "learning_rate": 5.094247019910709e-07,
      "loss": 0.1392,
      "step": 29459
    },
    {
      "epoch": 0.8594433747593209,
      "grad_norm": 0.8365052768357152,
      "learning_rate": 5.092169641331607e-07,
      "loss": 0.1018,
      "step": 29460
    },
    {
      "epoch": 0.8594725479899644,
      "grad_norm": 1.0732215328561312,
      "learning_rate": 5.090092663678903e-07,
      "loss": 0.095,
      "step": 29461
    },
    {
      "epoch": 0.859501721220608,
      "grad_norm": 0.8282863012039224,
      "learning_rate": 5.08801608697112e-07,
      "loss": 0.1072,
      "step": 29462
    },
    {
      "epoch": 0.8595308944512515,
      "grad_norm": 1.095662547070976,
      "learning_rate": 5.085939911226822e-07,
      "loss": 0.1489,
      "step": 29463
    },
    {
      "epoch": 0.8595600676818951,
      "grad_norm": 1.1262591698820914,
      "learning_rate": 5.083864136464517e-07,
      "loss": 0.1046,
      "step": 29464
    },
    {
      "epoch": 0.8595892409125386,
      "grad_norm": 0.7123270198738395,
      "learning_rate": 5.08178876270276e-07,
      "loss": 0.1164,
      "step": 29465
    },
    {
      "epoch": 0.8596184141431822,
      "grad_norm": 1.1177619069218627,
      "learning_rate": 5.079713789960061e-07,
      "loss": 0.116,
      "step": 29466
    },
    {
      "epoch": 0.8596475873738257,
      "grad_norm": 1.0782613105866432,
      "learning_rate": 5.077639218254965e-07,
      "loss": 0.1242,
      "step": 29467
    },
    {
      "epoch": 0.8596767606044693,
      "grad_norm": 1.2186038789684839,
      "learning_rate": 5.075565047605979e-07,
      "loss": 0.1009,
      "step": 29468
    },
    {
      "epoch": 0.8597059338351128,
      "grad_norm": 0.7794700839204736,
      "learning_rate": 5.073491278031617e-07,
      "loss": 0.1076,
      "step": 29469
    },
    {
      "epoch": 0.8597351070657565,
      "grad_norm": 0.870849339425482,
      "learning_rate": 5.071417909550402e-07,
      "loss": 0.1396,
      "step": 29470
    },
    {
      "epoch": 0.8597642802964001,
      "grad_norm": 0.7510726306428365,
      "learning_rate": 5.069344942180848e-07,
      "loss": 0.1076,
      "step": 29471
    },
    {
      "epoch": 0.8597934535270436,
      "grad_norm": 0.6740657251828723,
      "learning_rate": 5.067272375941463e-07,
      "loss": 0.0957,
      "step": 29472
    },
    {
      "epoch": 0.8598226267576872,
      "grad_norm": 0.891263377532334,
      "learning_rate": 5.065200210850723e-07,
      "loss": 0.1336,
      "step": 29473
    },
    {
      "epoch": 0.8598517999883307,
      "grad_norm": 0.8615194734568703,
      "learning_rate": 5.063128446927168e-07,
      "loss": 0.1181,
      "step": 29474
    },
    {
      "epoch": 0.8598809732189743,
      "grad_norm": 0.855735698394919,
      "learning_rate": 5.061057084189274e-07,
      "loss": 0.098,
      "step": 29475
    },
    {
      "epoch": 0.8599101464496178,
      "grad_norm": 0.9202547256040262,
      "learning_rate": 5.058986122655512e-07,
      "loss": 0.1092,
      "step": 29476
    },
    {
      "epoch": 0.8599393196802614,
      "grad_norm": 0.8691794033323256,
      "learning_rate": 5.056915562344411e-07,
      "loss": 0.1379,
      "step": 29477
    },
    {
      "epoch": 0.8599684929109049,
      "grad_norm": 0.7190411998033227,
      "learning_rate": 5.054845403274444e-07,
      "loss": 0.1002,
      "step": 29478
    },
    {
      "epoch": 0.8599976661415485,
      "grad_norm": 0.9201431554442078,
      "learning_rate": 5.052775645464075e-07,
      "loss": 0.1047,
      "step": 29479
    },
    {
      "epoch": 0.860026839372192,
      "grad_norm": 0.8565354245381157,
      "learning_rate": 5.050706288931806e-07,
      "loss": 0.1208,
      "step": 29480
    },
    {
      "epoch": 0.8600560126028356,
      "grad_norm": 1.1982432707796746,
      "learning_rate": 5.048637333696105e-07,
      "loss": 0.1142,
      "step": 29481
    },
    {
      "epoch": 0.8600851858334791,
      "grad_norm": 0.7490489965000526,
      "learning_rate": 5.046568779775424e-07,
      "loss": 0.0932,
      "step": 29482
    },
    {
      "epoch": 0.8601143590641228,
      "grad_norm": 0.8452700790680486,
      "learning_rate": 5.044500627188248e-07,
      "loss": 0.1063,
      "step": 29483
    },
    {
      "epoch": 0.8601435322947664,
      "grad_norm": 0.8475220682439937,
      "learning_rate": 5.042432875953046e-07,
      "loss": 0.1211,
      "step": 29484
    },
    {
      "epoch": 0.8601727055254099,
      "grad_norm": 0.8374258136534395,
      "learning_rate": 5.040365526088276e-07,
      "loss": 0.1239,
      "step": 29485
    },
    {
      "epoch": 0.8602018787560535,
      "grad_norm": 0.6981846126698772,
      "learning_rate": 5.038298577612378e-07,
      "loss": 0.0896,
      "step": 29486
    },
    {
      "epoch": 0.860231051986697,
      "grad_norm": 0.920102923102032,
      "learning_rate": 5.036232030543825e-07,
      "loss": 0.1133,
      "step": 29487
    },
    {
      "epoch": 0.8602602252173406,
      "grad_norm": 0.8147794332701817,
      "learning_rate": 5.034165884901049e-07,
      "loss": 0.1276,
      "step": 29488
    },
    {
      "epoch": 0.8602893984479841,
      "grad_norm": 0.8643020393761554,
      "learning_rate": 5.032100140702518e-07,
      "loss": 0.1059,
      "step": 29489
    },
    {
      "epoch": 0.8603185716786277,
      "grad_norm": 1.0273624491597337,
      "learning_rate": 5.030034797966649e-07,
      "loss": 0.1211,
      "step": 29490
    },
    {
      "epoch": 0.8603477449092712,
      "grad_norm": 1.0993653441757587,
      "learning_rate": 5.027969856711907e-07,
      "loss": 0.1197,
      "step": 29491
    },
    {
      "epoch": 0.8603769181399148,
      "grad_norm": 0.8674785008953878,
      "learning_rate": 5.025905316956703e-07,
      "loss": 0.117,
      "step": 29492
    },
    {
      "epoch": 0.8604060913705583,
      "grad_norm": 0.8134897664006652,
      "learning_rate": 5.023841178719491e-07,
      "loss": 0.1089,
      "step": 29493
    },
    {
      "epoch": 0.8604352646012019,
      "grad_norm": 0.740252608636668,
      "learning_rate": 5.021777442018677e-07,
      "loss": 0.106,
      "step": 29494
    },
    {
      "epoch": 0.8604644378318455,
      "grad_norm": 0.6740421975228598,
      "learning_rate": 5.019714106872709e-07,
      "loss": 0.0992,
      "step": 29495
    },
    {
      "epoch": 0.860493611062489,
      "grad_norm": 0.7712131246928646,
      "learning_rate": 5.017651173299981e-07,
      "loss": 0.0991,
      "step": 29496
    },
    {
      "epoch": 0.8605227842931327,
      "grad_norm": 0.6535127532044942,
      "learning_rate": 5.015588641318941e-07,
      "loss": 0.0951,
      "step": 29497
    },
    {
      "epoch": 0.8605519575237762,
      "grad_norm": 0.8457754040744747,
      "learning_rate": 5.013526510947986e-07,
      "loss": 0.1152,
      "step": 29498
    },
    {
      "epoch": 0.8605811307544198,
      "grad_norm": 0.9662646942978871,
      "learning_rate": 5.011464782205511e-07,
      "loss": 0.102,
      "step": 29499
    },
    {
      "epoch": 0.8606103039850633,
      "grad_norm": 1.363511336845176,
      "learning_rate": 5.009403455109946e-07,
      "loss": 0.106,
      "step": 29500
    },
    {
      "epoch": 0.8606394772157069,
      "grad_norm": 0.7929904353469331,
      "learning_rate": 5.007342529679693e-07,
      "loss": 0.1108,
      "step": 29501
    },
    {
      "epoch": 0.8606686504463504,
      "grad_norm": 0.7810159069157689,
      "learning_rate": 5.005282005933148e-07,
      "loss": 0.1041,
      "step": 29502
    },
    {
      "epoch": 0.860697823676994,
      "grad_norm": 0.9411311208768363,
      "learning_rate": 5.003221883888692e-07,
      "loss": 0.1139,
      "step": 29503
    },
    {
      "epoch": 0.8607269969076375,
      "grad_norm": 0.901259545584759,
      "learning_rate": 5.001162163564738e-07,
      "loss": 0.1218,
      "step": 29504
    },
    {
      "epoch": 0.8607561701382811,
      "grad_norm": 0.7228034390535022,
      "learning_rate": 4.99910284497967e-07,
      "loss": 0.1082,
      "step": 29505
    },
    {
      "epoch": 0.8607853433689246,
      "grad_norm": 0.9359231392219072,
      "learning_rate": 4.997043928151851e-07,
      "loss": 0.1067,
      "step": 29506
    },
    {
      "epoch": 0.8608145165995682,
      "grad_norm": 0.7750835167395633,
      "learning_rate": 4.994985413099695e-07,
      "loss": 0.0999,
      "step": 29507
    },
    {
      "epoch": 0.8608436898302118,
      "grad_norm": 0.9282258491681282,
      "learning_rate": 4.992927299841566e-07,
      "loss": 0.1321,
      "step": 29508
    },
    {
      "epoch": 0.8608728630608553,
      "grad_norm": 0.8180139497002444,
      "learning_rate": 4.99086958839583e-07,
      "loss": 0.1138,
      "step": 29509
    },
    {
      "epoch": 0.860902036291499,
      "grad_norm": 1.0599843840977938,
      "learning_rate": 4.988812278780875e-07,
      "loss": 0.1194,
      "step": 29510
    },
    {
      "epoch": 0.8609312095221425,
      "grad_norm": 0.7357831173012828,
      "learning_rate": 4.986755371015062e-07,
      "loss": 0.1146,
      "step": 29511
    },
    {
      "epoch": 0.8609603827527861,
      "grad_norm": 0.7365994357681691,
      "learning_rate": 4.984698865116739e-07,
      "loss": 0.1137,
      "step": 29512
    },
    {
      "epoch": 0.8609895559834296,
      "grad_norm": 0.8982152257623877,
      "learning_rate": 4.982642761104279e-07,
      "loss": 0.1135,
      "step": 29513
    },
    {
      "epoch": 0.8610187292140732,
      "grad_norm": 0.7699343204703459,
      "learning_rate": 4.980587058996044e-07,
      "loss": 0.1015,
      "step": 29514
    },
    {
      "epoch": 0.8610479024447167,
      "grad_norm": 0.7885245394304854,
      "learning_rate": 4.978531758810385e-07,
      "loss": 0.1134,
      "step": 29515
    },
    {
      "epoch": 0.8610770756753603,
      "grad_norm": 0.7656299112574463,
      "learning_rate": 4.976476860565638e-07,
      "loss": 0.0949,
      "step": 29516
    },
    {
      "epoch": 0.8611062489060038,
      "grad_norm": 0.9263050205599165,
      "learning_rate": 4.974422364280169e-07,
      "loss": 0.1144,
      "step": 29517
    },
    {
      "epoch": 0.8611354221366474,
      "grad_norm": 0.8468985385132534,
      "learning_rate": 4.972368269972294e-07,
      "loss": 0.1032,
      "step": 29518
    },
    {
      "epoch": 0.861164595367291,
      "grad_norm": 0.823224175773147,
      "learning_rate": 4.970314577660379e-07,
      "loss": 0.1014,
      "step": 29519
    },
    {
      "epoch": 0.8611937685979345,
      "grad_norm": 0.8047470667383144,
      "learning_rate": 4.96826128736273e-07,
      "loss": 0.1455,
      "step": 29520
    },
    {
      "epoch": 0.8612229418285781,
      "grad_norm": 0.9622720918984243,
      "learning_rate": 4.96620839909771e-07,
      "loss": 0.0958,
      "step": 29521
    },
    {
      "epoch": 0.8612521150592216,
      "grad_norm": 0.6545674953297864,
      "learning_rate": 4.964155912883628e-07,
      "loss": 0.1074,
      "step": 29522
    },
    {
      "epoch": 0.8612812882898652,
      "grad_norm": 0.781662532258794,
      "learning_rate": 4.962103828738807e-07,
      "loss": 0.0923,
      "step": 29523
    },
    {
      "epoch": 0.8613104615205088,
      "grad_norm": 0.8537351550173269,
      "learning_rate": 4.960052146681566e-07,
      "loss": 0.1015,
      "step": 29524
    },
    {
      "epoch": 0.8613396347511524,
      "grad_norm": 0.9227109810282786,
      "learning_rate": 4.95800086673024e-07,
      "loss": 0.0995,
      "step": 29525
    },
    {
      "epoch": 0.8613688079817959,
      "grad_norm": 0.6084352556263708,
      "learning_rate": 4.955949988903119e-07,
      "loss": 0.0862,
      "step": 29526
    },
    {
      "epoch": 0.8613979812124395,
      "grad_norm": 0.9855506075943639,
      "learning_rate": 4.953899513218535e-07,
      "loss": 0.1151,
      "step": 29527
    },
    {
      "epoch": 0.861427154443083,
      "grad_norm": 0.8146833029360177,
      "learning_rate": 4.951849439694778e-07,
      "loss": 0.1124,
      "step": 29528
    },
    {
      "epoch": 0.8614563276737266,
      "grad_norm": 1.0698802220578514,
      "learning_rate": 4.94979976835015e-07,
      "loss": 0.1176,
      "step": 29529
    },
    {
      "epoch": 0.8614855009043701,
      "grad_norm": 0.8454586330671141,
      "learning_rate": 4.947750499202952e-07,
      "loss": 0.1153,
      "step": 29530
    },
    {
      "epoch": 0.8615146741350137,
      "grad_norm": 0.8488563654978626,
      "learning_rate": 4.945701632271499e-07,
      "loss": 0.0999,
      "step": 29531
    },
    {
      "epoch": 0.8615438473656573,
      "grad_norm": 0.8152915120222931,
      "learning_rate": 4.943653167574058e-07,
      "loss": 0.0987,
      "step": 29532
    },
    {
      "epoch": 0.8615730205963008,
      "grad_norm": 0.8512444655778405,
      "learning_rate": 4.941605105128922e-07,
      "loss": 0.1129,
      "step": 29533
    },
    {
      "epoch": 0.8616021938269444,
      "grad_norm": 0.7923319732939569,
      "learning_rate": 4.93955744495439e-07,
      "loss": 0.1134,
      "step": 29534
    },
    {
      "epoch": 0.8616313670575879,
      "grad_norm": 0.990599679297401,
      "learning_rate": 4.937510187068728e-07,
      "loss": 0.0937,
      "step": 29535
    },
    {
      "epoch": 0.8616605402882315,
      "grad_norm": 0.9879391746123323,
      "learning_rate": 4.935463331490198e-07,
      "loss": 0.1152,
      "step": 29536
    },
    {
      "epoch": 0.8616897135188751,
      "grad_norm": 0.8191392081185264,
      "learning_rate": 4.933416878237118e-07,
      "loss": 0.1311,
      "step": 29537
    },
    {
      "epoch": 0.8617188867495187,
      "grad_norm": 0.9034065550039516,
      "learning_rate": 4.93137082732773e-07,
      "loss": 0.1108,
      "step": 29538
    },
    {
      "epoch": 0.8617480599801622,
      "grad_norm": 0.879997117093169,
      "learning_rate": 4.929325178780293e-07,
      "loss": 0.1106,
      "step": 29539
    },
    {
      "epoch": 0.8617772332108058,
      "grad_norm": 0.8021533510115464,
      "learning_rate": 4.927279932613094e-07,
      "loss": 0.102,
      "step": 29540
    },
    {
      "epoch": 0.8618064064414493,
      "grad_norm": 1.1201269405766916,
      "learning_rate": 4.925235088844382e-07,
      "loss": 0.1085,
      "step": 29541
    },
    {
      "epoch": 0.8618355796720929,
      "grad_norm": 1.093923757410142,
      "learning_rate": 4.923190647492399e-07,
      "loss": 0.1399,
      "step": 29542
    },
    {
      "epoch": 0.8618647529027365,
      "grad_norm": 0.8121647255800227,
      "learning_rate": 4.921146608575405e-07,
      "loss": 0.1077,
      "step": 29543
    },
    {
      "epoch": 0.86189392613338,
      "grad_norm": 0.959957420606875,
      "learning_rate": 4.919102972111667e-07,
      "loss": 0.1349,
      "step": 29544
    },
    {
      "epoch": 0.8619230993640236,
      "grad_norm": 0.9910972412210832,
      "learning_rate": 4.917059738119417e-07,
      "loss": 0.1277,
      "step": 29545
    },
    {
      "epoch": 0.8619522725946671,
      "grad_norm": 0.9460236678096623,
      "learning_rate": 4.915016906616888e-07,
      "loss": 0.0984,
      "step": 29546
    },
    {
      "epoch": 0.8619814458253107,
      "grad_norm": 0.8204929894094617,
      "learning_rate": 4.912974477622329e-07,
      "loss": 0.101,
      "step": 29547
    },
    {
      "epoch": 0.8620106190559542,
      "grad_norm": 1.071794228341519,
      "learning_rate": 4.910932451153966e-07,
      "loss": 0.1287,
      "step": 29548
    },
    {
      "epoch": 0.8620397922865978,
      "grad_norm": 1.0222057992768019,
      "learning_rate": 4.90889082723004e-07,
      "loss": 0.1319,
      "step": 29549
    },
    {
      "epoch": 0.8620689655172413,
      "grad_norm": 0.7890094777152389,
      "learning_rate": 4.906849605868763e-07,
      "loss": 0.1249,
      "step": 29550
    },
    {
      "epoch": 0.862098138747885,
      "grad_norm": 0.8086902933476409,
      "learning_rate": 4.904808787088383e-07,
      "loss": 0.1096,
      "step": 29551
    },
    {
      "epoch": 0.8621273119785285,
      "grad_norm": 0.7286563718766472,
      "learning_rate": 4.902768370907102e-07,
      "loss": 0.1083,
      "step": 29552
    },
    {
      "epoch": 0.8621564852091721,
      "grad_norm": 1.0063736550386313,
      "learning_rate": 4.900728357343127e-07,
      "loss": 0.1085,
      "step": 29553
    },
    {
      "epoch": 0.8621856584398156,
      "grad_norm": 0.898909238721518,
      "learning_rate": 4.898688746414687e-07,
      "loss": 0.1047,
      "step": 29554
    },
    {
      "epoch": 0.8622148316704592,
      "grad_norm": 0.9043671477518233,
      "learning_rate": 4.896649538139992e-07,
      "loss": 0.1001,
      "step": 29555
    },
    {
      "epoch": 0.8622440049011028,
      "grad_norm": 0.9191083316418673,
      "learning_rate": 4.894610732537241e-07,
      "loss": 0.1051,
      "step": 29556
    },
    {
      "epoch": 0.8622731781317463,
      "grad_norm": 0.8197405853127391,
      "learning_rate": 4.892572329624639e-07,
      "loss": 0.0833,
      "step": 29557
    },
    {
      "epoch": 0.8623023513623899,
      "grad_norm": 0.711686348154552,
      "learning_rate": 4.890534329420388e-07,
      "loss": 0.1055,
      "step": 29558
    },
    {
      "epoch": 0.8623315245930334,
      "grad_norm": 0.7694761334203983,
      "learning_rate": 4.888496731942671e-07,
      "loss": 0.1095,
      "step": 29559
    },
    {
      "epoch": 0.862360697823677,
      "grad_norm": 1.086513979497295,
      "learning_rate": 4.886459537209681e-07,
      "loss": 0.146,
      "step": 29560
    },
    {
      "epoch": 0.8623898710543205,
      "grad_norm": 0.8444544898854911,
      "learning_rate": 4.884422745239625e-07,
      "loss": 0.1104,
      "step": 29561
    },
    {
      "epoch": 0.8624190442849641,
      "grad_norm": 0.9671191569305575,
      "learning_rate": 4.882386356050667e-07,
      "loss": 0.1252,
      "step": 29562
    },
    {
      "epoch": 0.8624482175156076,
      "grad_norm": 0.8935100670661668,
      "learning_rate": 4.880350369660985e-07,
      "loss": 0.1355,
      "step": 29563
    },
    {
      "epoch": 0.8624773907462513,
      "grad_norm": 0.772155745151359,
      "learning_rate": 4.878314786088778e-07,
      "loss": 0.1052,
      "step": 29564
    },
    {
      "epoch": 0.8625065639768948,
      "grad_norm": 0.8498414873168953,
      "learning_rate": 4.876279605352202e-07,
      "loss": 0.1172,
      "step": 29565
    },
    {
      "epoch": 0.8625357372075384,
      "grad_norm": 0.7325778536929377,
      "learning_rate": 4.87424482746941e-07,
      "loss": 0.1088,
      "step": 29566
    },
    {
      "epoch": 0.862564910438182,
      "grad_norm": 0.982890890838626,
      "learning_rate": 4.872210452458609e-07,
      "loss": 0.0994,
      "step": 29567
    },
    {
      "epoch": 0.8625940836688255,
      "grad_norm": 0.7980004055946873,
      "learning_rate": 4.87017648033794e-07,
      "loss": 0.0958,
      "step": 29568
    },
    {
      "epoch": 0.8626232568994691,
      "grad_norm": 0.9001500264776446,
      "learning_rate": 4.868142911125551e-07,
      "loss": 0.1077,
      "step": 29569
    },
    {
      "epoch": 0.8626524301301126,
      "grad_norm": 0.8083634707055541,
      "learning_rate": 4.86610974483962e-07,
      "loss": 0.1135,
      "step": 29570
    },
    {
      "epoch": 0.8626816033607562,
      "grad_norm": 1.0745313282423126,
      "learning_rate": 4.864076981498284e-07,
      "loss": 0.1129,
      "step": 29571
    },
    {
      "epoch": 0.8627107765913997,
      "grad_norm": 0.9907363144617128,
      "learning_rate": 4.862044621119688e-07,
      "loss": 0.1273,
      "step": 29572
    },
    {
      "epoch": 0.8627399498220433,
      "grad_norm": 1.0432915768941344,
      "learning_rate": 4.860012663721981e-07,
      "loss": 0.131,
      "step": 29573
    },
    {
      "epoch": 0.8627691230526868,
      "grad_norm": 0.8546023685608694,
      "learning_rate": 4.857981109323312e-07,
      "loss": 0.1071,
      "step": 29574
    },
    {
      "epoch": 0.8627982962833304,
      "grad_norm": 0.9118591567924467,
      "learning_rate": 4.855949957941814e-07,
      "loss": 0.0988,
      "step": 29575
    },
    {
      "epoch": 0.8628274695139739,
      "grad_norm": 0.7762545627015762,
      "learning_rate": 4.853919209595604e-07,
      "loss": 0.1196,
      "step": 29576
    },
    {
      "epoch": 0.8628566427446175,
      "grad_norm": 1.0126079495030087,
      "learning_rate": 4.851888864302839e-07,
      "loss": 0.122,
      "step": 29577
    },
    {
      "epoch": 0.8628858159752612,
      "grad_norm": 1.0261197469856702,
      "learning_rate": 4.849858922081623e-07,
      "loss": 0.1097,
      "step": 29578
    },
    {
      "epoch": 0.8629149892059047,
      "grad_norm": 0.8383641536878149,
      "learning_rate": 4.847829382950098e-07,
      "loss": 0.1282,
      "step": 29579
    },
    {
      "epoch": 0.8629441624365483,
      "grad_norm": 0.867578677704776,
      "learning_rate": 4.845800246926369e-07,
      "loss": 0.1197,
      "step": 29580
    },
    {
      "epoch": 0.8629733356671918,
      "grad_norm": 0.9841259215689497,
      "learning_rate": 4.843771514028555e-07,
      "loss": 0.1044,
      "step": 29581
    },
    {
      "epoch": 0.8630025088978354,
      "grad_norm": 0.868658120493819,
      "learning_rate": 4.841743184274778e-07,
      "loss": 0.1045,
      "step": 29582
    },
    {
      "epoch": 0.8630316821284789,
      "grad_norm": 0.7679781412591147,
      "learning_rate": 4.839715257683125e-07,
      "loss": 0.1215,
      "step": 29583
    },
    {
      "epoch": 0.8630608553591225,
      "grad_norm": 0.7238086362854077,
      "learning_rate": 4.837687734271713e-07,
      "loss": 0.1178,
      "step": 29584
    },
    {
      "epoch": 0.863090028589766,
      "grad_norm": 0.8831568359416163,
      "learning_rate": 4.835660614058657e-07,
      "loss": 0.1146,
      "step": 29585
    },
    {
      "epoch": 0.8631192018204096,
      "grad_norm": 1.0227267616933795,
      "learning_rate": 4.833633897062029e-07,
      "loss": 0.1314,
      "step": 29586
    },
    {
      "epoch": 0.8631483750510531,
      "grad_norm": 1.3691232071072594,
      "learning_rate": 4.831607583299941e-07,
      "loss": 0.1041,
      "step": 29587
    },
    {
      "epoch": 0.8631775482816967,
      "grad_norm": 0.9266690092284912,
      "learning_rate": 4.829581672790484e-07,
      "loss": 0.1326,
      "step": 29588
    },
    {
      "epoch": 0.8632067215123402,
      "grad_norm": 0.6945943866312693,
      "learning_rate": 4.827556165551728e-07,
      "loss": 0.0837,
      "step": 29589
    },
    {
      "epoch": 0.8632358947429838,
      "grad_norm": 0.7280800558592375,
      "learning_rate": 4.825531061601768e-07,
      "loss": 0.1126,
      "step": 29590
    },
    {
      "epoch": 0.8632650679736275,
      "grad_norm": 1.0866269712261198,
      "learning_rate": 4.823506360958691e-07,
      "loss": 0.108,
      "step": 29591
    },
    {
      "epoch": 0.863294241204271,
      "grad_norm": 0.9334877491028227,
      "learning_rate": 4.821482063640559e-07,
      "loss": 0.1049,
      "step": 29592
    },
    {
      "epoch": 0.8633234144349146,
      "grad_norm": 0.8746056086522032,
      "learning_rate": 4.819458169665447e-07,
      "loss": 0.115,
      "step": 29593
    },
    {
      "epoch": 0.8633525876655581,
      "grad_norm": 0.8324723328818899,
      "learning_rate": 4.817434679051436e-07,
      "loss": 0.1243,
      "step": 29594
    },
    {
      "epoch": 0.8633817608962017,
      "grad_norm": 0.7242512479396693,
      "learning_rate": 4.815411591816583e-07,
      "loss": 0.0997,
      "step": 29595
    },
    {
      "epoch": 0.8634109341268452,
      "grad_norm": 0.7987187858506292,
      "learning_rate": 4.813388907978927e-07,
      "loss": 0.117,
      "step": 29596
    },
    {
      "epoch": 0.8634401073574888,
      "grad_norm": 0.8758749388026807,
      "learning_rate": 4.811366627556569e-07,
      "loss": 0.12,
      "step": 29597
    },
    {
      "epoch": 0.8634692805881323,
      "grad_norm": 0.8615853750316675,
      "learning_rate": 4.809344750567541e-07,
      "loss": 0.1227,
      "step": 29598
    },
    {
      "epoch": 0.8634984538187759,
      "grad_norm": 0.9269933404487796,
      "learning_rate": 4.807323277029885e-07,
      "loss": 0.1303,
      "step": 29599
    },
    {
      "epoch": 0.8635276270494194,
      "grad_norm": 0.8515560207390576,
      "learning_rate": 4.805302206961671e-07,
      "loss": 0.1002,
      "step": 29600
    },
    {
      "epoch": 0.863556800280063,
      "grad_norm": 0.8271251257469049,
      "learning_rate": 4.803281540380927e-07,
      "loss": 0.1053,
      "step": 29601
    },
    {
      "epoch": 0.8635859735107065,
      "grad_norm": 0.8991172528992981,
      "learning_rate": 4.801261277305691e-07,
      "loss": 0.1464,
      "step": 29602
    },
    {
      "epoch": 0.8636151467413501,
      "grad_norm": 0.8219771993122964,
      "learning_rate": 4.799241417754003e-07,
      "loss": 0.1001,
      "step": 29603
    },
    {
      "epoch": 0.8636443199719936,
      "grad_norm": 0.7661951768747031,
      "learning_rate": 4.797221961743903e-07,
      "loss": 0.1231,
      "step": 29604
    },
    {
      "epoch": 0.8636734932026373,
      "grad_norm": 0.800394710623368,
      "learning_rate": 4.795202909293417e-07,
      "loss": 0.0881,
      "step": 29605
    },
    {
      "epoch": 0.8637026664332809,
      "grad_norm": 0.957192883285935,
      "learning_rate": 4.793184260420558e-07,
      "loss": 0.122,
      "step": 29606
    },
    {
      "epoch": 0.8637318396639244,
      "grad_norm": 0.8945823312298447,
      "learning_rate": 4.791166015143367e-07,
      "loss": 0.1059,
      "step": 29607
    },
    {
      "epoch": 0.863761012894568,
      "grad_norm": 0.8919861071128578,
      "learning_rate": 4.789148173479846e-07,
      "loss": 0.1174,
      "step": 29608
    },
    {
      "epoch": 0.8637901861252115,
      "grad_norm": 0.8249856781036057,
      "learning_rate": 4.787130735448025e-07,
      "loss": 0.1166,
      "step": 29609
    },
    {
      "epoch": 0.8638193593558551,
      "grad_norm": 0.8328722605491482,
      "learning_rate": 4.785113701065902e-07,
      "loss": 0.1027,
      "step": 29610
    },
    {
      "epoch": 0.8638485325864986,
      "grad_norm": 0.826765544122123,
      "learning_rate": 4.783097070351494e-07,
      "loss": 0.0985,
      "step": 29611
    },
    {
      "epoch": 0.8638777058171422,
      "grad_norm": 0.8292172219427822,
      "learning_rate": 4.781080843322805e-07,
      "loss": 0.1188,
      "step": 29612
    },
    {
      "epoch": 0.8639068790477857,
      "grad_norm": 0.8156842541071601,
      "learning_rate": 4.779065019997813e-07,
      "loss": 0.1044,
      "step": 29613
    },
    {
      "epoch": 0.8639360522784293,
      "grad_norm": 0.6863972599258233,
      "learning_rate": 4.777049600394551e-07,
      "loss": 0.0988,
      "step": 29614
    },
    {
      "epoch": 0.8639652255090728,
      "grad_norm": 0.8723736753961623,
      "learning_rate": 4.775034584530997e-07,
      "loss": 0.1236,
      "step": 29615
    },
    {
      "epoch": 0.8639943987397164,
      "grad_norm": 0.7932184896501067,
      "learning_rate": 4.773019972425124e-07,
      "loss": 0.1085,
      "step": 29616
    },
    {
      "epoch": 0.86402357197036,
      "grad_norm": 0.8837860027059837,
      "learning_rate": 4.771005764094944e-07,
      "loss": 0.1008,
      "step": 29617
    },
    {
      "epoch": 0.8640527452010036,
      "grad_norm": 0.9156822763571928,
      "learning_rate": 4.768991959558428e-07,
      "loss": 0.0891,
      "step": 29618
    },
    {
      "epoch": 0.8640819184316472,
      "grad_norm": 0.7655104960309999,
      "learning_rate": 4.766978558833546e-07,
      "loss": 0.0967,
      "step": 29619
    },
    {
      "epoch": 0.8641110916622907,
      "grad_norm": 0.9262543272787156,
      "learning_rate": 4.7649655619382783e-07,
      "loss": 0.1254,
      "step": 29620
    },
    {
      "epoch": 0.8641402648929343,
      "grad_norm": 0.7676951827197807,
      "learning_rate": 4.7629529688906106e-07,
      "loss": 0.1097,
      "step": 29621
    },
    {
      "epoch": 0.8641694381235778,
      "grad_norm": 0.8639825347892992,
      "learning_rate": 4.7609407797085004e-07,
      "loss": 0.1322,
      "step": 29622
    },
    {
      "epoch": 0.8641986113542214,
      "grad_norm": 0.6632079328706527,
      "learning_rate": 4.7589289944099006e-07,
      "loss": 0.1022,
      "step": 29623
    },
    {
      "epoch": 0.8642277845848649,
      "grad_norm": 0.8810722908815922,
      "learning_rate": 4.756917613012796e-07,
      "loss": 0.1171,
      "step": 29624
    },
    {
      "epoch": 0.8642569578155085,
      "grad_norm": 0.8158237493702719,
      "learning_rate": 4.754906635535117e-07,
      "loss": 0.0895,
      "step": 29625
    },
    {
      "epoch": 0.864286131046152,
      "grad_norm": 0.7865349197095143,
      "learning_rate": 4.7528960619948326e-07,
      "loss": 0.1252,
      "step": 29626
    },
    {
      "epoch": 0.8643153042767956,
      "grad_norm": 0.8034154361529194,
      "learning_rate": 4.7508858924098957e-07,
      "loss": 0.1103,
      "step": 29627
    },
    {
      "epoch": 0.8643444775074391,
      "grad_norm": 0.9944795265877507,
      "learning_rate": 4.748876126798252e-07,
      "loss": 0.0968,
      "step": 29628
    },
    {
      "epoch": 0.8643736507380827,
      "grad_norm": 0.803210162221882,
      "learning_rate": 4.7468667651778323e-07,
      "loss": 0.1093,
      "step": 29629
    },
    {
      "epoch": 0.8644028239687263,
      "grad_norm": 0.7230637311969597,
      "learning_rate": 4.7448578075665887e-07,
      "loss": 0.1339,
      "step": 29630
    },
    {
      "epoch": 0.8644319971993698,
      "grad_norm": 0.763966666019329,
      "learning_rate": 4.7428492539824456e-07,
      "loss": 0.1086,
      "step": 29631
    },
    {
      "epoch": 0.8644611704300135,
      "grad_norm": 0.826078356683304,
      "learning_rate": 4.74084110444335e-07,
      "loss": 0.114,
      "step": 29632
    },
    {
      "epoch": 0.864490343660657,
      "grad_norm": 0.700532019841836,
      "learning_rate": 4.738833358967204e-07,
      "loss": 0.0972,
      "step": 29633
    },
    {
      "epoch": 0.8645195168913006,
      "grad_norm": 0.8568305123509733,
      "learning_rate": 4.736826017571966e-07,
      "loss": 0.1099,
      "step": 29634
    },
    {
      "epoch": 0.8645486901219441,
      "grad_norm": 0.8350213993387027,
      "learning_rate": 4.734819080275538e-07,
      "loss": 0.0964,
      "step": 29635
    },
    {
      "epoch": 0.8645778633525877,
      "grad_norm": 0.7811801257511575,
      "learning_rate": 4.732812547095833e-07,
      "loss": 0.1063,
      "step": 29636
    },
    {
      "epoch": 0.8646070365832312,
      "grad_norm": 0.7120592483117036,
      "learning_rate": 4.730806418050765e-07,
      "loss": 0.1155,
      "step": 29637
    },
    {
      "epoch": 0.8646362098138748,
      "grad_norm": 0.798759867045699,
      "learning_rate": 4.728800693158264e-07,
      "loss": 0.12,
      "step": 29638
    },
    {
      "epoch": 0.8646653830445183,
      "grad_norm": 1.1690973989697988,
      "learning_rate": 4.726795372436227e-07,
      "loss": 0.1087,
      "step": 29639
    },
    {
      "epoch": 0.8646945562751619,
      "grad_norm": 0.7437712717524874,
      "learning_rate": 4.7247904559025394e-07,
      "loss": 0.1404,
      "step": 29640
    },
    {
      "epoch": 0.8647237295058054,
      "grad_norm": 0.7925820616934592,
      "learning_rate": 4.7227859435751257e-07,
      "loss": 0.1112,
      "step": 29641
    },
    {
      "epoch": 0.864752902736449,
      "grad_norm": 0.7629146577327641,
      "learning_rate": 4.720781835471866e-07,
      "loss": 0.1117,
      "step": 29642
    },
    {
      "epoch": 0.8647820759670926,
      "grad_norm": 0.7356041591264301,
      "learning_rate": 4.718778131610641e-07,
      "loss": 0.109,
      "step": 29643
    },
    {
      "epoch": 0.8648112491977361,
      "grad_norm": 0.7391439129785692,
      "learning_rate": 4.716774832009374e-07,
      "loss": 0.1199,
      "step": 29644
    },
    {
      "epoch": 0.8648404224283798,
      "grad_norm": 0.9699460332926753,
      "learning_rate": 4.7147719366859356e-07,
      "loss": 0.1194,
      "step": 29645
    },
    {
      "epoch": 0.8648695956590233,
      "grad_norm": 0.9004880669962468,
      "learning_rate": 4.7127694456581886e-07,
      "loss": 0.1124,
      "step": 29646
    },
    {
      "epoch": 0.8648987688896669,
      "grad_norm": 0.8018383910546009,
      "learning_rate": 4.710767358944035e-07,
      "loss": 0.1194,
      "step": 29647
    },
    {
      "epoch": 0.8649279421203104,
      "grad_norm": 0.8808434508993854,
      "learning_rate": 4.708765676561339e-07,
      "loss": 0.1025,
      "step": 29648
    },
    {
      "epoch": 0.864957115350954,
      "grad_norm": 0.9759817118866354,
      "learning_rate": 4.706764398527963e-07,
      "loss": 0.1395,
      "step": 29649
    },
    {
      "epoch": 0.8649862885815975,
      "grad_norm": 1.155416398124548,
      "learning_rate": 4.704763524861783e-07,
      "loss": 0.0977,
      "step": 29650
    },
    {
      "epoch": 0.8650154618122411,
      "grad_norm": 0.895229972793927,
      "learning_rate": 4.702763055580672e-07,
      "loss": 0.1054,
      "step": 29651
    },
    {
      "epoch": 0.8650446350428846,
      "grad_norm": 0.8855127487464511,
      "learning_rate": 4.700762990702473e-07,
      "loss": 0.1126,
      "step": 29652
    },
    {
      "epoch": 0.8650738082735282,
      "grad_norm": 0.8177305592994144,
      "learning_rate": 4.698763330245043e-07,
      "loss": 0.102,
      "step": 29653
    },
    {
      "epoch": 0.8651029815041718,
      "grad_norm": 0.812140178505874,
      "learning_rate": 4.6967640742262513e-07,
      "loss": 0.0984,
      "step": 29654
    },
    {
      "epoch": 0.8651321547348153,
      "grad_norm": 0.8331334389322026,
      "learning_rate": 4.6947652226639216e-07,
      "loss": 0.1312,
      "step": 29655
    },
    {
      "epoch": 0.8651613279654589,
      "grad_norm": 1.0606635692666215,
      "learning_rate": 4.692766775575913e-07,
      "loss": 0.1145,
      "step": 29656
    },
    {
      "epoch": 0.8651905011961024,
      "grad_norm": 0.912480668116238,
      "learning_rate": 4.690768732980078e-07,
      "loss": 0.1161,
      "step": 29657
    },
    {
      "epoch": 0.865219674426746,
      "grad_norm": 0.8693959965482749,
      "learning_rate": 4.688771094894246e-07,
      "loss": 0.1201,
      "step": 29658
    },
    {
      "epoch": 0.8652488476573896,
      "grad_norm": 1.1410749152867425,
      "learning_rate": 4.6867738613362356e-07,
      "loss": 0.125,
      "step": 29659
    },
    {
      "epoch": 0.8652780208880332,
      "grad_norm": 0.9557606060725321,
      "learning_rate": 4.6847770323239006e-07,
      "loss": 0.1197,
      "step": 29660
    },
    {
      "epoch": 0.8653071941186767,
      "grad_norm": 0.9258297153682862,
      "learning_rate": 4.682780607875048e-07,
      "loss": 0.1102,
      "step": 29661
    },
    {
      "epoch": 0.8653363673493203,
      "grad_norm": 0.9021782178302932,
      "learning_rate": 4.680784588007525e-07,
      "loss": 0.1157,
      "step": 29662
    },
    {
      "epoch": 0.8653655405799638,
      "grad_norm": 0.912579263470195,
      "learning_rate": 4.678788972739129e-07,
      "loss": 0.0917,
      "step": 29663
    },
    {
      "epoch": 0.8653947138106074,
      "grad_norm": 0.7514401759654336,
      "learning_rate": 4.6767937620876946e-07,
      "loss": 0.1161,
      "step": 29664
    },
    {
      "epoch": 0.865423887041251,
      "grad_norm": 0.7817265586761414,
      "learning_rate": 4.674798956071025e-07,
      "loss": 0.1068,
      "step": 29665
    },
    {
      "epoch": 0.8654530602718945,
      "grad_norm": 0.7489309934528932,
      "learning_rate": 4.6728045547069223e-07,
      "loss": 0.1015,
      "step": 29666
    },
    {
      "epoch": 0.8654822335025381,
      "grad_norm": 0.7725362704533929,
      "learning_rate": 4.6708105580132e-07,
      "loss": 0.1026,
      "step": 29667
    },
    {
      "epoch": 0.8655114067331816,
      "grad_norm": 0.8417707708156441,
      "learning_rate": 4.6688169660076666e-07,
      "loss": 0.1124,
      "step": 29668
    },
    {
      "epoch": 0.8655405799638252,
      "grad_norm": 0.9401182149442621,
      "learning_rate": 4.6668237787081185e-07,
      "loss": 0.1365,
      "step": 29669
    },
    {
      "epoch": 0.8655697531944687,
      "grad_norm": 0.912303103831374,
      "learning_rate": 4.66483099613233e-07,
      "loss": 0.1218,
      "step": 29670
    },
    {
      "epoch": 0.8655989264251123,
      "grad_norm": 0.8884869685940538,
      "learning_rate": 4.662838618298121e-07,
      "loss": 0.126,
      "step": 29671
    },
    {
      "epoch": 0.8656280996557559,
      "grad_norm": 1.057071481354854,
      "learning_rate": 4.6608466452232713e-07,
      "loss": 0.1138,
      "step": 29672
    },
    {
      "epoch": 0.8656572728863995,
      "grad_norm": 0.8261209164621662,
      "learning_rate": 4.6588550769255336e-07,
      "loss": 0.1201,
      "step": 29673
    },
    {
      "epoch": 0.865686446117043,
      "grad_norm": 1.0436250607599036,
      "learning_rate": 4.656863913422732e-07,
      "loss": 0.1232,
      "step": 29674
    },
    {
      "epoch": 0.8657156193476866,
      "grad_norm": 0.7731383788619942,
      "learning_rate": 4.654873154732631e-07,
      "loss": 0.1106,
      "step": 29675
    },
    {
      "epoch": 0.8657447925783301,
      "grad_norm": 0.9086950245842619,
      "learning_rate": 4.652882800872982e-07,
      "loss": 0.1059,
      "step": 29676
    },
    {
      "epoch": 0.8657739658089737,
      "grad_norm": 0.6931387976822992,
      "learning_rate": 4.6508928518615883e-07,
      "loss": 0.1187,
      "step": 29677
    },
    {
      "epoch": 0.8658031390396173,
      "grad_norm": 0.8971935488584154,
      "learning_rate": 4.6489033077161907e-07,
      "loss": 0.1332,
      "step": 29678
    },
    {
      "epoch": 0.8658323122702608,
      "grad_norm": 0.8124624341357914,
      "learning_rate": 4.6469141684545473e-07,
      "loss": 0.1153,
      "step": 29679
    },
    {
      "epoch": 0.8658614855009044,
      "grad_norm": 0.7495792132361767,
      "learning_rate": 4.644925434094433e-07,
      "loss": 0.1164,
      "step": 29680
    },
    {
      "epoch": 0.8658906587315479,
      "grad_norm": 0.8907488826080208,
      "learning_rate": 4.6429371046536e-07,
      "loss": 0.1062,
      "step": 29681
    },
    {
      "epoch": 0.8659198319621915,
      "grad_norm": 0.7172330951828068,
      "learning_rate": 4.6409491801498006e-07,
      "loss": 0.1015,
      "step": 29682
    },
    {
      "epoch": 0.865949005192835,
      "grad_norm": 0.766924963875774,
      "learning_rate": 4.6389616606007717e-07,
      "loss": 0.0988,
      "step": 29683
    },
    {
      "epoch": 0.8659781784234786,
      "grad_norm": 0.6698874112215939,
      "learning_rate": 4.6369745460242755e-07,
      "loss": 0.1194,
      "step": 29684
    },
    {
      "epoch": 0.8660073516541221,
      "grad_norm": 0.878505291382565,
      "learning_rate": 4.634987836438026e-07,
      "loss": 0.1064,
      "step": 29685
    },
    {
      "epoch": 0.8660365248847658,
      "grad_norm": 0.8542466741397426,
      "learning_rate": 4.633001531859777e-07,
      "loss": 0.1042,
      "step": 29686
    },
    {
      "epoch": 0.8660656981154093,
      "grad_norm": 0.7377300469235439,
      "learning_rate": 4.631015632307273e-07,
      "loss": 0.1288,
      "step": 29687
    },
    {
      "epoch": 0.8660948713460529,
      "grad_norm": 0.832876429750545,
      "learning_rate": 4.629030137798229e-07,
      "loss": 0.1123,
      "step": 29688
    },
    {
      "epoch": 0.8661240445766965,
      "grad_norm": 0.8588785022383585,
      "learning_rate": 4.627045048350365e-07,
      "loss": 0.093,
      "step": 29689
    },
    {
      "epoch": 0.86615321780734,
      "grad_norm": 0.7639942154531778,
      "learning_rate": 4.6250603639814153e-07,
      "loss": 0.1027,
      "step": 29690
    },
    {
      "epoch": 0.8661823910379836,
      "grad_norm": 0.746171628499873,
      "learning_rate": 4.6230760847090936e-07,
      "loss": 0.1068,
      "step": 29691
    },
    {
      "epoch": 0.8662115642686271,
      "grad_norm": 0.812632010411005,
      "learning_rate": 4.62109221055112e-07,
      "loss": 0.0966,
      "step": 29692
    },
    {
      "epoch": 0.8662407374992707,
      "grad_norm": 1.05417175822687,
      "learning_rate": 4.619108741525197e-07,
      "loss": 0.1112,
      "step": 29693
    },
    {
      "epoch": 0.8662699107299142,
      "grad_norm": 0.829800720148143,
      "learning_rate": 4.6171256776490423e-07,
      "loss": 0.1126,
      "step": 29694
    },
    {
      "epoch": 0.8662990839605578,
      "grad_norm": 0.9135399325225867,
      "learning_rate": 4.61514301894036e-07,
      "loss": 0.1155,
      "step": 29695
    },
    {
      "epoch": 0.8663282571912013,
      "grad_norm": 0.8423022840432328,
      "learning_rate": 4.613160765416835e-07,
      "loss": 0.1176,
      "step": 29696
    },
    {
      "epoch": 0.8663574304218449,
      "grad_norm": 0.9440167894563377,
      "learning_rate": 4.6111789170961764e-07,
      "loss": 0.1227,
      "step": 29697
    },
    {
      "epoch": 0.8663866036524884,
      "grad_norm": 0.842576393518547,
      "learning_rate": 4.6091974739960855e-07,
      "loss": 0.1258,
      "step": 29698
    },
    {
      "epoch": 0.866415776883132,
      "grad_norm": 0.8789314064777867,
      "learning_rate": 4.607216436134243e-07,
      "loss": 0.1087,
      "step": 29699
    },
    {
      "epoch": 0.8664449501137756,
      "grad_norm": 0.7346332916299733,
      "learning_rate": 4.6052358035283296e-07,
      "loss": 0.0907,
      "step": 29700
    },
    {
      "epoch": 0.8664741233444192,
      "grad_norm": 0.6216925139506582,
      "learning_rate": 4.603255576196042e-07,
      "loss": 0.0919,
      "step": 29701
    },
    {
      "epoch": 0.8665032965750628,
      "grad_norm": 0.9711471230620363,
      "learning_rate": 4.6012757541550547e-07,
      "loss": 0.1112,
      "step": 29702
    },
    {
      "epoch": 0.8665324698057063,
      "grad_norm": 0.9344305312678193,
      "learning_rate": 4.5992963374230204e-07,
      "loss": 0.1119,
      "step": 29703
    },
    {
      "epoch": 0.8665616430363499,
      "grad_norm": 0.7686073239935602,
      "learning_rate": 4.5973173260176475e-07,
      "loss": 0.0924,
      "step": 29704
    },
    {
      "epoch": 0.8665908162669934,
      "grad_norm": 0.798213112409071,
      "learning_rate": 4.595338719956582e-07,
      "loss": 0.1017,
      "step": 29705
    },
    {
      "epoch": 0.866619989497637,
      "grad_norm": 0.8807198372508848,
      "learning_rate": 4.5933605192574894e-07,
      "loss": 0.1067,
      "step": 29706
    },
    {
      "epoch": 0.8666491627282805,
      "grad_norm": 0.8785217562478561,
      "learning_rate": 4.5913827239380483e-07,
      "loss": 0.0941,
      "step": 29707
    },
    {
      "epoch": 0.8666783359589241,
      "grad_norm": 1.0508830325605063,
      "learning_rate": 4.589405334015895e-07,
      "loss": 0.1189,
      "step": 29708
    },
    {
      "epoch": 0.8667075091895676,
      "grad_norm": 0.8974488364052352,
      "learning_rate": 4.5874283495086823e-07,
      "loss": 0.1269,
      "step": 29709
    },
    {
      "epoch": 0.8667366824202112,
      "grad_norm": 0.9093572309706573,
      "learning_rate": 4.585451770434074e-07,
      "loss": 0.1193,
      "step": 29710
    },
    {
      "epoch": 0.8667658556508547,
      "grad_norm": 1.1047782347058013,
      "learning_rate": 4.5834755968097167e-07,
      "loss": 0.1024,
      "step": 29711
    },
    {
      "epoch": 0.8667950288814983,
      "grad_norm": 0.7875122541939666,
      "learning_rate": 4.581499828653246e-07,
      "loss": 0.13,
      "step": 29712
    },
    {
      "epoch": 0.866824202112142,
      "grad_norm": 1.0699875763352769,
      "learning_rate": 4.5795244659822933e-07,
      "loss": 0.1018,
      "step": 29713
    },
    {
      "epoch": 0.8668533753427855,
      "grad_norm": 1.0058920404274683,
      "learning_rate": 4.577549508814516e-07,
      "loss": 0.1121,
      "step": 29714
    },
    {
      "epoch": 0.8668825485734291,
      "grad_norm": 1.5685121632036332,
      "learning_rate": 4.5755749571675223e-07,
      "loss": 0.1162,
      "step": 29715
    },
    {
      "epoch": 0.8669117218040726,
      "grad_norm": 0.7863468657298148,
      "learning_rate": 4.573600811058948e-07,
      "loss": 0.1151,
      "step": 29716
    },
    {
      "epoch": 0.8669408950347162,
      "grad_norm": 0.9508696183350926,
      "learning_rate": 4.571627070506435e-07,
      "loss": 0.1,
      "step": 29717
    },
    {
      "epoch": 0.8669700682653597,
      "grad_norm": 0.8904285033234068,
      "learning_rate": 4.5696537355275903e-07,
      "loss": 0.0987,
      "step": 29718
    },
    {
      "epoch": 0.8669992414960033,
      "grad_norm": 0.7338508216069743,
      "learning_rate": 4.5676808061400233e-07,
      "loss": 0.1011,
      "step": 29719
    },
    {
      "epoch": 0.8670284147266468,
      "grad_norm": 0.7718343882417587,
      "learning_rate": 4.5657082823613643e-07,
      "loss": 0.1052,
      "step": 29720
    },
    {
      "epoch": 0.8670575879572904,
      "grad_norm": 0.8998635641660996,
      "learning_rate": 4.5637361642092036e-07,
      "loss": 0.11,
      "step": 29721
    },
    {
      "epoch": 0.8670867611879339,
      "grad_norm": 0.9120540171063068,
      "learning_rate": 4.5617644517011727e-07,
      "loss": 0.1068,
      "step": 29722
    },
    {
      "epoch": 0.8671159344185775,
      "grad_norm": 0.904704683530814,
      "learning_rate": 4.559793144854857e-07,
      "loss": 0.1251,
      "step": 29723
    },
    {
      "epoch": 0.867145107649221,
      "grad_norm": 0.7281993287272448,
      "learning_rate": 4.557822243687865e-07,
      "loss": 0.1058,
      "step": 29724
    },
    {
      "epoch": 0.8671742808798646,
      "grad_norm": 0.7850270269802017,
      "learning_rate": 4.555851748217788e-07,
      "loss": 0.1136,
      "step": 29725
    },
    {
      "epoch": 0.8672034541105081,
      "grad_norm": 1.4475167809231915,
      "learning_rate": 4.553881658462206e-07,
      "loss": 0.1242,
      "step": 29726
    },
    {
      "epoch": 0.8672326273411518,
      "grad_norm": 0.9211778454821116,
      "learning_rate": 4.5519119744387273e-07,
      "loss": 0.1168,
      "step": 29727
    },
    {
      "epoch": 0.8672618005717954,
      "grad_norm": 0.9632290231290926,
      "learning_rate": 4.549942696164933e-07,
      "loss": 0.1036,
      "step": 29728
    },
    {
      "epoch": 0.8672909738024389,
      "grad_norm": 0.729019807957965,
      "learning_rate": 4.5479738236584026e-07,
      "loss": 0.1083,
      "step": 29729
    },
    {
      "epoch": 0.8673201470330825,
      "grad_norm": 0.7198276258285211,
      "learning_rate": 4.5460053569367e-07,
      "loss": 0.1025,
      "step": 29730
    },
    {
      "epoch": 0.867349320263726,
      "grad_norm": 0.9094212307826949,
      "learning_rate": 4.544037296017423e-07,
      "loss": 0.1269,
      "step": 29731
    },
    {
      "epoch": 0.8673784934943696,
      "grad_norm": 1.0392531432521597,
      "learning_rate": 4.5420696409181285e-07,
      "loss": 0.1173,
      "step": 29732
    },
    {
      "epoch": 0.8674076667250131,
      "grad_norm": 0.8547771219552441,
      "learning_rate": 4.540102391656365e-07,
      "loss": 0.1142,
      "step": 29733
    },
    {
      "epoch": 0.8674368399556567,
      "grad_norm": 0.9342302833273862,
      "learning_rate": 4.5381355482497334e-07,
      "loss": 0.102,
      "step": 29734
    },
    {
      "epoch": 0.8674660131863002,
      "grad_norm": 0.8647156138343178,
      "learning_rate": 4.536169110715777e-07,
      "loss": 0.1192,
      "step": 29735
    },
    {
      "epoch": 0.8674951864169438,
      "grad_norm": 0.8518650128110973,
      "learning_rate": 4.5342030790720415e-07,
      "loss": 0.0861,
      "step": 29736
    },
    {
      "epoch": 0.8675243596475873,
      "grad_norm": 0.7755918713379286,
      "learning_rate": 4.532237453336091e-07,
      "loss": 0.132,
      "step": 29737
    },
    {
      "epoch": 0.8675535328782309,
      "grad_norm": 1.0685931566112636,
      "learning_rate": 4.5302722335254735e-07,
      "loss": 0.1186,
      "step": 29738
    },
    {
      "epoch": 0.8675827061088744,
      "grad_norm": 1.0566728184063763,
      "learning_rate": 4.5283074196577236e-07,
      "loss": 0.1157,
      "step": 29739
    },
    {
      "epoch": 0.8676118793395181,
      "grad_norm": 0.7225468710013514,
      "learning_rate": 4.526343011750389e-07,
      "loss": 0.0999,
      "step": 29740
    },
    {
      "epoch": 0.8676410525701617,
      "grad_norm": 0.8328686994694089,
      "learning_rate": 4.524379009821017e-07,
      "loss": 0.1254,
      "step": 29741
    },
    {
      "epoch": 0.8676702258008052,
      "grad_norm": 0.7084810337350889,
      "learning_rate": 4.522415413887138e-07,
      "loss": 0.1049,
      "step": 29742
    },
    {
      "epoch": 0.8676993990314488,
      "grad_norm": 0.8528611464567548,
      "learning_rate": 4.520452223966265e-07,
      "loss": 0.1101,
      "step": 29743
    },
    {
      "epoch": 0.8677285722620923,
      "grad_norm": 0.9303856966190855,
      "learning_rate": 4.518489440075946e-07,
      "loss": 0.1222,
      "step": 29744
    },
    {
      "epoch": 0.8677577454927359,
      "grad_norm": 1.0222688221812284,
      "learning_rate": 4.516527062233683e-07,
      "loss": 0.1451,
      "step": 29745
    },
    {
      "epoch": 0.8677869187233794,
      "grad_norm": 0.9208701209918345,
      "learning_rate": 4.514565090457018e-07,
      "loss": 0.0919,
      "step": 29746
    },
    {
      "epoch": 0.867816091954023,
      "grad_norm": 0.8087070543058863,
      "learning_rate": 4.512603524763459e-07,
      "loss": 0.1012,
      "step": 29747
    },
    {
      "epoch": 0.8678452651846665,
      "grad_norm": 0.7322560302675284,
      "learning_rate": 4.51064236517052e-07,
      "loss": 0.119,
      "step": 29748
    },
    {
      "epoch": 0.8678744384153101,
      "grad_norm": 0.8583460991070108,
      "learning_rate": 4.5086816116956976e-07,
      "loss": 0.1025,
      "step": 29749
    },
    {
      "epoch": 0.8679036116459536,
      "grad_norm": 0.8543875345341135,
      "learning_rate": 4.5067212643565174e-07,
      "loss": 0.1139,
      "step": 29750
    },
    {
      "epoch": 0.8679327848765972,
      "grad_norm": 0.9019525138139194,
      "learning_rate": 4.504761323170453e-07,
      "loss": 0.1149,
      "step": 29751
    },
    {
      "epoch": 0.8679619581072407,
      "grad_norm": 0.8608702128939949,
      "learning_rate": 4.5028017881550367e-07,
      "loss": 0.1356,
      "step": 29752
    },
    {
      "epoch": 0.8679911313378843,
      "grad_norm": 0.895518179646985,
      "learning_rate": 4.500842659327731e-07,
      "loss": 0.1214,
      "step": 29753
    },
    {
      "epoch": 0.868020304568528,
      "grad_norm": 1.1530854543177367,
      "learning_rate": 4.49888393670605e-07,
      "loss": 0.1097,
      "step": 29754
    },
    {
      "epoch": 0.8680494777991715,
      "grad_norm": 0.9497906050404548,
      "learning_rate": 4.4969256203074743e-07,
      "loss": 0.1306,
      "step": 29755
    },
    {
      "epoch": 0.8680786510298151,
      "grad_norm": 1.0179029781359794,
      "learning_rate": 4.4949677101494725e-07,
      "loss": 0.1065,
      "step": 29756
    },
    {
      "epoch": 0.8681078242604586,
      "grad_norm": 0.7478767179124656,
      "learning_rate": 4.4930102062495375e-07,
      "loss": 0.0932,
      "step": 29757
    },
    {
      "epoch": 0.8681369974911022,
      "grad_norm": 0.8353406196041437,
      "learning_rate": 4.4910531086251487e-07,
      "loss": 0.1331,
      "step": 29758
    },
    {
      "epoch": 0.8681661707217457,
      "grad_norm": 0.7320057912699145,
      "learning_rate": 4.489096417293781e-07,
      "loss": 0.1118,
      "step": 29759
    },
    {
      "epoch": 0.8681953439523893,
      "grad_norm": 0.7836198279260504,
      "learning_rate": 4.4871401322728827e-07,
      "loss": 0.0998,
      "step": 29760
    },
    {
      "epoch": 0.8682245171830328,
      "grad_norm": 0.6365631225709393,
      "learning_rate": 4.485184253579944e-07,
      "loss": 0.0916,
      "step": 29761
    },
    {
      "epoch": 0.8682536904136764,
      "grad_norm": 0.9872720899301752,
      "learning_rate": 4.4832287812324127e-07,
      "loss": 0.1078,
      "step": 29762
    },
    {
      "epoch": 0.86828286364432,
      "grad_norm": 1.0003743596686172,
      "learning_rate": 4.4812737152477304e-07,
      "loss": 0.1116,
      "step": 29763
    },
    {
      "epoch": 0.8683120368749635,
      "grad_norm": 1.1022894300171184,
      "learning_rate": 4.4793190556433887e-07,
      "loss": 0.1167,
      "step": 29764
    },
    {
      "epoch": 0.868341210105607,
      "grad_norm": 0.9228480213490751,
      "learning_rate": 4.4773648024368174e-07,
      "loss": 0.1157,
      "step": 29765
    },
    {
      "epoch": 0.8683703833362506,
      "grad_norm": 0.8021311905958423,
      "learning_rate": 4.475410955645465e-07,
      "loss": 0.1034,
      "step": 29766
    },
    {
      "epoch": 0.8683995565668943,
      "grad_norm": 0.879069332619487,
      "learning_rate": 4.4734575152867777e-07,
      "loss": 0.1108,
      "step": 29767
    },
    {
      "epoch": 0.8684287297975378,
      "grad_norm": 0.8309891909386835,
      "learning_rate": 4.4715044813781974e-07,
      "loss": 0.1014,
      "step": 29768
    },
    {
      "epoch": 0.8684579030281814,
      "grad_norm": 0.9584210287436049,
      "learning_rate": 4.469551853937143e-07,
      "loss": 0.1141,
      "step": 29769
    },
    {
      "epoch": 0.8684870762588249,
      "grad_norm": 1.0390781683334436,
      "learning_rate": 4.4675996329810677e-07,
      "loss": 0.1122,
      "step": 29770
    },
    {
      "epoch": 0.8685162494894685,
      "grad_norm": 0.8384784217556217,
      "learning_rate": 4.4656478185273965e-07,
      "loss": 0.0945,
      "step": 29771
    },
    {
      "epoch": 0.868545422720112,
      "grad_norm": 0.934736468470086,
      "learning_rate": 4.463696410593554e-07,
      "loss": 0.1183,
      "step": 29772
    },
    {
      "epoch": 0.8685745959507556,
      "grad_norm": 1.025228067281019,
      "learning_rate": 4.461745409196949e-07,
      "loss": 0.0994,
      "step": 29773
    },
    {
      "epoch": 0.8686037691813991,
      "grad_norm": 0.8652336674970745,
      "learning_rate": 4.459794814355023e-07,
      "loss": 0.1118,
      "step": 29774
    },
    {
      "epoch": 0.8686329424120427,
      "grad_norm": 1.0542096956907,
      "learning_rate": 4.457844626085167e-07,
      "loss": 0.0994,
      "step": 29775
    },
    {
      "epoch": 0.8686621156426863,
      "grad_norm": 0.9462186891090342,
      "learning_rate": 4.455894844404801e-07,
      "loss": 0.1015,
      "step": 29776
    },
    {
      "epoch": 0.8686912888733298,
      "grad_norm": 1.1768899062578044,
      "learning_rate": 4.4539454693313445e-07,
      "loss": 0.1054,
      "step": 29777
    },
    {
      "epoch": 0.8687204621039734,
      "grad_norm": 0.9259754020643249,
      "learning_rate": 4.4519965008821884e-07,
      "loss": 0.1238,
      "step": 29778
    },
    {
      "epoch": 0.8687496353346169,
      "grad_norm": 0.9200452501299549,
      "learning_rate": 4.4500479390747256e-07,
      "loss": 0.0988,
      "step": 29779
    },
    {
      "epoch": 0.8687788085652605,
      "grad_norm": 0.6860223353769073,
      "learning_rate": 4.448099783926368e-07,
      "loss": 0.1065,
      "step": 29780
    },
    {
      "epoch": 0.8688079817959041,
      "grad_norm": 1.1867613252608016,
      "learning_rate": 4.446152035454493e-07,
      "loss": 0.1165,
      "step": 29781
    },
    {
      "epoch": 0.8688371550265477,
      "grad_norm": 0.9965666379750093,
      "learning_rate": 4.444204693676507e-07,
      "loss": 0.1074,
      "step": 29782
    },
    {
      "epoch": 0.8688663282571912,
      "grad_norm": 1.0111669054258339,
      "learning_rate": 4.4422577586097805e-07,
      "loss": 0.1385,
      "step": 29783
    },
    {
      "epoch": 0.8688955014878348,
      "grad_norm": 0.8950318607751709,
      "learning_rate": 4.44031123027171e-07,
      "loss": 0.1264,
      "step": 29784
    },
    {
      "epoch": 0.8689246747184783,
      "grad_norm": 1.0518872645517119,
      "learning_rate": 4.4383651086796655e-07,
      "loss": 0.1238,
      "step": 29785
    },
    {
      "epoch": 0.8689538479491219,
      "grad_norm": 0.9826575438077829,
      "learning_rate": 4.43641939385101e-07,
      "loss": 0.109,
      "step": 29786
    },
    {
      "epoch": 0.8689830211797654,
      "grad_norm": 0.8794138827483294,
      "learning_rate": 4.4344740858031253e-07,
      "loss": 0.1318,
      "step": 29787
    },
    {
      "epoch": 0.869012194410409,
      "grad_norm": 1.1362465386233664,
      "learning_rate": 4.432529184553386e-07,
      "loss": 0.1075,
      "step": 29788
    },
    {
      "epoch": 0.8690413676410526,
      "grad_norm": 0.8820666260009485,
      "learning_rate": 4.4305846901191495e-07,
      "loss": 0.0917,
      "step": 29789
    },
    {
      "epoch": 0.8690705408716961,
      "grad_norm": 0.712915621453368,
      "learning_rate": 4.428640602517764e-07,
      "loss": 0.1024,
      "step": 29790
    },
    {
      "epoch": 0.8690997141023397,
      "grad_norm": 0.8040965846348229,
      "learning_rate": 4.42669692176661e-07,
      "loss": 0.1011,
      "step": 29791
    },
    {
      "epoch": 0.8691288873329832,
      "grad_norm": 1.0730527558974972,
      "learning_rate": 4.424753647883023e-07,
      "loss": 0.1116,
      "step": 29792
    },
    {
      "epoch": 0.8691580605636268,
      "grad_norm": 1.048869244710362,
      "learning_rate": 4.42281078088434e-07,
      "loss": 0.1115,
      "step": 29793
    },
    {
      "epoch": 0.8691872337942704,
      "grad_norm": 0.7481494214929864,
      "learning_rate": 4.4208683207879355e-07,
      "loss": 0.1051,
      "step": 29794
    },
    {
      "epoch": 0.869216407024914,
      "grad_norm": 1.0399294612372059,
      "learning_rate": 4.418926267611146e-07,
      "loss": 0.1264,
      "step": 29795
    },
    {
      "epoch": 0.8692455802555575,
      "grad_norm": 0.7680603792499419,
      "learning_rate": 4.416984621371284e-07,
      "loss": 0.1065,
      "step": 29796
    },
    {
      "epoch": 0.8692747534862011,
      "grad_norm": 0.6179645275901688,
      "learning_rate": 4.4150433820857153e-07,
      "loss": 0.0989,
      "step": 29797
    },
    {
      "epoch": 0.8693039267168446,
      "grad_norm": 1.2297313226543285,
      "learning_rate": 4.4131025497717585e-07,
      "loss": 0.1101,
      "step": 29798
    },
    {
      "epoch": 0.8693330999474882,
      "grad_norm": 1.0925233920483266,
      "learning_rate": 4.4111621244467275e-07,
      "loss": 0.124,
      "step": 29799
    },
    {
      "epoch": 0.8693622731781318,
      "grad_norm": 0.906662609980025,
      "learning_rate": 4.409222106127958e-07,
      "loss": 0.1139,
      "step": 29800
    },
    {
      "epoch": 0.8693914464087753,
      "grad_norm": 0.887670852975099,
      "learning_rate": 4.407282494832782e-07,
      "loss": 0.127,
      "step": 29801
    },
    {
      "epoch": 0.8694206196394189,
      "grad_norm": 0.8546909175050807,
      "learning_rate": 4.405343290578507e-07,
      "loss": 0.1057,
      "step": 29802
    },
    {
      "epoch": 0.8694497928700624,
      "grad_norm": 0.8254342158400528,
      "learning_rate": 4.4034044933824294e-07,
      "loss": 0.1253,
      "step": 29803
    },
    {
      "epoch": 0.869478966100706,
      "grad_norm": 0.8674148604627151,
      "learning_rate": 4.401466103261881e-07,
      "loss": 0.14,
      "step": 29804
    },
    {
      "epoch": 0.8695081393313495,
      "grad_norm": 0.8069543818570193,
      "learning_rate": 4.399528120234148e-07,
      "loss": 0.1118,
      "step": 29805
    },
    {
      "epoch": 0.8695373125619931,
      "grad_norm": 0.8461359594532442,
      "learning_rate": 4.3975905443165437e-07,
      "loss": 0.0957,
      "step": 29806
    },
    {
      "epoch": 0.8695664857926366,
      "grad_norm": 0.9271947325694814,
      "learning_rate": 4.395653375526371e-07,
      "loss": 0.1032,
      "step": 29807
    },
    {
      "epoch": 0.8695956590232803,
      "grad_norm": 1.0509761241612552,
      "learning_rate": 4.3937166138809217e-07,
      "loss": 0.1136,
      "step": 29808
    },
    {
      "epoch": 0.8696248322539238,
      "grad_norm": 0.9624390305621473,
      "learning_rate": 4.3917802593974714e-07,
      "loss": 0.1093,
      "step": 29809
    },
    {
      "epoch": 0.8696540054845674,
      "grad_norm": 0.9669244218025086,
      "learning_rate": 4.389844312093322e-07,
      "loss": 0.1071,
      "step": 29810
    },
    {
      "epoch": 0.869683178715211,
      "grad_norm": 0.8908038698081963,
      "learning_rate": 4.387908771985766e-07,
      "loss": 0.1253,
      "step": 29811
    },
    {
      "epoch": 0.8697123519458545,
      "grad_norm": 0.9371307776667135,
      "learning_rate": 4.385973639092067e-07,
      "loss": 0.1183,
      "step": 29812
    },
    {
      "epoch": 0.869741525176498,
      "grad_norm": 0.7877828857168898,
      "learning_rate": 4.3840389134295e-07,
      "loss": 0.1128,
      "step": 29813
    },
    {
      "epoch": 0.8697706984071416,
      "grad_norm": 0.8044260548349564,
      "learning_rate": 4.3821045950153517e-07,
      "loss": 0.1077,
      "step": 29814
    },
    {
      "epoch": 0.8697998716377852,
      "grad_norm": 0.8445328060565375,
      "learning_rate": 4.3801706838668855e-07,
      "loss": 0.1203,
      "step": 29815
    },
    {
      "epoch": 0.8698290448684287,
      "grad_norm": 0.7289914820312914,
      "learning_rate": 4.3782371800013545e-07,
      "loss": 0.1149,
      "step": 29816
    },
    {
      "epoch": 0.8698582180990723,
      "grad_norm": 0.6795465123690801,
      "learning_rate": 4.376304083436028e-07,
      "loss": 0.1018,
      "step": 29817
    },
    {
      "epoch": 0.8698873913297158,
      "grad_norm": 0.7402984432363503,
      "learning_rate": 4.3743713941881817e-07,
      "loss": 0.11,
      "step": 29818
    },
    {
      "epoch": 0.8699165645603594,
      "grad_norm": 0.8466375251813191,
      "learning_rate": 4.3724391122750565e-07,
      "loss": 0.119,
      "step": 29819
    },
    {
      "epoch": 0.8699457377910029,
      "grad_norm": 0.7558040130905236,
      "learning_rate": 4.370507237713889e-07,
      "loss": 0.1048,
      "step": 29820
    },
    {
      "epoch": 0.8699749110216466,
      "grad_norm": 0.9283048767907476,
      "learning_rate": 4.3685757705219545e-07,
      "loss": 0.1111,
      "step": 29821
    },
    {
      "epoch": 0.8700040842522901,
      "grad_norm": 0.8306414883025738,
      "learning_rate": 4.3666447107164667e-07,
      "loss": 0.1019,
      "step": 29822
    },
    {
      "epoch": 0.8700332574829337,
      "grad_norm": 1.0089986757984788,
      "learning_rate": 4.36471405831469e-07,
      "loss": 0.1124,
      "step": 29823
    },
    {
      "epoch": 0.8700624307135773,
      "grad_norm": 0.960068224297036,
      "learning_rate": 4.362783813333854e-07,
      "loss": 0.1316,
      "step": 29824
    },
    {
      "epoch": 0.8700916039442208,
      "grad_norm": 0.94203315437371,
      "learning_rate": 4.3608539757911903e-07,
      "loss": 0.0981,
      "step": 29825
    },
    {
      "epoch": 0.8701207771748644,
      "grad_norm": 0.9243236379327263,
      "learning_rate": 4.3589245457039244e-07,
      "loss": 0.114,
      "step": 29826
    },
    {
      "epoch": 0.8701499504055079,
      "grad_norm": 1.14120755316505,
      "learning_rate": 4.3569955230892857e-07,
      "loss": 0.1236,
      "step": 29827
    },
    {
      "epoch": 0.8701791236361515,
      "grad_norm": 0.8580259854946702,
      "learning_rate": 4.355066907964489e-07,
      "loss": 0.1228,
      "step": 29828
    },
    {
      "epoch": 0.870208296866795,
      "grad_norm": 0.7866802087229846,
      "learning_rate": 4.3531387003467706e-07,
      "loss": 0.1305,
      "step": 29829
    },
    {
      "epoch": 0.8702374700974386,
      "grad_norm": 0.800562842323476,
      "learning_rate": 4.351210900253322e-07,
      "loss": 0.1166,
      "step": 29830
    },
    {
      "epoch": 0.8702666433280821,
      "grad_norm": 0.7850213567763958,
      "learning_rate": 4.349283507701374e-07,
      "loss": 0.133,
      "step": 29831
    },
    {
      "epoch": 0.8702958165587257,
      "grad_norm": 0.8698308370262087,
      "learning_rate": 4.3473565227081236e-07,
      "loss": 0.126,
      "step": 29832
    },
    {
      "epoch": 0.8703249897893692,
      "grad_norm": 0.6526075225422932,
      "learning_rate": 4.345429945290769e-07,
      "loss": 0.1,
      "step": 29833
    },
    {
      "epoch": 0.8703541630200128,
      "grad_norm": 0.841088174324627,
      "learning_rate": 4.343503775466518e-07,
      "loss": 0.1111,
      "step": 29834
    },
    {
      "epoch": 0.8703833362506564,
      "grad_norm": 0.8447937630109417,
      "learning_rate": 4.341578013252573e-07,
      "loss": 0.1072,
      "step": 29835
    },
    {
      "epoch": 0.8704125094813,
      "grad_norm": 0.924956378450774,
      "learning_rate": 4.339652658666116e-07,
      "loss": 0.1005,
      "step": 29836
    },
    {
      "epoch": 0.8704416827119436,
      "grad_norm": 0.8227379584896524,
      "learning_rate": 4.337727711724343e-07,
      "loss": 0.1193,
      "step": 29837
    },
    {
      "epoch": 0.8704708559425871,
      "grad_norm": 0.7631561645780006,
      "learning_rate": 4.3358031724444416e-07,
      "loss": 0.0987,
      "step": 29838
    },
    {
      "epoch": 0.8705000291732307,
      "grad_norm": 0.8128963542843648,
      "learning_rate": 4.333879040843575e-07,
      "loss": 0.1154,
      "step": 29839
    },
    {
      "epoch": 0.8705292024038742,
      "grad_norm": 0.7893618652077217,
      "learning_rate": 4.331955316938935e-07,
      "loss": 0.1227,
      "step": 29840
    },
    {
      "epoch": 0.8705583756345178,
      "grad_norm": 0.9053852248961141,
      "learning_rate": 4.330032000747708e-07,
      "loss": 0.1216,
      "step": 29841
    },
    {
      "epoch": 0.8705875488651613,
      "grad_norm": 0.8752026556647005,
      "learning_rate": 4.328109092287053e-07,
      "loss": 0.1205,
      "step": 29842
    },
    {
      "epoch": 0.8706167220958049,
      "grad_norm": 1.1452348967503392,
      "learning_rate": 4.3261865915741273e-07,
      "loss": 0.105,
      "step": 29843
    },
    {
      "epoch": 0.8706458953264484,
      "grad_norm": 0.7792971177379204,
      "learning_rate": 4.324264498626113e-07,
      "loss": 0.1105,
      "step": 29844
    },
    {
      "epoch": 0.870675068557092,
      "grad_norm": 0.7918411056047596,
      "learning_rate": 4.322342813460162e-07,
      "loss": 0.1183,
      "step": 29845
    },
    {
      "epoch": 0.8707042417877355,
      "grad_norm": 0.7757046761783517,
      "learning_rate": 4.320421536093422e-07,
      "loss": 0.0924,
      "step": 29846
    },
    {
      "epoch": 0.8707334150183791,
      "grad_norm": 0.6833492941206484,
      "learning_rate": 4.318500666543052e-07,
      "loss": 0.1033,
      "step": 29847
    },
    {
      "epoch": 0.8707625882490228,
      "grad_norm": 0.9957608773109329,
      "learning_rate": 4.3165802048262096e-07,
      "loss": 0.1247,
      "step": 29848
    },
    {
      "epoch": 0.8707917614796663,
      "grad_norm": 0.8078436126699239,
      "learning_rate": 4.314660150960037e-07,
      "loss": 0.0876,
      "step": 29849
    },
    {
      "epoch": 0.8708209347103099,
      "grad_norm": 0.8166737009025434,
      "learning_rate": 4.3127405049616654e-07,
      "loss": 0.0973,
      "step": 29850
    },
    {
      "epoch": 0.8708501079409534,
      "grad_norm": 0.9200094109189592,
      "learning_rate": 4.3108212668482476e-07,
      "loss": 0.1144,
      "step": 29851
    },
    {
      "epoch": 0.870879281171597,
      "grad_norm": 1.1565132894069288,
      "learning_rate": 4.308902436636903e-07,
      "loss": 0.1081,
      "step": 29852
    },
    {
      "epoch": 0.8709084544022405,
      "grad_norm": 1.0310305442230399,
      "learning_rate": 4.3069840143447674e-07,
      "loss": 0.1061,
      "step": 29853
    },
    {
      "epoch": 0.8709376276328841,
      "grad_norm": 0.8234617934363656,
      "learning_rate": 4.3050659999889776e-07,
      "loss": 0.1078,
      "step": 29854
    },
    {
      "epoch": 0.8709668008635276,
      "grad_norm": 0.9566713458942278,
      "learning_rate": 4.303148393586654e-07,
      "loss": 0.1094,
      "step": 29855
    },
    {
      "epoch": 0.8709959740941712,
      "grad_norm": 1.1751645532482526,
      "learning_rate": 4.3012311951549036e-07,
      "loss": 0.1275,
      "step": 29856
    },
    {
      "epoch": 0.8710251473248147,
      "grad_norm": 0.9431671484471107,
      "learning_rate": 4.299314404710864e-07,
      "loss": 0.0854,
      "step": 29857
    },
    {
      "epoch": 0.8710543205554583,
      "grad_norm": 1.068907517348355,
      "learning_rate": 4.2973980222716207e-07,
      "loss": 0.1144,
      "step": 29858
    },
    {
      "epoch": 0.8710834937861018,
      "grad_norm": 0.7769048005080315,
      "learning_rate": 4.29548204785431e-07,
      "loss": 0.1149,
      "step": 29859
    },
    {
      "epoch": 0.8711126670167454,
      "grad_norm": 0.9716605913536874,
      "learning_rate": 4.2935664814760136e-07,
      "loss": 0.1113,
      "step": 29860
    },
    {
      "epoch": 0.8711418402473889,
      "grad_norm": 0.8453873145446957,
      "learning_rate": 4.2916513231538557e-07,
      "loss": 0.1241,
      "step": 29861
    },
    {
      "epoch": 0.8711710134780326,
      "grad_norm": 0.8464955427424956,
      "learning_rate": 4.289736572904923e-07,
      "loss": 0.1215,
      "step": 29862
    },
    {
      "epoch": 0.8712001867086762,
      "grad_norm": 0.9637879572247396,
      "learning_rate": 4.2878222307463024e-07,
      "loss": 0.1416,
      "step": 29863
    },
    {
      "epoch": 0.8712293599393197,
      "grad_norm": 1.1063074579540713,
      "learning_rate": 4.28590829669509e-07,
      "loss": 0.1155,
      "step": 29864
    },
    {
      "epoch": 0.8712585331699633,
      "grad_norm": 0.9476633249530068,
      "learning_rate": 4.28399477076839e-07,
      "loss": 0.1174,
      "step": 29865
    },
    {
      "epoch": 0.8712877064006068,
      "grad_norm": 1.0494547089309854,
      "learning_rate": 4.2820816529832554e-07,
      "loss": 0.1017,
      "step": 29866
    },
    {
      "epoch": 0.8713168796312504,
      "grad_norm": 0.740568707230243,
      "learning_rate": 4.2801689433567937e-07,
      "loss": 0.0867,
      "step": 29867
    },
    {
      "epoch": 0.8713460528618939,
      "grad_norm": 0.7175939290898041,
      "learning_rate": 4.27825664190607e-07,
      "loss": 0.095,
      "step": 29868
    },
    {
      "epoch": 0.8713752260925375,
      "grad_norm": 0.9289451850827819,
      "learning_rate": 4.276344748648148e-07,
      "loss": 0.1285,
      "step": 29869
    },
    {
      "epoch": 0.871404399323181,
      "grad_norm": 0.6876481776515677,
      "learning_rate": 4.274433263600103e-07,
      "loss": 0.1117,
      "step": 29870
    },
    {
      "epoch": 0.8714335725538246,
      "grad_norm": 0.90676459476418,
      "learning_rate": 4.2725221867790155e-07,
      "loss": 0.1072,
      "step": 29871
    },
    {
      "epoch": 0.8714627457844681,
      "grad_norm": 0.7552836163927147,
      "learning_rate": 4.270611518201928e-07,
      "loss": 0.1008,
      "step": 29872
    },
    {
      "epoch": 0.8714919190151117,
      "grad_norm": 0.7758009585920969,
      "learning_rate": 4.268701257885899e-07,
      "loss": 0.1108,
      "step": 29873
    },
    {
      "epoch": 0.8715210922457552,
      "grad_norm": 0.8339668780951237,
      "learning_rate": 4.2667914058479976e-07,
      "loss": 0.1072,
      "step": 29874
    },
    {
      "epoch": 0.8715502654763989,
      "grad_norm": 0.7467170059467244,
      "learning_rate": 4.264881962105266e-07,
      "loss": 0.128,
      "step": 29875
    },
    {
      "epoch": 0.8715794387070425,
      "grad_norm": 1.0153642190803998,
      "learning_rate": 4.262972926674735e-07,
      "loss": 0.1076,
      "step": 29876
    },
    {
      "epoch": 0.871608611937686,
      "grad_norm": 0.7803989734333402,
      "learning_rate": 4.26106429957347e-07,
      "loss": 0.1203,
      "step": 29877
    },
    {
      "epoch": 0.8716377851683296,
      "grad_norm": 0.9265790676639721,
      "learning_rate": 4.2591560808185106e-07,
      "loss": 0.108,
      "step": 29878
    },
    {
      "epoch": 0.8716669583989731,
      "grad_norm": 0.8310663452206771,
      "learning_rate": 4.257248270426889e-07,
      "loss": 0.1056,
      "step": 29879
    },
    {
      "epoch": 0.8716961316296167,
      "grad_norm": 0.8347754515050189,
      "learning_rate": 4.255340868415625e-07,
      "loss": 0.1439,
      "step": 29880
    },
    {
      "epoch": 0.8717253048602602,
      "grad_norm": 0.7486925850183045,
      "learning_rate": 4.2534338748017655e-07,
      "loss": 0.1305,
      "step": 29881
    },
    {
      "epoch": 0.8717544780909038,
      "grad_norm": 0.9735315168196614,
      "learning_rate": 4.251527289602314e-07,
      "loss": 0.1217,
      "step": 29882
    },
    {
      "epoch": 0.8717836513215473,
      "grad_norm": 0.7607551056091284,
      "learning_rate": 4.2496211128343125e-07,
      "loss": 0.1111,
      "step": 29883
    },
    {
      "epoch": 0.8718128245521909,
      "grad_norm": 0.9457045281394639,
      "learning_rate": 4.24771534451478e-07,
      "loss": 0.1121,
      "step": 29884
    },
    {
      "epoch": 0.8718419977828344,
      "grad_norm": 0.8157892787241623,
      "learning_rate": 4.24580998466072e-07,
      "loss": 0.1103,
      "step": 29885
    },
    {
      "epoch": 0.871871171013478,
      "grad_norm": 0.8344157962970016,
      "learning_rate": 4.243905033289142e-07,
      "loss": 0.1019,
      "step": 29886
    },
    {
      "epoch": 0.8719003442441216,
      "grad_norm": 0.6242416127974619,
      "learning_rate": 4.2420004904170644e-07,
      "loss": 0.1195,
      "step": 29887
    },
    {
      "epoch": 0.8719295174747651,
      "grad_norm": 0.72553159326258,
      "learning_rate": 4.2400963560614736e-07,
      "loss": 0.11,
      "step": 29888
    },
    {
      "epoch": 0.8719586907054088,
      "grad_norm": 0.9495918137817224,
      "learning_rate": 4.238192630239385e-07,
      "loss": 0.1041,
      "step": 29889
    },
    {
      "epoch": 0.8719878639360523,
      "grad_norm": 0.8443970710203967,
      "learning_rate": 4.236289312967784e-07,
      "loss": 0.1021,
      "step": 29890
    },
    {
      "epoch": 0.8720170371666959,
      "grad_norm": 0.8687350574882492,
      "learning_rate": 4.234386404263674e-07,
      "loss": 0.1141,
      "step": 29891
    },
    {
      "epoch": 0.8720462103973394,
      "grad_norm": 1.1711484215960941,
      "learning_rate": 4.232483904144036e-07,
      "loss": 0.1398,
      "step": 29892
    },
    {
      "epoch": 0.872075383627983,
      "grad_norm": 0.8544625514073956,
      "learning_rate": 4.2305818126258445e-07,
      "loss": 0.1305,
      "step": 29893
    },
    {
      "epoch": 0.8721045568586265,
      "grad_norm": 0.8036563494629677,
      "learning_rate": 4.2286801297260983e-07,
      "loss": 0.1024,
      "step": 29894
    },
    {
      "epoch": 0.8721337300892701,
      "grad_norm": 0.7717621317173138,
      "learning_rate": 4.226778855461777e-07,
      "loss": 0.105,
      "step": 29895
    },
    {
      "epoch": 0.8721629033199136,
      "grad_norm": 0.8559787744048357,
      "learning_rate": 4.224877989849835e-07,
      "loss": 0.1056,
      "step": 29896
    },
    {
      "epoch": 0.8721920765505572,
      "grad_norm": 0.7974922375493215,
      "learning_rate": 4.2229775329072687e-07,
      "loss": 0.1164,
      "step": 29897
    },
    {
      "epoch": 0.8722212497812007,
      "grad_norm": 0.6807687891692692,
      "learning_rate": 4.221077484651026e-07,
      "loss": 0.1071,
      "step": 29898
    },
    {
      "epoch": 0.8722504230118443,
      "grad_norm": 0.9698070852506537,
      "learning_rate": 4.219177845098071e-07,
      "loss": 0.1102,
      "step": 29899
    },
    {
      "epoch": 0.8722795962424879,
      "grad_norm": 1.0138886604346813,
      "learning_rate": 4.2172786142653633e-07,
      "loss": 0.0935,
      "step": 29900
    },
    {
      "epoch": 0.8723087694731314,
      "grad_norm": 0.9475220467153204,
      "learning_rate": 4.215379792169877e-07,
      "loss": 0.1128,
      "step": 29901
    },
    {
      "epoch": 0.8723379427037751,
      "grad_norm": 0.8521653135230126,
      "learning_rate": 4.2134813788285436e-07,
      "loss": 0.129,
      "step": 29902
    },
    {
      "epoch": 0.8723671159344186,
      "grad_norm": 0.8184166906469769,
      "learning_rate": 4.2115833742583157e-07,
      "loss": 0.1151,
      "step": 29903
    },
    {
      "epoch": 0.8723962891650622,
      "grad_norm": 0.8239441281175344,
      "learning_rate": 4.2096857784761466e-07,
      "loss": 0.1221,
      "step": 29904
    },
    {
      "epoch": 0.8724254623957057,
      "grad_norm": 0.778795356122018,
      "learning_rate": 4.2077885914989733e-07,
      "loss": 0.1057,
      "step": 29905
    },
    {
      "epoch": 0.8724546356263493,
      "grad_norm": 0.8442644199060447,
      "learning_rate": 4.205891813343721e-07,
      "loss": 0.1132,
      "step": 29906
    },
    {
      "epoch": 0.8724838088569928,
      "grad_norm": 0.7711401672933712,
      "learning_rate": 4.203995444027337e-07,
      "loss": 0.1139,
      "step": 29907
    },
    {
      "epoch": 0.8725129820876364,
      "grad_norm": 0.7549938529351892,
      "learning_rate": 4.202099483566757e-07,
      "loss": 0.1347,
      "step": 29908
    },
    {
      "epoch": 0.8725421553182799,
      "grad_norm": 0.8560959875783646,
      "learning_rate": 4.200203931978897e-07,
      "loss": 0.1139,
      "step": 29909
    },
    {
      "epoch": 0.8725713285489235,
      "grad_norm": 0.8226736831077333,
      "learning_rate": 4.198308789280681e-07,
      "loss": 0.1178,
      "step": 29910
    },
    {
      "epoch": 0.872600501779567,
      "grad_norm": 0.8858024223853243,
      "learning_rate": 4.1964140554890343e-07,
      "loss": 0.1126,
      "step": 29911
    },
    {
      "epoch": 0.8726296750102106,
      "grad_norm": 0.7147194341951294,
      "learning_rate": 4.1945197306208606e-07,
      "loss": 0.1069,
      "step": 29912
    },
    {
      "epoch": 0.8726588482408542,
      "grad_norm": 4.827301150089571,
      "learning_rate": 4.19262581469308e-07,
      "loss": 0.1068,
      "step": 29913
    },
    {
      "epoch": 0.8726880214714977,
      "grad_norm": 0.9969858528344643,
      "learning_rate": 4.1907323077226114e-07,
      "loss": 0.1077,
      "step": 29914
    },
    {
      "epoch": 0.8727171947021413,
      "grad_norm": 0.9808522789212067,
      "learning_rate": 4.1888392097263473e-07,
      "loss": 0.1035,
      "step": 29915
    },
    {
      "epoch": 0.8727463679327849,
      "grad_norm": 0.8994254612147985,
      "learning_rate": 4.18694652072118e-07,
      "loss": 0.1273,
      "step": 29916
    },
    {
      "epoch": 0.8727755411634285,
      "grad_norm": 0.8959273599287071,
      "learning_rate": 4.185054240724029e-07,
      "loss": 0.1051,
      "step": 29917
    },
    {
      "epoch": 0.872804714394072,
      "grad_norm": 0.832198318156632,
      "learning_rate": 4.1831623697517697e-07,
      "loss": 0.1083,
      "step": 29918
    },
    {
      "epoch": 0.8728338876247156,
      "grad_norm": 0.7491026800389943,
      "learning_rate": 4.1812709078213056e-07,
      "loss": 0.1021,
      "step": 29919
    },
    {
      "epoch": 0.8728630608553591,
      "grad_norm": 0.7795319850787747,
      "learning_rate": 4.1793798549495115e-07,
      "loss": 0.1048,
      "step": 29920
    },
    {
      "epoch": 0.8728922340860027,
      "grad_norm": 0.8050425427365901,
      "learning_rate": 4.177489211153279e-07,
      "loss": 0.0944,
      "step": 29921
    },
    {
      "epoch": 0.8729214073166462,
      "grad_norm": 1.023494835948346,
      "learning_rate": 4.175598976449491e-07,
      "loss": 0.1113,
      "step": 29922
    },
    {
      "epoch": 0.8729505805472898,
      "grad_norm": 0.8657322492817636,
      "learning_rate": 4.1737091508550043e-07,
      "loss": 0.1197,
      "step": 29923
    },
    {
      "epoch": 0.8729797537779334,
      "grad_norm": 0.9398422308099172,
      "learning_rate": 4.1718197343867004e-07,
      "loss": 0.119,
      "step": 29924
    },
    {
      "epoch": 0.8730089270085769,
      "grad_norm": 0.9654851883590972,
      "learning_rate": 4.1699307270614607e-07,
      "loss": 0.099,
      "step": 29925
    },
    {
      "epoch": 0.8730381002392205,
      "grad_norm": 0.9190948702960542,
      "learning_rate": 4.168042128896127e-07,
      "loss": 0.1241,
      "step": 29926
    },
    {
      "epoch": 0.873067273469864,
      "grad_norm": 0.9845219121354531,
      "learning_rate": 4.1661539399075855e-07,
      "loss": 0.1198,
      "step": 29927
    },
    {
      "epoch": 0.8730964467005076,
      "grad_norm": 0.8927423186777527,
      "learning_rate": 4.164266160112679e-07,
      "loss": 0.1357,
      "step": 29928
    },
    {
      "epoch": 0.8731256199311512,
      "grad_norm": 0.6886350312245313,
      "learning_rate": 4.162378789528254e-07,
      "loss": 0.1073,
      "step": 29929
    },
    {
      "epoch": 0.8731547931617948,
      "grad_norm": 1.350166590262735,
      "learning_rate": 4.160491828171165e-07,
      "loss": 0.1191,
      "step": 29930
    },
    {
      "epoch": 0.8731839663924383,
      "grad_norm": 0.8723272090238917,
      "learning_rate": 4.1586052760582753e-07,
      "loss": 0.1056,
      "step": 29931
    },
    {
      "epoch": 0.8732131396230819,
      "grad_norm": 0.8579215096389476,
      "learning_rate": 4.156719133206416e-07,
      "loss": 0.1123,
      "step": 29932
    },
    {
      "epoch": 0.8732423128537254,
      "grad_norm": 0.9218092007763244,
      "learning_rate": 4.154833399632413e-07,
      "loss": 0.1153,
      "step": 29933
    },
    {
      "epoch": 0.873271486084369,
      "grad_norm": 0.7899232977371622,
      "learning_rate": 4.1529480753531193e-07,
      "loss": 0.1077,
      "step": 29934
    },
    {
      "epoch": 0.8733006593150126,
      "grad_norm": 0.8275254846762283,
      "learning_rate": 4.1510631603853655e-07,
      "loss": 0.1211,
      "step": 29935
    },
    {
      "epoch": 0.8733298325456561,
      "grad_norm": 1.005011908344243,
      "learning_rate": 4.149178654745961e-07,
      "loss": 0.1223,
      "step": 29936
    },
    {
      "epoch": 0.8733590057762997,
      "grad_norm": 0.7056828889262484,
      "learning_rate": 4.1472945584517476e-07,
      "loss": 0.1276,
      "step": 29937
    },
    {
      "epoch": 0.8733881790069432,
      "grad_norm": 1.3001967449194474,
      "learning_rate": 4.145410871519551e-07,
      "loss": 0.1158,
      "step": 29938
    },
    {
      "epoch": 0.8734173522375868,
      "grad_norm": 1.0241636410606516,
      "learning_rate": 4.143527593966179e-07,
      "loss": 0.0849,
      "step": 29939
    },
    {
      "epoch": 0.8734465254682303,
      "grad_norm": 0.9323677915350644,
      "learning_rate": 4.141644725808436e-07,
      "loss": 0.0996,
      "step": 29940
    },
    {
      "epoch": 0.8734756986988739,
      "grad_norm": 0.7319661576150167,
      "learning_rate": 4.1397622670631523e-07,
      "loss": 0.101,
      "step": 29941
    },
    {
      "epoch": 0.8735048719295174,
      "grad_norm": 0.7916314828259348,
      "learning_rate": 4.1378802177471144e-07,
      "loss": 0.107,
      "step": 29942
    },
    {
      "epoch": 0.8735340451601611,
      "grad_norm": 0.9512679043925271,
      "learning_rate": 4.135998577877132e-07,
      "loss": 0.1079,
      "step": 29943
    },
    {
      "epoch": 0.8735632183908046,
      "grad_norm": 1.0788826656880606,
      "learning_rate": 4.134117347470018e-07,
      "loss": 0.1201,
      "step": 29944
    },
    {
      "epoch": 0.8735923916214482,
      "grad_norm": 1.027673197157146,
      "learning_rate": 4.1322365265425545e-07,
      "loss": 0.1201,
      "step": 29945
    },
    {
      "epoch": 0.8736215648520917,
      "grad_norm": 1.0343293405049407,
      "learning_rate": 4.130356115111522e-07,
      "loss": 0.0916,
      "step": 29946
    },
    {
      "epoch": 0.8736507380827353,
      "grad_norm": 0.7508662886368539,
      "learning_rate": 4.12847611319373e-07,
      "loss": 0.1096,
      "step": 29947
    },
    {
      "epoch": 0.8736799113133789,
      "grad_norm": 0.8008703546242361,
      "learning_rate": 4.1265965208059423e-07,
      "loss": 0.1011,
      "step": 29948
    },
    {
      "epoch": 0.8737090845440224,
      "grad_norm": 0.749532749373626,
      "learning_rate": 4.124717337964962e-07,
      "loss": 0.1193,
      "step": 29949
    },
    {
      "epoch": 0.873738257774666,
      "grad_norm": 0.7525171418922129,
      "learning_rate": 4.122838564687542e-07,
      "loss": 0.11,
      "step": 29950
    },
    {
      "epoch": 0.8737674310053095,
      "grad_norm": 0.8707908207771639,
      "learning_rate": 4.120960200990481e-07,
      "loss": 0.1171,
      "step": 29951
    },
    {
      "epoch": 0.8737966042359531,
      "grad_norm": 0.7708651216906697,
      "learning_rate": 4.119082246890532e-07,
      "loss": 0.1204,
      "step": 29952
    },
    {
      "epoch": 0.8738257774665966,
      "grad_norm": 0.9071471813604898,
      "learning_rate": 4.117204702404459e-07,
      "loss": 0.0917,
      "step": 29953
    },
    {
      "epoch": 0.8738549506972402,
      "grad_norm": 1.0675546952025274,
      "learning_rate": 4.115327567549021e-07,
      "loss": 0.1175,
      "step": 29954
    },
    {
      "epoch": 0.8738841239278837,
      "grad_norm": 0.887214975252043,
      "learning_rate": 4.113450842340999e-07,
      "loss": 0.1088,
      "step": 29955
    },
    {
      "epoch": 0.8739132971585273,
      "grad_norm": 0.9111866400366525,
      "learning_rate": 4.11157452679713e-07,
      "loss": 0.096,
      "step": 29956
    },
    {
      "epoch": 0.873942470389171,
      "grad_norm": 0.697758272359337,
      "learning_rate": 4.1096986209341716e-07,
      "loss": 0.1141,
      "step": 29957
    },
    {
      "epoch": 0.8739716436198145,
      "grad_norm": 0.8424568829645216,
      "learning_rate": 4.107823124768867e-07,
      "loss": 0.105,
      "step": 29958
    },
    {
      "epoch": 0.874000816850458,
      "grad_norm": 0.7569289409986389,
      "learning_rate": 4.1059480383179586e-07,
      "loss": 0.1084,
      "step": 29959
    },
    {
      "epoch": 0.8740299900811016,
      "grad_norm": 1.0302477770546048,
      "learning_rate": 4.1040733615981876e-07,
      "loss": 0.1246,
      "step": 29960
    },
    {
      "epoch": 0.8740591633117452,
      "grad_norm": 1.1180763991332148,
      "learning_rate": 4.1021990946263025e-07,
      "loss": 0.1065,
      "step": 29961
    },
    {
      "epoch": 0.8740883365423887,
      "grad_norm": 0.7034423738035324,
      "learning_rate": 4.1003252374190284e-07,
      "loss": 0.0891,
      "step": 29962
    },
    {
      "epoch": 0.8741175097730323,
      "grad_norm": 0.8282797111144902,
      "learning_rate": 4.098451789993085e-07,
      "loss": 0.1065,
      "step": 29963
    },
    {
      "epoch": 0.8741466830036758,
      "grad_norm": 0.8533735373287727,
      "learning_rate": 4.0965787523652156e-07,
      "loss": 0.1067,
      "step": 29964
    },
    {
      "epoch": 0.8741758562343194,
      "grad_norm": 0.9289842007959089,
      "learning_rate": 4.094706124552128e-07,
      "loss": 0.1331,
      "step": 29965
    },
    {
      "epoch": 0.8742050294649629,
      "grad_norm": 0.7402285195808873,
      "learning_rate": 4.0928339065705424e-07,
      "loss": 0.1074,
      "step": 29966
    },
    {
      "epoch": 0.8742342026956065,
      "grad_norm": 0.898508439569312,
      "learning_rate": 4.0909620984371733e-07,
      "loss": 0.0995,
      "step": 29967
    },
    {
      "epoch": 0.87426337592625,
      "grad_norm": 0.7110177183257381,
      "learning_rate": 4.0890907001687463e-07,
      "loss": 0.0994,
      "step": 29968
    },
    {
      "epoch": 0.8742925491568936,
      "grad_norm": 0.9737372746845955,
      "learning_rate": 4.087219711781959e-07,
      "loss": 0.1059,
      "step": 29969
    },
    {
      "epoch": 0.8743217223875372,
      "grad_norm": 0.793370043176925,
      "learning_rate": 4.0853491332935034e-07,
      "loss": 0.1002,
      "step": 29970
    },
    {
      "epoch": 0.8743508956181808,
      "grad_norm": 0.9129891616227633,
      "learning_rate": 4.0834789647201003e-07,
      "loss": 0.1241,
      "step": 29971
    },
    {
      "epoch": 0.8743800688488244,
      "grad_norm": 0.9607523923505505,
      "learning_rate": 4.081609206078424e-07,
      "loss": 0.1325,
      "step": 29972
    },
    {
      "epoch": 0.8744092420794679,
      "grad_norm": 0.8913248019903911,
      "learning_rate": 4.079739857385179e-07,
      "loss": 0.1118,
      "step": 29973
    },
    {
      "epoch": 0.8744384153101115,
      "grad_norm": 0.8102677818546921,
      "learning_rate": 4.077870918657062e-07,
      "loss": 0.1165,
      "step": 29974
    },
    {
      "epoch": 0.874467588540755,
      "grad_norm": 0.9559230906975134,
      "learning_rate": 4.076002389910755e-07,
      "loss": 0.1191,
      "step": 29975
    },
    {
      "epoch": 0.8744967617713986,
      "grad_norm": 1.04818129475544,
      "learning_rate": 4.074134271162927e-07,
      "loss": 0.111,
      "step": 29976
    },
    {
      "epoch": 0.8745259350020421,
      "grad_norm": 0.8321931156481468,
      "learning_rate": 4.0722665624302717e-07,
      "loss": 0.1135,
      "step": 29977
    },
    {
      "epoch": 0.8745551082326857,
      "grad_norm": 0.9789598585791341,
      "learning_rate": 4.0703992637294466e-07,
      "loss": 0.1107,
      "step": 29978
    },
    {
      "epoch": 0.8745842814633292,
      "grad_norm": 0.8875249128288172,
      "learning_rate": 4.068532375077144e-07,
      "loss": 0.1119,
      "step": 29979
    },
    {
      "epoch": 0.8746134546939728,
      "grad_norm": 3.800407419199114,
      "learning_rate": 4.066665896490013e-07,
      "loss": 0.1162,
      "step": 29980
    },
    {
      "epoch": 0.8746426279246163,
      "grad_norm": 0.7398637198877579,
      "learning_rate": 4.0647998279847277e-07,
      "loss": 0.111,
      "step": 29981
    },
    {
      "epoch": 0.8746718011552599,
      "grad_norm": 0.9066124867651206,
      "learning_rate": 4.0629341695779423e-07,
      "loss": 0.092,
      "step": 29982
    },
    {
      "epoch": 0.8747009743859034,
      "grad_norm": 0.7808211060457666,
      "learning_rate": 4.06106892128631e-07,
      "loss": 0.1071,
      "step": 29983
    },
    {
      "epoch": 0.8747301476165471,
      "grad_norm": 0.8295891986742827,
      "learning_rate": 4.059204083126489e-07,
      "loss": 0.1066,
      "step": 29984
    },
    {
      "epoch": 0.8747593208471907,
      "grad_norm": 0.980293509020823,
      "learning_rate": 4.0573396551151335e-07,
      "loss": 0.094,
      "step": 29985
    },
    {
      "epoch": 0.8747884940778342,
      "grad_norm": 0.7797549997944827,
      "learning_rate": 4.0554756372688744e-07,
      "loss": 0.1293,
      "step": 29986
    },
    {
      "epoch": 0.8748176673084778,
      "grad_norm": 0.6828497120825575,
      "learning_rate": 4.05361202960437e-07,
      "loss": 0.119,
      "step": 29987
    },
    {
      "epoch": 0.8748468405391213,
      "grad_norm": 0.7780385789923829,
      "learning_rate": 4.051748832138247e-07,
      "loss": 0.1278,
      "step": 29988
    },
    {
      "epoch": 0.8748760137697649,
      "grad_norm": 0.8115155380145198,
      "learning_rate": 4.049886044887136e-07,
      "loss": 0.1093,
      "step": 29989
    },
    {
      "epoch": 0.8749051870004084,
      "grad_norm": 0.7910249846198542,
      "learning_rate": 4.0480236678676674e-07,
      "loss": 0.1009,
      "step": 29990
    },
    {
      "epoch": 0.874934360231052,
      "grad_norm": 0.9517769185784213,
      "learning_rate": 4.0461617010964906e-07,
      "loss": 0.1178,
      "step": 29991
    },
    {
      "epoch": 0.8749635334616955,
      "grad_norm": 1.1443227130278366,
      "learning_rate": 4.0443001445902073e-07,
      "loss": 0.1106,
      "step": 29992
    },
    {
      "epoch": 0.8749927066923391,
      "grad_norm": 0.9945797268553265,
      "learning_rate": 4.042438998365433e-07,
      "loss": 0.1464,
      "step": 29993
    },
    {
      "epoch": 0.8750218799229826,
      "grad_norm": 0.7847706764904184,
      "learning_rate": 4.040578262438799e-07,
      "loss": 0.1146,
      "step": 29994
    },
    {
      "epoch": 0.8750510531536262,
      "grad_norm": 0.8295375998411549,
      "learning_rate": 4.0387179368269137e-07,
      "loss": 0.1081,
      "step": 29995
    },
    {
      "epoch": 0.8750802263842697,
      "grad_norm": 0.8514728598688256,
      "learning_rate": 4.0368580215463746e-07,
      "loss": 0.1125,
      "step": 29996
    },
    {
      "epoch": 0.8751093996149134,
      "grad_norm": 0.857317926644112,
      "learning_rate": 4.034998516613797e-07,
      "loss": 0.1265,
      "step": 29997
    },
    {
      "epoch": 0.875138572845557,
      "grad_norm": 1.005063040112087,
      "learning_rate": 4.033139422045784e-07,
      "loss": 0.1122,
      "step": 29998
    },
    {
      "epoch": 0.8751677460762005,
      "grad_norm": 0.783048773544439,
      "learning_rate": 4.0312807378589335e-07,
      "loss": 0.0949,
      "step": 29999
    },
    {
      "epoch": 0.8751969193068441,
      "grad_norm": 0.7896354250843888,
      "learning_rate": 4.029422464069821e-07,
      "loss": 0.0973,
      "step": 30000
    },
    {
      "epoch": 0.8752260925374876,
      "grad_norm": 0.6961728329544881,
      "learning_rate": 4.027564600695055e-07,
      "loss": 0.115,
      "step": 30001
    },
    {
      "epoch": 0.8752552657681312,
      "grad_norm": 0.8803479317906855,
      "learning_rate": 4.025707147751223e-07,
      "loss": 0.1123,
      "step": 30002
    },
    {
      "epoch": 0.8752844389987747,
      "grad_norm": 0.8960196147789281,
      "learning_rate": 4.023850105254895e-07,
      "loss": 0.1079,
      "step": 30003
    },
    {
      "epoch": 0.8753136122294183,
      "grad_norm": 0.7061949648126875,
      "learning_rate": 4.021993473222668e-07,
      "loss": 0.0946,
      "step": 30004
    },
    {
      "epoch": 0.8753427854600618,
      "grad_norm": 0.7050193902636842,
      "learning_rate": 4.020137251671108e-07,
      "loss": 0.0999,
      "step": 30005
    },
    {
      "epoch": 0.8753719586907054,
      "grad_norm": 1.057616937879093,
      "learning_rate": 4.018281440616778e-07,
      "loss": 0.1046,
      "step": 30006
    },
    {
      "epoch": 0.8754011319213489,
      "grad_norm": 1.6158759986205025,
      "learning_rate": 4.016426040076249e-07,
      "loss": 0.0961,
      "step": 30007
    },
    {
      "epoch": 0.8754303051519925,
      "grad_norm": 0.6381332574210345,
      "learning_rate": 4.0145710500661075e-07,
      "loss": 0.1005,
      "step": 30008
    },
    {
      "epoch": 0.875459478382636,
      "grad_norm": 0.7287050047502998,
      "learning_rate": 4.012716470602895e-07,
      "loss": 0.1222,
      "step": 30009
    },
    {
      "epoch": 0.8754886516132796,
      "grad_norm": 0.7602515936667066,
      "learning_rate": 4.0108623017031613e-07,
      "loss": 0.1154,
      "step": 30010
    },
    {
      "epoch": 0.8755178248439233,
      "grad_norm": 0.7219171099542323,
      "learning_rate": 4.00900854338348e-07,
      "loss": 0.0995,
      "step": 30011
    },
    {
      "epoch": 0.8755469980745668,
      "grad_norm": 0.7480555337142266,
      "learning_rate": 4.0071551956603893e-07,
      "loss": 0.103,
      "step": 30012
    },
    {
      "epoch": 0.8755761713052104,
      "grad_norm": 0.7282980125470874,
      "learning_rate": 4.005302258550425e-07,
      "loss": 0.1128,
      "step": 30013
    },
    {
      "epoch": 0.8756053445358539,
      "grad_norm": 0.8296426808167071,
      "learning_rate": 4.0034497320701584e-07,
      "loss": 0.1136,
      "step": 30014
    },
    {
      "epoch": 0.8756345177664975,
      "grad_norm": 0.7558778919722866,
      "learning_rate": 4.001597616236108e-07,
      "loss": 0.1005,
      "step": 30015
    },
    {
      "epoch": 0.875663690997141,
      "grad_norm": 1.0726349994338809,
      "learning_rate": 3.999745911064812e-07,
      "loss": 0.1275,
      "step": 30016
    },
    {
      "epoch": 0.8756928642277846,
      "grad_norm": 0.8753592511955679,
      "learning_rate": 3.997894616572806e-07,
      "loss": 0.1007,
      "step": 30017
    },
    {
      "epoch": 0.8757220374584281,
      "grad_norm": 0.8331388970453892,
      "learning_rate": 3.996043732776617e-07,
      "loss": 0.0973,
      "step": 30018
    },
    {
      "epoch": 0.8757512106890717,
      "grad_norm": 0.7109433945251317,
      "learning_rate": 3.994193259692758e-07,
      "loss": 0.1228,
      "step": 30019
    },
    {
      "epoch": 0.8757803839197152,
      "grad_norm": 0.8393881516507595,
      "learning_rate": 3.992343197337761e-07,
      "loss": 0.1025,
      "step": 30020
    },
    {
      "epoch": 0.8758095571503588,
      "grad_norm": 0.830369322465344,
      "learning_rate": 3.9904935457281524e-07,
      "loss": 0.1054,
      "step": 30021
    },
    {
      "epoch": 0.8758387303810024,
      "grad_norm": 0.7929558092667852,
      "learning_rate": 3.988644304880429e-07,
      "loss": 0.1091,
      "step": 30022
    },
    {
      "epoch": 0.8758679036116459,
      "grad_norm": 0.8207451692628104,
      "learning_rate": 3.9867954748111e-07,
      "loss": 0.0978,
      "step": 30023
    },
    {
      "epoch": 0.8758970768422896,
      "grad_norm": 0.8455388434418435,
      "learning_rate": 3.984947055536681e-07,
      "loss": 0.1114,
      "step": 30024
    },
    {
      "epoch": 0.8759262500729331,
      "grad_norm": 1.1177513257661293,
      "learning_rate": 3.9830990470736684e-07,
      "loss": 0.0862,
      "step": 30025
    },
    {
      "epoch": 0.8759554233035767,
      "grad_norm": 0.9104098906899314,
      "learning_rate": 3.981251449438567e-07,
      "loss": 0.1016,
      "step": 30026
    },
    {
      "epoch": 0.8759845965342202,
      "grad_norm": 0.766933255601191,
      "learning_rate": 3.9794042626478566e-07,
      "loss": 0.0957,
      "step": 30027
    },
    {
      "epoch": 0.8760137697648638,
      "grad_norm": 0.8170312088935947,
      "learning_rate": 3.9775574867180477e-07,
      "loss": 0.1149,
      "step": 30028
    },
    {
      "epoch": 0.8760429429955073,
      "grad_norm": 0.73466149004003,
      "learning_rate": 3.975711121665621e-07,
      "loss": 0.1059,
      "step": 30029
    },
    {
      "epoch": 0.8760721162261509,
      "grad_norm": 0.8350009194852385,
      "learning_rate": 3.973865167507052e-07,
      "loss": 0.1139,
      "step": 30030
    },
    {
      "epoch": 0.8761012894567944,
      "grad_norm": 0.8192370000669458,
      "learning_rate": 3.9720196242588214e-07,
      "loss": 0.104,
      "step": 30031
    },
    {
      "epoch": 0.876130462687438,
      "grad_norm": 0.9749452790959781,
      "learning_rate": 3.9701744919374285e-07,
      "loss": 0.108,
      "step": 30032
    },
    {
      "epoch": 0.8761596359180815,
      "grad_norm": 0.8738741975347576,
      "learning_rate": 3.968329770559315e-07,
      "loss": 0.1195,
      "step": 30033
    },
    {
      "epoch": 0.8761888091487251,
      "grad_norm": 0.8300108519181769,
      "learning_rate": 3.966485460140973e-07,
      "loss": 0.102,
      "step": 30034
    },
    {
      "epoch": 0.8762179823793687,
      "grad_norm": 1.1384501295776965,
      "learning_rate": 3.964641560698862e-07,
      "loss": 0.1216,
      "step": 30035
    },
    {
      "epoch": 0.8762471556100122,
      "grad_norm": 0.8709744781499231,
      "learning_rate": 3.962798072249435e-07,
      "loss": 0.134,
      "step": 30036
    },
    {
      "epoch": 0.8762763288406558,
      "grad_norm": 0.9695599752029358,
      "learning_rate": 3.9609549948091517e-07,
      "loss": 0.0907,
      "step": 30037
    },
    {
      "epoch": 0.8763055020712994,
      "grad_norm": 0.7941932202922776,
      "learning_rate": 3.9591123283944875e-07,
      "loss": 0.1069,
      "step": 30038
    },
    {
      "epoch": 0.876334675301943,
      "grad_norm": 0.8073176101841258,
      "learning_rate": 3.9572700730218685e-07,
      "loss": 0.105,
      "step": 30039
    },
    {
      "epoch": 0.8763638485325865,
      "grad_norm": 0.7648654751563687,
      "learning_rate": 3.955428228707747e-07,
      "loss": 0.0891,
      "step": 30040
    },
    {
      "epoch": 0.8763930217632301,
      "grad_norm": 0.9619900489099411,
      "learning_rate": 3.953586795468584e-07,
      "loss": 0.1166,
      "step": 30041
    },
    {
      "epoch": 0.8764221949938736,
      "grad_norm": 0.9208818118250522,
      "learning_rate": 3.9517457733207973e-07,
      "loss": 0.1073,
      "step": 30042
    },
    {
      "epoch": 0.8764513682245172,
      "grad_norm": 1.0076492847406802,
      "learning_rate": 3.9499051622808203e-07,
      "loss": 0.106,
      "step": 30043
    },
    {
      "epoch": 0.8764805414551607,
      "grad_norm": 0.6455984377638654,
      "learning_rate": 3.948064962365111e-07,
      "loss": 0.1056,
      "step": 30044
    },
    {
      "epoch": 0.8765097146858043,
      "grad_norm": 0.7674607736520921,
      "learning_rate": 3.9462251735900845e-07,
      "loss": 0.1208,
      "step": 30045
    },
    {
      "epoch": 0.8765388879164479,
      "grad_norm": 0.9589067616641717,
      "learning_rate": 3.944385795972161e-07,
      "loss": 0.1063,
      "step": 30046
    },
    {
      "epoch": 0.8765680611470914,
      "grad_norm": 1.0815651788480012,
      "learning_rate": 3.9425468295277714e-07,
      "loss": 0.1416,
      "step": 30047
    },
    {
      "epoch": 0.876597234377735,
      "grad_norm": 0.9132409396108546,
      "learning_rate": 3.9407082742733306e-07,
      "loss": 0.1517,
      "step": 30048
    },
    {
      "epoch": 0.8766264076083785,
      "grad_norm": 0.9288953014955593,
      "learning_rate": 3.938870130225242e-07,
      "loss": 0.1227,
      "step": 30049
    },
    {
      "epoch": 0.8766555808390221,
      "grad_norm": 0.8664721978661181,
      "learning_rate": 3.937032397399926e-07,
      "loss": 0.0873,
      "step": 30050
    },
    {
      "epoch": 0.8766847540696657,
      "grad_norm": 0.849766620273122,
      "learning_rate": 3.935195075813797e-07,
      "loss": 0.1311,
      "step": 30051
    },
    {
      "epoch": 0.8767139273003093,
      "grad_norm": 1.6430253113803783,
      "learning_rate": 3.9333581654832473e-07,
      "loss": 0.1059,
      "step": 30052
    },
    {
      "epoch": 0.8767431005309528,
      "grad_norm": 0.927702926764876,
      "learning_rate": 3.931521666424676e-07,
      "loss": 0.1006,
      "step": 30053
    },
    {
      "epoch": 0.8767722737615964,
      "grad_norm": 0.8112553638058957,
      "learning_rate": 3.929685578654485e-07,
      "loss": 0.1072,
      "step": 30054
    },
    {
      "epoch": 0.8768014469922399,
      "grad_norm": 0.8092630212378464,
      "learning_rate": 3.927849902189057e-07,
      "loss": 0.1063,
      "step": 30055
    },
    {
      "epoch": 0.8768306202228835,
      "grad_norm": 1.0379929437241544,
      "learning_rate": 3.926014637044795e-07,
      "loss": 0.125,
      "step": 30056
    },
    {
      "epoch": 0.876859793453527,
      "grad_norm": 0.8573863127018945,
      "learning_rate": 3.9241797832380634e-07,
      "loss": 0.103,
      "step": 30057
    },
    {
      "epoch": 0.8768889666841706,
      "grad_norm": 0.7165389145789426,
      "learning_rate": 3.922345340785266e-07,
      "loss": 0.1025,
      "step": 30058
    },
    {
      "epoch": 0.8769181399148142,
      "grad_norm": 0.9053114762016254,
      "learning_rate": 3.9205113097027734e-07,
      "loss": 0.1301,
      "step": 30059
    },
    {
      "epoch": 0.8769473131454577,
      "grad_norm": 1.5393050782075324,
      "learning_rate": 3.9186776900069444e-07,
      "loss": 0.0985,
      "step": 30060
    },
    {
      "epoch": 0.8769764863761013,
      "grad_norm": 0.7210706822800717,
      "learning_rate": 3.9168444817141603e-07,
      "loss": 0.1217,
      "step": 30061
    },
    {
      "epoch": 0.8770056596067448,
      "grad_norm": 0.8042049388030086,
      "learning_rate": 3.9150116848407973e-07,
      "loss": 0.1152,
      "step": 30062
    },
    {
      "epoch": 0.8770348328373884,
      "grad_norm": 0.9580766186000356,
      "learning_rate": 3.913179299403203e-07,
      "loss": 0.1204,
      "step": 30063
    },
    {
      "epoch": 0.8770640060680319,
      "grad_norm": 0.8548960706534561,
      "learning_rate": 3.911347325417747e-07,
      "loss": 0.1178,
      "step": 30064
    },
    {
      "epoch": 0.8770931792986756,
      "grad_norm": 0.7249366520179904,
      "learning_rate": 3.9095157629007786e-07,
      "loss": 0.1217,
      "step": 30065
    },
    {
      "epoch": 0.8771223525293191,
      "grad_norm": 0.7628181865832407,
      "learning_rate": 3.907684611868645e-07,
      "loss": 0.1001,
      "step": 30066
    },
    {
      "epoch": 0.8771515257599627,
      "grad_norm": 0.9237902685551095,
      "learning_rate": 3.905853872337695e-07,
      "loss": 0.1089,
      "step": 30067
    },
    {
      "epoch": 0.8771806989906062,
      "grad_norm": 0.7328612675413081,
      "learning_rate": 3.9040235443242924e-07,
      "loss": 0.1103,
      "step": 30068
    },
    {
      "epoch": 0.8772098722212498,
      "grad_norm": 0.7110917749059704,
      "learning_rate": 3.9021936278447636e-07,
      "loss": 0.1169,
      "step": 30069
    },
    {
      "epoch": 0.8772390454518934,
      "grad_norm": 0.9421898718187118,
      "learning_rate": 3.900364122915434e-07,
      "loss": 0.0999,
      "step": 30070
    },
    {
      "epoch": 0.8772682186825369,
      "grad_norm": 0.7690975519919347,
      "learning_rate": 3.898535029552658e-07,
      "loss": 0.135,
      "step": 30071
    },
    {
      "epoch": 0.8772973919131805,
      "grad_norm": 0.999268463003431,
      "learning_rate": 3.896706347772755e-07,
      "loss": 0.1127,
      "step": 30072
    },
    {
      "epoch": 0.877326565143824,
      "grad_norm": 1.0454217622245405,
      "learning_rate": 3.89487807759204e-07,
      "loss": 0.124,
      "step": 30073
    },
    {
      "epoch": 0.8773557383744676,
      "grad_norm": 1.1237790440347422,
      "learning_rate": 3.8930502190268616e-07,
      "loss": 0.1349,
      "step": 30074
    },
    {
      "epoch": 0.8773849116051111,
      "grad_norm": 0.8953995022199389,
      "learning_rate": 3.891222772093523e-07,
      "loss": 0.1143,
      "step": 30075
    },
    {
      "epoch": 0.8774140848357547,
      "grad_norm": 0.638862969028371,
      "learning_rate": 3.8893957368083325e-07,
      "loss": 0.1054,
      "step": 30076
    },
    {
      "epoch": 0.8774432580663982,
      "grad_norm": 0.6438116980388293,
      "learning_rate": 3.887569113187617e-07,
      "loss": 0.1073,
      "step": 30077
    },
    {
      "epoch": 0.8774724312970419,
      "grad_norm": 0.6530132009425201,
      "learning_rate": 3.885742901247674e-07,
      "loss": 0.102,
      "step": 30078
    },
    {
      "epoch": 0.8775016045276854,
      "grad_norm": 0.7356138233567865,
      "learning_rate": 3.8839171010048083e-07,
      "loss": 0.1374,
      "step": 30079
    },
    {
      "epoch": 0.877530777758329,
      "grad_norm": 0.8455874163711665,
      "learning_rate": 3.8820917124753163e-07,
      "loss": 0.1328,
      "step": 30080
    },
    {
      "epoch": 0.8775599509889725,
      "grad_norm": 0.9487013068431305,
      "learning_rate": 3.8802667356755084e-07,
      "loss": 0.0962,
      "step": 30081
    },
    {
      "epoch": 0.8775891242196161,
      "grad_norm": 0.7003432501212324,
      "learning_rate": 3.8784421706216714e-07,
      "loss": 0.0828,
      "step": 30082
    },
    {
      "epoch": 0.8776182974502597,
      "grad_norm": 0.6712415974749824,
      "learning_rate": 3.876618017330086e-07,
      "loss": 0.1132,
      "step": 30083
    },
    {
      "epoch": 0.8776474706809032,
      "grad_norm": 0.8493576882662304,
      "learning_rate": 3.874794275817051e-07,
      "loss": 0.0873,
      "step": 30084
    },
    {
      "epoch": 0.8776766439115468,
      "grad_norm": 0.8328528734137715,
      "learning_rate": 3.8729709460988365e-07,
      "loss": 0.1332,
      "step": 30085
    },
    {
      "epoch": 0.8777058171421903,
      "grad_norm": 0.8530582869124106,
      "learning_rate": 3.871148028191729e-07,
      "loss": 0.118,
      "step": 30086
    },
    {
      "epoch": 0.8777349903728339,
      "grad_norm": 0.9669593474332003,
      "learning_rate": 3.869325522111994e-07,
      "loss": 0.119,
      "step": 30087
    },
    {
      "epoch": 0.8777641636034774,
      "grad_norm": 0.9538297900146231,
      "learning_rate": 3.8675034278759184e-07,
      "loss": 0.1051,
      "step": 30088
    },
    {
      "epoch": 0.877793336834121,
      "grad_norm": 0.7721806789673515,
      "learning_rate": 3.865681745499761e-07,
      "loss": 0.1056,
      "step": 30089
    },
    {
      "epoch": 0.8778225100647645,
      "grad_norm": 0.5860090225534232,
      "learning_rate": 3.86386047499977e-07,
      "loss": 0.1165,
      "step": 30090
    },
    {
      "epoch": 0.8778516832954081,
      "grad_norm": 0.768090832424776,
      "learning_rate": 3.862039616392221e-07,
      "loss": 0.1027,
      "step": 30091
    },
    {
      "epoch": 0.8778808565260517,
      "grad_norm": 0.9645450329038163,
      "learning_rate": 3.86021916969338e-07,
      "loss": 0.1128,
      "step": 30092
    },
    {
      "epoch": 0.8779100297566953,
      "grad_norm": 1.041949592417441,
      "learning_rate": 3.858399134919472e-07,
      "loss": 0.1298,
      "step": 30093
    },
    {
      "epoch": 0.8779392029873389,
      "grad_norm": 0.8590835881084311,
      "learning_rate": 3.856579512086778e-07,
      "loss": 0.1187,
      "step": 30094
    },
    {
      "epoch": 0.8779683762179824,
      "grad_norm": 0.8951618817724409,
      "learning_rate": 3.854760301211524e-07,
      "loss": 0.1185,
      "step": 30095
    },
    {
      "epoch": 0.877997549448626,
      "grad_norm": 0.7344761342477992,
      "learning_rate": 3.8529415023099425e-07,
      "loss": 0.1026,
      "step": 30096
    },
    {
      "epoch": 0.8780267226792695,
      "grad_norm": 0.7742693369014273,
      "learning_rate": 3.8511231153982866e-07,
      "loss": 0.1022,
      "step": 30097
    },
    {
      "epoch": 0.8780558959099131,
      "grad_norm": 1.4251768437248684,
      "learning_rate": 3.8493051404927985e-07,
      "loss": 0.1222,
      "step": 30098
    },
    {
      "epoch": 0.8780850691405566,
      "grad_norm": 0.85558468117075,
      "learning_rate": 3.847487577609693e-07,
      "loss": 0.1207,
      "step": 30099
    },
    {
      "epoch": 0.8781142423712002,
      "grad_norm": 0.7702977387294887,
      "learning_rate": 3.845670426765191e-07,
      "loss": 0.1001,
      "step": 30100
    },
    {
      "epoch": 0.8781434156018437,
      "grad_norm": 0.6038297114219298,
      "learning_rate": 3.843853687975535e-07,
      "loss": 0.1134,
      "step": 30101
    },
    {
      "epoch": 0.8781725888324873,
      "grad_norm": 0.7674689518157997,
      "learning_rate": 3.842037361256934e-07,
      "loss": 0.1063,
      "step": 30102
    },
    {
      "epoch": 0.8782017620631308,
      "grad_norm": 0.8539562901700051,
      "learning_rate": 3.8402214466255914e-07,
      "loss": 0.1057,
      "step": 30103
    },
    {
      "epoch": 0.8782309352937744,
      "grad_norm": 0.7256561522628749,
      "learning_rate": 3.838405944097745e-07,
      "loss": 0.1229,
      "step": 30104
    },
    {
      "epoch": 0.878260108524418,
      "grad_norm": 0.7276274960672796,
      "learning_rate": 3.8365908536895924e-07,
      "loss": 0.1316,
      "step": 30105
    },
    {
      "epoch": 0.8782892817550616,
      "grad_norm": 0.9949774348404163,
      "learning_rate": 3.834776175417332e-07,
      "loss": 0.1199,
      "step": 30106
    },
    {
      "epoch": 0.8783184549857052,
      "grad_norm": 0.8770199532814935,
      "learning_rate": 3.832961909297173e-07,
      "loss": 0.1152,
      "step": 30107
    },
    {
      "epoch": 0.8783476282163487,
      "grad_norm": 0.8766923169155485,
      "learning_rate": 3.8311480553453127e-07,
      "loss": 0.0978,
      "step": 30108
    },
    {
      "epoch": 0.8783768014469923,
      "grad_norm": 0.8452586878530937,
      "learning_rate": 3.8293346135779287e-07,
      "loss": 0.1069,
      "step": 30109
    },
    {
      "epoch": 0.8784059746776358,
      "grad_norm": 0.7680703348608079,
      "learning_rate": 3.827521584011229e-07,
      "loss": 0.1005,
      "step": 30110
    },
    {
      "epoch": 0.8784351479082794,
      "grad_norm": 0.9003956041630796,
      "learning_rate": 3.8257089666613957e-07,
      "loss": 0.0992,
      "step": 30111
    },
    {
      "epoch": 0.8784643211389229,
      "grad_norm": 0.9955260840028338,
      "learning_rate": 3.8238967615446155e-07,
      "loss": 0.1114,
      "step": 30112
    },
    {
      "epoch": 0.8784934943695665,
      "grad_norm": 0.8329995820013536,
      "learning_rate": 3.8220849686770535e-07,
      "loss": 0.1021,
      "step": 30113
    },
    {
      "epoch": 0.87852266760021,
      "grad_norm": 0.887962467161193,
      "learning_rate": 3.820273588074896e-07,
      "loss": 0.1167,
      "step": 30114
    },
    {
      "epoch": 0.8785518408308536,
      "grad_norm": 0.8019271561048367,
      "learning_rate": 3.8184626197543095e-07,
      "loss": 0.114,
      "step": 30115
    },
    {
      "epoch": 0.8785810140614971,
      "grad_norm": 0.8154637267247528,
      "learning_rate": 3.8166520637314684e-07,
      "loss": 0.1241,
      "step": 30116
    },
    {
      "epoch": 0.8786101872921407,
      "grad_norm": 1.0624333597161308,
      "learning_rate": 3.8148419200225275e-07,
      "loss": 0.1192,
      "step": 30117
    },
    {
      "epoch": 0.8786393605227842,
      "grad_norm": 0.7806937411066809,
      "learning_rate": 3.813032188643662e-07,
      "loss": 0.1105,
      "step": 30118
    },
    {
      "epoch": 0.8786685337534279,
      "grad_norm": 0.7367626320148644,
      "learning_rate": 3.8112228696110144e-07,
      "loss": 0.1245,
      "step": 30119
    },
    {
      "epoch": 0.8786977069840715,
      "grad_norm": 0.8753164490793615,
      "learning_rate": 3.809413962940739e-07,
      "loss": 0.1381,
      "step": 30120
    },
    {
      "epoch": 0.878726880214715,
      "grad_norm": 0.7303670589121473,
      "learning_rate": 3.8076054686489893e-07,
      "loss": 0.0997,
      "step": 30121
    },
    {
      "epoch": 0.8787560534453586,
      "grad_norm": 0.7192568392563339,
      "learning_rate": 3.805797386751914e-07,
      "loss": 0.1191,
      "step": 30122
    },
    {
      "epoch": 0.8787852266760021,
      "grad_norm": 0.8562402704481382,
      "learning_rate": 3.803989717265649e-07,
      "loss": 0.1306,
      "step": 30123
    },
    {
      "epoch": 0.8788143999066457,
      "grad_norm": 0.9609287710374402,
      "learning_rate": 3.802182460206344e-07,
      "loss": 0.1022,
      "step": 30124
    },
    {
      "epoch": 0.8788435731372892,
      "grad_norm": 0.9550523474148769,
      "learning_rate": 3.800375615590124e-07,
      "loss": 0.1078,
      "step": 30125
    },
    {
      "epoch": 0.8788727463679328,
      "grad_norm": 0.8383321912229541,
      "learning_rate": 3.798569183433115e-07,
      "loss": 0.1209,
      "step": 30126
    },
    {
      "epoch": 0.8789019195985763,
      "grad_norm": 0.7164831033059212,
      "learning_rate": 3.796763163751449e-07,
      "loss": 0.0934,
      "step": 30127
    },
    {
      "epoch": 0.8789310928292199,
      "grad_norm": 0.9388970028053739,
      "learning_rate": 3.7949575565612626e-07,
      "loss": 0.0829,
      "step": 30128
    },
    {
      "epoch": 0.8789602660598634,
      "grad_norm": 0.8746108903663825,
      "learning_rate": 3.7931523618786605e-07,
      "loss": 0.1208,
      "step": 30129
    },
    {
      "epoch": 0.878989439290507,
      "grad_norm": 0.9518464912522664,
      "learning_rate": 3.7913475797197616e-07,
      "loss": 0.1028,
      "step": 30130
    },
    {
      "epoch": 0.8790186125211505,
      "grad_norm": 0.8934136366208427,
      "learning_rate": 3.789543210100688e-07,
      "loss": 0.1041,
      "step": 30131
    },
    {
      "epoch": 0.8790477857517942,
      "grad_norm": 1.497982141631577,
      "learning_rate": 3.787739253037537e-07,
      "loss": 0.098,
      "step": 30132
    },
    {
      "epoch": 0.8790769589824378,
      "grad_norm": 0.8273318173618878,
      "learning_rate": 3.785935708546401e-07,
      "loss": 0.1017,
      "step": 30133
    },
    {
      "epoch": 0.8791061322130813,
      "grad_norm": 0.7497054741907678,
      "learning_rate": 3.7841325766434236e-07,
      "loss": 0.095,
      "step": 30134
    },
    {
      "epoch": 0.8791353054437249,
      "grad_norm": 1.0387847957457483,
      "learning_rate": 3.7823298573446687e-07,
      "loss": 0.0966,
      "step": 30135
    },
    {
      "epoch": 0.8791644786743684,
      "grad_norm": 0.8097839007625008,
      "learning_rate": 3.7805275506662355e-07,
      "loss": 0.1179,
      "step": 30136
    },
    {
      "epoch": 0.879193651905012,
      "grad_norm": 0.8732204206518616,
      "learning_rate": 3.778725656624227e-07,
      "loss": 0.1078,
      "step": 30137
    },
    {
      "epoch": 0.8792228251356555,
      "grad_norm": 0.8567947949250957,
      "learning_rate": 3.776924175234725e-07,
      "loss": 0.121,
      "step": 30138
    },
    {
      "epoch": 0.8792519983662991,
      "grad_norm": 1.1688406674700176,
      "learning_rate": 3.775123106513795e-07,
      "loss": 0.0971,
      "step": 30139
    },
    {
      "epoch": 0.8792811715969426,
      "grad_norm": 0.8019344356493667,
      "learning_rate": 3.7733224504775344e-07,
      "loss": 0.1092,
      "step": 30140
    },
    {
      "epoch": 0.8793103448275862,
      "grad_norm": 0.9010931894003198,
      "learning_rate": 3.7715222071420253e-07,
      "loss": 0.1175,
      "step": 30141
    },
    {
      "epoch": 0.8793395180582297,
      "grad_norm": 0.9679309741482341,
      "learning_rate": 3.769722376523327e-07,
      "loss": 0.1171,
      "step": 30142
    },
    {
      "epoch": 0.8793686912888733,
      "grad_norm": 0.9531648527760062,
      "learning_rate": 3.76792295863751e-07,
      "loss": 0.1175,
      "step": 30143
    },
    {
      "epoch": 0.8793978645195168,
      "grad_norm": 0.9525118817732205,
      "learning_rate": 3.766123953500639e-07,
      "loss": 0.1195,
      "step": 30144
    },
    {
      "epoch": 0.8794270377501604,
      "grad_norm": 0.8026345896916173,
      "learning_rate": 3.7643253611287734e-07,
      "loss": 0.0972,
      "step": 30145
    },
    {
      "epoch": 0.8794562109808041,
      "grad_norm": 1.0289423310726562,
      "learning_rate": 3.762527181537984e-07,
      "loss": 0.0985,
      "step": 30146
    },
    {
      "epoch": 0.8794853842114476,
      "grad_norm": 0.8479740417898695,
      "learning_rate": 3.760729414744302e-07,
      "loss": 0.1142,
      "step": 30147
    },
    {
      "epoch": 0.8795145574420912,
      "grad_norm": 0.8876567653180892,
      "learning_rate": 3.758932060763798e-07,
      "loss": 0.1036,
      "step": 30148
    },
    {
      "epoch": 0.8795437306727347,
      "grad_norm": 0.9156554228765477,
      "learning_rate": 3.757135119612515e-07,
      "loss": 0.1044,
      "step": 30149
    },
    {
      "epoch": 0.8795729039033783,
      "grad_norm": 0.786024507820376,
      "learning_rate": 3.755338591306473e-07,
      "loss": 0.1046,
      "step": 30150
    },
    {
      "epoch": 0.8796020771340218,
      "grad_norm": 0.7131721520840634,
      "learning_rate": 3.753542475861738e-07,
      "loss": 0.136,
      "step": 30151
    },
    {
      "epoch": 0.8796312503646654,
      "grad_norm": 0.6966960885413169,
      "learning_rate": 3.75174677329434e-07,
      "loss": 0.0937,
      "step": 30152
    },
    {
      "epoch": 0.8796604235953089,
      "grad_norm": 1.554708338985056,
      "learning_rate": 3.7499514836202954e-07,
      "loss": 0.1018,
      "step": 30153
    },
    {
      "epoch": 0.8796895968259525,
      "grad_norm": 0.884565350692909,
      "learning_rate": 3.7481566068556575e-07,
      "loss": 0.0866,
      "step": 30154
    },
    {
      "epoch": 0.879718770056596,
      "grad_norm": 0.8087524084328968,
      "learning_rate": 3.74636214301643e-07,
      "loss": 0.1291,
      "step": 30155
    },
    {
      "epoch": 0.8797479432872396,
      "grad_norm": 0.8313123893698217,
      "learning_rate": 3.744568092118633e-07,
      "loss": 0.094,
      "step": 30156
    },
    {
      "epoch": 0.8797771165178832,
      "grad_norm": 0.8654617412353212,
      "learning_rate": 3.742774454178294e-07,
      "loss": 0.1,
      "step": 30157
    },
    {
      "epoch": 0.8798062897485267,
      "grad_norm": 0.7752968563793792,
      "learning_rate": 3.740981229211427e-07,
      "loss": 0.1252,
      "step": 30158
    },
    {
      "epoch": 0.8798354629791704,
      "grad_norm": 0.8847875580572274,
      "learning_rate": 3.739188417234041e-07,
      "loss": 0.1138,
      "step": 30159
    },
    {
      "epoch": 0.8798646362098139,
      "grad_norm": 0.9552669953758516,
      "learning_rate": 3.737396018262124e-07,
      "loss": 0.1187,
      "step": 30160
    },
    {
      "epoch": 0.8798938094404575,
      "grad_norm": 0.8074904802698792,
      "learning_rate": 3.7356040323117016e-07,
      "loss": 0.1026,
      "step": 30161
    },
    {
      "epoch": 0.879922982671101,
      "grad_norm": 0.8214451204027293,
      "learning_rate": 3.733812459398761e-07,
      "loss": 0.1191,
      "step": 30162
    },
    {
      "epoch": 0.8799521559017446,
      "grad_norm": 0.9769523954735072,
      "learning_rate": 3.73202129953929e-07,
      "loss": 0.1034,
      "step": 30163
    },
    {
      "epoch": 0.8799813291323881,
      "grad_norm": 0.7063403750544465,
      "learning_rate": 3.730230552749292e-07,
      "loss": 0.0912,
      "step": 30164
    },
    {
      "epoch": 0.8800105023630317,
      "grad_norm": 0.6989168611891687,
      "learning_rate": 3.7284402190447546e-07,
      "loss": 0.135,
      "step": 30165
    },
    {
      "epoch": 0.8800396755936752,
      "grad_norm": 0.8157231699521588,
      "learning_rate": 3.7266502984416477e-07,
      "loss": 0.1035,
      "step": 30166
    },
    {
      "epoch": 0.8800688488243188,
      "grad_norm": 1.1671000651664127,
      "learning_rate": 3.7248607909559697e-07,
      "loss": 0.1036,
      "step": 30167
    },
    {
      "epoch": 0.8800980220549623,
      "grad_norm": 0.7715153986115448,
      "learning_rate": 3.7230716966036915e-07,
      "loss": 0.0905,
      "step": 30168
    },
    {
      "epoch": 0.8801271952856059,
      "grad_norm": 0.876595675384357,
      "learning_rate": 3.7212830154007675e-07,
      "loss": 0.1093,
      "step": 30169
    },
    {
      "epoch": 0.8801563685162495,
      "grad_norm": 0.953601495349331,
      "learning_rate": 3.7194947473631837e-07,
      "loss": 0.1222,
      "step": 30170
    },
    {
      "epoch": 0.880185541746893,
      "grad_norm": 0.862498674999839,
      "learning_rate": 3.7177068925069116e-07,
      "loss": 0.1162,
      "step": 30171
    },
    {
      "epoch": 0.8802147149775366,
      "grad_norm": 0.8836441089597472,
      "learning_rate": 3.7159194508479046e-07,
      "loss": 0.1326,
      "step": 30172
    },
    {
      "epoch": 0.8802438882081802,
      "grad_norm": 0.8572154163014452,
      "learning_rate": 3.7141324224021116e-07,
      "loss": 0.1065,
      "step": 30173
    },
    {
      "epoch": 0.8802730614388238,
      "grad_norm": 1.014929219004118,
      "learning_rate": 3.7123458071855024e-07,
      "loss": 0.1051,
      "step": 30174
    },
    {
      "epoch": 0.8803022346694673,
      "grad_norm": 0.8122855596044031,
      "learning_rate": 3.7105596052140145e-07,
      "loss": 0.1376,
      "step": 30175
    },
    {
      "epoch": 0.8803314079001109,
      "grad_norm": 0.7589586008961208,
      "learning_rate": 3.708773816503608e-07,
      "loss": 0.1035,
      "step": 30176
    },
    {
      "epoch": 0.8803605811307544,
      "grad_norm": 0.7209743075329469,
      "learning_rate": 3.706988441070203e-07,
      "loss": 0.1359,
      "step": 30177
    },
    {
      "epoch": 0.880389754361398,
      "grad_norm": 0.7143563623297943,
      "learning_rate": 3.7052034789297697e-07,
      "loss": 0.0876,
      "step": 30178
    },
    {
      "epoch": 0.8804189275920415,
      "grad_norm": 1.0480815545668183,
      "learning_rate": 3.7034189300982294e-07,
      "loss": 0.0997,
      "step": 30179
    },
    {
      "epoch": 0.8804481008226851,
      "grad_norm": 0.7673726984716717,
      "learning_rate": 3.7016347945914966e-07,
      "loss": 0.117,
      "step": 30180
    },
    {
      "epoch": 0.8804772740533287,
      "grad_norm": 0.6952595807552829,
      "learning_rate": 3.699851072425525e-07,
      "loss": 0.1018,
      "step": 30181
    },
    {
      "epoch": 0.8805064472839722,
      "grad_norm": 0.8821413448512636,
      "learning_rate": 3.698067763616231e-07,
      "loss": 0.1159,
      "step": 30182
    },
    {
      "epoch": 0.8805356205146158,
      "grad_norm": 0.9795421892790883,
      "learning_rate": 3.696284868179534e-07,
      "loss": 0.1087,
      "step": 30183
    },
    {
      "epoch": 0.8805647937452593,
      "grad_norm": 0.6566982558593002,
      "learning_rate": 3.6945023861313547e-07,
      "loss": 0.1128,
      "step": 30184
    },
    {
      "epoch": 0.8805939669759029,
      "grad_norm": 0.8421261691287741,
      "learning_rate": 3.6927203174876027e-07,
      "loss": 0.1046,
      "step": 30185
    },
    {
      "epoch": 0.8806231402065464,
      "grad_norm": 0.8631026093023745,
      "learning_rate": 3.6909386622641876e-07,
      "loss": 0.1012,
      "step": 30186
    },
    {
      "epoch": 0.8806523134371901,
      "grad_norm": 0.8232195206923908,
      "learning_rate": 3.689157420477013e-07,
      "loss": 0.0996,
      "step": 30187
    },
    {
      "epoch": 0.8806814866678336,
      "grad_norm": 0.9174507808088106,
      "learning_rate": 3.687376592141995e-07,
      "loss": 0.1314,
      "step": 30188
    },
    {
      "epoch": 0.8807106598984772,
      "grad_norm": 0.8254176508882628,
      "learning_rate": 3.6855961772750193e-07,
      "loss": 0.0857,
      "step": 30189
    },
    {
      "epoch": 0.8807398331291207,
      "grad_norm": 0.8672602160728197,
      "learning_rate": 3.68381617589198e-07,
      "loss": 0.0941,
      "step": 30190
    },
    {
      "epoch": 0.8807690063597643,
      "grad_norm": 0.8352647499556992,
      "learning_rate": 3.682036588008786e-07,
      "loss": 0.1155,
      "step": 30191
    },
    {
      "epoch": 0.8807981795904078,
      "grad_norm": 0.9017842170348959,
      "learning_rate": 3.6802574136413084e-07,
      "loss": 0.1294,
      "step": 30192
    },
    {
      "epoch": 0.8808273528210514,
      "grad_norm": 0.7935272301615023,
      "learning_rate": 3.678478652805423e-07,
      "loss": 0.1041,
      "step": 30193
    },
    {
      "epoch": 0.880856526051695,
      "grad_norm": 0.8954750138041571,
      "learning_rate": 3.676700305517028e-07,
      "loss": 0.1378,
      "step": 30194
    },
    {
      "epoch": 0.8808856992823385,
      "grad_norm": 0.9790343919425698,
      "learning_rate": 3.674922371792e-07,
      "loss": 0.1232,
      "step": 30195
    },
    {
      "epoch": 0.8809148725129821,
      "grad_norm": 1.0833191201543046,
      "learning_rate": 3.673144851646199e-07,
      "loss": 0.1253,
      "step": 30196
    },
    {
      "epoch": 0.8809440457436256,
      "grad_norm": 0.7122042425090758,
      "learning_rate": 3.671367745095511e-07,
      "loss": 0.1127,
      "step": 30197
    },
    {
      "epoch": 0.8809732189742692,
      "grad_norm": 0.7927459358635081,
      "learning_rate": 3.6695910521557797e-07,
      "loss": 0.1189,
      "step": 30198
    },
    {
      "epoch": 0.8810023922049127,
      "grad_norm": 0.7376181023768205,
      "learning_rate": 3.6678147728428926e-07,
      "loss": 0.1042,
      "step": 30199
    },
    {
      "epoch": 0.8810315654355564,
      "grad_norm": 0.8077804932474733,
      "learning_rate": 3.6660389071726807e-07,
      "loss": 0.0969,
      "step": 30200
    },
    {
      "epoch": 0.8810607386661999,
      "grad_norm": 0.7568147108828605,
      "learning_rate": 3.664263455161027e-07,
      "loss": 0.1205,
      "step": 30201
    },
    {
      "epoch": 0.8810899118968435,
      "grad_norm": 0.9443235460663865,
      "learning_rate": 3.6624884168237675e-07,
      "loss": 0.1218,
      "step": 30202
    },
    {
      "epoch": 0.881119085127487,
      "grad_norm": 0.9146561949814475,
      "learning_rate": 3.66071379217674e-07,
      "loss": 0.1306,
      "step": 30203
    },
    {
      "epoch": 0.8811482583581306,
      "grad_norm": 0.7435052455702427,
      "learning_rate": 3.658939581235793e-07,
      "loss": 0.126,
      "step": 30204
    },
    {
      "epoch": 0.8811774315887742,
      "grad_norm": 0.8530381042113752,
      "learning_rate": 3.6571657840167864e-07,
      "loss": 0.1057,
      "step": 30205
    },
    {
      "epoch": 0.8812066048194177,
      "grad_norm": 0.8355369961185656,
      "learning_rate": 3.6553924005355347e-07,
      "loss": 0.0845,
      "step": 30206
    },
    {
      "epoch": 0.8812357780500613,
      "grad_norm": 0.8559363073796667,
      "learning_rate": 3.6536194308078756e-07,
      "loss": 0.1119,
      "step": 30207
    },
    {
      "epoch": 0.8812649512807048,
      "grad_norm": 1.2055206965322574,
      "learning_rate": 3.6518468748496406e-07,
      "loss": 0.1162,
      "step": 30208
    },
    {
      "epoch": 0.8812941245113484,
      "grad_norm": 0.8828364406691396,
      "learning_rate": 3.650074732676656e-07,
      "loss": 0.1211,
      "step": 30209
    },
    {
      "epoch": 0.8813232977419919,
      "grad_norm": 0.8566573650686539,
      "learning_rate": 3.648303004304721e-07,
      "loss": 0.0958,
      "step": 30210
    },
    {
      "epoch": 0.8813524709726355,
      "grad_norm": 0.7686853905378299,
      "learning_rate": 3.6465316897496883e-07,
      "loss": 0.0915,
      "step": 30211
    },
    {
      "epoch": 0.881381644203279,
      "grad_norm": 0.7660954448389473,
      "learning_rate": 3.6447607890273516e-07,
      "loss": 0.118,
      "step": 30212
    },
    {
      "epoch": 0.8814108174339226,
      "grad_norm": 1.2803163696680397,
      "learning_rate": 3.6429903021535207e-07,
      "loss": 0.1309,
      "step": 30213
    },
    {
      "epoch": 0.8814399906645662,
      "grad_norm": 0.8122822849721811,
      "learning_rate": 3.641220229144016e-07,
      "loss": 0.0962,
      "step": 30214
    },
    {
      "epoch": 0.8814691638952098,
      "grad_norm": 0.9291086541392866,
      "learning_rate": 3.63945057001463e-07,
      "loss": 0.1174,
      "step": 30215
    },
    {
      "epoch": 0.8814983371258533,
      "grad_norm": 0.7421057537233868,
      "learning_rate": 3.6376813247811503e-07,
      "loss": 0.1095,
      "step": 30216
    },
    {
      "epoch": 0.8815275103564969,
      "grad_norm": 0.8081893349893342,
      "learning_rate": 3.635912493459387e-07,
      "loss": 0.1213,
      "step": 30217
    },
    {
      "epoch": 0.8815566835871405,
      "grad_norm": 0.7958339955453644,
      "learning_rate": 3.634144076065133e-07,
      "loss": 0.1276,
      "step": 30218
    },
    {
      "epoch": 0.881585856817784,
      "grad_norm": 0.773084285292648,
      "learning_rate": 3.63237607261418e-07,
      "loss": 0.1135,
      "step": 30219
    },
    {
      "epoch": 0.8816150300484276,
      "grad_norm": 0.8580062597910707,
      "learning_rate": 3.6306084831222887e-07,
      "loss": 0.1146,
      "step": 30220
    },
    {
      "epoch": 0.8816442032790711,
      "grad_norm": 1.190768283142839,
      "learning_rate": 3.628841307605269e-07,
      "loss": 0.1097,
      "step": 30221
    },
    {
      "epoch": 0.8816733765097147,
      "grad_norm": 0.7718992014176562,
      "learning_rate": 3.6270745460788736e-07,
      "loss": 0.1004,
      "step": 30222
    },
    {
      "epoch": 0.8817025497403582,
      "grad_norm": 0.757772427516553,
      "learning_rate": 3.625308198558897e-07,
      "loss": 0.1068,
      "step": 30223
    },
    {
      "epoch": 0.8817317229710018,
      "grad_norm": 0.8011985273552965,
      "learning_rate": 3.6235422650610863e-07,
      "loss": 0.109,
      "step": 30224
    },
    {
      "epoch": 0.8817608962016453,
      "grad_norm": 0.8914904698648717,
      "learning_rate": 3.62177674560123e-07,
      "loss": 0.1159,
      "step": 30225
    },
    {
      "epoch": 0.8817900694322889,
      "grad_norm": 0.6703580860724604,
      "learning_rate": 3.6200116401950704e-07,
      "loss": 0.1239,
      "step": 30226
    },
    {
      "epoch": 0.8818192426629325,
      "grad_norm": 0.9313783388406465,
      "learning_rate": 3.6182469488583836e-07,
      "loss": 0.0931,
      "step": 30227
    },
    {
      "epoch": 0.8818484158935761,
      "grad_norm": 1.0304906920137527,
      "learning_rate": 3.616482671606908e-07,
      "loss": 0.1236,
      "step": 30228
    },
    {
      "epoch": 0.8818775891242197,
      "grad_norm": 0.7672447611229704,
      "learning_rate": 3.6147188084564075e-07,
      "loss": 0.1131,
      "step": 30229
    },
    {
      "epoch": 0.8819067623548632,
      "grad_norm": 0.8632346594672483,
      "learning_rate": 3.612955359422621e-07,
      "loss": 0.1032,
      "step": 30230
    },
    {
      "epoch": 0.8819359355855068,
      "grad_norm": 0.8417856631660557,
      "learning_rate": 3.611192324521301e-07,
      "loss": 0.1315,
      "step": 30231
    },
    {
      "epoch": 0.8819651088161503,
      "grad_norm": 0.7716576250477684,
      "learning_rate": 3.6094297037681857e-07,
      "loss": 0.0889,
      "step": 30232
    },
    {
      "epoch": 0.8819942820467939,
      "grad_norm": 0.5673189806323955,
      "learning_rate": 3.607667497178996e-07,
      "loss": 0.1137,
      "step": 30233
    },
    {
      "epoch": 0.8820234552774374,
      "grad_norm": 1.2397629243526644,
      "learning_rate": 3.6059057047694745e-07,
      "loss": 0.1183,
      "step": 30234
    },
    {
      "epoch": 0.882052628508081,
      "grad_norm": 0.7958689435050259,
      "learning_rate": 3.6041443265553645e-07,
      "loss": 0.0991,
      "step": 30235
    },
    {
      "epoch": 0.8820818017387245,
      "grad_norm": 0.7340789986518017,
      "learning_rate": 3.602383362552375e-07,
      "loss": 0.0997,
      "step": 30236
    },
    {
      "epoch": 0.8821109749693681,
      "grad_norm": 1.2508656996866652,
      "learning_rate": 3.6006228127762275e-07,
      "loss": 0.1301,
      "step": 30237
    },
    {
      "epoch": 0.8821401482000116,
      "grad_norm": 0.9875033771728545,
      "learning_rate": 3.598862677242643e-07,
      "loss": 0.1124,
      "step": 30238
    },
    {
      "epoch": 0.8821693214306552,
      "grad_norm": 0.8114853958236029,
      "learning_rate": 3.5971029559673407e-07,
      "loss": 0.0895,
      "step": 30239
    },
    {
      "epoch": 0.8821984946612987,
      "grad_norm": 1.0385342276752447,
      "learning_rate": 3.59534364896601e-07,
      "loss": 0.1121,
      "step": 30240
    },
    {
      "epoch": 0.8822276678919424,
      "grad_norm": 0.7225282116525515,
      "learning_rate": 3.5935847562543927e-07,
      "loss": 0.1008,
      "step": 30241
    },
    {
      "epoch": 0.882256841122586,
      "grad_norm": 0.8569040528906543,
      "learning_rate": 3.591826277848165e-07,
      "loss": 0.1013,
      "step": 30242
    },
    {
      "epoch": 0.8822860143532295,
      "grad_norm": 0.7433117161667993,
      "learning_rate": 3.5900682137630317e-07,
      "loss": 0.097,
      "step": 30243
    },
    {
      "epoch": 0.8823151875838731,
      "grad_norm": 0.8608631443274006,
      "learning_rate": 3.5883105640146965e-07,
      "loss": 0.1158,
      "step": 30244
    },
    {
      "epoch": 0.8823443608145166,
      "grad_norm": 1.0101357825938861,
      "learning_rate": 3.5865533286188415e-07,
      "loss": 0.1232,
      "step": 30245
    },
    {
      "epoch": 0.8823735340451602,
      "grad_norm": 0.8340525484015279,
      "learning_rate": 3.58479650759116e-07,
      "loss": 0.1139,
      "step": 30246
    },
    {
      "epoch": 0.8824027072758037,
      "grad_norm": 0.7593287722072896,
      "learning_rate": 3.583040100947327e-07,
      "loss": 0.1226,
      "step": 30247
    },
    {
      "epoch": 0.8824318805064473,
      "grad_norm": 0.9341213761325549,
      "learning_rate": 3.5812841087030427e-07,
      "loss": 0.1289,
      "step": 30248
    },
    {
      "epoch": 0.8824610537370908,
      "grad_norm": 0.7890900124503014,
      "learning_rate": 3.5795285308739715e-07,
      "loss": 0.0969,
      "step": 30249
    },
    {
      "epoch": 0.8824902269677344,
      "grad_norm": 0.7419355140165514,
      "learning_rate": 3.577773367475779e-07,
      "loss": 0.1042,
      "step": 30250
    },
    {
      "epoch": 0.8825194001983779,
      "grad_norm": 0.7754554477757386,
      "learning_rate": 3.576018618524152e-07,
      "loss": 0.1147,
      "step": 30251
    },
    {
      "epoch": 0.8825485734290215,
      "grad_norm": 0.8652486385826137,
      "learning_rate": 3.574264284034745e-07,
      "loss": 0.1177,
      "step": 30252
    },
    {
      "epoch": 0.882577746659665,
      "grad_norm": 0.8863230988863542,
      "learning_rate": 3.572510364023224e-07,
      "loss": 0.1053,
      "step": 30253
    },
    {
      "epoch": 0.8826069198903087,
      "grad_norm": 0.7918883451187265,
      "learning_rate": 3.5707568585052477e-07,
      "loss": 0.098,
      "step": 30254
    },
    {
      "epoch": 0.8826360931209523,
      "grad_norm": 0.7949978564938222,
      "learning_rate": 3.569003767496476e-07,
      "loss": 0.1235,
      "step": 30255
    },
    {
      "epoch": 0.8826652663515958,
      "grad_norm": 1.299755252294959,
      "learning_rate": 3.5672510910125526e-07,
      "loss": 0.1428,
      "step": 30256
    },
    {
      "epoch": 0.8826944395822394,
      "grad_norm": 0.7907743204613458,
      "learning_rate": 3.565498829069119e-07,
      "loss": 0.103,
      "step": 30257
    },
    {
      "epoch": 0.8827236128128829,
      "grad_norm": 0.8460585996047849,
      "learning_rate": 3.563746981681826e-07,
      "loss": 0.1137,
      "step": 30258
    },
    {
      "epoch": 0.8827527860435265,
      "grad_norm": 1.0933770669696037,
      "learning_rate": 3.561995548866326e-07,
      "loss": 0.1239,
      "step": 30259
    },
    {
      "epoch": 0.88278195927417,
      "grad_norm": 0.7781846495940566,
      "learning_rate": 3.560244530638235e-07,
      "loss": 0.0979,
      "step": 30260
    },
    {
      "epoch": 0.8828111325048136,
      "grad_norm": 0.8418009683284758,
      "learning_rate": 3.558493927013201e-07,
      "loss": 0.1074,
      "step": 30261
    },
    {
      "epoch": 0.8828403057354571,
      "grad_norm": 0.7863138275600805,
      "learning_rate": 3.5567437380068515e-07,
      "loss": 0.1113,
      "step": 30262
    },
    {
      "epoch": 0.8828694789661007,
      "grad_norm": 1.299581592141793,
      "learning_rate": 3.554993963634795e-07,
      "loss": 0.1033,
      "step": 30263
    },
    {
      "epoch": 0.8828986521967442,
      "grad_norm": 0.9671923105206143,
      "learning_rate": 3.5532446039126645e-07,
      "loss": 0.1229,
      "step": 30264
    },
    {
      "epoch": 0.8829278254273878,
      "grad_norm": 0.7420217463153288,
      "learning_rate": 3.551495658856091e-07,
      "loss": 0.112,
      "step": 30265
    },
    {
      "epoch": 0.8829569986580313,
      "grad_norm": 0.8974301122852568,
      "learning_rate": 3.5497471284806686e-07,
      "loss": 0.1269,
      "step": 30266
    },
    {
      "epoch": 0.8829861718886749,
      "grad_norm": 1.0018350939994582,
      "learning_rate": 3.5479990128020113e-07,
      "loss": 0.0921,
      "step": 30267
    },
    {
      "epoch": 0.8830153451193186,
      "grad_norm": 0.9819348515423417,
      "learning_rate": 3.5462513118357413e-07,
      "loss": 0.1246,
      "step": 30268
    },
    {
      "epoch": 0.8830445183499621,
      "grad_norm": 0.8769700484705546,
      "learning_rate": 3.544504025597445e-07,
      "loss": 0.1112,
      "step": 30269
    },
    {
      "epoch": 0.8830736915806057,
      "grad_norm": 0.8802326679739839,
      "learning_rate": 3.542757154102716e-07,
      "loss": 0.1104,
      "step": 30270
    },
    {
      "epoch": 0.8831028648112492,
      "grad_norm": 0.8391767456555339,
      "learning_rate": 3.54101069736717e-07,
      "loss": 0.1093,
      "step": 30271
    },
    {
      "epoch": 0.8831320380418928,
      "grad_norm": 0.6753819973574647,
      "learning_rate": 3.5392646554063935e-07,
      "loss": 0.1079,
      "step": 30272
    },
    {
      "epoch": 0.8831612112725363,
      "grad_norm": 0.7957394720387498,
      "learning_rate": 3.537519028235964e-07,
      "loss": 0.1181,
      "step": 30273
    },
    {
      "epoch": 0.8831903845031799,
      "grad_norm": 0.9294920446919498,
      "learning_rate": 3.535773815871485e-07,
      "loss": 0.1081,
      "step": 30274
    },
    {
      "epoch": 0.8832195577338234,
      "grad_norm": 1.0407690985588065,
      "learning_rate": 3.534029018328516e-07,
      "loss": 0.1038,
      "step": 30275
    },
    {
      "epoch": 0.883248730964467,
      "grad_norm": 0.9803304389381334,
      "learning_rate": 3.5322846356226405e-07,
      "loss": 0.1218,
      "step": 30276
    },
    {
      "epoch": 0.8832779041951105,
      "grad_norm": 1.0250165615672384,
      "learning_rate": 3.5305406677694386e-07,
      "loss": 0.1137,
      "step": 30277
    },
    {
      "epoch": 0.8833070774257541,
      "grad_norm": 1.0597643994693118,
      "learning_rate": 3.5287971147844823e-07,
      "loss": 0.1113,
      "step": 30278
    },
    {
      "epoch": 0.8833362506563976,
      "grad_norm": 0.6969120680101855,
      "learning_rate": 3.5270539766833257e-07,
      "loss": 0.107,
      "step": 30279
    },
    {
      "epoch": 0.8833654238870412,
      "grad_norm": 1.0829992251926237,
      "learning_rate": 3.5253112534815336e-07,
      "loss": 0.1206,
      "step": 30280
    },
    {
      "epoch": 0.8833945971176849,
      "grad_norm": 1.1661278745786465,
      "learning_rate": 3.5235689451946775e-07,
      "loss": 0.1067,
      "step": 30281
    },
    {
      "epoch": 0.8834237703483284,
      "grad_norm": 1.0443561061873734,
      "learning_rate": 3.521827051838295e-07,
      "loss": 0.0962,
      "step": 30282
    },
    {
      "epoch": 0.883452943578972,
      "grad_norm": 0.9537307633965,
      "learning_rate": 3.52008557342795e-07,
      "loss": 0.1177,
      "step": 30283
    },
    {
      "epoch": 0.8834821168096155,
      "grad_norm": 0.7337475191683551,
      "learning_rate": 3.5183445099791825e-07,
      "loss": 0.0946,
      "step": 30284
    },
    {
      "epoch": 0.8835112900402591,
      "grad_norm": 1.1754368881679809,
      "learning_rate": 3.516603861507545e-07,
      "loss": 0.1096,
      "step": 30285
    },
    {
      "epoch": 0.8835404632709026,
      "grad_norm": 0.7435421860743248,
      "learning_rate": 3.5148636280285697e-07,
      "loss": 0.1121,
      "step": 30286
    },
    {
      "epoch": 0.8835696365015462,
      "grad_norm": 1.0549735837093794,
      "learning_rate": 3.513123809557789e-07,
      "loss": 0.1186,
      "step": 30287
    },
    {
      "epoch": 0.8835988097321897,
      "grad_norm": 0.8718229764373705,
      "learning_rate": 3.5113844061107404e-07,
      "loss": 0.1243,
      "step": 30288
    },
    {
      "epoch": 0.8836279829628333,
      "grad_norm": 2.081912003183867,
      "learning_rate": 3.509645417702967e-07,
      "loss": 0.1217,
      "step": 30289
    },
    {
      "epoch": 0.8836571561934768,
      "grad_norm": 1.1329910540016377,
      "learning_rate": 3.5079068443499676e-07,
      "loss": 0.1147,
      "step": 30290
    },
    {
      "epoch": 0.8836863294241204,
      "grad_norm": 0.8690301360376276,
      "learning_rate": 3.506168686067285e-07,
      "loss": 0.0998,
      "step": 30291
    },
    {
      "epoch": 0.883715502654764,
      "grad_norm": 1.1275935446741046,
      "learning_rate": 3.504430942870429e-07,
      "loss": 0.09,
      "step": 30292
    },
    {
      "epoch": 0.8837446758854075,
      "grad_norm": 0.7895830958470886,
      "learning_rate": 3.5026936147749104e-07,
      "loss": 0.1115,
      "step": 30293
    },
    {
      "epoch": 0.8837738491160511,
      "grad_norm": 0.8889682049730542,
      "learning_rate": 3.500956701796243e-07,
      "loss": 0.1199,
      "step": 30294
    },
    {
      "epoch": 0.8838030223466947,
      "grad_norm": 0.7569732883636934,
      "learning_rate": 3.4992202039499377e-07,
      "loss": 0.1126,
      "step": 30295
    },
    {
      "epoch": 0.8838321955773383,
      "grad_norm": 0.7844529605231352,
      "learning_rate": 3.497484121251499e-07,
      "loss": 0.0896,
      "step": 30296
    },
    {
      "epoch": 0.8838613688079818,
      "grad_norm": 0.6706005533343086,
      "learning_rate": 3.4957484537164076e-07,
      "loss": 0.1146,
      "step": 30297
    },
    {
      "epoch": 0.8838905420386254,
      "grad_norm": 0.7684084409947594,
      "learning_rate": 3.494013201360186e-07,
      "loss": 0.1139,
      "step": 30298
    },
    {
      "epoch": 0.8839197152692689,
      "grad_norm": 1.0253168927714207,
      "learning_rate": 3.492278364198309e-07,
      "loss": 0.1272,
      "step": 30299
    },
    {
      "epoch": 0.8839488884999125,
      "grad_norm": 0.9081985058797085,
      "learning_rate": 3.490543942246255e-07,
      "loss": 0.1214,
      "step": 30300
    },
    {
      "epoch": 0.883978061730556,
      "grad_norm": 0.8650806966578726,
      "learning_rate": 3.488809935519533e-07,
      "loss": 0.1174,
      "step": 30301
    },
    {
      "epoch": 0.8840072349611996,
      "grad_norm": 0.952124751383058,
      "learning_rate": 3.4870763440336185e-07,
      "loss": 0.1157,
      "step": 30302
    },
    {
      "epoch": 0.8840364081918431,
      "grad_norm": 0.8657833867463129,
      "learning_rate": 3.485343167803973e-07,
      "loss": 0.1027,
      "step": 30303
    },
    {
      "epoch": 0.8840655814224867,
      "grad_norm": 0.8805401006232512,
      "learning_rate": 3.4836104068460887e-07,
      "loss": 0.1072,
      "step": 30304
    },
    {
      "epoch": 0.8840947546531303,
      "grad_norm": 0.994418975411668,
      "learning_rate": 3.48187806117542e-07,
      "loss": 0.11,
      "step": 30305
    },
    {
      "epoch": 0.8841239278837738,
      "grad_norm": 0.7017094904926031,
      "learning_rate": 3.480146130807438e-07,
      "loss": 0.1046,
      "step": 30306
    },
    {
      "epoch": 0.8841531011144174,
      "grad_norm": 0.7848220442586089,
      "learning_rate": 3.4784146157576025e-07,
      "loss": 0.1075,
      "step": 30307
    },
    {
      "epoch": 0.884182274345061,
      "grad_norm": 0.9510315721539675,
      "learning_rate": 3.4766835160413846e-07,
      "loss": 0.1239,
      "step": 30308
    },
    {
      "epoch": 0.8842114475757046,
      "grad_norm": 1.1385160742814275,
      "learning_rate": 3.474952831674233e-07,
      "loss": 0.133,
      "step": 30309
    },
    {
      "epoch": 0.8842406208063481,
      "grad_norm": 0.8747383799876995,
      "learning_rate": 3.473222562671585e-07,
      "loss": 0.1212,
      "step": 30310
    },
    {
      "epoch": 0.8842697940369917,
      "grad_norm": 0.8685490919580368,
      "learning_rate": 3.4714927090489126e-07,
      "loss": 0.1087,
      "step": 30311
    },
    {
      "epoch": 0.8842989672676352,
      "grad_norm": 0.6771623979097176,
      "learning_rate": 3.46976327082163e-07,
      "loss": 0.1199,
      "step": 30312
    },
    {
      "epoch": 0.8843281404982788,
      "grad_norm": 0.794962219029144,
      "learning_rate": 3.468034248005209e-07,
      "loss": 0.1196,
      "step": 30313
    },
    {
      "epoch": 0.8843573137289223,
      "grad_norm": 0.8725883111671113,
      "learning_rate": 3.466305640615059e-07,
      "loss": 0.1123,
      "step": 30314
    },
    {
      "epoch": 0.8843864869595659,
      "grad_norm": 0.7223817005236681,
      "learning_rate": 3.4645774486666285e-07,
      "loss": 0.118,
      "step": 30315
    },
    {
      "epoch": 0.8844156601902095,
      "grad_norm": 0.749383241880221,
      "learning_rate": 3.4628496721753444e-07,
      "loss": 0.1165,
      "step": 30316
    },
    {
      "epoch": 0.884444833420853,
      "grad_norm": 0.9503710213224816,
      "learning_rate": 3.4611223111566226e-07,
      "loss": 0.0974,
      "step": 30317
    },
    {
      "epoch": 0.8844740066514966,
      "grad_norm": 0.8096765693535113,
      "learning_rate": 3.4593953656258896e-07,
      "loss": 0.105,
      "step": 30318
    },
    {
      "epoch": 0.8845031798821401,
      "grad_norm": 0.7867137257183453,
      "learning_rate": 3.457668835598571e-07,
      "loss": 0.1145,
      "step": 30319
    },
    {
      "epoch": 0.8845323531127837,
      "grad_norm": 1.102137486792454,
      "learning_rate": 3.4559427210900663e-07,
      "loss": 0.1027,
      "step": 30320
    },
    {
      "epoch": 0.8845615263434272,
      "grad_norm": 0.8603445635648463,
      "learning_rate": 3.454217022115802e-07,
      "loss": 0.122,
      "step": 30321
    },
    {
      "epoch": 0.8845906995740709,
      "grad_norm": 0.815858513164158,
      "learning_rate": 3.452491738691183e-07,
      "loss": 0.1111,
      "step": 30322
    },
    {
      "epoch": 0.8846198728047144,
      "grad_norm": 0.6665759354368596,
      "learning_rate": 3.45076687083159e-07,
      "loss": 0.102,
      "step": 30323
    },
    {
      "epoch": 0.884649046035358,
      "grad_norm": 1.4909042422648486,
      "learning_rate": 3.44904241855244e-07,
      "loss": 0.0927,
      "step": 30324
    },
    {
      "epoch": 0.8846782192660015,
      "grad_norm": 0.8096901343232552,
      "learning_rate": 3.447318381869136e-07,
      "loss": 0.1112,
      "step": 30325
    },
    {
      "epoch": 0.8847073924966451,
      "grad_norm": 0.9453686884324168,
      "learning_rate": 3.445594760797061e-07,
      "loss": 0.1079,
      "step": 30326
    },
    {
      "epoch": 0.8847365657272886,
      "grad_norm": 0.7234627055790871,
      "learning_rate": 3.443871555351597e-07,
      "loss": 0.0901,
      "step": 30327
    },
    {
      "epoch": 0.8847657389579322,
      "grad_norm": 0.8254188611841817,
      "learning_rate": 3.4421487655481366e-07,
      "loss": 0.0975,
      "step": 30328
    },
    {
      "epoch": 0.8847949121885758,
      "grad_norm": 0.7246687590352812,
      "learning_rate": 3.440426391402063e-07,
      "loss": 0.0927,
      "step": 30329
    },
    {
      "epoch": 0.8848240854192193,
      "grad_norm": 0.8764899631729903,
      "learning_rate": 3.43870443292873e-07,
      "loss": 0.1043,
      "step": 30330
    },
    {
      "epoch": 0.8848532586498629,
      "grad_norm": 0.8373716420881642,
      "learning_rate": 3.436982890143542e-07,
      "loss": 0.1229,
      "step": 30331
    },
    {
      "epoch": 0.8848824318805064,
      "grad_norm": 0.7997293485721308,
      "learning_rate": 3.435261763061859e-07,
      "loss": 0.1116,
      "step": 30332
    },
    {
      "epoch": 0.88491160511115,
      "grad_norm": 0.9667778708676278,
      "learning_rate": 3.43354105169903e-07,
      "loss": 0.135,
      "step": 30333
    },
    {
      "epoch": 0.8849407783417935,
      "grad_norm": 0.7458684991803146,
      "learning_rate": 3.431820756070442e-07,
      "loss": 0.1301,
      "step": 30334
    },
    {
      "epoch": 0.8849699515724372,
      "grad_norm": 0.8914313326095475,
      "learning_rate": 3.430100876191439e-07,
      "loss": 0.1234,
      "step": 30335
    },
    {
      "epoch": 0.8849991248030807,
      "grad_norm": 0.8700656007969616,
      "learning_rate": 3.4283814120773753e-07,
      "loss": 0.1036,
      "step": 30336
    },
    {
      "epoch": 0.8850282980337243,
      "grad_norm": 1.0175748851464634,
      "learning_rate": 3.426662363743599e-07,
      "loss": 0.1164,
      "step": 30337
    },
    {
      "epoch": 0.8850574712643678,
      "grad_norm": 0.9406545152020644,
      "learning_rate": 3.424943731205477e-07,
      "loss": 0.1189,
      "step": 30338
    },
    {
      "epoch": 0.8850866444950114,
      "grad_norm": 0.8426086877704665,
      "learning_rate": 3.4232255144783347e-07,
      "loss": 0.0977,
      "step": 30339
    },
    {
      "epoch": 0.885115817725655,
      "grad_norm": 0.9989210816802246,
      "learning_rate": 3.42150771357751e-07,
      "loss": 0.1201,
      "step": 30340
    },
    {
      "epoch": 0.8851449909562985,
      "grad_norm": 0.7275818106060049,
      "learning_rate": 3.419790328518352e-07,
      "loss": 0.1052,
      "step": 30341
    },
    {
      "epoch": 0.8851741641869421,
      "grad_norm": 0.7937811983282704,
      "learning_rate": 3.4180733593161764e-07,
      "loss": 0.0996,
      "step": 30342
    },
    {
      "epoch": 0.8852033374175856,
      "grad_norm": 0.6934409654506852,
      "learning_rate": 3.4163568059863374e-07,
      "loss": 0.0883,
      "step": 30343
    },
    {
      "epoch": 0.8852325106482292,
      "grad_norm": 1.0038186410180796,
      "learning_rate": 3.414640668544128e-07,
      "loss": 0.1137,
      "step": 30344
    },
    {
      "epoch": 0.8852616838788727,
      "grad_norm": 0.7151777752932028,
      "learning_rate": 3.4129249470048974e-07,
      "loss": 0.1123,
      "step": 30345
    },
    {
      "epoch": 0.8852908571095163,
      "grad_norm": 0.8763191749524867,
      "learning_rate": 3.41120964138395e-07,
      "loss": 0.1089,
      "step": 30346
    },
    {
      "epoch": 0.8853200303401598,
      "grad_norm": 0.8118309774776161,
      "learning_rate": 3.409494751696596e-07,
      "loss": 0.1074,
      "step": 30347
    },
    {
      "epoch": 0.8853492035708034,
      "grad_norm": 0.9261107718805629,
      "learning_rate": 3.4077802779581504e-07,
      "loss": 0.0919,
      "step": 30348
    },
    {
      "epoch": 0.885378376801447,
      "grad_norm": 0.864050094739019,
      "learning_rate": 3.406066220183929e-07,
      "loss": 0.1105,
      "step": 30349
    },
    {
      "epoch": 0.8854075500320906,
      "grad_norm": 0.8010293586694941,
      "learning_rate": 3.404352578389214e-07,
      "loss": 0.1065,
      "step": 30350
    },
    {
      "epoch": 0.8854367232627341,
      "grad_norm": 0.715757383092985,
      "learning_rate": 3.4026393525893266e-07,
      "loss": 0.1173,
      "step": 30351
    },
    {
      "epoch": 0.8854658964933777,
      "grad_norm": 0.7882231160982873,
      "learning_rate": 3.4009265427995483e-07,
      "loss": 0.1016,
      "step": 30352
    },
    {
      "epoch": 0.8854950697240213,
      "grad_norm": 0.7943256913202561,
      "learning_rate": 3.3992141490351685e-07,
      "loss": 0.0949,
      "step": 30353
    },
    {
      "epoch": 0.8855242429546648,
      "grad_norm": 0.792414663988053,
      "learning_rate": 3.3975021713114844e-07,
      "loss": 0.1012,
      "step": 30354
    },
    {
      "epoch": 0.8855534161853084,
      "grad_norm": 0.9474072103656929,
      "learning_rate": 3.395790609643779e-07,
      "loss": 0.1585,
      "step": 30355
    },
    {
      "epoch": 0.8855825894159519,
      "grad_norm": 1.0532927138993091,
      "learning_rate": 3.3940794640473284e-07,
      "loss": 0.1174,
      "step": 30356
    },
    {
      "epoch": 0.8856117626465955,
      "grad_norm": 0.6979188327652479,
      "learning_rate": 3.3923687345374046e-07,
      "loss": 0.1171,
      "step": 30357
    },
    {
      "epoch": 0.885640935877239,
      "grad_norm": 1.392831635861828,
      "learning_rate": 3.390658421129295e-07,
      "loss": 0.1041,
      "step": 30358
    },
    {
      "epoch": 0.8856701091078826,
      "grad_norm": 0.756555154112626,
      "learning_rate": 3.388948523838259e-07,
      "loss": 0.1172,
      "step": 30359
    },
    {
      "epoch": 0.8856992823385261,
      "grad_norm": 1.1926946760902775,
      "learning_rate": 3.3872390426795467e-07,
      "loss": 0.1008,
      "step": 30360
    },
    {
      "epoch": 0.8857284555691697,
      "grad_norm": 0.7766733283219366,
      "learning_rate": 3.385529977668456e-07,
      "loss": 0.1133,
      "step": 30361
    },
    {
      "epoch": 0.8857576287998133,
      "grad_norm": 0.7369969660292518,
      "learning_rate": 3.383821328820225e-07,
      "loss": 0.108,
      "step": 30362
    },
    {
      "epoch": 0.8857868020304569,
      "grad_norm": 1.0297691496982326,
      "learning_rate": 3.382113096150097e-07,
      "loss": 0.1336,
      "step": 30363
    },
    {
      "epoch": 0.8858159752611005,
      "grad_norm": 0.813095504044418,
      "learning_rate": 3.380405279673349e-07,
      "loss": 0.1031,
      "step": 30364
    },
    {
      "epoch": 0.885845148491744,
      "grad_norm": 0.9703168646153482,
      "learning_rate": 3.3786978794052126e-07,
      "loss": 0.1379,
      "step": 30365
    },
    {
      "epoch": 0.8858743217223876,
      "grad_norm": 0.9349777106145506,
      "learning_rate": 3.376990895360921e-07,
      "loss": 0.1289,
      "step": 30366
    },
    {
      "epoch": 0.8859034949530311,
      "grad_norm": 0.8325957039835654,
      "learning_rate": 3.3752843275557224e-07,
      "loss": 0.0895,
      "step": 30367
    },
    {
      "epoch": 0.8859326681836747,
      "grad_norm": 0.8702869652325833,
      "learning_rate": 3.3735781760048714e-07,
      "loss": 0.1073,
      "step": 30368
    },
    {
      "epoch": 0.8859618414143182,
      "grad_norm": 0.7044139624804796,
      "learning_rate": 3.371872440723578e-07,
      "loss": 0.1108,
      "step": 30369
    },
    {
      "epoch": 0.8859910146449618,
      "grad_norm": 0.8736508089684976,
      "learning_rate": 3.370167121727069e-07,
      "loss": 0.1027,
      "step": 30370
    },
    {
      "epoch": 0.8860201878756053,
      "grad_norm": 0.7745660315239808,
      "learning_rate": 3.3684622190305825e-07,
      "loss": 0.0869,
      "step": 30371
    },
    {
      "epoch": 0.8860493611062489,
      "grad_norm": 0.6383409050373748,
      "learning_rate": 3.3667577326493283e-07,
      "loss": 0.1149,
      "step": 30372
    },
    {
      "epoch": 0.8860785343368924,
      "grad_norm": 0.7618712373043488,
      "learning_rate": 3.3650536625985384e-07,
      "loss": 0.1091,
      "step": 30373
    },
    {
      "epoch": 0.886107707567536,
      "grad_norm": 1.0625097758772686,
      "learning_rate": 3.363350008893407e-07,
      "loss": 0.1144,
      "step": 30374
    },
    {
      "epoch": 0.8861368807981795,
      "grad_norm": 0.7828996684426,
      "learning_rate": 3.3616467715491654e-07,
      "loss": 0.1043,
      "step": 30375
    },
    {
      "epoch": 0.8861660540288232,
      "grad_norm": 0.70035622400141,
      "learning_rate": 3.3599439505810015e-07,
      "loss": 0.1211,
      "step": 30376
    },
    {
      "epoch": 0.8861952272594668,
      "grad_norm": 0.8927629012197409,
      "learning_rate": 3.358241546004121e-07,
      "loss": 0.1268,
      "step": 30377
    },
    {
      "epoch": 0.8862244004901103,
      "grad_norm": 0.9462367269295212,
      "learning_rate": 3.3565395578337214e-07,
      "loss": 0.1323,
      "step": 30378
    },
    {
      "epoch": 0.8862535737207539,
      "grad_norm": 0.974531682763787,
      "learning_rate": 3.354837986085013e-07,
      "loss": 0.1314,
      "step": 30379
    },
    {
      "epoch": 0.8862827469513974,
      "grad_norm": 0.8371538846374478,
      "learning_rate": 3.353136830773168e-07,
      "loss": 0.1071,
      "step": 30380
    },
    {
      "epoch": 0.886311920182041,
      "grad_norm": 0.8964652179080821,
      "learning_rate": 3.351436091913385e-07,
      "loss": 0.122,
      "step": 30381
    },
    {
      "epoch": 0.8863410934126845,
      "grad_norm": 0.772242333638482,
      "learning_rate": 3.349735769520851e-07,
      "loss": 0.0863,
      "step": 30382
    },
    {
      "epoch": 0.8863702666433281,
      "grad_norm": 0.7731169190457556,
      "learning_rate": 3.3480358636107267e-07,
      "loss": 0.1095,
      "step": 30383
    },
    {
      "epoch": 0.8863994398739716,
      "grad_norm": 0.7525963182497357,
      "learning_rate": 3.346336374198206e-07,
      "loss": 0.112,
      "step": 30384
    },
    {
      "epoch": 0.8864286131046152,
      "grad_norm": 0.7043585510672596,
      "learning_rate": 3.3446373012984647e-07,
      "loss": 0.1016,
      "step": 30385
    },
    {
      "epoch": 0.8864577863352587,
      "grad_norm": 0.6955703191132063,
      "learning_rate": 3.3429386449266634e-07,
      "loss": 0.1052,
      "step": 30386
    },
    {
      "epoch": 0.8864869595659023,
      "grad_norm": 0.9402183031429808,
      "learning_rate": 3.3412404050979564e-07,
      "loss": 0.1084,
      "step": 30387
    },
    {
      "epoch": 0.8865161327965458,
      "grad_norm": 0.9400248364293934,
      "learning_rate": 3.3395425818275264e-07,
      "loss": 0.0924,
      "step": 30388
    },
    {
      "epoch": 0.8865453060271895,
      "grad_norm": 0.893022260749343,
      "learning_rate": 3.337845175130522e-07,
      "loss": 0.1098,
      "step": 30389
    },
    {
      "epoch": 0.8865744792578331,
      "grad_norm": 0.9950712262562712,
      "learning_rate": 3.336148185022081e-07,
      "loss": 0.1274,
      "step": 30390
    },
    {
      "epoch": 0.8866036524884766,
      "grad_norm": 0.8041470734874085,
      "learning_rate": 3.3344516115173863e-07,
      "loss": 0.0984,
      "step": 30391
    },
    {
      "epoch": 0.8866328257191202,
      "grad_norm": 0.832538593492917,
      "learning_rate": 3.33275545463157e-07,
      "loss": 0.1172,
      "step": 30392
    },
    {
      "epoch": 0.8866619989497637,
      "grad_norm": 0.9968711353932206,
      "learning_rate": 3.3310597143797585e-07,
      "loss": 0.1215,
      "step": 30393
    },
    {
      "epoch": 0.8866911721804073,
      "grad_norm": 0.7186352544941272,
      "learning_rate": 3.329364390777118e-07,
      "loss": 0.1069,
      "step": 30394
    },
    {
      "epoch": 0.8867203454110508,
      "grad_norm": 1.319439937713482,
      "learning_rate": 3.327669483838758e-07,
      "loss": 0.1224,
      "step": 30395
    },
    {
      "epoch": 0.8867495186416944,
      "grad_norm": 0.8814657572629593,
      "learning_rate": 3.325974993579839e-07,
      "loss": 0.0908,
      "step": 30396
    },
    {
      "epoch": 0.8867786918723379,
      "grad_norm": 0.8183405807279585,
      "learning_rate": 3.3242809200154603e-07,
      "loss": 0.107,
      "step": 30397
    },
    {
      "epoch": 0.8868078651029815,
      "grad_norm": 0.9222259577856428,
      "learning_rate": 3.3225872631607646e-07,
      "loss": 0.1071,
      "step": 30398
    },
    {
      "epoch": 0.886837038333625,
      "grad_norm": 0.8522762644758137,
      "learning_rate": 3.320894023030868e-07,
      "loss": 0.1116,
      "step": 30399
    },
    {
      "epoch": 0.8868662115642686,
      "grad_norm": 0.7650674869070023,
      "learning_rate": 3.319201199640881e-07,
      "loss": 0.0998,
      "step": 30400
    },
    {
      "epoch": 0.8868953847949121,
      "grad_norm": 0.7338584267253627,
      "learning_rate": 3.3175087930059246e-07,
      "loss": 0.103,
      "step": 30401
    },
    {
      "epoch": 0.8869245580255557,
      "grad_norm": 1.0480343563621168,
      "learning_rate": 3.3158168031411085e-07,
      "loss": 0.1184,
      "step": 30402
    },
    {
      "epoch": 0.8869537312561994,
      "grad_norm": 0.9729019843719371,
      "learning_rate": 3.3141252300615377e-07,
      "loss": 0.1189,
      "step": 30403
    },
    {
      "epoch": 0.8869829044868429,
      "grad_norm": 0.7023401695125001,
      "learning_rate": 3.3124340737823056e-07,
      "loss": 0.1196,
      "step": 30404
    },
    {
      "epoch": 0.8870120777174865,
      "grad_norm": 0.7756368928637107,
      "learning_rate": 3.3107433343185224e-07,
      "loss": 0.1112,
      "step": 30405
    },
    {
      "epoch": 0.88704125094813,
      "grad_norm": 0.8369702667736236,
      "learning_rate": 3.3090530116852757e-07,
      "loss": 0.1031,
      "step": 30406
    },
    {
      "epoch": 0.8870704241787736,
      "grad_norm": 1.2126969738676157,
      "learning_rate": 3.3073631058976486e-07,
      "loss": 0.0888,
      "step": 30407
    },
    {
      "epoch": 0.8870995974094171,
      "grad_norm": 0.8124887938795178,
      "learning_rate": 3.305673616970745e-07,
      "loss": 0.1267,
      "step": 30408
    },
    {
      "epoch": 0.8871287706400607,
      "grad_norm": 0.8935019345644799,
      "learning_rate": 3.3039845449196473e-07,
      "loss": 0.1546,
      "step": 30409
    },
    {
      "epoch": 0.8871579438707042,
      "grad_norm": 0.820766387428527,
      "learning_rate": 3.3022958897594157e-07,
      "loss": 0.1252,
      "step": 30410
    },
    {
      "epoch": 0.8871871171013478,
      "grad_norm": 1.0688200141873976,
      "learning_rate": 3.30060765150515e-07,
      "loss": 0.1052,
      "step": 30411
    },
    {
      "epoch": 0.8872162903319913,
      "grad_norm": 0.9664418098268349,
      "learning_rate": 3.2989198301719095e-07,
      "loss": 0.1005,
      "step": 30412
    },
    {
      "epoch": 0.8872454635626349,
      "grad_norm": 1.0969730287842412,
      "learning_rate": 3.2972324257747543e-07,
      "loss": 0.1285,
      "step": 30413
    },
    {
      "epoch": 0.8872746367932784,
      "grad_norm": 0.9190987287618725,
      "learning_rate": 3.295545438328762e-07,
      "loss": 0.1015,
      "step": 30414
    },
    {
      "epoch": 0.887303810023922,
      "grad_norm": 1.037815457992433,
      "learning_rate": 3.293858867848998e-07,
      "loss": 0.1077,
      "step": 30415
    },
    {
      "epoch": 0.8873329832545657,
      "grad_norm": 0.749096335415323,
      "learning_rate": 3.2921727143505114e-07,
      "loss": 0.1158,
      "step": 30416
    },
    {
      "epoch": 0.8873621564852092,
      "grad_norm": 0.805998932613953,
      "learning_rate": 3.290486977848345e-07,
      "loss": 0.1025,
      "step": 30417
    },
    {
      "epoch": 0.8873913297158528,
      "grad_norm": 0.6761166927583815,
      "learning_rate": 3.2888016583575765e-07,
      "loss": 0.1073,
      "step": 30418
    },
    {
      "epoch": 0.8874205029464963,
      "grad_norm": 0.8600428273131147,
      "learning_rate": 3.2871167558932214e-07,
      "loss": 0.0964,
      "step": 30419
    },
    {
      "epoch": 0.8874496761771399,
      "grad_norm": 0.8516478008239923,
      "learning_rate": 3.28543227047034e-07,
      "loss": 0.1233,
      "step": 30420
    },
    {
      "epoch": 0.8874788494077834,
      "grad_norm": 0.9747511822210017,
      "learning_rate": 3.2837482021039757e-07,
      "loss": 0.108,
      "step": 30421
    },
    {
      "epoch": 0.887508022638427,
      "grad_norm": 0.673979853826961,
      "learning_rate": 3.282064550809155e-07,
      "loss": 0.1074,
      "step": 30422
    },
    {
      "epoch": 0.8875371958690705,
      "grad_norm": 1.093949927830266,
      "learning_rate": 3.2803813166009004e-07,
      "loss": 0.137,
      "step": 30423
    },
    {
      "epoch": 0.8875663690997141,
      "grad_norm": 0.9164141893291394,
      "learning_rate": 3.2786984994942596e-07,
      "loss": 0.1227,
      "step": 30424
    },
    {
      "epoch": 0.8875955423303576,
      "grad_norm": 0.760827304849373,
      "learning_rate": 3.2770160995042323e-07,
      "loss": 0.0866,
      "step": 30425
    },
    {
      "epoch": 0.8876247155610012,
      "grad_norm": 1.0346784719106643,
      "learning_rate": 3.275334116645867e-07,
      "loss": 0.117,
      "step": 30426
    },
    {
      "epoch": 0.8876538887916448,
      "grad_norm": 0.7707234390139983,
      "learning_rate": 3.2736525509341476e-07,
      "loss": 0.1127,
      "step": 30427
    },
    {
      "epoch": 0.8876830620222883,
      "grad_norm": 0.8335679426506698,
      "learning_rate": 3.2719714023841163e-07,
      "loss": 0.1262,
      "step": 30428
    },
    {
      "epoch": 0.8877122352529319,
      "grad_norm": 0.7506067864274482,
      "learning_rate": 3.270290671010773e-07,
      "loss": 0.0833,
      "step": 30429
    },
    {
      "epoch": 0.8877414084835755,
      "grad_norm": 0.8514175594375909,
      "learning_rate": 3.268610356829105e-07,
      "loss": 0.1138,
      "step": 30430
    },
    {
      "epoch": 0.8877705817142191,
      "grad_norm": 0.8487384069260846,
      "learning_rate": 3.266930459854134e-07,
      "loss": 0.1412,
      "step": 30431
    },
    {
      "epoch": 0.8877997549448626,
      "grad_norm": 0.7939705584414907,
      "learning_rate": 3.2652509801008536e-07,
      "loss": 0.1074,
      "step": 30432
    },
    {
      "epoch": 0.8878289281755062,
      "grad_norm": 0.8212437316467182,
      "learning_rate": 3.263571917584257e-07,
      "loss": 0.1118,
      "step": 30433
    },
    {
      "epoch": 0.8878581014061497,
      "grad_norm": 0.844297758483977,
      "learning_rate": 3.2618932723193274e-07,
      "loss": 0.103,
      "step": 30434
    },
    {
      "epoch": 0.8878872746367933,
      "grad_norm": 0.8544561466150221,
      "learning_rate": 3.260215044321069e-07,
      "loss": 0.1095,
      "step": 30435
    },
    {
      "epoch": 0.8879164478674368,
      "grad_norm": 0.7951195004484574,
      "learning_rate": 3.2585372336044473e-07,
      "loss": 0.1035,
      "step": 30436
    },
    {
      "epoch": 0.8879456210980804,
      "grad_norm": 0.865101965359929,
      "learning_rate": 3.2568598401844344e-07,
      "loss": 0.1073,
      "step": 30437
    },
    {
      "epoch": 0.887974794328724,
      "grad_norm": 0.9363742201354716,
      "learning_rate": 3.255182864076034e-07,
      "loss": 0.119,
      "step": 30438
    },
    {
      "epoch": 0.8880039675593675,
      "grad_norm": 0.8501357082705552,
      "learning_rate": 3.2535063052942015e-07,
      "loss": 0.1263,
      "step": 30439
    },
    {
      "epoch": 0.888033140790011,
      "grad_norm": 0.8730648155104872,
      "learning_rate": 3.2518301638538976e-07,
      "loss": 0.1014,
      "step": 30440
    },
    {
      "epoch": 0.8880623140206546,
      "grad_norm": 0.908414300042689,
      "learning_rate": 3.250154439770098e-07,
      "loss": 0.1016,
      "step": 30441
    },
    {
      "epoch": 0.8880914872512982,
      "grad_norm": 0.8683485720759831,
      "learning_rate": 3.2484791330577635e-07,
      "loss": 0.1504,
      "step": 30442
    },
    {
      "epoch": 0.8881206604819417,
      "grad_norm": 1.0706309103476457,
      "learning_rate": 3.246804243731838e-07,
      "loss": 0.1075,
      "step": 30443
    },
    {
      "epoch": 0.8881498337125854,
      "grad_norm": 0.6936667445533639,
      "learning_rate": 3.245129771807287e-07,
      "loss": 0.106,
      "step": 30444
    },
    {
      "epoch": 0.8881790069432289,
      "grad_norm": 0.8525681655626866,
      "learning_rate": 3.24345571729906e-07,
      "loss": 0.1237,
      "step": 30445
    },
    {
      "epoch": 0.8882081801738725,
      "grad_norm": 0.9698863640439978,
      "learning_rate": 3.2417820802221e-07,
      "loss": 0.1253,
      "step": 30446
    },
    {
      "epoch": 0.888237353404516,
      "grad_norm": 1.1217498817935327,
      "learning_rate": 3.24010886059134e-07,
      "loss": 0.0998,
      "step": 30447
    },
    {
      "epoch": 0.8882665266351596,
      "grad_norm": 0.8988503477875328,
      "learning_rate": 3.238436058421729e-07,
      "loss": 0.1288,
      "step": 30448
    },
    {
      "epoch": 0.8882956998658031,
      "grad_norm": 0.7698741762173608,
      "learning_rate": 3.236763673728194e-07,
      "loss": 0.093,
      "step": 30449
    },
    {
      "epoch": 0.8883248730964467,
      "grad_norm": 1.034610237159529,
      "learning_rate": 3.235091706525673e-07,
      "loss": 0.0992,
      "step": 30450
    },
    {
      "epoch": 0.8883540463270903,
      "grad_norm": 0.7852459626938117,
      "learning_rate": 3.2334201568290924e-07,
      "loss": 0.1391,
      "step": 30451
    },
    {
      "epoch": 0.8883832195577338,
      "grad_norm": 0.8902457560368345,
      "learning_rate": 3.2317490246533745e-07,
      "loss": 0.1137,
      "step": 30452
    },
    {
      "epoch": 0.8884123927883774,
      "grad_norm": 1.07622350155105,
      "learning_rate": 3.230078310013429e-07,
      "loss": 0.1048,
      "step": 30453
    },
    {
      "epoch": 0.8884415660190209,
      "grad_norm": 0.8138661621072937,
      "learning_rate": 3.2284080129241837e-07,
      "loss": 0.1096,
      "step": 30454
    },
    {
      "epoch": 0.8884707392496645,
      "grad_norm": 0.7863486309532878,
      "learning_rate": 3.226738133400542e-07,
      "loss": 0.1056,
      "step": 30455
    },
    {
      "epoch": 0.888499912480308,
      "grad_norm": 0.8666539740372097,
      "learning_rate": 3.225068671457426e-07,
      "loss": 0.1237,
      "step": 30456
    },
    {
      "epoch": 0.8885290857109517,
      "grad_norm": 0.8918594009942206,
      "learning_rate": 3.223399627109719e-07,
      "loss": 0.1275,
      "step": 30457
    },
    {
      "epoch": 0.8885582589415952,
      "grad_norm": 0.6043041679174953,
      "learning_rate": 3.2217310003723467e-07,
      "loss": 0.1047,
      "step": 30458
    },
    {
      "epoch": 0.8885874321722388,
      "grad_norm": 0.8028948186370006,
      "learning_rate": 3.2200627912601866e-07,
      "loss": 0.1278,
      "step": 30459
    },
    {
      "epoch": 0.8886166054028823,
      "grad_norm": 1.0191922252066992,
      "learning_rate": 3.218394999788138e-07,
      "loss": 0.124,
      "step": 30460
    },
    {
      "epoch": 0.8886457786335259,
      "grad_norm": 1.0013908355360317,
      "learning_rate": 3.216727625971083e-07,
      "loss": 0.1314,
      "step": 30461
    },
    {
      "epoch": 0.8886749518641694,
      "grad_norm": 0.6720942638188737,
      "learning_rate": 3.215060669823933e-07,
      "loss": 0.1286,
      "step": 30462
    },
    {
      "epoch": 0.888704125094813,
      "grad_norm": 0.785874262867982,
      "learning_rate": 3.213394131361547e-07,
      "loss": 0.1251,
      "step": 30463
    },
    {
      "epoch": 0.8887332983254566,
      "grad_norm": 0.7468232802908306,
      "learning_rate": 3.2117280105988026e-07,
      "loss": 0.0906,
      "step": 30464
    },
    {
      "epoch": 0.8887624715561001,
      "grad_norm": 0.7520984517669977,
      "learning_rate": 3.2100623075505874e-07,
      "loss": 0.0913,
      "step": 30465
    },
    {
      "epoch": 0.8887916447867437,
      "grad_norm": 1.0473963135644402,
      "learning_rate": 3.2083970222317686e-07,
      "loss": 0.1219,
      "step": 30466
    },
    {
      "epoch": 0.8888208180173872,
      "grad_norm": 0.8427324687372726,
      "learning_rate": 3.206732154657194e-07,
      "loss": 0.0994,
      "step": 30467
    },
    {
      "epoch": 0.8888499912480308,
      "grad_norm": 0.8416467101814705,
      "learning_rate": 3.2050677048417577e-07,
      "loss": 0.112,
      "step": 30468
    },
    {
      "epoch": 0.8888791644786743,
      "grad_norm": 0.7900828921500531,
      "learning_rate": 3.203403672800309e-07,
      "loss": 0.1002,
      "step": 30469
    },
    {
      "epoch": 0.8889083377093179,
      "grad_norm": 0.8763892121160695,
      "learning_rate": 3.2017400585476923e-07,
      "loss": 0.1045,
      "step": 30470
    },
    {
      "epoch": 0.8889375109399615,
      "grad_norm": 0.8571175985732294,
      "learning_rate": 3.2000768620987776e-07,
      "loss": 0.0971,
      "step": 30471
    },
    {
      "epoch": 0.8889666841706051,
      "grad_norm": 0.8307611178054767,
      "learning_rate": 3.198414083468404e-07,
      "loss": 0.114,
      "step": 30472
    },
    {
      "epoch": 0.8889958574012486,
      "grad_norm": 1.315998495462903,
      "learning_rate": 3.19675172267141e-07,
      "loss": 0.1126,
      "step": 30473
    },
    {
      "epoch": 0.8890250306318922,
      "grad_norm": 1.105312974477422,
      "learning_rate": 3.195089779722643e-07,
      "loss": 0.0932,
      "step": 30474
    },
    {
      "epoch": 0.8890542038625358,
      "grad_norm": 0.9735997191167188,
      "learning_rate": 3.193428254636949e-07,
      "loss": 0.1035,
      "step": 30475
    },
    {
      "epoch": 0.8890833770931793,
      "grad_norm": 0.7936149384272677,
      "learning_rate": 3.191767147429159e-07,
      "loss": 0.1118,
      "step": 30476
    },
    {
      "epoch": 0.8891125503238229,
      "grad_norm": 1.138799021036221,
      "learning_rate": 3.190106458114084e-07,
      "loss": 0.1215,
      "step": 30477
    },
    {
      "epoch": 0.8891417235544664,
      "grad_norm": 1.2291060948656234,
      "learning_rate": 3.188446186706573e-07,
      "loss": 0.1187,
      "step": 30478
    },
    {
      "epoch": 0.88917089678511,
      "grad_norm": 1.1005788815254303,
      "learning_rate": 3.186786333221431e-07,
      "loss": 0.1232,
      "step": 30479
    },
    {
      "epoch": 0.8892000700157535,
      "grad_norm": 0.6889063645022628,
      "learning_rate": 3.185126897673485e-07,
      "loss": 0.114,
      "step": 30480
    },
    {
      "epoch": 0.8892292432463971,
      "grad_norm": 0.983870192076746,
      "learning_rate": 3.183467880077562e-07,
      "loss": 0.1312,
      "step": 30481
    },
    {
      "epoch": 0.8892584164770406,
      "grad_norm": 0.6741017738651957,
      "learning_rate": 3.1818092804484556e-07,
      "loss": 0.0919,
      "step": 30482
    },
    {
      "epoch": 0.8892875897076842,
      "grad_norm": 0.7882326232819421,
      "learning_rate": 3.1801510988009765e-07,
      "loss": 0.1177,
      "step": 30483
    },
    {
      "epoch": 0.8893167629383278,
      "grad_norm": 0.8376701441824327,
      "learning_rate": 3.1784933351499404e-07,
      "loss": 0.1158,
      "step": 30484
    },
    {
      "epoch": 0.8893459361689714,
      "grad_norm": 1.136097784752444,
      "learning_rate": 3.1768359895101296e-07,
      "loss": 0.1146,
      "step": 30485
    },
    {
      "epoch": 0.889375109399615,
      "grad_norm": 0.932393242138555,
      "learning_rate": 3.175179061896355e-07,
      "loss": 0.1224,
      "step": 30486
    },
    {
      "epoch": 0.8894042826302585,
      "grad_norm": 0.748704135637435,
      "learning_rate": 3.173522552323399e-07,
      "loss": 0.1164,
      "step": 30487
    },
    {
      "epoch": 0.8894334558609021,
      "grad_norm": 0.8891019213388898,
      "learning_rate": 3.171866460806061e-07,
      "loss": 0.0986,
      "step": 30488
    },
    {
      "epoch": 0.8894626290915456,
      "grad_norm": 0.7344867986012511,
      "learning_rate": 3.170210787359118e-07,
      "loss": 0.1016,
      "step": 30489
    },
    {
      "epoch": 0.8894918023221892,
      "grad_norm": 1.1247175630906892,
      "learning_rate": 3.1685555319973525e-07,
      "loss": 0.1264,
      "step": 30490
    },
    {
      "epoch": 0.8895209755528327,
      "grad_norm": 0.94693035646125,
      "learning_rate": 3.166900694735542e-07,
      "loss": 0.1247,
      "step": 30491
    },
    {
      "epoch": 0.8895501487834763,
      "grad_norm": 0.882752799471745,
      "learning_rate": 3.1652462755884686e-07,
      "loss": 0.118,
      "step": 30492
    },
    {
      "epoch": 0.8895793220141198,
      "grad_norm": 0.8356541380491931,
      "learning_rate": 3.1635922745708927e-07,
      "loss": 0.1211,
      "step": 30493
    },
    {
      "epoch": 0.8896084952447634,
      "grad_norm": 0.9078331283278978,
      "learning_rate": 3.1619386916975804e-07,
      "loss": 0.0996,
      "step": 30494
    },
    {
      "epoch": 0.8896376684754069,
      "grad_norm": 0.988811410334127,
      "learning_rate": 3.160285526983303e-07,
      "loss": 0.1261,
      "step": 30495
    },
    {
      "epoch": 0.8896668417060505,
      "grad_norm": 1.0003746410143617,
      "learning_rate": 3.158632780442816e-07,
      "loss": 0.1246,
      "step": 30496
    },
    {
      "epoch": 0.889696014936694,
      "grad_norm": 0.8954152287289935,
      "learning_rate": 3.1569804520908633e-07,
      "loss": 0.0992,
      "step": 30497
    },
    {
      "epoch": 0.8897251881673377,
      "grad_norm": 0.8224363469838618,
      "learning_rate": 3.1553285419422153e-07,
      "loss": 0.1197,
      "step": 30498
    },
    {
      "epoch": 0.8897543613979813,
      "grad_norm": 1.0507848059549514,
      "learning_rate": 3.1536770500116164e-07,
      "loss": 0.1346,
      "step": 30499
    },
    {
      "epoch": 0.8897835346286248,
      "grad_norm": 0.8836399621266284,
      "learning_rate": 3.152025976313794e-07,
      "loss": 0.1221,
      "step": 30500
    },
    {
      "epoch": 0.8898127078592684,
      "grad_norm": 0.6925661010777691,
      "learning_rate": 3.150375320863508e-07,
      "loss": 0.0937,
      "step": 30501
    },
    {
      "epoch": 0.8898418810899119,
      "grad_norm": 0.8388241395615271,
      "learning_rate": 3.1487250836754915e-07,
      "loss": 0.1088,
      "step": 30502
    },
    {
      "epoch": 0.8898710543205555,
      "grad_norm": 0.9641978352144771,
      "learning_rate": 3.147075264764465e-07,
      "loss": 0.1265,
      "step": 30503
    },
    {
      "epoch": 0.889900227551199,
      "grad_norm": 0.8059912009626359,
      "learning_rate": 3.145425864145163e-07,
      "loss": 0.1036,
      "step": 30504
    },
    {
      "epoch": 0.8899294007818426,
      "grad_norm": 0.8709323443371206,
      "learning_rate": 3.143776881832322e-07,
      "loss": 0.1259,
      "step": 30505
    },
    {
      "epoch": 0.8899585740124861,
      "grad_norm": 0.7596751859457278,
      "learning_rate": 3.1421283178406537e-07,
      "loss": 0.1075,
      "step": 30506
    },
    {
      "epoch": 0.8899877472431297,
      "grad_norm": 0.8746650117572208,
      "learning_rate": 3.140480172184873e-07,
      "loss": 0.1099,
      "step": 30507
    },
    {
      "epoch": 0.8900169204737732,
      "grad_norm": 0.8399294210591024,
      "learning_rate": 3.1388324448797083e-07,
      "loss": 0.1325,
      "step": 30508
    },
    {
      "epoch": 0.8900460937044168,
      "grad_norm": 1.0775169203826622,
      "learning_rate": 3.1371851359398465e-07,
      "loss": 0.1154,
      "step": 30509
    },
    {
      "epoch": 0.8900752669350603,
      "grad_norm": 1.135219210523027,
      "learning_rate": 3.1355382453800155e-07,
      "loss": 0.1014,
      "step": 30510
    },
    {
      "epoch": 0.890104440165704,
      "grad_norm": 0.9120801805597152,
      "learning_rate": 3.133891773214914e-07,
      "loss": 0.1263,
      "step": 30511
    },
    {
      "epoch": 0.8901336133963476,
      "grad_norm": 0.8937106915785827,
      "learning_rate": 3.1322457194592426e-07,
      "loss": 0.1296,
      "step": 30512
    },
    {
      "epoch": 0.8901627866269911,
      "grad_norm": 0.8927612096903995,
      "learning_rate": 3.130600084127683e-07,
      "loss": 0.1092,
      "step": 30513
    },
    {
      "epoch": 0.8901919598576347,
      "grad_norm": 0.9258014059962174,
      "learning_rate": 3.1289548672349514e-07,
      "loss": 0.0989,
      "step": 30514
    },
    {
      "epoch": 0.8902211330882782,
      "grad_norm": 0.8992620417502106,
      "learning_rate": 3.127310068795708e-07,
      "loss": 0.1403,
      "step": 30515
    },
    {
      "epoch": 0.8902503063189218,
      "grad_norm": 0.8881986317519851,
      "learning_rate": 3.1256656888246586e-07,
      "loss": 0.1094,
      "step": 30516
    },
    {
      "epoch": 0.8902794795495653,
      "grad_norm": 0.8222217825382221,
      "learning_rate": 3.124021727336468e-07,
      "loss": 0.1184,
      "step": 30517
    },
    {
      "epoch": 0.8903086527802089,
      "grad_norm": 0.7635861878730664,
      "learning_rate": 3.1223781843458314e-07,
      "loss": 0.0984,
      "step": 30518
    },
    {
      "epoch": 0.8903378260108524,
      "grad_norm": 0.9682910058529923,
      "learning_rate": 3.1207350598674137e-07,
      "loss": 0.13,
      "step": 30519
    },
    {
      "epoch": 0.890366999241496,
      "grad_norm": 0.8115903491035434,
      "learning_rate": 3.11909235391587e-07,
      "loss": 0.1362,
      "step": 30520
    },
    {
      "epoch": 0.8903961724721395,
      "grad_norm": 0.7768085823349589,
      "learning_rate": 3.117450066505878e-07,
      "loss": 0.099,
      "step": 30521
    },
    {
      "epoch": 0.8904253457027831,
      "grad_norm": 0.8773722598141154,
      "learning_rate": 3.1158081976521094e-07,
      "loss": 0.1294,
      "step": 30522
    },
    {
      "epoch": 0.8904545189334266,
      "grad_norm": 0.8846059299708734,
      "learning_rate": 3.114166747369218e-07,
      "loss": 0.1125,
      "step": 30523
    },
    {
      "epoch": 0.8904836921640702,
      "grad_norm": 0.8924039041252876,
      "learning_rate": 3.112525715671838e-07,
      "loss": 0.1339,
      "step": 30524
    },
    {
      "epoch": 0.8905128653947139,
      "grad_norm": 0.9959765749310289,
      "learning_rate": 3.1108851025746457e-07,
      "loss": 0.1294,
      "step": 30525
    },
    {
      "epoch": 0.8905420386253574,
      "grad_norm": 0.8864124216035949,
      "learning_rate": 3.109244908092279e-07,
      "loss": 0.1142,
      "step": 30526
    },
    {
      "epoch": 0.890571211856001,
      "grad_norm": 1.0028369673101463,
      "learning_rate": 3.1076051322393663e-07,
      "loss": 0.1179,
      "step": 30527
    },
    {
      "epoch": 0.8906003850866445,
      "grad_norm": 0.9746235679010936,
      "learning_rate": 3.105965775030573e-07,
      "loss": 0.1197,
      "step": 30528
    },
    {
      "epoch": 0.8906295583172881,
      "grad_norm": 0.7617722022236654,
      "learning_rate": 3.1043268364805257e-07,
      "loss": 0.1113,
      "step": 30529
    },
    {
      "epoch": 0.8906587315479316,
      "grad_norm": 0.649378711927433,
      "learning_rate": 3.1026883166038413e-07,
      "loss": 0.1209,
      "step": 30530
    },
    {
      "epoch": 0.8906879047785752,
      "grad_norm": 0.8936101132561894,
      "learning_rate": 3.1010502154151743e-07,
      "loss": 0.1055,
      "step": 30531
    },
    {
      "epoch": 0.8907170780092187,
      "grad_norm": 0.997456688702685,
      "learning_rate": 3.09941253292913e-07,
      "loss": 0.112,
      "step": 30532
    },
    {
      "epoch": 0.8907462512398623,
      "grad_norm": 0.8087439596778104,
      "learning_rate": 3.0977752691603303e-07,
      "loss": 0.1236,
      "step": 30533
    },
    {
      "epoch": 0.8907754244705058,
      "grad_norm": 0.7940552257410859,
      "learning_rate": 3.0961384241233907e-07,
      "loss": 0.1104,
      "step": 30534
    },
    {
      "epoch": 0.8908045977011494,
      "grad_norm": 1.3486098578327579,
      "learning_rate": 3.094501997832944e-07,
      "loss": 0.1031,
      "step": 30535
    },
    {
      "epoch": 0.890833770931793,
      "grad_norm": 1.070137905667782,
      "learning_rate": 3.092865990303584e-07,
      "loss": 0.1328,
      "step": 30536
    },
    {
      "epoch": 0.8908629441624365,
      "grad_norm": 0.7014482598401466,
      "learning_rate": 3.0912304015499106e-07,
      "loss": 0.1088,
      "step": 30537
    },
    {
      "epoch": 0.8908921173930802,
      "grad_norm": 0.8931822380794611,
      "learning_rate": 3.089595231586545e-07,
      "loss": 0.1118,
      "step": 30538
    },
    {
      "epoch": 0.8909212906237237,
      "grad_norm": 0.7487271169363175,
      "learning_rate": 3.087960480428065e-07,
      "loss": 0.0883,
      "step": 30539
    },
    {
      "epoch": 0.8909504638543673,
      "grad_norm": 0.7756367771297727,
      "learning_rate": 3.086326148089075e-07,
      "loss": 0.1131,
      "step": 30540
    },
    {
      "epoch": 0.8909796370850108,
      "grad_norm": 0.8286140357712191,
      "learning_rate": 3.0846922345841746e-07,
      "loss": 0.1071,
      "step": 30541
    },
    {
      "epoch": 0.8910088103156544,
      "grad_norm": 0.8281252707009609,
      "learning_rate": 3.083058739927941e-07,
      "loss": 0.1344,
      "step": 30542
    },
    {
      "epoch": 0.8910379835462979,
      "grad_norm": 0.8690984771551428,
      "learning_rate": 3.0814256641349517e-07,
      "loss": 0.0929,
      "step": 30543
    },
    {
      "epoch": 0.8910671567769415,
      "grad_norm": 1.0453484297338385,
      "learning_rate": 3.0797930072198e-07,
      "loss": 0.1216,
      "step": 30544
    },
    {
      "epoch": 0.891096330007585,
      "grad_norm": 1.211958748447629,
      "learning_rate": 3.0781607691970474e-07,
      "loss": 0.1067,
      "step": 30545
    },
    {
      "epoch": 0.8911255032382286,
      "grad_norm": 0.960417382087224,
      "learning_rate": 3.0765289500812866e-07,
      "loss": 0.1116,
      "step": 30546
    },
    {
      "epoch": 0.8911546764688721,
      "grad_norm": 0.8513883921626092,
      "learning_rate": 3.0748975498870627e-07,
      "loss": 0.1019,
      "step": 30547
    },
    {
      "epoch": 0.8911838496995157,
      "grad_norm": 0.8598663139293854,
      "learning_rate": 3.0732665686289574e-07,
      "loss": 0.1111,
      "step": 30548
    },
    {
      "epoch": 0.8912130229301592,
      "grad_norm": 1.0060394657794325,
      "learning_rate": 3.071636006321527e-07,
      "loss": 0.0974,
      "step": 30549
    },
    {
      "epoch": 0.8912421961608028,
      "grad_norm": 0.9333911478303079,
      "learning_rate": 3.070005862979325e-07,
      "loss": 0.1098,
      "step": 30550
    },
    {
      "epoch": 0.8912713693914464,
      "grad_norm": 0.8556123932859099,
      "learning_rate": 3.068376138616902e-07,
      "loss": 0.1133,
      "step": 30551
    },
    {
      "epoch": 0.89130054262209,
      "grad_norm": 0.8275808135699773,
      "learning_rate": 3.0667468332488237e-07,
      "loss": 0.1081,
      "step": 30552
    },
    {
      "epoch": 0.8913297158527336,
      "grad_norm": 1.247262579244288,
      "learning_rate": 3.065117946889623e-07,
      "loss": 0.0892,
      "step": 30553
    },
    {
      "epoch": 0.8913588890833771,
      "grad_norm": 0.8238164952677675,
      "learning_rate": 3.0634894795538385e-07,
      "loss": 0.1198,
      "step": 30554
    },
    {
      "epoch": 0.8913880623140207,
      "grad_norm": 0.7707484983912157,
      "learning_rate": 3.0618614312560244e-07,
      "loss": 0.1196,
      "step": 30555
    },
    {
      "epoch": 0.8914172355446642,
      "grad_norm": 0.8191705907142699,
      "learning_rate": 3.060233802010709e-07,
      "loss": 0.1,
      "step": 30556
    },
    {
      "epoch": 0.8914464087753078,
      "grad_norm": 0.9204114270653742,
      "learning_rate": 3.0586065918324025e-07,
      "loss": 0.1119,
      "step": 30557
    },
    {
      "epoch": 0.8914755820059513,
      "grad_norm": 0.6168227730592508,
      "learning_rate": 3.0569798007356653e-07,
      "loss": 0.0896,
      "step": 30558
    },
    {
      "epoch": 0.8915047552365949,
      "grad_norm": 0.7855472813687441,
      "learning_rate": 3.055353428735003e-07,
      "loss": 0.1006,
      "step": 30559
    },
    {
      "epoch": 0.8915339284672384,
      "grad_norm": 0.7277658231052057,
      "learning_rate": 3.0537274758449366e-07,
      "loss": 0.0903,
      "step": 30560
    },
    {
      "epoch": 0.891563101697882,
      "grad_norm": 0.7969145948110496,
      "learning_rate": 3.052101942079988e-07,
      "loss": 0.1249,
      "step": 30561
    },
    {
      "epoch": 0.8915922749285256,
      "grad_norm": 0.877067568969228,
      "learning_rate": 3.050476827454668e-07,
      "loss": 0.1064,
      "step": 30562
    },
    {
      "epoch": 0.8916214481591691,
      "grad_norm": 0.8154884683646071,
      "learning_rate": 3.048852131983476e-07,
      "loss": 0.1162,
      "step": 30563
    },
    {
      "epoch": 0.8916506213898127,
      "grad_norm": 0.7883368860871747,
      "learning_rate": 3.0472278556809233e-07,
      "loss": 0.1112,
      "step": 30564
    },
    {
      "epoch": 0.8916797946204563,
      "grad_norm": 0.6825588540517084,
      "learning_rate": 3.0456039985615193e-07,
      "loss": 0.1182,
      "step": 30565
    },
    {
      "epoch": 0.8917089678510999,
      "grad_norm": 0.9066670376722872,
      "learning_rate": 3.0439805606397533e-07,
      "loss": 0.1112,
      "step": 30566
    },
    {
      "epoch": 0.8917381410817434,
      "grad_norm": 0.9240938686339571,
      "learning_rate": 3.042357541930113e-07,
      "loss": 0.0867,
      "step": 30567
    },
    {
      "epoch": 0.891767314312387,
      "grad_norm": 0.9674166330384046,
      "learning_rate": 3.0407349424471043e-07,
      "loss": 0.0945,
      "step": 30568
    },
    {
      "epoch": 0.8917964875430305,
      "grad_norm": 0.8731843201559136,
      "learning_rate": 3.039112762205193e-07,
      "loss": 0.0938,
      "step": 30569
    },
    {
      "epoch": 0.8918256607736741,
      "grad_norm": 0.7800262294296054,
      "learning_rate": 3.037491001218873e-07,
      "loss": 0.119,
      "step": 30570
    },
    {
      "epoch": 0.8918548340043176,
      "grad_norm": 0.9072854256014472,
      "learning_rate": 3.0358696595026327e-07,
      "loss": 0.1109,
      "step": 30571
    },
    {
      "epoch": 0.8918840072349612,
      "grad_norm": 0.7739425465977036,
      "learning_rate": 3.034248737070933e-07,
      "loss": 0.1044,
      "step": 30572
    },
    {
      "epoch": 0.8919131804656047,
      "grad_norm": 0.8250757646433379,
      "learning_rate": 3.0326282339382453e-07,
      "loss": 0.1087,
      "step": 30573
    },
    {
      "epoch": 0.8919423536962483,
      "grad_norm": 0.7924070911825599,
      "learning_rate": 3.0310081501190415e-07,
      "loss": 0.0985,
      "step": 30574
    },
    {
      "epoch": 0.8919715269268919,
      "grad_norm": 1.0470126728379414,
      "learning_rate": 3.029388485627782e-07,
      "loss": 0.1127,
      "step": 30575
    },
    {
      "epoch": 0.8920007001575354,
      "grad_norm": 0.7333108222090367,
      "learning_rate": 3.027769240478939e-07,
      "loss": 0.1336,
      "step": 30576
    },
    {
      "epoch": 0.892029873388179,
      "grad_norm": 0.8361327535722722,
      "learning_rate": 3.0261504146869457e-07,
      "loss": 0.134,
      "step": 30577
    },
    {
      "epoch": 0.8920590466188225,
      "grad_norm": 0.811324091203435,
      "learning_rate": 3.024532008266279e-07,
      "loss": 0.1014,
      "step": 30578
    },
    {
      "epoch": 0.8920882198494662,
      "grad_norm": 0.8761814206887861,
      "learning_rate": 3.0229140212313767e-07,
      "loss": 0.1054,
      "step": 30579
    },
    {
      "epoch": 0.8921173930801097,
      "grad_norm": 0.8650337702531615,
      "learning_rate": 3.021296453596678e-07,
      "loss": 0.1161,
      "step": 30580
    },
    {
      "epoch": 0.8921465663107533,
      "grad_norm": 0.8423918525707391,
      "learning_rate": 3.019679305376627e-07,
      "loss": 0.1031,
      "step": 30581
    },
    {
      "epoch": 0.8921757395413968,
      "grad_norm": 1.008493869777864,
      "learning_rate": 3.018062576585673e-07,
      "loss": 0.114,
      "step": 30582
    },
    {
      "epoch": 0.8922049127720404,
      "grad_norm": 0.8460407679440864,
      "learning_rate": 3.016446267238238e-07,
      "loss": 0.1039,
      "step": 30583
    },
    {
      "epoch": 0.892234086002684,
      "grad_norm": 0.8650983055723295,
      "learning_rate": 3.0148303773487486e-07,
      "loss": 0.0898,
      "step": 30584
    },
    {
      "epoch": 0.8922632592333275,
      "grad_norm": 0.7098856616095556,
      "learning_rate": 3.0132149069316497e-07,
      "loss": 0.1034,
      "step": 30585
    },
    {
      "epoch": 0.892292432463971,
      "grad_norm": 0.7638130427984393,
      "learning_rate": 3.0115998560013404e-07,
      "loss": 0.1122,
      "step": 30586
    },
    {
      "epoch": 0.8923216056946146,
      "grad_norm": 0.9767240295345735,
      "learning_rate": 3.0099852245722483e-07,
      "loss": 0.1272,
      "step": 30587
    },
    {
      "epoch": 0.8923507789252582,
      "grad_norm": 1.1747269220045504,
      "learning_rate": 3.0083710126588005e-07,
      "loss": 0.1167,
      "step": 30588
    },
    {
      "epoch": 0.8923799521559017,
      "grad_norm": 0.9038430151497058,
      "learning_rate": 3.006757220275397e-07,
      "loss": 0.0973,
      "step": 30589
    },
    {
      "epoch": 0.8924091253865453,
      "grad_norm": 0.7636481866103193,
      "learning_rate": 3.005143847436437e-07,
      "loss": 0.1106,
      "step": 30590
    },
    {
      "epoch": 0.8924382986171888,
      "grad_norm": 1.0234036792038594,
      "learning_rate": 3.003530894156348e-07,
      "loss": 0.1155,
      "step": 30591
    },
    {
      "epoch": 0.8924674718478325,
      "grad_norm": 0.8167228866756323,
      "learning_rate": 3.0019183604495075e-07,
      "loss": 0.1213,
      "step": 30592
    },
    {
      "epoch": 0.892496645078476,
      "grad_norm": 0.6343908880006888,
      "learning_rate": 3.0003062463303257e-07,
      "loss": 0.0835,
      "step": 30593
    },
    {
      "epoch": 0.8925258183091196,
      "grad_norm": 0.67414439241472,
      "learning_rate": 2.99869455181318e-07,
      "loss": 0.0921,
      "step": 30594
    },
    {
      "epoch": 0.8925549915397631,
      "grad_norm": 0.7684257627189851,
      "learning_rate": 2.9970832769124823e-07,
      "loss": 0.1224,
      "step": 30595
    },
    {
      "epoch": 0.8925841647704067,
      "grad_norm": 0.7164831969761788,
      "learning_rate": 2.995472421642598e-07,
      "loss": 0.0952,
      "step": 30596
    },
    {
      "epoch": 0.8926133380010502,
      "grad_norm": 0.774801751984761,
      "learning_rate": 2.993861986017915e-07,
      "loss": 0.0964,
      "step": 30597
    },
    {
      "epoch": 0.8926425112316938,
      "grad_norm": 0.8492196878393536,
      "learning_rate": 2.992251970052806e-07,
      "loss": 0.1069,
      "step": 30598
    },
    {
      "epoch": 0.8926716844623374,
      "grad_norm": 0.816706175559454,
      "learning_rate": 2.9906423737616595e-07,
      "loss": 0.1139,
      "step": 30599
    },
    {
      "epoch": 0.8927008576929809,
      "grad_norm": 1.1452740993489874,
      "learning_rate": 2.989033197158825e-07,
      "loss": 0.11,
      "step": 30600
    },
    {
      "epoch": 0.8927300309236245,
      "grad_norm": 0.7472179425536273,
      "learning_rate": 2.9874244402586903e-07,
      "loss": 0.1386,
      "step": 30601
    },
    {
      "epoch": 0.892759204154268,
      "grad_norm": 0.8886819051906715,
      "learning_rate": 2.985816103075606e-07,
      "loss": 0.1148,
      "step": 30602
    },
    {
      "epoch": 0.8927883773849116,
      "grad_norm": 1.1445926068268717,
      "learning_rate": 2.984208185623927e-07,
      "loss": 0.1115,
      "step": 30603
    },
    {
      "epoch": 0.8928175506155551,
      "grad_norm": 0.7815328900810175,
      "learning_rate": 2.982600687918014e-07,
      "loss": 0.1243,
      "step": 30604
    },
    {
      "epoch": 0.8928467238461987,
      "grad_norm": 0.7975707815032408,
      "learning_rate": 2.980993609972221e-07,
      "loss": 0.0996,
      "step": 30605
    },
    {
      "epoch": 0.8928758970768423,
      "grad_norm": 0.8032606289605577,
      "learning_rate": 2.9793869518009e-07,
      "loss": 0.0782,
      "step": 30606
    },
    {
      "epoch": 0.8929050703074859,
      "grad_norm": 0.8663623225654811,
      "learning_rate": 2.9777807134183714e-07,
      "loss": 0.1096,
      "step": 30607
    },
    {
      "epoch": 0.8929342435381294,
      "grad_norm": 0.7444972697751595,
      "learning_rate": 2.976174894839007e-07,
      "loss": 0.1195,
      "step": 30608
    },
    {
      "epoch": 0.892963416768773,
      "grad_norm": 0.8804128856649871,
      "learning_rate": 2.974569496077123e-07,
      "loss": 0.1303,
      "step": 30609
    },
    {
      "epoch": 0.8929925899994166,
      "grad_norm": 0.8628877034987134,
      "learning_rate": 2.972964517147048e-07,
      "loss": 0.1054,
      "step": 30610
    },
    {
      "epoch": 0.8930217632300601,
      "grad_norm": 0.8268818707012316,
      "learning_rate": 2.971359958063125e-07,
      "loss": 0.131,
      "step": 30611
    },
    {
      "epoch": 0.8930509364607037,
      "grad_norm": 0.9250978819681452,
      "learning_rate": 2.9697558188396757e-07,
      "loss": 0.12,
      "step": 30612
    },
    {
      "epoch": 0.8930801096913472,
      "grad_norm": 0.8044794877353744,
      "learning_rate": 2.968152099491023e-07,
      "loss": 0.113,
      "step": 30613
    },
    {
      "epoch": 0.8931092829219908,
      "grad_norm": 0.8176311630704783,
      "learning_rate": 2.966548800031471e-07,
      "loss": 0.0922,
      "step": 30614
    },
    {
      "epoch": 0.8931384561526343,
      "grad_norm": 0.7626515244155622,
      "learning_rate": 2.964945920475354e-07,
      "loss": 0.116,
      "step": 30615
    },
    {
      "epoch": 0.8931676293832779,
      "grad_norm": 0.7250689703709754,
      "learning_rate": 2.9633434608369596e-07,
      "loss": 0.1064,
      "step": 30616
    },
    {
      "epoch": 0.8931968026139214,
      "grad_norm": 2.6020126383282434,
      "learning_rate": 2.9617414211306093e-07,
      "loss": 0.1268,
      "step": 30617
    },
    {
      "epoch": 0.893225975844565,
      "grad_norm": 0.9140808306146535,
      "learning_rate": 2.9601398013706094e-07,
      "loss": 0.1186,
      "step": 30618
    },
    {
      "epoch": 0.8932551490752086,
      "grad_norm": 0.7610128111653633,
      "learning_rate": 2.9585386015712537e-07,
      "loss": 0.0968,
      "step": 30619
    },
    {
      "epoch": 0.8932843223058522,
      "grad_norm": 0.9511054648297602,
      "learning_rate": 2.9569378217468247e-07,
      "loss": 0.1079,
      "step": 30620
    },
    {
      "epoch": 0.8933134955364957,
      "grad_norm": 0.908847078825639,
      "learning_rate": 2.9553374619116335e-07,
      "loss": 0.1329,
      "step": 30621
    },
    {
      "epoch": 0.8933426687671393,
      "grad_norm": 0.8692358887319792,
      "learning_rate": 2.953737522079952e-07,
      "loss": 0.1308,
      "step": 30622
    },
    {
      "epoch": 0.8933718419977829,
      "grad_norm": 0.7723239781444972,
      "learning_rate": 2.952138002266081e-07,
      "loss": 0.1421,
      "step": 30623
    },
    {
      "epoch": 0.8934010152284264,
      "grad_norm": 0.8969370724316609,
      "learning_rate": 2.950538902484279e-07,
      "loss": 0.1169,
      "step": 30624
    },
    {
      "epoch": 0.89343018845907,
      "grad_norm": 0.7501624440476841,
      "learning_rate": 2.9489402227488474e-07,
      "loss": 0.0857,
      "step": 30625
    },
    {
      "epoch": 0.8934593616897135,
      "grad_norm": 0.8291425232771156,
      "learning_rate": 2.9473419630740405e-07,
      "loss": 0.1151,
      "step": 30626
    },
    {
      "epoch": 0.8934885349203571,
      "grad_norm": 0.8029671988119853,
      "learning_rate": 2.9457441234741256e-07,
      "loss": 0.0864,
      "step": 30627
    },
    {
      "epoch": 0.8935177081510006,
      "grad_norm": 0.7704961735744835,
      "learning_rate": 2.944146703963374e-07,
      "loss": 0.0901,
      "step": 30628
    },
    {
      "epoch": 0.8935468813816442,
      "grad_norm": 0.8741711341205287,
      "learning_rate": 2.942549704556058e-07,
      "loss": 0.1128,
      "step": 30629
    },
    {
      "epoch": 0.8935760546122877,
      "grad_norm": 0.9295472757177593,
      "learning_rate": 2.9409531252664105e-07,
      "loss": 0.1079,
      "step": 30630
    },
    {
      "epoch": 0.8936052278429313,
      "grad_norm": 0.9614111491802272,
      "learning_rate": 2.9393569661087143e-07,
      "loss": 0.1105,
      "step": 30631
    },
    {
      "epoch": 0.8936344010735748,
      "grad_norm": 1.0608409579751146,
      "learning_rate": 2.937761227097202e-07,
      "loss": 0.0896,
      "step": 30632
    },
    {
      "epoch": 0.8936635743042185,
      "grad_norm": 0.9306061138796965,
      "learning_rate": 2.9361659082461137e-07,
      "loss": 0.1199,
      "step": 30633
    },
    {
      "epoch": 0.893692747534862,
      "grad_norm": 2.088853597013049,
      "learning_rate": 2.9345710095697036e-07,
      "loss": 0.1011,
      "step": 30634
    },
    {
      "epoch": 0.8937219207655056,
      "grad_norm": 0.9394738653616855,
      "learning_rate": 2.9329765310822156e-07,
      "loss": 0.1088,
      "step": 30635
    },
    {
      "epoch": 0.8937510939961492,
      "grad_norm": 0.7388414370064872,
      "learning_rate": 2.931382472797878e-07,
      "loss": 0.1107,
      "step": 30636
    },
    {
      "epoch": 0.8937802672267927,
      "grad_norm": 0.6401330911219283,
      "learning_rate": 2.9297888347309124e-07,
      "loss": 0.1186,
      "step": 30637
    },
    {
      "epoch": 0.8938094404574363,
      "grad_norm": 0.7369857127213442,
      "learning_rate": 2.928195616895563e-07,
      "loss": 0.103,
      "step": 30638
    },
    {
      "epoch": 0.8938386136880798,
      "grad_norm": 1.0939213670571821,
      "learning_rate": 2.926602819306046e-07,
      "loss": 0.1145,
      "step": 30639
    },
    {
      "epoch": 0.8938677869187234,
      "grad_norm": 0.7832643718469073,
      "learning_rate": 2.9250104419765724e-07,
      "loss": 0.1139,
      "step": 30640
    },
    {
      "epoch": 0.8938969601493669,
      "grad_norm": 0.8266742583807348,
      "learning_rate": 2.9234184849213696e-07,
      "loss": 0.1126,
      "step": 30641
    },
    {
      "epoch": 0.8939261333800105,
      "grad_norm": 0.9793590388440535,
      "learning_rate": 2.9218269481546545e-07,
      "loss": 0.0995,
      "step": 30642
    },
    {
      "epoch": 0.893955306610654,
      "grad_norm": 0.7229233293580005,
      "learning_rate": 2.920235831690632e-07,
      "loss": 0.1271,
      "step": 30643
    },
    {
      "epoch": 0.8939844798412976,
      "grad_norm": 0.9154397096787203,
      "learning_rate": 2.9186451355435017e-07,
      "loss": 0.1333,
      "step": 30644
    },
    {
      "epoch": 0.8940136530719411,
      "grad_norm": 0.9308696874881223,
      "learning_rate": 2.9170548597274697e-07,
      "loss": 0.1305,
      "step": 30645
    },
    {
      "epoch": 0.8940428263025848,
      "grad_norm": 0.8685920500446513,
      "learning_rate": 2.915465004256729e-07,
      "loss": 0.0946,
      "step": 30646
    },
    {
      "epoch": 0.8940719995332284,
      "grad_norm": 0.9372730465917704,
      "learning_rate": 2.9138755691454745e-07,
      "loss": 0.1112,
      "step": 30647
    },
    {
      "epoch": 0.8941011727638719,
      "grad_norm": 0.8874817740803603,
      "learning_rate": 2.912286554407906e-07,
      "loss": 0.1358,
      "step": 30648
    },
    {
      "epoch": 0.8941303459945155,
      "grad_norm": 0.9400008207788705,
      "learning_rate": 2.910697960058201e-07,
      "loss": 0.1025,
      "step": 30649
    },
    {
      "epoch": 0.894159519225159,
      "grad_norm": 0.9180184949259226,
      "learning_rate": 2.9091097861105365e-07,
      "loss": 0.1202,
      "step": 30650
    },
    {
      "epoch": 0.8941886924558026,
      "grad_norm": 0.7490221131587833,
      "learning_rate": 2.9075220325791076e-07,
      "loss": 0.0969,
      "step": 30651
    },
    {
      "epoch": 0.8942178656864461,
      "grad_norm": 0.869059582480006,
      "learning_rate": 2.905934699478069e-07,
      "loss": 0.0943,
      "step": 30652
    },
    {
      "epoch": 0.8942470389170897,
      "grad_norm": 0.9224576121602542,
      "learning_rate": 2.9043477868216154e-07,
      "loss": 0.0965,
      "step": 30653
    },
    {
      "epoch": 0.8942762121477332,
      "grad_norm": 3.962025518557501,
      "learning_rate": 2.9027612946238906e-07,
      "loss": 0.125,
      "step": 30654
    },
    {
      "epoch": 0.8943053853783768,
      "grad_norm": 0.8240901935222994,
      "learning_rate": 2.901175222899083e-07,
      "loss": 0.1219,
      "step": 30655
    },
    {
      "epoch": 0.8943345586090203,
      "grad_norm": 1.1371779334655232,
      "learning_rate": 2.899589571661332e-07,
      "loss": 0.0914,
      "step": 30656
    },
    {
      "epoch": 0.8943637318396639,
      "grad_norm": 0.8406002026070913,
      "learning_rate": 2.898004340924798e-07,
      "loss": 0.1017,
      "step": 30657
    },
    {
      "epoch": 0.8943929050703074,
      "grad_norm": 0.77795371350629,
      "learning_rate": 2.896419530703637e-07,
      "loss": 0.1167,
      "step": 30658
    },
    {
      "epoch": 0.894422078300951,
      "grad_norm": 1.038122229345276,
      "learning_rate": 2.894835141012009e-07,
      "loss": 0.1116,
      "step": 30659
    },
    {
      "epoch": 0.8944512515315947,
      "grad_norm": 0.7155179875183134,
      "learning_rate": 2.8932511718640366e-07,
      "loss": 0.0913,
      "step": 30660
    },
    {
      "epoch": 0.8944804247622382,
      "grad_norm": 0.9850546589207486,
      "learning_rate": 2.891667623273881e-07,
      "loss": 0.1044,
      "step": 30661
    },
    {
      "epoch": 0.8945095979928818,
      "grad_norm": 0.9246766592180333,
      "learning_rate": 2.8900844952556685e-07,
      "loss": 0.1404,
      "step": 30662
    },
    {
      "epoch": 0.8945387712235253,
      "grad_norm": 0.8832666813312848,
      "learning_rate": 2.888501787823533e-07,
      "loss": 0.1197,
      "step": 30663
    },
    {
      "epoch": 0.8945679444541689,
      "grad_norm": 0.7829364176625517,
      "learning_rate": 2.886919500991603e-07,
      "loss": 0.1011,
      "step": 30664
    },
    {
      "epoch": 0.8945971176848124,
      "grad_norm": 0.8653227660922241,
      "learning_rate": 2.885337634774016e-07,
      "loss": 0.1069,
      "step": 30665
    },
    {
      "epoch": 0.894626290915456,
      "grad_norm": 0.7824465549723113,
      "learning_rate": 2.883756189184889e-07,
      "loss": 0.1057,
      "step": 30666
    },
    {
      "epoch": 0.8946554641460995,
      "grad_norm": 0.9997744609921497,
      "learning_rate": 2.882175164238332e-07,
      "loss": 0.0947,
      "step": 30667
    },
    {
      "epoch": 0.8946846373767431,
      "grad_norm": 0.7816483007528533,
      "learning_rate": 2.8805945599484743e-07,
      "loss": 0.1004,
      "step": 30668
    },
    {
      "epoch": 0.8947138106073866,
      "grad_norm": 0.7103235354537379,
      "learning_rate": 2.87901437632942e-07,
      "loss": 0.1237,
      "step": 30669
    },
    {
      "epoch": 0.8947429838380302,
      "grad_norm": 0.7618700438722354,
      "learning_rate": 2.877434613395269e-07,
      "loss": 0.1019,
      "step": 30670
    },
    {
      "epoch": 0.8947721570686737,
      "grad_norm": 0.8066672420541794,
      "learning_rate": 2.875855271160133e-07,
      "loss": 0.0943,
      "step": 30671
    },
    {
      "epoch": 0.8948013302993173,
      "grad_norm": 0.9799411310826015,
      "learning_rate": 2.874276349638122e-07,
      "loss": 0.1095,
      "step": 30672
    },
    {
      "epoch": 0.894830503529961,
      "grad_norm": 0.8487921395001287,
      "learning_rate": 2.87269784884332e-07,
      "loss": 0.1159,
      "step": 30673
    },
    {
      "epoch": 0.8948596767606045,
      "grad_norm": 1.1195891799567557,
      "learning_rate": 2.8711197687898097e-07,
      "loss": 0.1378,
      "step": 30674
    },
    {
      "epoch": 0.8948888499912481,
      "grad_norm": 0.7989028904936224,
      "learning_rate": 2.869542109491702e-07,
      "loss": 0.1056,
      "step": 30675
    },
    {
      "epoch": 0.8949180232218916,
      "grad_norm": 0.7346770494945787,
      "learning_rate": 2.867964870963069e-07,
      "loss": 0.0971,
      "step": 30676
    },
    {
      "epoch": 0.8949471964525352,
      "grad_norm": 1.0206758932178501,
      "learning_rate": 2.8663880532179887e-07,
      "loss": 0.1043,
      "step": 30677
    },
    {
      "epoch": 0.8949763696831787,
      "grad_norm": 0.8518157081409864,
      "learning_rate": 2.8648116562705494e-07,
      "loss": 0.1208,
      "step": 30678
    },
    {
      "epoch": 0.8950055429138223,
      "grad_norm": 0.9966931330463132,
      "learning_rate": 2.863235680134824e-07,
      "loss": 0.1239,
      "step": 30679
    },
    {
      "epoch": 0.8950347161444658,
      "grad_norm": 0.7942440447797972,
      "learning_rate": 2.861660124824872e-07,
      "loss": 0.1207,
      "step": 30680
    },
    {
      "epoch": 0.8950638893751094,
      "grad_norm": 0.780495041468004,
      "learning_rate": 2.8600849903547666e-07,
      "loss": 0.1128,
      "step": 30681
    },
    {
      "epoch": 0.8950930626057529,
      "grad_norm": 0.9115147619165473,
      "learning_rate": 2.8585102767385685e-07,
      "loss": 0.1214,
      "step": 30682
    },
    {
      "epoch": 0.8951222358363965,
      "grad_norm": 0.7222577368355971,
      "learning_rate": 2.856935983990339e-07,
      "loss": 0.1054,
      "step": 30683
    },
    {
      "epoch": 0.89515140906704,
      "grad_norm": 0.7301611366280484,
      "learning_rate": 2.855362112124127e-07,
      "loss": 0.1142,
      "step": 30684
    },
    {
      "epoch": 0.8951805822976836,
      "grad_norm": 0.7552815843178827,
      "learning_rate": 2.8537886611539945e-07,
      "loss": 0.1079,
      "step": 30685
    },
    {
      "epoch": 0.8952097555283272,
      "grad_norm": 0.8885221919914154,
      "learning_rate": 2.8522156310939797e-07,
      "loss": 0.1102,
      "step": 30686
    },
    {
      "epoch": 0.8952389287589708,
      "grad_norm": 0.6438831330036408,
      "learning_rate": 2.850643021958127e-07,
      "loss": 0.1161,
      "step": 30687
    },
    {
      "epoch": 0.8952681019896144,
      "grad_norm": 1.0867915301485433,
      "learning_rate": 2.8490708337604756e-07,
      "loss": 0.1039,
      "step": 30688
    },
    {
      "epoch": 0.8952972752202579,
      "grad_norm": 0.7095812392346565,
      "learning_rate": 2.847499066515069e-07,
      "loss": 0.1149,
      "step": 30689
    },
    {
      "epoch": 0.8953264484509015,
      "grad_norm": 0.8682813865885494,
      "learning_rate": 2.84592772023593e-07,
      "loss": 0.118,
      "step": 30690
    },
    {
      "epoch": 0.895355621681545,
      "grad_norm": 0.8883751743992266,
      "learning_rate": 2.8443567949370974e-07,
      "loss": 0.1098,
      "step": 30691
    },
    {
      "epoch": 0.8953847949121886,
      "grad_norm": 1.3194172476721755,
      "learning_rate": 2.8427862906325875e-07,
      "loss": 0.1219,
      "step": 30692
    },
    {
      "epoch": 0.8954139681428321,
      "grad_norm": 0.8805669145462485,
      "learning_rate": 2.8412162073364227e-07,
      "loss": 0.1039,
      "step": 30693
    },
    {
      "epoch": 0.8954431413734757,
      "grad_norm": 0.7842008713991359,
      "learning_rate": 2.8396465450626186e-07,
      "loss": 0.1162,
      "step": 30694
    },
    {
      "epoch": 0.8954723146041192,
      "grad_norm": 0.927920248075283,
      "learning_rate": 2.8380773038251984e-07,
      "loss": 0.1288,
      "step": 30695
    },
    {
      "epoch": 0.8955014878347628,
      "grad_norm": 0.8649481687855042,
      "learning_rate": 2.836508483638167e-07,
      "loss": 0.1096,
      "step": 30696
    },
    {
      "epoch": 0.8955306610654064,
      "grad_norm": 0.7721159957993788,
      "learning_rate": 2.8349400845155193e-07,
      "loss": 0.0965,
      "step": 30697
    },
    {
      "epoch": 0.8955598342960499,
      "grad_norm": 0.8514232986269841,
      "learning_rate": 2.833372106471277e-07,
      "loss": 0.1059,
      "step": 30698
    },
    {
      "epoch": 0.8955890075266935,
      "grad_norm": 0.8185380777442179,
      "learning_rate": 2.8318045495194293e-07,
      "loss": 0.1026,
      "step": 30699
    },
    {
      "epoch": 0.895618180757337,
      "grad_norm": 0.680964602932964,
      "learning_rate": 2.8302374136739643e-07,
      "loss": 0.0835,
      "step": 30700
    },
    {
      "epoch": 0.8956473539879807,
      "grad_norm": 0.7553616726414424,
      "learning_rate": 2.8286706989488766e-07,
      "loss": 0.1148,
      "step": 30701
    },
    {
      "epoch": 0.8956765272186242,
      "grad_norm": 0.760820917165124,
      "learning_rate": 2.8271044053581666e-07,
      "loss": 0.1132,
      "step": 30702
    },
    {
      "epoch": 0.8957057004492678,
      "grad_norm": 0.7823646909128376,
      "learning_rate": 2.8255385329158056e-07,
      "loss": 0.1031,
      "step": 30703
    },
    {
      "epoch": 0.8957348736799113,
      "grad_norm": 0.8388846348531964,
      "learning_rate": 2.823973081635767e-07,
      "loss": 0.1039,
      "step": 30704
    },
    {
      "epoch": 0.8957640469105549,
      "grad_norm": 0.6541002407280917,
      "learning_rate": 2.822408051532044e-07,
      "loss": 0.114,
      "step": 30705
    },
    {
      "epoch": 0.8957932201411984,
      "grad_norm": 0.7585429260662029,
      "learning_rate": 2.8208434426185926e-07,
      "loss": 0.1107,
      "step": 30706
    },
    {
      "epoch": 0.895822393371842,
      "grad_norm": 0.8984268695878658,
      "learning_rate": 2.819279254909385e-07,
      "loss": 0.0971,
      "step": 30707
    },
    {
      "epoch": 0.8958515666024855,
      "grad_norm": 0.9978469007664811,
      "learning_rate": 2.8177154884183986e-07,
      "loss": 0.1216,
      "step": 30708
    },
    {
      "epoch": 0.8958807398331291,
      "grad_norm": 0.8137297061545247,
      "learning_rate": 2.8161521431595897e-07,
      "loss": 0.1117,
      "step": 30709
    },
    {
      "epoch": 0.8959099130637727,
      "grad_norm": 0.7365263239636884,
      "learning_rate": 2.814589219146896e-07,
      "loss": 0.1022,
      "step": 30710
    },
    {
      "epoch": 0.8959390862944162,
      "grad_norm": 1.0540794711851977,
      "learning_rate": 2.813026716394296e-07,
      "loss": 0.0966,
      "step": 30711
    },
    {
      "epoch": 0.8959682595250598,
      "grad_norm": 0.7269967391048545,
      "learning_rate": 2.8114646349157227e-07,
      "loss": 0.0908,
      "step": 30712
    },
    {
      "epoch": 0.8959974327557033,
      "grad_norm": 0.793578367137509,
      "learning_rate": 2.8099029747251314e-07,
      "loss": 0.1286,
      "step": 30713
    },
    {
      "epoch": 0.896026605986347,
      "grad_norm": 0.8365626249923098,
      "learning_rate": 2.8083417358364615e-07,
      "loss": 0.0899,
      "step": 30714
    },
    {
      "epoch": 0.8960557792169905,
      "grad_norm": 0.8866360400757096,
      "learning_rate": 2.806780918263652e-07,
      "loss": 0.1257,
      "step": 30715
    },
    {
      "epoch": 0.8960849524476341,
      "grad_norm": 0.8700735130147824,
      "learning_rate": 2.8052205220206406e-07,
      "loss": 0.1288,
      "step": 30716
    },
    {
      "epoch": 0.8961141256782776,
      "grad_norm": 0.8824283698640039,
      "learning_rate": 2.8036605471213453e-07,
      "loss": 0.1095,
      "step": 30717
    },
    {
      "epoch": 0.8961432989089212,
      "grad_norm": 0.9074946088251287,
      "learning_rate": 2.802100993579698e-07,
      "loss": 0.1142,
      "step": 30718
    },
    {
      "epoch": 0.8961724721395647,
      "grad_norm": 0.9216507187468439,
      "learning_rate": 2.800541861409639e-07,
      "loss": 0.1228,
      "step": 30719
    },
    {
      "epoch": 0.8962016453702083,
      "grad_norm": 0.8283107882034496,
      "learning_rate": 2.798983150625062e-07,
      "loss": 0.1107,
      "step": 30720
    },
    {
      "epoch": 0.8962308186008519,
      "grad_norm": 0.9998284171102396,
      "learning_rate": 2.797424861239906e-07,
      "loss": 0.1166,
      "step": 30721
    },
    {
      "epoch": 0.8962599918314954,
      "grad_norm": 0.681729079454811,
      "learning_rate": 2.795866993268076e-07,
      "loss": 0.0987,
      "step": 30722
    },
    {
      "epoch": 0.896289165062139,
      "grad_norm": 0.7205330169882552,
      "learning_rate": 2.794309546723467e-07,
      "loss": 0.0987,
      "step": 30723
    },
    {
      "epoch": 0.8963183382927825,
      "grad_norm": 0.943005385774912,
      "learning_rate": 2.79275252161999e-07,
      "loss": 0.1013,
      "step": 30724
    },
    {
      "epoch": 0.8963475115234261,
      "grad_norm": 0.7708921117820327,
      "learning_rate": 2.791195917971562e-07,
      "loss": 0.0857,
      "step": 30725
    },
    {
      "epoch": 0.8963766847540696,
      "grad_norm": 0.710933459659192,
      "learning_rate": 2.7896397357920655e-07,
      "loss": 0.1099,
      "step": 30726
    },
    {
      "epoch": 0.8964058579847132,
      "grad_norm": 0.9451970832846561,
      "learning_rate": 2.788083975095385e-07,
      "loss": 0.1309,
      "step": 30727
    },
    {
      "epoch": 0.8964350312153568,
      "grad_norm": 0.8845957460741791,
      "learning_rate": 2.78652863589543e-07,
      "loss": 0.0908,
      "step": 30728
    },
    {
      "epoch": 0.8964642044460004,
      "grad_norm": 0.7861611751527217,
      "learning_rate": 2.7849737182060743e-07,
      "loss": 0.0954,
      "step": 30729
    },
    {
      "epoch": 0.8964933776766439,
      "grad_norm": 0.9881368903368798,
      "learning_rate": 2.7834192220412004e-07,
      "loss": 0.1213,
      "step": 30730
    },
    {
      "epoch": 0.8965225509072875,
      "grad_norm": 1.0046237997108478,
      "learning_rate": 2.7818651474146865e-07,
      "loss": 0.1232,
      "step": 30731
    },
    {
      "epoch": 0.896551724137931,
      "grad_norm": 0.8559461826711967,
      "learning_rate": 2.7803114943404096e-07,
      "loss": 0.0804,
      "step": 30732
    },
    {
      "epoch": 0.8965808973685746,
      "grad_norm": 0.8640787998027005,
      "learning_rate": 2.7787582628322484e-07,
      "loss": 0.1227,
      "step": 30733
    },
    {
      "epoch": 0.8966100705992182,
      "grad_norm": 0.765354985120999,
      "learning_rate": 2.777205452904047e-07,
      "loss": 0.1108,
      "step": 30734
    },
    {
      "epoch": 0.8966392438298617,
      "grad_norm": 0.8463059268957756,
      "learning_rate": 2.775653064569689e-07,
      "loss": 0.0985,
      "step": 30735
    },
    {
      "epoch": 0.8966684170605053,
      "grad_norm": 0.7819341705243084,
      "learning_rate": 2.774101097843024e-07,
      "loss": 0.104,
      "step": 30736
    },
    {
      "epoch": 0.8966975902911488,
      "grad_norm": 0.7381435723679869,
      "learning_rate": 2.7725495527379075e-07,
      "loss": 0.107,
      "step": 30737
    },
    {
      "epoch": 0.8967267635217924,
      "grad_norm": 0.9283154934314369,
      "learning_rate": 2.7709984292682067e-07,
      "loss": 0.1262,
      "step": 30738
    },
    {
      "epoch": 0.8967559367524359,
      "grad_norm": 0.8106571715015493,
      "learning_rate": 2.7694477274477547e-07,
      "loss": 0.0994,
      "step": 30739
    },
    {
      "epoch": 0.8967851099830795,
      "grad_norm": 0.7198788558290798,
      "learning_rate": 2.767897447290391e-07,
      "loss": 0.0992,
      "step": 30740
    },
    {
      "epoch": 0.8968142832137231,
      "grad_norm": 0.9750558874247081,
      "learning_rate": 2.76634758880997e-07,
      "loss": 0.1103,
      "step": 30741
    },
    {
      "epoch": 0.8968434564443667,
      "grad_norm": 0.9876056999943881,
      "learning_rate": 2.764798152020315e-07,
      "loss": 0.1046,
      "step": 30742
    },
    {
      "epoch": 0.8968726296750102,
      "grad_norm": 0.833314343991956,
      "learning_rate": 2.7632491369352756e-07,
      "loss": 0.0982,
      "step": 30743
    },
    {
      "epoch": 0.8969018029056538,
      "grad_norm": 0.8603811947496085,
      "learning_rate": 2.7617005435686626e-07,
      "loss": 0.1087,
      "step": 30744
    },
    {
      "epoch": 0.8969309761362974,
      "grad_norm": 0.8700389349659293,
      "learning_rate": 2.760152371934316e-07,
      "loss": 0.1125,
      "step": 30745
    },
    {
      "epoch": 0.8969601493669409,
      "grad_norm": 0.8254819574187079,
      "learning_rate": 2.758604622046057e-07,
      "loss": 0.0942,
      "step": 30746
    },
    {
      "epoch": 0.8969893225975845,
      "grad_norm": 0.6604077936248297,
      "learning_rate": 2.7570572939176866e-07,
      "loss": 0.1055,
      "step": 30747
    },
    {
      "epoch": 0.897018495828228,
      "grad_norm": 0.8877528725600868,
      "learning_rate": 2.755510387563032e-07,
      "loss": 0.1215,
      "step": 30748
    },
    {
      "epoch": 0.8970476690588716,
      "grad_norm": 0.924881130670953,
      "learning_rate": 2.7539639029959097e-07,
      "loss": 0.0986,
      "step": 30749
    },
    {
      "epoch": 0.8970768422895151,
      "grad_norm": 1.092146565671295,
      "learning_rate": 2.752417840230115e-07,
      "loss": 0.1184,
      "step": 30750
    },
    {
      "epoch": 0.8971060155201587,
      "grad_norm": 0.8404472563530829,
      "learning_rate": 2.7508721992794586e-07,
      "loss": 0.1135,
      "step": 30751
    },
    {
      "epoch": 0.8971351887508022,
      "grad_norm": 0.6649637523160014,
      "learning_rate": 2.749326980157735e-07,
      "loss": 0.1087,
      "step": 30752
    },
    {
      "epoch": 0.8971643619814458,
      "grad_norm": 0.7348002229460175,
      "learning_rate": 2.7477821828787333e-07,
      "loss": 0.1138,
      "step": 30753
    },
    {
      "epoch": 0.8971935352120893,
      "grad_norm": 0.8516718049135685,
      "learning_rate": 2.746237807456259e-07,
      "loss": 0.1132,
      "step": 30754
    },
    {
      "epoch": 0.897222708442733,
      "grad_norm": 0.7731775570235779,
      "learning_rate": 2.744693853904096e-07,
      "loss": 0.1218,
      "step": 30755
    },
    {
      "epoch": 0.8972518816733766,
      "grad_norm": 0.6942492578301084,
      "learning_rate": 2.743150322236021e-07,
      "loss": 0.1262,
      "step": 30756
    },
    {
      "epoch": 0.8972810549040201,
      "grad_norm": 0.9903813547306958,
      "learning_rate": 2.7416072124658186e-07,
      "loss": 0.089,
      "step": 30757
    },
    {
      "epoch": 0.8973102281346637,
      "grad_norm": 0.8046959363603583,
      "learning_rate": 2.740064524607267e-07,
      "loss": 0.1009,
      "step": 30758
    },
    {
      "epoch": 0.8973394013653072,
      "grad_norm": 0.8655088682922392,
      "learning_rate": 2.738522258674142e-07,
      "loss": 0.1156,
      "step": 30759
    },
    {
      "epoch": 0.8973685745959508,
      "grad_norm": 0.9014478945337064,
      "learning_rate": 2.736980414680196e-07,
      "loss": 0.1041,
      "step": 30760
    },
    {
      "epoch": 0.8973977478265943,
      "grad_norm": 0.8983628128044445,
      "learning_rate": 2.7354389926392113e-07,
      "loss": 0.1093,
      "step": 30761
    },
    {
      "epoch": 0.8974269210572379,
      "grad_norm": 0.8696555401269471,
      "learning_rate": 2.733897992564949e-07,
      "loss": 0.1236,
      "step": 30762
    },
    {
      "epoch": 0.8974560942878814,
      "grad_norm": 0.9487568686646244,
      "learning_rate": 2.732357414471165e-07,
      "loss": 0.1057,
      "step": 30763
    },
    {
      "epoch": 0.897485267518525,
      "grad_norm": 0.7836511441328771,
      "learning_rate": 2.7308172583715984e-07,
      "loss": 0.1099,
      "step": 30764
    },
    {
      "epoch": 0.8975144407491685,
      "grad_norm": 0.8953019495718707,
      "learning_rate": 2.729277524280022e-07,
      "loss": 0.1129,
      "step": 30765
    },
    {
      "epoch": 0.8975436139798121,
      "grad_norm": 0.7815891728054658,
      "learning_rate": 2.7277382122101627e-07,
      "loss": 0.1155,
      "step": 30766
    },
    {
      "epoch": 0.8975727872104556,
      "grad_norm": 0.9841914813041843,
      "learning_rate": 2.726199322175771e-07,
      "loss": 0.119,
      "step": 30767
    },
    {
      "epoch": 0.8976019604410993,
      "grad_norm": 0.721315614571614,
      "learning_rate": 2.7246608541905975e-07,
      "loss": 0.0907,
      "step": 30768
    },
    {
      "epoch": 0.8976311336717429,
      "grad_norm": 0.9775726056696618,
      "learning_rate": 2.7231228082683634e-07,
      "loss": 0.1115,
      "step": 30769
    },
    {
      "epoch": 0.8976603069023864,
      "grad_norm": 0.8163172049338536,
      "learning_rate": 2.7215851844227925e-07,
      "loss": 0.1174,
      "step": 30770
    },
    {
      "epoch": 0.89768948013303,
      "grad_norm": 1.0424985541986709,
      "learning_rate": 2.720047982667634e-07,
      "loss": 0.1033,
      "step": 30771
    },
    {
      "epoch": 0.8977186533636735,
      "grad_norm": 0.9386592665034721,
      "learning_rate": 2.718511203016594e-07,
      "loss": 0.1101,
      "step": 30772
    },
    {
      "epoch": 0.8977478265943171,
      "grad_norm": 0.6801122392266861,
      "learning_rate": 2.7169748454834055e-07,
      "loss": 0.1025,
      "step": 30773
    },
    {
      "epoch": 0.8977769998249606,
      "grad_norm": 0.782668735345411,
      "learning_rate": 2.715438910081769e-07,
      "loss": 0.0888,
      "step": 30774
    },
    {
      "epoch": 0.8978061730556042,
      "grad_norm": 0.8612139763576521,
      "learning_rate": 2.713903396825418e-07,
      "loss": 0.1076,
      "step": 30775
    },
    {
      "epoch": 0.8978353462862477,
      "grad_norm": 0.6433592810114508,
      "learning_rate": 2.712368305728047e-07,
      "loss": 0.0999,
      "step": 30776
    },
    {
      "epoch": 0.8978645195168913,
      "grad_norm": 0.8775201550517686,
      "learning_rate": 2.7108336368033505e-07,
      "loss": 0.1068,
      "step": 30777
    },
    {
      "epoch": 0.8978936927475348,
      "grad_norm": 0.9999598708750298,
      "learning_rate": 2.709299390065051e-07,
      "loss": 0.1337,
      "step": 30778
    },
    {
      "epoch": 0.8979228659781784,
      "grad_norm": 0.7312506172836786,
      "learning_rate": 2.7077655655268375e-07,
      "loss": 0.088,
      "step": 30779
    },
    {
      "epoch": 0.8979520392088219,
      "grad_norm": 1.1768203471175818,
      "learning_rate": 2.706232163202405e-07,
      "loss": 0.1097,
      "step": 30780
    },
    {
      "epoch": 0.8979812124394655,
      "grad_norm": 0.7983772650753197,
      "learning_rate": 2.704699183105441e-07,
      "loss": 0.1066,
      "step": 30781
    },
    {
      "epoch": 0.8980103856701092,
      "grad_norm": 0.877758334830019,
      "learning_rate": 2.7031666252496367e-07,
      "loss": 0.0904,
      "step": 30782
    },
    {
      "epoch": 0.8980395589007527,
      "grad_norm": 1.0949126353961296,
      "learning_rate": 2.701634489648658e-07,
      "loss": 0.1134,
      "step": 30783
    },
    {
      "epoch": 0.8980687321313963,
      "grad_norm": 0.8147996950949038,
      "learning_rate": 2.700102776316199e-07,
      "loss": 0.1035,
      "step": 30784
    },
    {
      "epoch": 0.8980979053620398,
      "grad_norm": 0.9267408923909373,
      "learning_rate": 2.6985714852659386e-07,
      "loss": 0.1177,
      "step": 30785
    },
    {
      "epoch": 0.8981270785926834,
      "grad_norm": 1.0054067499868213,
      "learning_rate": 2.6970406165115425e-07,
      "loss": 0.118,
      "step": 30786
    },
    {
      "epoch": 0.8981562518233269,
      "grad_norm": 0.852842728820599,
      "learning_rate": 2.695510170066662e-07,
      "loss": 0.1134,
      "step": 30787
    },
    {
      "epoch": 0.8981854250539705,
      "grad_norm": 0.9854147793022071,
      "learning_rate": 2.6939801459449856e-07,
      "loss": 0.1362,
      "step": 30788
    },
    {
      "epoch": 0.898214598284614,
      "grad_norm": 0.891330330666457,
      "learning_rate": 2.692450544160152e-07,
      "loss": 0.0972,
      "step": 30789
    },
    {
      "epoch": 0.8982437715152576,
      "grad_norm": 0.7731519930964349,
      "learning_rate": 2.6909213647258404e-07,
      "loss": 0.1103,
      "step": 30790
    },
    {
      "epoch": 0.8982729447459011,
      "grad_norm": 1.002568118988472,
      "learning_rate": 2.6893926076556774e-07,
      "loss": 0.1372,
      "step": 30791
    },
    {
      "epoch": 0.8983021179765447,
      "grad_norm": 0.8203451141084636,
      "learning_rate": 2.6878642729633307e-07,
      "loss": 0.1301,
      "step": 30792
    },
    {
      "epoch": 0.8983312912071882,
      "grad_norm": 0.9201695529116912,
      "learning_rate": 2.686336360662434e-07,
      "loss": 0.1026,
      "step": 30793
    },
    {
      "epoch": 0.8983604644378318,
      "grad_norm": 0.6820850081623775,
      "learning_rate": 2.6848088707666307e-07,
      "loss": 0.117,
      "step": 30794
    },
    {
      "epoch": 0.8983896376684755,
      "grad_norm": 0.8914170321809327,
      "learning_rate": 2.683281803289556e-07,
      "loss": 0.1249,
      "step": 30795
    },
    {
      "epoch": 0.898418810899119,
      "grad_norm": 0.9629246405962423,
      "learning_rate": 2.681755158244853e-07,
      "loss": 0.0944,
      "step": 30796
    },
    {
      "epoch": 0.8984479841297626,
      "grad_norm": 0.7348982561750177,
      "learning_rate": 2.680228935646134e-07,
      "loss": 0.115,
      "step": 30797
    },
    {
      "epoch": 0.8984771573604061,
      "grad_norm": 0.864887716885459,
      "learning_rate": 2.6787031355070435e-07,
      "loss": 0.1004,
      "step": 30798
    },
    {
      "epoch": 0.8985063305910497,
      "grad_norm": 0.7809241301837349,
      "learning_rate": 2.6771777578411983e-07,
      "loss": 0.1068,
      "step": 30799
    },
    {
      "epoch": 0.8985355038216932,
      "grad_norm": 0.8871663708259863,
      "learning_rate": 2.6756528026622043e-07,
      "loss": 0.1067,
      "step": 30800
    },
    {
      "epoch": 0.8985646770523368,
      "grad_norm": 0.8317968895716673,
      "learning_rate": 2.6741282699836837e-07,
      "loss": 0.1183,
      "step": 30801
    },
    {
      "epoch": 0.8985938502829803,
      "grad_norm": 0.8902794853332741,
      "learning_rate": 2.6726041598192585e-07,
      "loss": 0.1132,
      "step": 30802
    },
    {
      "epoch": 0.8986230235136239,
      "grad_norm": 0.8742007970379547,
      "learning_rate": 2.6710804721825246e-07,
      "loss": 0.087,
      "step": 30803
    },
    {
      "epoch": 0.8986521967442674,
      "grad_norm": 0.9510818055178514,
      "learning_rate": 2.669557207087076e-07,
      "loss": 0.1238,
      "step": 30804
    },
    {
      "epoch": 0.898681369974911,
      "grad_norm": 0.7604341231543608,
      "learning_rate": 2.668034364546529e-07,
      "loss": 0.1206,
      "step": 30805
    },
    {
      "epoch": 0.8987105432055545,
      "grad_norm": 0.8376054704151209,
      "learning_rate": 2.6665119445744736e-07,
      "loss": 0.1054,
      "step": 30806
    },
    {
      "epoch": 0.8987397164361981,
      "grad_norm": 1.085951157882634,
      "learning_rate": 2.6649899471844875e-07,
      "loss": 0.1024,
      "step": 30807
    },
    {
      "epoch": 0.8987688896668417,
      "grad_norm": 0.7417634980802448,
      "learning_rate": 2.663468372390182e-07,
      "loss": 0.0976,
      "step": 30808
    },
    {
      "epoch": 0.8987980628974853,
      "grad_norm": 0.8267745571301569,
      "learning_rate": 2.6619472202051356e-07,
      "loss": 0.1098,
      "step": 30809
    },
    {
      "epoch": 0.8988272361281289,
      "grad_norm": 0.9105857106462436,
      "learning_rate": 2.6604264906429143e-07,
      "loss": 0.0986,
      "step": 30810
    },
    {
      "epoch": 0.8988564093587724,
      "grad_norm": 1.2090116911069029,
      "learning_rate": 2.658906183717108e-07,
      "loss": 0.1303,
      "step": 30811
    },
    {
      "epoch": 0.898885582589416,
      "grad_norm": 0.7385412598763644,
      "learning_rate": 2.657386299441289e-07,
      "loss": 0.1109,
      "step": 30812
    },
    {
      "epoch": 0.8989147558200595,
      "grad_norm": 0.813675632854998,
      "learning_rate": 2.655866837829019e-07,
      "loss": 0.1125,
      "step": 30813
    },
    {
      "epoch": 0.8989439290507031,
      "grad_norm": 0.9246775517224919,
      "learning_rate": 2.654347798893864e-07,
      "loss": 0.1004,
      "step": 30814
    },
    {
      "epoch": 0.8989731022813466,
      "grad_norm": 0.9039317095109196,
      "learning_rate": 2.652829182649397e-07,
      "loss": 0.1098,
      "step": 30815
    },
    {
      "epoch": 0.8990022755119902,
      "grad_norm": 0.8844314416017702,
      "learning_rate": 2.651310989109174e-07,
      "loss": 0.1233,
      "step": 30816
    },
    {
      "epoch": 0.8990314487426337,
      "grad_norm": 0.8500331010090957,
      "learning_rate": 2.649793218286728e-07,
      "loss": 0.1022,
      "step": 30817
    },
    {
      "epoch": 0.8990606219732773,
      "grad_norm": 0.8048821621514879,
      "learning_rate": 2.6482758701956377e-07,
      "loss": 0.0893,
      "step": 30818
    },
    {
      "epoch": 0.8990897952039208,
      "grad_norm": 0.7685700013816277,
      "learning_rate": 2.6467589448494255e-07,
      "loss": 0.1259,
      "step": 30819
    },
    {
      "epoch": 0.8991189684345644,
      "grad_norm": 0.9148224900349895,
      "learning_rate": 2.645242442261659e-07,
      "loss": 0.1175,
      "step": 30820
    },
    {
      "epoch": 0.899148141665208,
      "grad_norm": 0.7447199833000645,
      "learning_rate": 2.6437263624458474e-07,
      "loss": 0.1107,
      "step": 30821
    },
    {
      "epoch": 0.8991773148958516,
      "grad_norm": 0.6002362659498117,
      "learning_rate": 2.642210705415554e-07,
      "loss": 0.1035,
      "step": 30822
    },
    {
      "epoch": 0.8992064881264952,
      "grad_norm": 0.9959436406921786,
      "learning_rate": 2.6406954711843014e-07,
      "loss": 0.1231,
      "step": 30823
    },
    {
      "epoch": 0.8992356613571387,
      "grad_norm": 0.9342524108593679,
      "learning_rate": 2.6391806597656003e-07,
      "loss": 0.1154,
      "step": 30824
    },
    {
      "epoch": 0.8992648345877823,
      "grad_norm": 0.9400089022570661,
      "learning_rate": 2.637666271172995e-07,
      "loss": 0.109,
      "step": 30825
    },
    {
      "epoch": 0.8992940078184258,
      "grad_norm": 0.8065494860891471,
      "learning_rate": 2.636152305419998e-07,
      "loss": 0.1314,
      "step": 30826
    },
    {
      "epoch": 0.8993231810490694,
      "grad_norm": 0.7908419503663388,
      "learning_rate": 2.634638762520125e-07,
      "loss": 0.117,
      "step": 30827
    },
    {
      "epoch": 0.8993523542797129,
      "grad_norm": 1.0302652218095083,
      "learning_rate": 2.6331256424869e-07,
      "loss": 0.0926,
      "step": 30828
    },
    {
      "epoch": 0.8993815275103565,
      "grad_norm": 0.7005029814400058,
      "learning_rate": 2.631612945333817e-07,
      "loss": 0.1076,
      "step": 30829
    },
    {
      "epoch": 0.899410700741,
      "grad_norm": 0.8424244710245236,
      "learning_rate": 2.630100671074376e-07,
      "loss": 0.1141,
      "step": 30830
    },
    {
      "epoch": 0.8994398739716436,
      "grad_norm": 1.0077946107225784,
      "learning_rate": 2.628588819722094e-07,
      "loss": 0.1099,
      "step": 30831
    },
    {
      "epoch": 0.8994690472022872,
      "grad_norm": 0.8653977940915419,
      "learning_rate": 2.627077391290467e-07,
      "loss": 0.0934,
      "step": 30832
    },
    {
      "epoch": 0.8994982204329307,
      "grad_norm": 0.6753000315672723,
      "learning_rate": 2.625566385792988e-07,
      "loss": 0.1042,
      "step": 30833
    },
    {
      "epoch": 0.8995273936635743,
      "grad_norm": 0.8426563397021365,
      "learning_rate": 2.6240558032431307e-07,
      "loss": 0.1162,
      "step": 30834
    },
    {
      "epoch": 0.8995565668942178,
      "grad_norm": 0.8772241797615372,
      "learning_rate": 2.622545643654401e-07,
      "loss": 0.0923,
      "step": 30835
    },
    {
      "epoch": 0.8995857401248615,
      "grad_norm": 0.9294651252284601,
      "learning_rate": 2.621035907040276e-07,
      "loss": 0.1218,
      "step": 30836
    },
    {
      "epoch": 0.899614913355505,
      "grad_norm": 0.7503295861919965,
      "learning_rate": 2.6195265934142177e-07,
      "loss": 0.1339,
      "step": 30837
    },
    {
      "epoch": 0.8996440865861486,
      "grad_norm": 0.9234185871140549,
      "learning_rate": 2.6180177027897326e-07,
      "loss": 0.1176,
      "step": 30838
    },
    {
      "epoch": 0.8996732598167921,
      "grad_norm": 0.8301220374617003,
      "learning_rate": 2.61650923518027e-07,
      "loss": 0.0955,
      "step": 30839
    },
    {
      "epoch": 0.8997024330474357,
      "grad_norm": 1.0647598043586948,
      "learning_rate": 2.6150011905992977e-07,
      "loss": 0.1012,
      "step": 30840
    },
    {
      "epoch": 0.8997316062780792,
      "grad_norm": 0.8196386934894001,
      "learning_rate": 2.613493569060288e-07,
      "loss": 0.1471,
      "step": 30841
    },
    {
      "epoch": 0.8997607795087228,
      "grad_norm": 0.7867605259723952,
      "learning_rate": 2.6119863705766967e-07,
      "loss": 0.0898,
      "step": 30842
    },
    {
      "epoch": 0.8997899527393663,
      "grad_norm": 0.8799963901929425,
      "learning_rate": 2.610479595161969e-07,
      "loss": 0.1258,
      "step": 30843
    },
    {
      "epoch": 0.8998191259700099,
      "grad_norm": 0.6158166063517552,
      "learning_rate": 2.6089732428295654e-07,
      "loss": 0.1008,
      "step": 30844
    },
    {
      "epoch": 0.8998482992006535,
      "grad_norm": 1.0886071944431155,
      "learning_rate": 2.6074673135929486e-07,
      "loss": 0.1368,
      "step": 30845
    },
    {
      "epoch": 0.899877472431297,
      "grad_norm": 0.8235006316894611,
      "learning_rate": 2.6059618074655457e-07,
      "loss": 0.1114,
      "step": 30846
    },
    {
      "epoch": 0.8999066456619406,
      "grad_norm": 0.7278380548351747,
      "learning_rate": 2.6044567244607963e-07,
      "loss": 0.106,
      "step": 30847
    },
    {
      "epoch": 0.8999358188925841,
      "grad_norm": 0.8271609162705068,
      "learning_rate": 2.6029520645921515e-07,
      "loss": 0.1188,
      "step": 30848
    },
    {
      "epoch": 0.8999649921232278,
      "grad_norm": 0.8698388363891452,
      "learning_rate": 2.601447827873027e-07,
      "loss": 0.1279,
      "step": 30849
    },
    {
      "epoch": 0.8999941653538713,
      "grad_norm": 0.7373099240975741,
      "learning_rate": 2.5999440143168686e-07,
      "loss": 0.0866,
      "step": 30850
    },
    {
      "epoch": 0.9000233385845149,
      "grad_norm": 0.7588799540457818,
      "learning_rate": 2.5984406239370874e-07,
      "loss": 0.1118,
      "step": 30851
    },
    {
      "epoch": 0.9000525118151584,
      "grad_norm": 0.920814935558577,
      "learning_rate": 2.5969376567471226e-07,
      "loss": 0.1166,
      "step": 30852
    },
    {
      "epoch": 0.900081685045802,
      "grad_norm": 0.8258080195124249,
      "learning_rate": 2.5954351127603807e-07,
      "loss": 0.1226,
      "step": 30853
    },
    {
      "epoch": 0.9001108582764455,
      "grad_norm": 1.0723653998016915,
      "learning_rate": 2.593932991990272e-07,
      "loss": 0.1029,
      "step": 30854
    },
    {
      "epoch": 0.9001400315070891,
      "grad_norm": 0.8500662747471033,
      "learning_rate": 2.5924312944502095e-07,
      "loss": 0.0984,
      "step": 30855
    },
    {
      "epoch": 0.9001692047377327,
      "grad_norm": 0.8728339231385889,
      "learning_rate": 2.590930020153609e-07,
      "loss": 0.1193,
      "step": 30856
    },
    {
      "epoch": 0.9001983779683762,
      "grad_norm": 0.8822052485185008,
      "learning_rate": 2.589429169113866e-07,
      "loss": 0.0893,
      "step": 30857
    },
    {
      "epoch": 0.9002275511990198,
      "grad_norm": 0.7788704914360438,
      "learning_rate": 2.5879287413443863e-07,
      "loss": 0.1235,
      "step": 30858
    },
    {
      "epoch": 0.9002567244296633,
      "grad_norm": 0.8237787340704235,
      "learning_rate": 2.5864287368585596e-07,
      "loss": 0.108,
      "step": 30859
    },
    {
      "epoch": 0.9002858976603069,
      "grad_norm": 0.955336411671347,
      "learning_rate": 2.584929155669774e-07,
      "loss": 0.1245,
      "step": 30860
    },
    {
      "epoch": 0.9003150708909504,
      "grad_norm": 0.8322752354288175,
      "learning_rate": 2.5834299977914203e-07,
      "loss": 0.0947,
      "step": 30861
    },
    {
      "epoch": 0.900344244121594,
      "grad_norm": 0.8808347603018455,
      "learning_rate": 2.581931263236892e-07,
      "loss": 0.139,
      "step": 30862
    },
    {
      "epoch": 0.9003734173522376,
      "grad_norm": 0.7896792013506789,
      "learning_rate": 2.5804329520195625e-07,
      "loss": 0.1091,
      "step": 30863
    },
    {
      "epoch": 0.9004025905828812,
      "grad_norm": 0.7428391325210728,
      "learning_rate": 2.5789350641527987e-07,
      "loss": 0.1038,
      "step": 30864
    },
    {
      "epoch": 0.9004317638135247,
      "grad_norm": 0.8425847751115024,
      "learning_rate": 2.57743759964999e-07,
      "loss": 0.1187,
      "step": 30865
    },
    {
      "epoch": 0.9004609370441683,
      "grad_norm": 0.7751035477123707,
      "learning_rate": 2.575940558524498e-07,
      "loss": 0.1127,
      "step": 30866
    },
    {
      "epoch": 0.9004901102748119,
      "grad_norm": 1.1087439040171116,
      "learning_rate": 2.5744439407896725e-07,
      "loss": 0.119,
      "step": 30867
    },
    {
      "epoch": 0.9005192835054554,
      "grad_norm": 0.844292477344265,
      "learning_rate": 2.5729477464589037e-07,
      "loss": 0.099,
      "step": 30868
    },
    {
      "epoch": 0.900548456736099,
      "grad_norm": 0.7187965661116755,
      "learning_rate": 2.5714519755455416e-07,
      "loss": 0.1004,
      "step": 30869
    },
    {
      "epoch": 0.9005776299667425,
      "grad_norm": 0.8663774904698612,
      "learning_rate": 2.5699566280629196e-07,
      "loss": 0.1334,
      "step": 30870
    },
    {
      "epoch": 0.9006068031973861,
      "grad_norm": 0.8035676158163162,
      "learning_rate": 2.568461704024411e-07,
      "loss": 0.1072,
      "step": 30871
    },
    {
      "epoch": 0.9006359764280296,
      "grad_norm": 1.0698248448789052,
      "learning_rate": 2.5669672034433544e-07,
      "loss": 0.1221,
      "step": 30872
    },
    {
      "epoch": 0.9006651496586732,
      "grad_norm": 0.8177173393092028,
      "learning_rate": 2.56547312633309e-07,
      "loss": 0.1058,
      "step": 30873
    },
    {
      "epoch": 0.9006943228893167,
      "grad_norm": 0.8131376079609332,
      "learning_rate": 2.563979472706951e-07,
      "loss": 0.1116,
      "step": 30874
    },
    {
      "epoch": 0.9007234961199603,
      "grad_norm": 0.9209480791815651,
      "learning_rate": 2.562486242578288e-07,
      "loss": 0.111,
      "step": 30875
    },
    {
      "epoch": 0.9007526693506039,
      "grad_norm": 0.6814739903364504,
      "learning_rate": 2.560993435960424e-07,
      "loss": 0.1054,
      "step": 30876
    },
    {
      "epoch": 0.9007818425812475,
      "grad_norm": 1.072332176891597,
      "learning_rate": 2.559501052866681e-07,
      "loss": 0.1447,
      "step": 30877
    },
    {
      "epoch": 0.900811015811891,
      "grad_norm": 0.7242888196953966,
      "learning_rate": 2.558009093310393e-07,
      "loss": 0.1091,
      "step": 30878
    },
    {
      "epoch": 0.9008401890425346,
      "grad_norm": 0.826829439552671,
      "learning_rate": 2.556517557304866e-07,
      "loss": 0.0992,
      "step": 30879
    },
    {
      "epoch": 0.9008693622731782,
      "grad_norm": 0.6926146048625293,
      "learning_rate": 2.5550264448634285e-07,
      "loss": 0.1045,
      "step": 30880
    },
    {
      "epoch": 0.9008985355038217,
      "grad_norm": 0.8374251096933836,
      "learning_rate": 2.553535755999387e-07,
      "loss": 0.1059,
      "step": 30881
    },
    {
      "epoch": 0.9009277087344653,
      "grad_norm": 0.8812956683022092,
      "learning_rate": 2.552045490726057e-07,
      "loss": 0.0972,
      "step": 30882
    },
    {
      "epoch": 0.9009568819651088,
      "grad_norm": 0.8180317243090963,
      "learning_rate": 2.5505556490567405e-07,
      "loss": 0.1059,
      "step": 30883
    },
    {
      "epoch": 0.9009860551957524,
      "grad_norm": 0.9969662856760693,
      "learning_rate": 2.5490662310047264e-07,
      "loss": 0.1173,
      "step": 30884
    },
    {
      "epoch": 0.9010152284263959,
      "grad_norm": 1.2642738594841663,
      "learning_rate": 2.547577236583326e-07,
      "loss": 0.0814,
      "step": 30885
    },
    {
      "epoch": 0.9010444016570395,
      "grad_norm": 0.8798486010831635,
      "learning_rate": 2.5460886658058295e-07,
      "loss": 0.1076,
      "step": 30886
    },
    {
      "epoch": 0.901073574887683,
      "grad_norm": 0.6230444000273161,
      "learning_rate": 2.544600518685519e-07,
      "loss": 0.0998,
      "step": 30887
    },
    {
      "epoch": 0.9011027481183266,
      "grad_norm": 0.7321902723437084,
      "learning_rate": 2.543112795235697e-07,
      "loss": 0.108,
      "step": 30888
    },
    {
      "epoch": 0.9011319213489701,
      "grad_norm": 0.7923578488218115,
      "learning_rate": 2.541625495469635e-07,
      "loss": 0.1009,
      "step": 30889
    },
    {
      "epoch": 0.9011610945796138,
      "grad_norm": 0.7111146089392812,
      "learning_rate": 2.5401386194006005e-07,
      "loss": 0.0948,
      "step": 30890
    },
    {
      "epoch": 0.9011902678102574,
      "grad_norm": 0.7253050259418145,
      "learning_rate": 2.538652167041883e-07,
      "loss": 0.1153,
      "step": 30891
    },
    {
      "epoch": 0.9012194410409009,
      "grad_norm": 0.7411366341909867,
      "learning_rate": 2.5371661384067546e-07,
      "loss": 0.1037,
      "step": 30892
    },
    {
      "epoch": 0.9012486142715445,
      "grad_norm": 0.8317204427851583,
      "learning_rate": 2.5356805335084776e-07,
      "loss": 0.0949,
      "step": 30893
    },
    {
      "epoch": 0.901277787502188,
      "grad_norm": 0.6538123182560942,
      "learning_rate": 2.5341953523603024e-07,
      "loss": 0.1088,
      "step": 30894
    },
    {
      "epoch": 0.9013069607328316,
      "grad_norm": 0.8232839203533794,
      "learning_rate": 2.5327105949755125e-07,
      "loss": 0.103,
      "step": 30895
    },
    {
      "epoch": 0.9013361339634751,
      "grad_norm": 0.9527635326361865,
      "learning_rate": 2.5312262613673476e-07,
      "loss": 0.119,
      "step": 30896
    },
    {
      "epoch": 0.9013653071941187,
      "grad_norm": 0.7507188860357058,
      "learning_rate": 2.5297423515490584e-07,
      "loss": 0.1225,
      "step": 30897
    },
    {
      "epoch": 0.9013944804247622,
      "grad_norm": 0.6477173770002925,
      "learning_rate": 2.5282588655338947e-07,
      "loss": 0.1153,
      "step": 30898
    },
    {
      "epoch": 0.9014236536554058,
      "grad_norm": 0.7759081563079759,
      "learning_rate": 2.526775803335113e-07,
      "loss": 0.1176,
      "step": 30899
    },
    {
      "epoch": 0.9014528268860493,
      "grad_norm": 0.7528650119604344,
      "learning_rate": 2.525293164965936e-07,
      "loss": 0.1,
      "step": 30900
    },
    {
      "epoch": 0.9014820001166929,
      "grad_norm": 0.8382682887800795,
      "learning_rate": 2.523810950439615e-07,
      "loss": 0.1159,
      "step": 30901
    },
    {
      "epoch": 0.9015111733473364,
      "grad_norm": 0.9790493988981085,
      "learning_rate": 2.5223291597693764e-07,
      "loss": 0.1206,
      "step": 30902
    },
    {
      "epoch": 0.9015403465779801,
      "grad_norm": 0.9686947625084419,
      "learning_rate": 2.520847792968445e-07,
      "loss": 0.0947,
      "step": 30903
    },
    {
      "epoch": 0.9015695198086237,
      "grad_norm": 1.0043684611504828,
      "learning_rate": 2.519366850050048e-07,
      "loss": 0.1195,
      "step": 30904
    },
    {
      "epoch": 0.9015986930392672,
      "grad_norm": 1.1410283074329841,
      "learning_rate": 2.5178863310274136e-07,
      "loss": 0.108,
      "step": 30905
    },
    {
      "epoch": 0.9016278662699108,
      "grad_norm": 0.822753373595886,
      "learning_rate": 2.51640623591376e-07,
      "loss": 0.0961,
      "step": 30906
    },
    {
      "epoch": 0.9016570395005543,
      "grad_norm": 0.7734796717613033,
      "learning_rate": 2.5149265647222863e-07,
      "loss": 0.0935,
      "step": 30907
    },
    {
      "epoch": 0.9016862127311979,
      "grad_norm": 0.8922035087707321,
      "learning_rate": 2.513447317466222e-07,
      "loss": 0.1136,
      "step": 30908
    },
    {
      "epoch": 0.9017153859618414,
      "grad_norm": 0.9762875819094442,
      "learning_rate": 2.511968494158751e-07,
      "loss": 0.1387,
      "step": 30909
    },
    {
      "epoch": 0.901744559192485,
      "grad_norm": 0.7317975276067623,
      "learning_rate": 2.510490094813101e-07,
      "loss": 0.0967,
      "step": 30910
    },
    {
      "epoch": 0.9017737324231285,
      "grad_norm": 0.7514206912013227,
      "learning_rate": 2.5090121194424554e-07,
      "loss": 0.1044,
      "step": 30911
    },
    {
      "epoch": 0.9018029056537721,
      "grad_norm": 1.0323943227713905,
      "learning_rate": 2.5075345680600107e-07,
      "loss": 0.102,
      "step": 30912
    },
    {
      "epoch": 0.9018320788844156,
      "grad_norm": 0.8356574035684364,
      "learning_rate": 2.5060574406789664e-07,
      "loss": 0.1103,
      "step": 30913
    },
    {
      "epoch": 0.9018612521150592,
      "grad_norm": 0.661884839685752,
      "learning_rate": 2.504580737312495e-07,
      "loss": 0.0862,
      "step": 30914
    },
    {
      "epoch": 0.9018904253457027,
      "grad_norm": 0.7698925091478908,
      "learning_rate": 2.503104457973787e-07,
      "loss": 0.1014,
      "step": 30915
    },
    {
      "epoch": 0.9019195985763463,
      "grad_norm": 1.1653480649798673,
      "learning_rate": 2.501628602676037e-07,
      "loss": 0.1072,
      "step": 30916
    },
    {
      "epoch": 0.90194877180699,
      "grad_norm": 0.8357527387352833,
      "learning_rate": 2.500153171432396e-07,
      "loss": 0.1254,
      "step": 30917
    },
    {
      "epoch": 0.9019779450376335,
      "grad_norm": 0.7601997967500363,
      "learning_rate": 2.498678164256052e-07,
      "loss": 0.1405,
      "step": 30918
    },
    {
      "epoch": 0.9020071182682771,
      "grad_norm": 0.9844474328297425,
      "learning_rate": 2.497203581160174e-07,
      "loss": 0.1058,
      "step": 30919
    },
    {
      "epoch": 0.9020362914989206,
      "grad_norm": 0.9306600426511832,
      "learning_rate": 2.4957294221579166e-07,
      "loss": 0.1323,
      "step": 30920
    },
    {
      "epoch": 0.9020654647295642,
      "grad_norm": 0.811693818410758,
      "learning_rate": 2.4942556872624477e-07,
      "loss": 0.1107,
      "step": 30921
    },
    {
      "epoch": 0.9020946379602077,
      "grad_norm": 0.813405748983144,
      "learning_rate": 2.4927823764869296e-07,
      "loss": 0.1039,
      "step": 30922
    },
    {
      "epoch": 0.9021238111908513,
      "grad_norm": 0.7775052672368832,
      "learning_rate": 2.4913094898445066e-07,
      "loss": 0.121,
      "step": 30923
    },
    {
      "epoch": 0.9021529844214948,
      "grad_norm": 1.168129538557138,
      "learning_rate": 2.489837027348324e-07,
      "loss": 0.1124,
      "step": 30924
    },
    {
      "epoch": 0.9021821576521384,
      "grad_norm": 0.8427444648072628,
      "learning_rate": 2.488364989011544e-07,
      "loss": 0.1146,
      "step": 30925
    },
    {
      "epoch": 0.9022113308827819,
      "grad_norm": 0.7314329053327707,
      "learning_rate": 2.486893374847299e-07,
      "loss": 0.1171,
      "step": 30926
    },
    {
      "epoch": 0.9022405041134255,
      "grad_norm": 0.9614666321424571,
      "learning_rate": 2.4854221848687245e-07,
      "loss": 0.1134,
      "step": 30927
    },
    {
      "epoch": 0.902269677344069,
      "grad_norm": 0.8172701848878543,
      "learning_rate": 2.4839514190889534e-07,
      "loss": 0.0922,
      "step": 30928
    },
    {
      "epoch": 0.9022988505747126,
      "grad_norm": 0.8324606832777022,
      "learning_rate": 2.482481077521126e-07,
      "loss": 0.1075,
      "step": 30929
    },
    {
      "epoch": 0.9023280238053561,
      "grad_norm": 0.7730009621892097,
      "learning_rate": 2.481011160178365e-07,
      "loss": 0.1299,
      "step": 30930
    },
    {
      "epoch": 0.9023571970359998,
      "grad_norm": 1.0540633963722275,
      "learning_rate": 2.4795416670737925e-07,
      "loss": 0.1041,
      "step": 30931
    },
    {
      "epoch": 0.9023863702666434,
      "grad_norm": 0.7690313082884614,
      "learning_rate": 2.478072598220532e-07,
      "loss": 0.1178,
      "step": 30932
    },
    {
      "epoch": 0.9024155434972869,
      "grad_norm": 0.9766291913066324,
      "learning_rate": 2.4766039536316843e-07,
      "loss": 0.1105,
      "step": 30933
    },
    {
      "epoch": 0.9024447167279305,
      "grad_norm": 0.8108029336761172,
      "learning_rate": 2.475135733320372e-07,
      "loss": 0.1208,
      "step": 30934
    },
    {
      "epoch": 0.902473889958574,
      "grad_norm": 0.8216731486601332,
      "learning_rate": 2.473667937299712e-07,
      "loss": 0.1149,
      "step": 30935
    },
    {
      "epoch": 0.9025030631892176,
      "grad_norm": 0.7140596228755002,
      "learning_rate": 2.4722005655827995e-07,
      "loss": 0.0882,
      "step": 30936
    },
    {
      "epoch": 0.9025322364198611,
      "grad_norm": 0.9323876320571479,
      "learning_rate": 2.47073361818273e-07,
      "loss": 0.1222,
      "step": 30937
    },
    {
      "epoch": 0.9025614096505047,
      "grad_norm": 1.040450051990931,
      "learning_rate": 2.4692670951126043e-07,
      "loss": 0.1355,
      "step": 30938
    },
    {
      "epoch": 0.9025905828811482,
      "grad_norm": 1.0706327905908033,
      "learning_rate": 2.4678009963855165e-07,
      "loss": 0.0996,
      "step": 30939
    },
    {
      "epoch": 0.9026197561117918,
      "grad_norm": 0.8522603385455392,
      "learning_rate": 2.466335322014557e-07,
      "loss": 0.1033,
      "step": 30940
    },
    {
      "epoch": 0.9026489293424353,
      "grad_norm": 0.7507943013091795,
      "learning_rate": 2.4648700720128036e-07,
      "loss": 0.116,
      "step": 30941
    },
    {
      "epoch": 0.9026781025730789,
      "grad_norm": 0.6786135439008352,
      "learning_rate": 2.4634052463933466e-07,
      "loss": 0.1013,
      "step": 30942
    },
    {
      "epoch": 0.9027072758037225,
      "grad_norm": 0.8883417035087033,
      "learning_rate": 2.4619408451692584e-07,
      "loss": 0.1081,
      "step": 30943
    },
    {
      "epoch": 0.9027364490343661,
      "grad_norm": 0.847454523909641,
      "learning_rate": 2.460476868353612e-07,
      "loss": 0.1239,
      "step": 30944
    },
    {
      "epoch": 0.9027656222650097,
      "grad_norm": 0.6801634304033013,
      "learning_rate": 2.45901331595948e-07,
      "loss": 0.1167,
      "step": 30945
    },
    {
      "epoch": 0.9027947954956532,
      "grad_norm": 0.7889849025518476,
      "learning_rate": 2.4575501879999295e-07,
      "loss": 0.101,
      "step": 30946
    },
    {
      "epoch": 0.9028239687262968,
      "grad_norm": 0.6949769325179381,
      "learning_rate": 2.456087484488018e-07,
      "loss": 0.1186,
      "step": 30947
    },
    {
      "epoch": 0.9028531419569403,
      "grad_norm": 0.8075382518895727,
      "learning_rate": 2.454625205436817e-07,
      "loss": 0.1208,
      "step": 30948
    },
    {
      "epoch": 0.9028823151875839,
      "grad_norm": 0.7407950225445782,
      "learning_rate": 2.4531633508593665e-07,
      "loss": 0.1166,
      "step": 30949
    },
    {
      "epoch": 0.9029114884182274,
      "grad_norm": 0.9062719691739753,
      "learning_rate": 2.451701920768723e-07,
      "loss": 0.1167,
      "step": 30950
    },
    {
      "epoch": 0.902940661648871,
      "grad_norm": 0.6634029421397257,
      "learning_rate": 2.4502409151779317e-07,
      "loss": 0.1192,
      "step": 30951
    },
    {
      "epoch": 0.9029698348795145,
      "grad_norm": 0.9013022547794278,
      "learning_rate": 2.4487803341000425e-07,
      "loss": 0.1226,
      "step": 30952
    },
    {
      "epoch": 0.9029990081101581,
      "grad_norm": 0.8704093365322719,
      "learning_rate": 2.44732017754809e-07,
      "loss": 0.0961,
      "step": 30953
    },
    {
      "epoch": 0.9030281813408017,
      "grad_norm": 0.9566523073249599,
      "learning_rate": 2.445860445535109e-07,
      "loss": 0.0978,
      "step": 30954
    },
    {
      "epoch": 0.9030573545714452,
      "grad_norm": 0.7246687325679024,
      "learning_rate": 2.4444011380741375e-07,
      "loss": 0.1432,
      "step": 30955
    },
    {
      "epoch": 0.9030865278020888,
      "grad_norm": 0.8785881788994038,
      "learning_rate": 2.4429422551782046e-07,
      "loss": 0.1089,
      "step": 30956
    },
    {
      "epoch": 0.9031157010327323,
      "grad_norm": 0.6631408368634061,
      "learning_rate": 2.4414837968603223e-07,
      "loss": 0.0962,
      "step": 30957
    },
    {
      "epoch": 0.903144874263376,
      "grad_norm": 0.7138819774688472,
      "learning_rate": 2.4400257631335136e-07,
      "loss": 0.1202,
      "step": 30958
    },
    {
      "epoch": 0.9031740474940195,
      "grad_norm": 0.8350813516994058,
      "learning_rate": 2.4385681540108117e-07,
      "loss": 0.1019,
      "step": 30959
    },
    {
      "epoch": 0.9032032207246631,
      "grad_norm": 0.8326867081345029,
      "learning_rate": 2.4371109695052185e-07,
      "loss": 0.0901,
      "step": 30960
    },
    {
      "epoch": 0.9032323939553066,
      "grad_norm": 0.9466281671113214,
      "learning_rate": 2.435654209629745e-07,
      "loss": 0.1093,
      "step": 30961
    },
    {
      "epoch": 0.9032615671859502,
      "grad_norm": 0.7263247733108613,
      "learning_rate": 2.434197874397398e-07,
      "loss": 0.1239,
      "step": 30962
    },
    {
      "epoch": 0.9032907404165937,
      "grad_norm": 1.1284205234836768,
      "learning_rate": 2.432741963821167e-07,
      "loss": 0.1144,
      "step": 30963
    },
    {
      "epoch": 0.9033199136472373,
      "grad_norm": 0.8764197879557643,
      "learning_rate": 2.4312864779140633e-07,
      "loss": 0.115,
      "step": 30964
    },
    {
      "epoch": 0.9033490868778808,
      "grad_norm": 0.8498423461680746,
      "learning_rate": 2.429831416689088e-07,
      "loss": 0.0999,
      "step": 30965
    },
    {
      "epoch": 0.9033782601085244,
      "grad_norm": 0.964680119609015,
      "learning_rate": 2.4283767801592196e-07,
      "loss": 0.1004,
      "step": 30966
    },
    {
      "epoch": 0.903407433339168,
      "grad_norm": 0.7881218667683954,
      "learning_rate": 2.426922568337442e-07,
      "loss": 0.091,
      "step": 30967
    },
    {
      "epoch": 0.9034366065698115,
      "grad_norm": 0.7746170126969204,
      "learning_rate": 2.425468781236745e-07,
      "loss": 0.0981,
      "step": 30968
    },
    {
      "epoch": 0.9034657798004551,
      "grad_norm": 0.6896526807509923,
      "learning_rate": 2.4240154188701013e-07,
      "loss": 0.1071,
      "step": 30969
    },
    {
      "epoch": 0.9034949530310986,
      "grad_norm": 0.880851038956034,
      "learning_rate": 2.422562481250501e-07,
      "loss": 0.1166,
      "step": 30970
    },
    {
      "epoch": 0.9035241262617423,
      "grad_norm": 0.8271895146708425,
      "learning_rate": 2.421109968390895e-07,
      "loss": 0.112,
      "step": 30971
    },
    {
      "epoch": 0.9035532994923858,
      "grad_norm": 0.8047694310509017,
      "learning_rate": 2.419657880304266e-07,
      "loss": 0.1297,
      "step": 30972
    },
    {
      "epoch": 0.9035824727230294,
      "grad_norm": 0.7329982488742748,
      "learning_rate": 2.418206217003577e-07,
      "loss": 0.1152,
      "step": 30973
    },
    {
      "epoch": 0.9036116459536729,
      "grad_norm": 0.8524591097579353,
      "learning_rate": 2.4167549785017676e-07,
      "loss": 0.102,
      "step": 30974
    },
    {
      "epoch": 0.9036408191843165,
      "grad_norm": 1.0279475728860563,
      "learning_rate": 2.415304164811827e-07,
      "loss": 0.1175,
      "step": 30975
    },
    {
      "epoch": 0.90366999241496,
      "grad_norm": 0.7631475659126696,
      "learning_rate": 2.4138537759466894e-07,
      "loss": 0.1055,
      "step": 30976
    },
    {
      "epoch": 0.9036991656456036,
      "grad_norm": 1.0890616148085686,
      "learning_rate": 2.4124038119193006e-07,
      "loss": 0.1283,
      "step": 30977
    },
    {
      "epoch": 0.9037283388762472,
      "grad_norm": 0.9686796762385809,
      "learning_rate": 2.410954272742616e-07,
      "loss": 0.1085,
      "step": 30978
    },
    {
      "epoch": 0.9037575121068907,
      "grad_norm": 0.7816770751882689,
      "learning_rate": 2.4095051584295704e-07,
      "loss": 0.134,
      "step": 30979
    },
    {
      "epoch": 0.9037866853375343,
      "grad_norm": 0.891180047793578,
      "learning_rate": 2.4080564689930974e-07,
      "loss": 0.1066,
      "step": 30980
    },
    {
      "epoch": 0.9038158585681778,
      "grad_norm": 0.9566231189326931,
      "learning_rate": 2.406608204446137e-07,
      "loss": 0.1225,
      "step": 30981
    },
    {
      "epoch": 0.9038450317988214,
      "grad_norm": 0.732768771886364,
      "learning_rate": 2.4051603648016176e-07,
      "loss": 0.099,
      "step": 30982
    },
    {
      "epoch": 0.9038742050294649,
      "grad_norm": 0.9489851905210597,
      "learning_rate": 2.4037129500724675e-07,
      "loss": 0.1364,
      "step": 30983
    },
    {
      "epoch": 0.9039033782601085,
      "grad_norm": 0.981945530141147,
      "learning_rate": 2.402265960271599e-07,
      "loss": 0.1373,
      "step": 30984
    },
    {
      "epoch": 0.9039325514907521,
      "grad_norm": 0.8241497624676498,
      "learning_rate": 2.400819395411946e-07,
      "loss": 0.1099,
      "step": 30985
    },
    {
      "epoch": 0.9039617247213957,
      "grad_norm": 0.8656803812664841,
      "learning_rate": 2.39937325550641e-07,
      "loss": 0.1164,
      "step": 30986
    },
    {
      "epoch": 0.9039908979520392,
      "grad_norm": 0.9454769588806509,
      "learning_rate": 2.397927540567907e-07,
      "loss": 0.1225,
      "step": 30987
    },
    {
      "epoch": 0.9040200711826828,
      "grad_norm": 0.9195737657724945,
      "learning_rate": 2.396482250609339e-07,
      "loss": 0.1211,
      "step": 30988
    },
    {
      "epoch": 0.9040492444133263,
      "grad_norm": 0.8605926343562299,
      "learning_rate": 2.395037385643623e-07,
      "loss": 0.1167,
      "step": 30989
    },
    {
      "epoch": 0.9040784176439699,
      "grad_norm": 0.7684792064288665,
      "learning_rate": 2.393592945683648e-07,
      "loss": 0.1058,
      "step": 30990
    },
    {
      "epoch": 0.9041075908746135,
      "grad_norm": 0.9725175925245941,
      "learning_rate": 2.3921489307422994e-07,
      "loss": 0.1375,
      "step": 30991
    },
    {
      "epoch": 0.904136764105257,
      "grad_norm": 0.8903827468080072,
      "learning_rate": 2.3907053408324885e-07,
      "loss": 0.1289,
      "step": 30992
    },
    {
      "epoch": 0.9041659373359006,
      "grad_norm": 0.7438117327272914,
      "learning_rate": 2.3892621759670943e-07,
      "loss": 0.1012,
      "step": 30993
    },
    {
      "epoch": 0.9041951105665441,
      "grad_norm": 0.8033488966088786,
      "learning_rate": 2.3878194361590003e-07,
      "loss": 0.1216,
      "step": 30994
    },
    {
      "epoch": 0.9042242837971877,
      "grad_norm": 0.8563267664831004,
      "learning_rate": 2.386377121421091e-07,
      "loss": 0.1022,
      "step": 30995
    },
    {
      "epoch": 0.9042534570278312,
      "grad_norm": 0.8219721406685765,
      "learning_rate": 2.3849352317662446e-07,
      "loss": 0.1125,
      "step": 30996
    },
    {
      "epoch": 0.9042826302584748,
      "grad_norm": 0.6910867898795853,
      "learning_rate": 2.3834937672073178e-07,
      "loss": 0.0953,
      "step": 30997
    },
    {
      "epoch": 0.9043118034891184,
      "grad_norm": 1.2877588495692245,
      "learning_rate": 2.3820527277571949e-07,
      "loss": 0.1343,
      "step": 30998
    },
    {
      "epoch": 0.904340976719762,
      "grad_norm": 0.9232830015681499,
      "learning_rate": 2.380612113428743e-07,
      "loss": 0.1171,
      "step": 30999
    },
    {
      "epoch": 0.9043701499504055,
      "grad_norm": 0.8927480596637919,
      "learning_rate": 2.3791719242348188e-07,
      "loss": 0.0948,
      "step": 31000
    },
    {
      "epoch": 0.9043993231810491,
      "grad_norm": 0.9465587826538443,
      "learning_rate": 2.377732160188273e-07,
      "loss": 0.1136,
      "step": 31001
    },
    {
      "epoch": 0.9044284964116927,
      "grad_norm": 0.8571432623023012,
      "learning_rate": 2.3762928213019786e-07,
      "loss": 0.0976,
      "step": 31002
    },
    {
      "epoch": 0.9044576696423362,
      "grad_norm": 0.8745611444710717,
      "learning_rate": 2.3748539075887646e-07,
      "loss": 0.1145,
      "step": 31003
    },
    {
      "epoch": 0.9044868428729798,
      "grad_norm": 0.8098827712414928,
      "learning_rate": 2.3734154190614755e-07,
      "loss": 0.1167,
      "step": 31004
    },
    {
      "epoch": 0.9045160161036233,
      "grad_norm": 0.7728558326997578,
      "learning_rate": 2.3719773557329794e-07,
      "loss": 0.1036,
      "step": 31005
    },
    {
      "epoch": 0.9045451893342669,
      "grad_norm": 0.9925806399402782,
      "learning_rate": 2.3705397176160994e-07,
      "loss": 0.1134,
      "step": 31006
    },
    {
      "epoch": 0.9045743625649104,
      "grad_norm": 0.7889660460696515,
      "learning_rate": 2.3691025047236637e-07,
      "loss": 0.1048,
      "step": 31007
    },
    {
      "epoch": 0.904603535795554,
      "grad_norm": 0.7049189496488726,
      "learning_rate": 2.367665717068518e-07,
      "loss": 0.1169,
      "step": 31008
    },
    {
      "epoch": 0.9046327090261975,
      "grad_norm": 0.834736438102681,
      "learning_rate": 2.3662293546634796e-07,
      "loss": 0.1024,
      "step": 31009
    },
    {
      "epoch": 0.9046618822568411,
      "grad_norm": 0.7792846950019244,
      "learning_rate": 2.364793417521366e-07,
      "loss": 0.1128,
      "step": 31010
    },
    {
      "epoch": 0.9046910554874846,
      "grad_norm": 0.8957140921369928,
      "learning_rate": 2.3633579056550115e-07,
      "loss": 0.1363,
      "step": 31011
    },
    {
      "epoch": 0.9047202287181283,
      "grad_norm": 0.6520278082438606,
      "learning_rate": 2.3619228190772282e-07,
      "loss": 0.1141,
      "step": 31012
    },
    {
      "epoch": 0.9047494019487718,
      "grad_norm": 0.8356939446502553,
      "learning_rate": 2.3604881578008276e-07,
      "loss": 0.1189,
      "step": 31013
    },
    {
      "epoch": 0.9047785751794154,
      "grad_norm": 0.7724826162424849,
      "learning_rate": 2.3590539218386056e-07,
      "loss": 0.1028,
      "step": 31014
    },
    {
      "epoch": 0.904807748410059,
      "grad_norm": 0.8208899102690861,
      "learning_rate": 2.3576201112033903e-07,
      "loss": 0.0979,
      "step": 31015
    },
    {
      "epoch": 0.9048369216407025,
      "grad_norm": 0.9358438814407456,
      "learning_rate": 2.3561867259079607e-07,
      "loss": 0.117,
      "step": 31016
    },
    {
      "epoch": 0.9048660948713461,
      "grad_norm": 0.952807885742543,
      "learning_rate": 2.3547537659651286e-07,
      "loss": 0.1138,
      "step": 31017
    },
    {
      "epoch": 0.9048952681019896,
      "grad_norm": 0.765959134979808,
      "learning_rate": 2.353321231387673e-07,
      "loss": 0.1204,
      "step": 31018
    },
    {
      "epoch": 0.9049244413326332,
      "grad_norm": 0.7022546939782963,
      "learning_rate": 2.3518891221884e-07,
      "loss": 0.1135,
      "step": 31019
    },
    {
      "epoch": 0.9049536145632767,
      "grad_norm": 0.831842049537802,
      "learning_rate": 2.3504574383800825e-07,
      "loss": 0.1097,
      "step": 31020
    },
    {
      "epoch": 0.9049827877939203,
      "grad_norm": 0.7555123556790414,
      "learning_rate": 2.3490261799755e-07,
      "loss": 0.0981,
      "step": 31021
    },
    {
      "epoch": 0.9050119610245638,
      "grad_norm": 0.8825737255745918,
      "learning_rate": 2.3475953469874413e-07,
      "loss": 0.1375,
      "step": 31022
    },
    {
      "epoch": 0.9050411342552074,
      "grad_norm": 0.8603160876024761,
      "learning_rate": 2.34616493942868e-07,
      "loss": 0.125,
      "step": 31023
    },
    {
      "epoch": 0.9050703074858509,
      "grad_norm": 0.7903554247217,
      "learning_rate": 2.3447349573119725e-07,
      "loss": 0.0977,
      "step": 31024
    },
    {
      "epoch": 0.9050994807164946,
      "grad_norm": 0.8445792710266081,
      "learning_rate": 2.3433054006501087e-07,
      "loss": 0.0881,
      "step": 31025
    },
    {
      "epoch": 0.9051286539471382,
      "grad_norm": 0.6327029514433683,
      "learning_rate": 2.341876269455834e-07,
      "loss": 0.0999,
      "step": 31026
    },
    {
      "epoch": 0.9051578271777817,
      "grad_norm": 0.8723141299911839,
      "learning_rate": 2.3404475637419045e-07,
      "loss": 0.1183,
      "step": 31027
    },
    {
      "epoch": 0.9051870004084253,
      "grad_norm": 0.8394980661644551,
      "learning_rate": 2.3390192835210824e-07,
      "loss": 0.0942,
      "step": 31028
    },
    {
      "epoch": 0.9052161736390688,
      "grad_norm": 0.776989093761145,
      "learning_rate": 2.337591428806124e-07,
      "loss": 0.0981,
      "step": 31029
    },
    {
      "epoch": 0.9052453468697124,
      "grad_norm": 0.8312850821874578,
      "learning_rate": 2.3361639996097697e-07,
      "loss": 0.1182,
      "step": 31030
    },
    {
      "epoch": 0.9052745201003559,
      "grad_norm": 0.804418812954818,
      "learning_rate": 2.3347369959447584e-07,
      "loss": 0.1065,
      "step": 31031
    },
    {
      "epoch": 0.9053036933309995,
      "grad_norm": 1.229864617067817,
      "learning_rate": 2.3333104178238475e-07,
      "loss": 0.0858,
      "step": 31032
    },
    {
      "epoch": 0.905332866561643,
      "grad_norm": 0.8527797931521814,
      "learning_rate": 2.3318842652597595e-07,
      "loss": 0.1266,
      "step": 31033
    },
    {
      "epoch": 0.9053620397922866,
      "grad_norm": 0.7333503546245682,
      "learning_rate": 2.3304585382652178e-07,
      "loss": 0.1215,
      "step": 31034
    },
    {
      "epoch": 0.9053912130229301,
      "grad_norm": 0.9030188895183978,
      "learning_rate": 2.3290332368529734e-07,
      "loss": 0.1002,
      "step": 31035
    },
    {
      "epoch": 0.9054203862535737,
      "grad_norm": 0.8612737379561668,
      "learning_rate": 2.3276083610357436e-07,
      "loss": 0.116,
      "step": 31036
    },
    {
      "epoch": 0.9054495594842172,
      "grad_norm": 0.8589736419008908,
      "learning_rate": 2.3261839108262353e-07,
      "loss": 0.1321,
      "step": 31037
    },
    {
      "epoch": 0.9054787327148608,
      "grad_norm": 1.0454511600397138,
      "learning_rate": 2.3247598862371878e-07,
      "loss": 0.1098,
      "step": 31038
    },
    {
      "epoch": 0.9055079059455045,
      "grad_norm": 0.8891984577817922,
      "learning_rate": 2.323336287281297e-07,
      "loss": 0.1172,
      "step": 31039
    },
    {
      "epoch": 0.905537079176148,
      "grad_norm": 0.8612033636963312,
      "learning_rate": 2.3219131139712746e-07,
      "loss": 0.1037,
      "step": 31040
    },
    {
      "epoch": 0.9055662524067916,
      "grad_norm": 1.1069143747697723,
      "learning_rate": 2.320490366319833e-07,
      "loss": 0.1265,
      "step": 31041
    },
    {
      "epoch": 0.9055954256374351,
      "grad_norm": 0.9442900554547359,
      "learning_rate": 2.3190680443396784e-07,
      "loss": 0.1238,
      "step": 31042
    },
    {
      "epoch": 0.9056245988680787,
      "grad_norm": 0.7584375799488945,
      "learning_rate": 2.3176461480434954e-07,
      "loss": 0.1181,
      "step": 31043
    },
    {
      "epoch": 0.9056537720987222,
      "grad_norm": 0.8124001834974975,
      "learning_rate": 2.3162246774439845e-07,
      "loss": 0.1161,
      "step": 31044
    },
    {
      "epoch": 0.9056829453293658,
      "grad_norm": 0.8476413654548273,
      "learning_rate": 2.3148036325538414e-07,
      "loss": 0.1176,
      "step": 31045
    },
    {
      "epoch": 0.9057121185600093,
      "grad_norm": 0.9265441993099188,
      "learning_rate": 2.3133830133857393e-07,
      "loss": 0.0995,
      "step": 31046
    },
    {
      "epoch": 0.9057412917906529,
      "grad_norm": 0.8002302248988808,
      "learning_rate": 2.3119628199523792e-07,
      "loss": 0.1017,
      "step": 31047
    },
    {
      "epoch": 0.9057704650212964,
      "grad_norm": 0.8202679698560909,
      "learning_rate": 2.310543052266423e-07,
      "loss": 0.1194,
      "step": 31048
    },
    {
      "epoch": 0.90579963825194,
      "grad_norm": 0.7965327943716995,
      "learning_rate": 2.3091237103405606e-07,
      "loss": 0.1057,
      "step": 31049
    },
    {
      "epoch": 0.9058288114825835,
      "grad_norm": 0.7776205645230267,
      "learning_rate": 2.3077047941874597e-07,
      "loss": 0.0935,
      "step": 31050
    },
    {
      "epoch": 0.9058579847132271,
      "grad_norm": 0.6608344382665353,
      "learning_rate": 2.306286303819777e-07,
      "loss": 0.1199,
      "step": 31051
    },
    {
      "epoch": 0.9058871579438708,
      "grad_norm": 0.8951919596240322,
      "learning_rate": 2.3048682392501854e-07,
      "loss": 0.1156,
      "step": 31052
    },
    {
      "epoch": 0.9059163311745143,
      "grad_norm": 0.9607789177046562,
      "learning_rate": 2.303450600491347e-07,
      "loss": 0.1322,
      "step": 31053
    },
    {
      "epoch": 0.9059455044051579,
      "grad_norm": 0.779468320438791,
      "learning_rate": 2.3020333875559132e-07,
      "loss": 0.1013,
      "step": 31054
    },
    {
      "epoch": 0.9059746776358014,
      "grad_norm": 0.6703479128871883,
      "learning_rate": 2.3006166004565454e-07,
      "loss": 0.1241,
      "step": 31055
    },
    {
      "epoch": 0.906003850866445,
      "grad_norm": 0.8106083888563351,
      "learning_rate": 2.2992002392058843e-07,
      "loss": 0.1359,
      "step": 31056
    },
    {
      "epoch": 0.9060330240970885,
      "grad_norm": 1.0194871420894678,
      "learning_rate": 2.2977843038165693e-07,
      "loss": 0.1166,
      "step": 31057
    },
    {
      "epoch": 0.9060621973277321,
      "grad_norm": 0.6440779035411326,
      "learning_rate": 2.2963687943012515e-07,
      "loss": 0.1041,
      "step": 31058
    },
    {
      "epoch": 0.9060913705583756,
      "grad_norm": 0.7043635813284241,
      "learning_rate": 2.294953710672565e-07,
      "loss": 0.0986,
      "step": 31059
    },
    {
      "epoch": 0.9061205437890192,
      "grad_norm": 0.8523291980476269,
      "learning_rate": 2.29353905294315e-07,
      "loss": 0.1145,
      "step": 31060
    },
    {
      "epoch": 0.9061497170196627,
      "grad_norm": 0.7977194639494615,
      "learning_rate": 2.2921248211256242e-07,
      "loss": 0.1101,
      "step": 31061
    },
    {
      "epoch": 0.9061788902503063,
      "grad_norm": 0.7833624685213701,
      "learning_rate": 2.2907110152326217e-07,
      "loss": 0.113,
      "step": 31062
    },
    {
      "epoch": 0.9062080634809498,
      "grad_norm": 0.9112102627067392,
      "learning_rate": 2.289297635276766e-07,
      "loss": 0.1274,
      "step": 31063
    },
    {
      "epoch": 0.9062372367115934,
      "grad_norm": 0.7919046650217728,
      "learning_rate": 2.2878846812706524e-07,
      "loss": 0.0988,
      "step": 31064
    },
    {
      "epoch": 0.906266409942237,
      "grad_norm": 1.4700491593039289,
      "learning_rate": 2.2864721532269317e-07,
      "loss": 0.1413,
      "step": 31065
    },
    {
      "epoch": 0.9062955831728806,
      "grad_norm": 0.7976698184707087,
      "learning_rate": 2.2850600511582e-07,
      "loss": 0.1366,
      "step": 31066
    },
    {
      "epoch": 0.9063247564035242,
      "grad_norm": 0.8000100874950398,
      "learning_rate": 2.283648375077052e-07,
      "loss": 0.0992,
      "step": 31067
    },
    {
      "epoch": 0.9063539296341677,
      "grad_norm": 0.6631769861970778,
      "learning_rate": 2.282237124996106e-07,
      "loss": 0.0908,
      "step": 31068
    },
    {
      "epoch": 0.9063831028648113,
      "grad_norm": 0.6905358890076473,
      "learning_rate": 2.2808263009279574e-07,
      "loss": 0.1244,
      "step": 31069
    },
    {
      "epoch": 0.9064122760954548,
      "grad_norm": 0.7779457979066686,
      "learning_rate": 2.2794159028851958e-07,
      "loss": 0.1234,
      "step": 31070
    },
    {
      "epoch": 0.9064414493260984,
      "grad_norm": 0.8832020763851377,
      "learning_rate": 2.2780059308804116e-07,
      "loss": 0.1196,
      "step": 31071
    },
    {
      "epoch": 0.9064706225567419,
      "grad_norm": 0.7292286484002232,
      "learning_rate": 2.2765963849262107e-07,
      "loss": 0.1031,
      "step": 31072
    },
    {
      "epoch": 0.9064997957873855,
      "grad_norm": 0.7474740082202941,
      "learning_rate": 2.2751872650351614e-07,
      "loss": 0.1218,
      "step": 31073
    },
    {
      "epoch": 0.906528969018029,
      "grad_norm": 0.9404091708721877,
      "learning_rate": 2.2737785712198423e-07,
      "loss": 0.0952,
      "step": 31074
    },
    {
      "epoch": 0.9065581422486726,
      "grad_norm": 0.8599232304803229,
      "learning_rate": 2.2723703034928435e-07,
      "loss": 0.1165,
      "step": 31075
    },
    {
      "epoch": 0.9065873154793161,
      "grad_norm": 0.6120221323168671,
      "learning_rate": 2.2709624618667159e-07,
      "loss": 0.1024,
      "step": 31076
    },
    {
      "epoch": 0.9066164887099597,
      "grad_norm": 0.7979718073030723,
      "learning_rate": 2.269555046354055e-07,
      "loss": 0.1167,
      "step": 31077
    },
    {
      "epoch": 0.9066456619406033,
      "grad_norm": 1.2851241355542093,
      "learning_rate": 2.2681480569674007e-07,
      "loss": 0.1044,
      "step": 31078
    },
    {
      "epoch": 0.9066748351712469,
      "grad_norm": 0.8137264629751338,
      "learning_rate": 2.2667414937193378e-07,
      "loss": 0.1069,
      "step": 31079
    },
    {
      "epoch": 0.9067040084018905,
      "grad_norm": 0.8026044382238032,
      "learning_rate": 2.2653353566224058e-07,
      "loss": 0.0959,
      "step": 31080
    },
    {
      "epoch": 0.906733181632534,
      "grad_norm": 0.9254360566307075,
      "learning_rate": 2.2639296456891612e-07,
      "loss": 0.1094,
      "step": 31081
    },
    {
      "epoch": 0.9067623548631776,
      "grad_norm": 0.8374644847661314,
      "learning_rate": 2.262524360932161e-07,
      "loss": 0.1064,
      "step": 31082
    },
    {
      "epoch": 0.9067915280938211,
      "grad_norm": 0.8899565088495911,
      "learning_rate": 2.261119502363951e-07,
      "loss": 0.1023,
      "step": 31083
    },
    {
      "epoch": 0.9068207013244647,
      "grad_norm": 0.6607754550107457,
      "learning_rate": 2.2597150699970594e-07,
      "loss": 0.1093,
      "step": 31084
    },
    {
      "epoch": 0.9068498745551082,
      "grad_norm": 0.7933378432633355,
      "learning_rate": 2.258311063844043e-07,
      "loss": 0.1014,
      "step": 31085
    },
    {
      "epoch": 0.9068790477857518,
      "grad_norm": 0.9711117450350107,
      "learning_rate": 2.256907483917431e-07,
      "loss": 0.097,
      "step": 31086
    },
    {
      "epoch": 0.9069082210163953,
      "grad_norm": 0.7130955809590389,
      "learning_rate": 2.2555043302297464e-07,
      "loss": 0.1079,
      "step": 31087
    },
    {
      "epoch": 0.9069373942470389,
      "grad_norm": 0.9837946899650742,
      "learning_rate": 2.254101602793518e-07,
      "loss": 0.1113,
      "step": 31088
    },
    {
      "epoch": 0.9069665674776825,
      "grad_norm": 0.8483504739284345,
      "learning_rate": 2.252699301621286e-07,
      "loss": 0.1125,
      "step": 31089
    },
    {
      "epoch": 0.906995740708326,
      "grad_norm": 0.820332575581785,
      "learning_rate": 2.2512974267255517e-07,
      "loss": 0.1033,
      "step": 31090
    },
    {
      "epoch": 0.9070249139389696,
      "grad_norm": 0.7765796816180711,
      "learning_rate": 2.2498959781188267e-07,
      "loss": 0.1264,
      "step": 31091
    },
    {
      "epoch": 0.9070540871696131,
      "grad_norm": 0.8304216817017167,
      "learning_rate": 2.2484949558136405e-07,
      "loss": 0.1119,
      "step": 31092
    },
    {
      "epoch": 0.9070832604002568,
      "grad_norm": 0.859422112818967,
      "learning_rate": 2.2470943598224936e-07,
      "loss": 0.0962,
      "step": 31093
    },
    {
      "epoch": 0.9071124336309003,
      "grad_norm": 0.6354218403920345,
      "learning_rate": 2.245694190157871e-07,
      "loss": 0.1054,
      "step": 31094
    },
    {
      "epoch": 0.9071416068615439,
      "grad_norm": 0.7047697085873138,
      "learning_rate": 2.244294446832307e-07,
      "loss": 0.1197,
      "step": 31095
    },
    {
      "epoch": 0.9071707800921874,
      "grad_norm": 0.8612226207933252,
      "learning_rate": 2.24289512985828e-07,
      "loss": 0.1021,
      "step": 31096
    },
    {
      "epoch": 0.907199953322831,
      "grad_norm": 0.814602887405063,
      "learning_rate": 2.241496239248281e-07,
      "loss": 0.1203,
      "step": 31097
    },
    {
      "epoch": 0.9072291265534745,
      "grad_norm": 0.8157422395148158,
      "learning_rate": 2.24009777501481e-07,
      "loss": 0.1115,
      "step": 31098
    },
    {
      "epoch": 0.9072582997841181,
      "grad_norm": 0.9433895599957898,
      "learning_rate": 2.2386997371703413e-07,
      "loss": 0.1034,
      "step": 31099
    },
    {
      "epoch": 0.9072874730147616,
      "grad_norm": 0.9194868283420881,
      "learning_rate": 2.237302125727353e-07,
      "loss": 0.1002,
      "step": 31100
    },
    {
      "epoch": 0.9073166462454052,
      "grad_norm": 0.9143603771129105,
      "learning_rate": 2.2359049406983358e-07,
      "loss": 0.0983,
      "step": 31101
    },
    {
      "epoch": 0.9073458194760488,
      "grad_norm": 0.893875061229067,
      "learning_rate": 2.234508182095757e-07,
      "loss": 0.125,
      "step": 31102
    },
    {
      "epoch": 0.9073749927066923,
      "grad_norm": 0.8063523363762451,
      "learning_rate": 2.2331118499320904e-07,
      "loss": 0.116,
      "step": 31103
    },
    {
      "epoch": 0.9074041659373359,
      "grad_norm": 0.9565084545115901,
      "learning_rate": 2.2317159442197868e-07,
      "loss": 0.1179,
      "step": 31104
    },
    {
      "epoch": 0.9074333391679794,
      "grad_norm": 0.8637748883300185,
      "learning_rate": 2.2303204649713305e-07,
      "loss": 0.1137,
      "step": 31105
    },
    {
      "epoch": 0.9074625123986231,
      "grad_norm": 0.8914017938165445,
      "learning_rate": 2.228925412199162e-07,
      "loss": 0.1096,
      "step": 31106
    },
    {
      "epoch": 0.9074916856292666,
      "grad_norm": 0.7612287769685566,
      "learning_rate": 2.2275307859157546e-07,
      "loss": 0.0936,
      "step": 31107
    },
    {
      "epoch": 0.9075208588599102,
      "grad_norm": 0.9977039789906293,
      "learning_rate": 2.2261365861335372e-07,
      "loss": 0.12,
      "step": 31108
    },
    {
      "epoch": 0.9075500320905537,
      "grad_norm": 0.954415893967994,
      "learning_rate": 2.2247428128649717e-07,
      "loss": 0.1214,
      "step": 31109
    },
    {
      "epoch": 0.9075792053211973,
      "grad_norm": 0.7368586790932046,
      "learning_rate": 2.2233494661225042e-07,
      "loss": 0.0997,
      "step": 31110
    },
    {
      "epoch": 0.9076083785518408,
      "grad_norm": 0.8528026718875293,
      "learning_rate": 2.2219565459185578e-07,
      "loss": 0.0907,
      "step": 31111
    },
    {
      "epoch": 0.9076375517824844,
      "grad_norm": 0.7457147550208287,
      "learning_rate": 2.2205640522655725e-07,
      "loss": 0.1451,
      "step": 31112
    },
    {
      "epoch": 0.907666725013128,
      "grad_norm": 0.9140024106994731,
      "learning_rate": 2.2191719851759996e-07,
      "loss": 0.1118,
      "step": 31113
    },
    {
      "epoch": 0.9076958982437715,
      "grad_norm": 0.9136693296214689,
      "learning_rate": 2.2177803446622404e-07,
      "loss": 0.0968,
      "step": 31114
    },
    {
      "epoch": 0.9077250714744151,
      "grad_norm": 0.7244222394257598,
      "learning_rate": 2.2163891307367457e-07,
      "loss": 0.101,
      "step": 31115
    },
    {
      "epoch": 0.9077542447050586,
      "grad_norm": 0.8501133370857192,
      "learning_rate": 2.214998343411917e-07,
      "loss": 0.129,
      "step": 31116
    },
    {
      "epoch": 0.9077834179357022,
      "grad_norm": 0.8108296197082614,
      "learning_rate": 2.2136079827001666e-07,
      "loss": 0.1176,
      "step": 31117
    },
    {
      "epoch": 0.9078125911663457,
      "grad_norm": 0.7676470133077243,
      "learning_rate": 2.2122180486139232e-07,
      "loss": 0.1272,
      "step": 31118
    },
    {
      "epoch": 0.9078417643969893,
      "grad_norm": 0.9231091928417677,
      "learning_rate": 2.2108285411655938e-07,
      "loss": 0.1052,
      "step": 31119
    },
    {
      "epoch": 0.9078709376276329,
      "grad_norm": 0.9125915237622088,
      "learning_rate": 2.209439460367574e-07,
      "loss": 0.1269,
      "step": 31120
    },
    {
      "epoch": 0.9079001108582765,
      "grad_norm": 1.3371222585899003,
      "learning_rate": 2.2080508062322704e-07,
      "loss": 0.1045,
      "step": 31121
    },
    {
      "epoch": 0.90792928408892,
      "grad_norm": 0.7902946209997209,
      "learning_rate": 2.2066625787720842e-07,
      "loss": 0.1254,
      "step": 31122
    },
    {
      "epoch": 0.9079584573195636,
      "grad_norm": 0.6485061366155017,
      "learning_rate": 2.2052747779994055e-07,
      "loss": 0.1125,
      "step": 31123
    },
    {
      "epoch": 0.9079876305502071,
      "grad_norm": 1.104568038545473,
      "learning_rate": 2.2038874039266077e-07,
      "loss": 0.1224,
      "step": 31124
    },
    {
      "epoch": 0.9080168037808507,
      "grad_norm": 0.7622552739176846,
      "learning_rate": 2.202500456566109e-07,
      "loss": 0.1066,
      "step": 31125
    },
    {
      "epoch": 0.9080459770114943,
      "grad_norm": 0.7422091194608011,
      "learning_rate": 2.201113935930277e-07,
      "loss": 0.1074,
      "step": 31126
    },
    {
      "epoch": 0.9080751502421378,
      "grad_norm": 0.8907907529926969,
      "learning_rate": 2.1997278420314848e-07,
      "loss": 0.1117,
      "step": 31127
    },
    {
      "epoch": 0.9081043234727814,
      "grad_norm": 0.9067372482328319,
      "learning_rate": 2.198342174882112e-07,
      "loss": 0.1036,
      "step": 31128
    },
    {
      "epoch": 0.9081334967034249,
      "grad_norm": 0.7713778168392936,
      "learning_rate": 2.196956934494532e-07,
      "loss": 0.1138,
      "step": 31129
    },
    {
      "epoch": 0.9081626699340685,
      "grad_norm": 0.6670136652004125,
      "learning_rate": 2.1955721208811066e-07,
      "loss": 0.1027,
      "step": 31130
    },
    {
      "epoch": 0.908191843164712,
      "grad_norm": 0.8619621585420121,
      "learning_rate": 2.1941877340541984e-07,
      "loss": 0.1326,
      "step": 31131
    },
    {
      "epoch": 0.9082210163953556,
      "grad_norm": 0.8390283872276735,
      "learning_rate": 2.1928037740261753e-07,
      "loss": 0.1058,
      "step": 31132
    },
    {
      "epoch": 0.9082501896259992,
      "grad_norm": 0.7881674467248587,
      "learning_rate": 2.1914202408093887e-07,
      "loss": 0.105,
      "step": 31133
    },
    {
      "epoch": 0.9082793628566428,
      "grad_norm": 0.7892069065864602,
      "learning_rate": 2.1900371344161787e-07,
      "loss": 0.0975,
      "step": 31134
    },
    {
      "epoch": 0.9083085360872863,
      "grad_norm": 0.7709085175835558,
      "learning_rate": 2.1886544548589184e-07,
      "loss": 0.1178,
      "step": 31135
    },
    {
      "epoch": 0.9083377093179299,
      "grad_norm": 0.6235509734500673,
      "learning_rate": 2.187272202149926e-07,
      "loss": 0.1023,
      "step": 31136
    },
    {
      "epoch": 0.9083668825485735,
      "grad_norm": 0.6889896345070166,
      "learning_rate": 2.1858903763015583e-07,
      "loss": 0.1144,
      "step": 31137
    },
    {
      "epoch": 0.908396055779217,
      "grad_norm": 0.8440367631430685,
      "learning_rate": 2.184508977326144e-07,
      "loss": 0.1128,
      "step": 31138
    },
    {
      "epoch": 0.9084252290098606,
      "grad_norm": 0.8555631303594844,
      "learning_rate": 2.1831280052360238e-07,
      "loss": 0.1042,
      "step": 31139
    },
    {
      "epoch": 0.9084544022405041,
      "grad_norm": 0.9960188899937072,
      "learning_rate": 2.1817474600435262e-07,
      "loss": 0.1311,
      "step": 31140
    },
    {
      "epoch": 0.9084835754711477,
      "grad_norm": 0.7604910924176862,
      "learning_rate": 2.1803673417609584e-07,
      "loss": 0.1087,
      "step": 31141
    },
    {
      "epoch": 0.9085127487017912,
      "grad_norm": 0.8788464053917843,
      "learning_rate": 2.1789876504006601e-07,
      "loss": 0.1196,
      "step": 31142
    },
    {
      "epoch": 0.9085419219324348,
      "grad_norm": 0.8429605800895439,
      "learning_rate": 2.1776083859749498e-07,
      "loss": 0.0924,
      "step": 31143
    },
    {
      "epoch": 0.9085710951630783,
      "grad_norm": 0.9128799379517005,
      "learning_rate": 2.176229548496134e-07,
      "loss": 0.1232,
      "step": 31144
    },
    {
      "epoch": 0.9086002683937219,
      "grad_norm": 0.9937007117197812,
      "learning_rate": 2.1748511379765247e-07,
      "loss": 0.1039,
      "step": 31145
    },
    {
      "epoch": 0.9086294416243654,
      "grad_norm": 1.179439316220124,
      "learning_rate": 2.1734731544284293e-07,
      "loss": 0.1173,
      "step": 31146
    },
    {
      "epoch": 0.9086586148550091,
      "grad_norm": 0.8566170913287037,
      "learning_rate": 2.1720955978641433e-07,
      "loss": 0.1144,
      "step": 31147
    },
    {
      "epoch": 0.9086877880856526,
      "grad_norm": 0.8013007543558315,
      "learning_rate": 2.170718468295968e-07,
      "loss": 0.1093,
      "step": 31148
    },
    {
      "epoch": 0.9087169613162962,
      "grad_norm": 1.1102425067230988,
      "learning_rate": 2.1693417657362048e-07,
      "loss": 0.1153,
      "step": 31149
    },
    {
      "epoch": 0.9087461345469398,
      "grad_norm": 0.7486575885918128,
      "learning_rate": 2.1679654901971436e-07,
      "loss": 0.1024,
      "step": 31150
    },
    {
      "epoch": 0.9087753077775833,
      "grad_norm": 0.8329626115948966,
      "learning_rate": 2.1665896416910638e-07,
      "loss": 0.1171,
      "step": 31151
    },
    {
      "epoch": 0.9088044810082269,
      "grad_norm": 0.7603322985587933,
      "learning_rate": 2.165214220230255e-07,
      "loss": 0.1066,
      "step": 31152
    },
    {
      "epoch": 0.9088336542388704,
      "grad_norm": 0.7644941059406194,
      "learning_rate": 2.163839225826997e-07,
      "loss": 0.0884,
      "step": 31153
    },
    {
      "epoch": 0.908862827469514,
      "grad_norm": 0.8187194717508908,
      "learning_rate": 2.1624646584935515e-07,
      "loss": 0.1062,
      "step": 31154
    },
    {
      "epoch": 0.9088920007001575,
      "grad_norm": 0.8938377716105418,
      "learning_rate": 2.161090518242215e-07,
      "loss": 0.1072,
      "step": 31155
    },
    {
      "epoch": 0.9089211739308011,
      "grad_norm": 0.7978155172171842,
      "learning_rate": 2.159716805085238e-07,
      "loss": 0.1165,
      "step": 31156
    },
    {
      "epoch": 0.9089503471614446,
      "grad_norm": 0.7328847864974878,
      "learning_rate": 2.1583435190348833e-07,
      "loss": 0.096,
      "step": 31157
    },
    {
      "epoch": 0.9089795203920882,
      "grad_norm": 0.6593593177615167,
      "learning_rate": 2.1569706601034246e-07,
      "loss": 0.1249,
      "step": 31158
    },
    {
      "epoch": 0.9090086936227317,
      "grad_norm": 0.855227361992073,
      "learning_rate": 2.155598228303113e-07,
      "loss": 0.1173,
      "step": 31159
    },
    {
      "epoch": 0.9090378668533754,
      "grad_norm": 0.813050944243014,
      "learning_rate": 2.1542262236461887e-07,
      "loss": 0.0982,
      "step": 31160
    },
    {
      "epoch": 0.909067040084019,
      "grad_norm": 0.9921805848528199,
      "learning_rate": 2.152854646144914e-07,
      "loss": 0.098,
      "step": 31161
    },
    {
      "epoch": 0.9090962133146625,
      "grad_norm": 0.7507611875558359,
      "learning_rate": 2.151483495811535e-07,
      "loss": 0.1068,
      "step": 31162
    },
    {
      "epoch": 0.9091253865453061,
      "grad_norm": 0.822681609407171,
      "learning_rate": 2.1501127726582916e-07,
      "loss": 0.1313,
      "step": 31163
    },
    {
      "epoch": 0.9091545597759496,
      "grad_norm": 0.976159035189303,
      "learning_rate": 2.148742476697413e-07,
      "loss": 0.1231,
      "step": 31164
    },
    {
      "epoch": 0.9091837330065932,
      "grad_norm": 0.7753825272428042,
      "learning_rate": 2.1473726079411394e-07,
      "loss": 0.1147,
      "step": 31165
    },
    {
      "epoch": 0.9092129062372367,
      "grad_norm": 1.1635609513305534,
      "learning_rate": 2.1460031664017002e-07,
      "loss": 0.1184,
      "step": 31166
    },
    {
      "epoch": 0.9092420794678803,
      "grad_norm": 0.9433089715287637,
      "learning_rate": 2.1446341520913238e-07,
      "loss": 0.1118,
      "step": 31167
    },
    {
      "epoch": 0.9092712526985238,
      "grad_norm": 0.6221398742753601,
      "learning_rate": 2.1432655650222234e-07,
      "loss": 0.1106,
      "step": 31168
    },
    {
      "epoch": 0.9093004259291674,
      "grad_norm": 0.811485942160595,
      "learning_rate": 2.1418974052066276e-07,
      "loss": 0.1407,
      "step": 31169
    },
    {
      "epoch": 0.9093295991598109,
      "grad_norm": 0.8165279238446315,
      "learning_rate": 2.1405296726567493e-07,
      "loss": 0.125,
      "step": 31170
    },
    {
      "epoch": 0.9093587723904545,
      "grad_norm": 0.9310940873584964,
      "learning_rate": 2.139162367384784e-07,
      "loss": 0.1085,
      "step": 31171
    },
    {
      "epoch": 0.909387945621098,
      "grad_norm": 0.6928230019094935,
      "learning_rate": 2.1377954894029662e-07,
      "loss": 0.0968,
      "step": 31172
    },
    {
      "epoch": 0.9094171188517416,
      "grad_norm": 0.7918110263515088,
      "learning_rate": 2.1364290387234864e-07,
      "loss": 0.1196,
      "step": 31173
    },
    {
      "epoch": 0.9094462920823853,
      "grad_norm": 0.8102359101197163,
      "learning_rate": 2.135063015358535e-07,
      "loss": 0.1077,
      "step": 31174
    },
    {
      "epoch": 0.9094754653130288,
      "grad_norm": 0.9933870456354755,
      "learning_rate": 2.1336974193203185e-07,
      "loss": 0.0799,
      "step": 31175
    },
    {
      "epoch": 0.9095046385436724,
      "grad_norm": 0.768122671744198,
      "learning_rate": 2.132332250621022e-07,
      "loss": 0.1067,
      "step": 31176
    },
    {
      "epoch": 0.9095338117743159,
      "grad_norm": 0.8469302239599373,
      "learning_rate": 2.1309675092728353e-07,
      "loss": 0.0991,
      "step": 31177
    },
    {
      "epoch": 0.9095629850049595,
      "grad_norm": 0.8711834960374405,
      "learning_rate": 2.1296031952879437e-07,
      "loss": 0.1015,
      "step": 31178
    },
    {
      "epoch": 0.909592158235603,
      "grad_norm": 0.8359144380449869,
      "learning_rate": 2.1282393086785313e-07,
      "loss": 0.108,
      "step": 31179
    },
    {
      "epoch": 0.9096213314662466,
      "grad_norm": 0.771065559095617,
      "learning_rate": 2.1268758494567666e-07,
      "loss": 0.13,
      "step": 31180
    },
    {
      "epoch": 0.9096505046968901,
      "grad_norm": 0.8667820910889501,
      "learning_rate": 2.1255128176348283e-07,
      "loss": 0.1193,
      "step": 31181
    },
    {
      "epoch": 0.9096796779275337,
      "grad_norm": 0.7451349820479439,
      "learning_rate": 2.1241502132248848e-07,
      "loss": 0.1084,
      "step": 31182
    },
    {
      "epoch": 0.9097088511581772,
      "grad_norm": 0.7770633929395803,
      "learning_rate": 2.122788036239093e-07,
      "loss": 0.1314,
      "step": 31183
    },
    {
      "epoch": 0.9097380243888208,
      "grad_norm": 0.8238839116750369,
      "learning_rate": 2.1214262866896208e-07,
      "loss": 0.0927,
      "step": 31184
    },
    {
      "epoch": 0.9097671976194643,
      "grad_norm": 0.8207904619219264,
      "learning_rate": 2.1200649645886308e-07,
      "loss": 0.1215,
      "step": 31185
    },
    {
      "epoch": 0.9097963708501079,
      "grad_norm": 0.905414138949186,
      "learning_rate": 2.1187040699482685e-07,
      "loss": 0.1098,
      "step": 31186
    },
    {
      "epoch": 0.9098255440807514,
      "grad_norm": 0.8058840126251363,
      "learning_rate": 2.117343602780686e-07,
      "loss": 0.1078,
      "step": 31187
    },
    {
      "epoch": 0.9098547173113951,
      "grad_norm": 0.8401359675866493,
      "learning_rate": 2.1159835630980286e-07,
      "loss": 0.1306,
      "step": 31188
    },
    {
      "epoch": 0.9098838905420387,
      "grad_norm": 0.8153818141633182,
      "learning_rate": 2.1146239509124365e-07,
      "loss": 0.097,
      "step": 31189
    },
    {
      "epoch": 0.9099130637726822,
      "grad_norm": 0.9347981040478424,
      "learning_rate": 2.1132647662360562e-07,
      "loss": 0.0835,
      "step": 31190
    },
    {
      "epoch": 0.9099422370033258,
      "grad_norm": 0.88370779193522,
      "learning_rate": 2.1119060090810106e-07,
      "loss": 0.102,
      "step": 31191
    },
    {
      "epoch": 0.9099714102339693,
      "grad_norm": 0.9322960137650994,
      "learning_rate": 2.110547679459446e-07,
      "loss": 0.1082,
      "step": 31192
    },
    {
      "epoch": 0.9100005834646129,
      "grad_norm": 0.7839507296136591,
      "learning_rate": 2.1091897773834746e-07,
      "loss": 0.0998,
      "step": 31193
    },
    {
      "epoch": 0.9100297566952564,
      "grad_norm": 0.7603850833206659,
      "learning_rate": 2.1078323028652203e-07,
      "loss": 0.1055,
      "step": 31194
    },
    {
      "epoch": 0.9100589299259,
      "grad_norm": 0.6832395959495823,
      "learning_rate": 2.1064752559168067e-07,
      "loss": 0.1009,
      "step": 31195
    },
    {
      "epoch": 0.9100881031565435,
      "grad_norm": 0.8325881847933028,
      "learning_rate": 2.1051186365503517e-07,
      "loss": 0.128,
      "step": 31196
    },
    {
      "epoch": 0.9101172763871871,
      "grad_norm": 0.6845476421445118,
      "learning_rate": 2.1037624447779682e-07,
      "loss": 0.1121,
      "step": 31197
    },
    {
      "epoch": 0.9101464496178306,
      "grad_norm": 0.7073666026472823,
      "learning_rate": 2.1024066806117515e-07,
      "loss": 0.1075,
      "step": 31198
    },
    {
      "epoch": 0.9101756228484742,
      "grad_norm": 1.0179873335506506,
      "learning_rate": 2.1010513440638203e-07,
      "loss": 0.1141,
      "step": 31199
    },
    {
      "epoch": 0.9102047960791178,
      "grad_norm": 0.8199139157129577,
      "learning_rate": 2.09969643514627e-07,
      "loss": 0.0954,
      "step": 31200
    },
    {
      "epoch": 0.9102339693097614,
      "grad_norm": 1.125528827216129,
      "learning_rate": 2.0983419538711803e-07,
      "loss": 0.1098,
      "step": 31201
    },
    {
      "epoch": 0.910263142540405,
      "grad_norm": 0.7962419334239833,
      "learning_rate": 2.0969879002506742e-07,
      "loss": 0.1239,
      "step": 31202
    },
    {
      "epoch": 0.9102923157710485,
      "grad_norm": 0.7826052581298216,
      "learning_rate": 2.095634274296826e-07,
      "loss": 0.0953,
      "step": 31203
    },
    {
      "epoch": 0.9103214890016921,
      "grad_norm": 0.9102591702918901,
      "learning_rate": 2.0942810760217092e-07,
      "loss": 0.1033,
      "step": 31204
    },
    {
      "epoch": 0.9103506622323356,
      "grad_norm": 0.8062521425848791,
      "learning_rate": 2.0929283054374193e-07,
      "loss": 0.1047,
      "step": 31205
    },
    {
      "epoch": 0.9103798354629792,
      "grad_norm": 0.8918295512444327,
      "learning_rate": 2.0915759625560306e-07,
      "loss": 0.1228,
      "step": 31206
    },
    {
      "epoch": 0.9104090086936227,
      "grad_norm": 1.0958800628665835,
      "learning_rate": 2.0902240473896106e-07,
      "loss": 0.1021,
      "step": 31207
    },
    {
      "epoch": 0.9104381819242663,
      "grad_norm": 0.8979327014990276,
      "learning_rate": 2.0888725599502335e-07,
      "loss": 0.1171,
      "step": 31208
    },
    {
      "epoch": 0.9104673551549098,
      "grad_norm": 0.8538666372363959,
      "learning_rate": 2.0875215002499727e-07,
      "loss": 0.1012,
      "step": 31209
    },
    {
      "epoch": 0.9104965283855534,
      "grad_norm": 1.3116497012888577,
      "learning_rate": 2.0861708683008796e-07,
      "loss": 0.1114,
      "step": 31210
    },
    {
      "epoch": 0.910525701616197,
      "grad_norm": 0.8829732928385619,
      "learning_rate": 2.084820664115006e-07,
      "loss": 0.1142,
      "step": 31211
    },
    {
      "epoch": 0.9105548748468405,
      "grad_norm": 0.8813183058437354,
      "learning_rate": 2.0834708877044252e-07,
      "loss": 0.1215,
      "step": 31212
    },
    {
      "epoch": 0.910584048077484,
      "grad_norm": 0.8921581346672733,
      "learning_rate": 2.0821215390811722e-07,
      "loss": 0.1125,
      "step": 31213
    },
    {
      "epoch": 0.9106132213081276,
      "grad_norm": 0.8727611817341433,
      "learning_rate": 2.0807726182572984e-07,
      "loss": 0.1025,
      "step": 31214
    },
    {
      "epoch": 0.9106423945387713,
      "grad_norm": 0.7044576566301238,
      "learning_rate": 2.0794241252448554e-07,
      "loss": 0.107,
      "step": 31215
    },
    {
      "epoch": 0.9106715677694148,
      "grad_norm": 0.7849471879663136,
      "learning_rate": 2.0780760600558724e-07,
      "loss": 0.1085,
      "step": 31216
    },
    {
      "epoch": 0.9107007410000584,
      "grad_norm": 0.8331097134214188,
      "learning_rate": 2.0767284227023786e-07,
      "loss": 0.1132,
      "step": 31217
    },
    {
      "epoch": 0.9107299142307019,
      "grad_norm": 0.6959226899889344,
      "learning_rate": 2.0753812131964202e-07,
      "loss": 0.1337,
      "step": 31218
    },
    {
      "epoch": 0.9107590874613455,
      "grad_norm": 0.967610349999905,
      "learning_rate": 2.0740344315500093e-07,
      "loss": 0.1025,
      "step": 31219
    },
    {
      "epoch": 0.910788260691989,
      "grad_norm": 0.9678259605853304,
      "learning_rate": 2.0726880777751922e-07,
      "loss": 0.1306,
      "step": 31220
    },
    {
      "epoch": 0.9108174339226326,
      "grad_norm": 0.8755955747118267,
      "learning_rate": 2.0713421518839595e-07,
      "loss": 0.1215,
      "step": 31221
    },
    {
      "epoch": 0.9108466071532761,
      "grad_norm": 0.7458522518925815,
      "learning_rate": 2.0699966538883565e-07,
      "loss": 0.125,
      "step": 31222
    },
    {
      "epoch": 0.9108757803839197,
      "grad_norm": 0.9182134613131028,
      "learning_rate": 2.068651583800374e-07,
      "loss": 0.1196,
      "step": 31223
    },
    {
      "epoch": 0.9109049536145633,
      "grad_norm": 0.8866340951449045,
      "learning_rate": 2.0673069416320303e-07,
      "loss": 0.1035,
      "step": 31224
    },
    {
      "epoch": 0.9109341268452068,
      "grad_norm": 0.8933500504473191,
      "learning_rate": 2.065962727395321e-07,
      "loss": 0.1116,
      "step": 31225
    },
    {
      "epoch": 0.9109633000758504,
      "grad_norm": 0.7485970038879802,
      "learning_rate": 2.0646189411022588e-07,
      "loss": 0.1031,
      "step": 31226
    },
    {
      "epoch": 0.9109924733064939,
      "grad_norm": 0.8148759799355173,
      "learning_rate": 2.0632755827648397e-07,
      "loss": 0.1201,
      "step": 31227
    },
    {
      "epoch": 0.9110216465371376,
      "grad_norm": 0.7876042273869697,
      "learning_rate": 2.061932652395049e-07,
      "loss": 0.0992,
      "step": 31228
    },
    {
      "epoch": 0.9110508197677811,
      "grad_norm": 0.9348614192922458,
      "learning_rate": 2.060590150004882e-07,
      "loss": 0.1073,
      "step": 31229
    },
    {
      "epoch": 0.9110799929984247,
      "grad_norm": 0.8014039247808129,
      "learning_rate": 2.0592480756063237e-07,
      "loss": 0.1041,
      "step": 31230
    },
    {
      "epoch": 0.9111091662290682,
      "grad_norm": 0.8874166261027333,
      "learning_rate": 2.057906429211337e-07,
      "loss": 0.1228,
      "step": 31231
    },
    {
      "epoch": 0.9111383394597118,
      "grad_norm": 0.7143287227728006,
      "learning_rate": 2.0565652108319344e-07,
      "loss": 0.1181,
      "step": 31232
    },
    {
      "epoch": 0.9111675126903553,
      "grad_norm": 0.7281203535430113,
      "learning_rate": 2.055224420480073e-07,
      "loss": 0.1188,
      "step": 31233
    },
    {
      "epoch": 0.9111966859209989,
      "grad_norm": 0.763141740829035,
      "learning_rate": 2.0538840581677156e-07,
      "loss": 0.0935,
      "step": 31234
    },
    {
      "epoch": 0.9112258591516424,
      "grad_norm": 0.9045996524010984,
      "learning_rate": 2.052544123906841e-07,
      "loss": 0.112,
      "step": 31235
    },
    {
      "epoch": 0.911255032382286,
      "grad_norm": 0.7547713704952629,
      "learning_rate": 2.051204617709407e-07,
      "loss": 0.106,
      "step": 31236
    },
    {
      "epoch": 0.9112842056129296,
      "grad_norm": 1.0281046979497552,
      "learning_rate": 2.0498655395873645e-07,
      "loss": 0.1105,
      "step": 31237
    },
    {
      "epoch": 0.9113133788435731,
      "grad_norm": 0.9824969572957485,
      "learning_rate": 2.0485268895526766e-07,
      "loss": 0.1236,
      "step": 31238
    },
    {
      "epoch": 0.9113425520742167,
      "grad_norm": 0.8476378577194715,
      "learning_rate": 2.0471886676173002e-07,
      "loss": 0.0903,
      "step": 31239
    },
    {
      "epoch": 0.9113717253048602,
      "grad_norm": 0.7982521157682128,
      "learning_rate": 2.045850873793176e-07,
      "loss": 0.1229,
      "step": 31240
    },
    {
      "epoch": 0.9114008985355038,
      "grad_norm": 0.9620654671412657,
      "learning_rate": 2.044513508092244e-07,
      "loss": 0.1272,
      "step": 31241
    },
    {
      "epoch": 0.9114300717661474,
      "grad_norm": 0.7299604073402126,
      "learning_rate": 2.0431765705264505e-07,
      "loss": 0.0799,
      "step": 31242
    },
    {
      "epoch": 0.911459244996791,
      "grad_norm": 0.7012347569197621,
      "learning_rate": 2.0418400611077194e-07,
      "loss": 0.1011,
      "step": 31243
    },
    {
      "epoch": 0.9114884182274345,
      "grad_norm": 0.7623281519241979,
      "learning_rate": 2.0405039798479964e-07,
      "loss": 0.0953,
      "step": 31244
    },
    {
      "epoch": 0.9115175914580781,
      "grad_norm": 0.8991371714453419,
      "learning_rate": 2.039168326759211e-07,
      "loss": 0.1127,
      "step": 31245
    },
    {
      "epoch": 0.9115467646887216,
      "grad_norm": 1.036221718478377,
      "learning_rate": 2.0378331018532814e-07,
      "loss": 0.1039,
      "step": 31246
    },
    {
      "epoch": 0.9115759379193652,
      "grad_norm": 0.794707890425934,
      "learning_rate": 2.0364983051421204e-07,
      "loss": 0.0994,
      "step": 31247
    },
    {
      "epoch": 0.9116051111500088,
      "grad_norm": 0.9886205039691118,
      "learning_rate": 2.0351639366376575e-07,
      "loss": 0.1216,
      "step": 31248
    },
    {
      "epoch": 0.9116342843806523,
      "grad_norm": 0.9432682229351107,
      "learning_rate": 2.0338299963517993e-07,
      "loss": 0.1286,
      "step": 31249
    },
    {
      "epoch": 0.9116634576112959,
      "grad_norm": 0.8112227455028246,
      "learning_rate": 2.0324964842964589e-07,
      "loss": 0.0921,
      "step": 31250
    },
    {
      "epoch": 0.9116926308419394,
      "grad_norm": 0.7402480177845739,
      "learning_rate": 2.0311634004835324e-07,
      "loss": 0.1389,
      "step": 31251
    },
    {
      "epoch": 0.911721804072583,
      "grad_norm": 0.8635522058155916,
      "learning_rate": 2.0298307449249377e-07,
      "loss": 0.1064,
      "step": 31252
    },
    {
      "epoch": 0.9117509773032265,
      "grad_norm": 0.9832749334854058,
      "learning_rate": 2.02849851763256e-07,
      "loss": 0.1093,
      "step": 31253
    },
    {
      "epoch": 0.9117801505338701,
      "grad_norm": 1.0136663990298818,
      "learning_rate": 2.0271667186182897e-07,
      "loss": 0.1018,
      "step": 31254
    },
    {
      "epoch": 0.9118093237645137,
      "grad_norm": 0.7260314617532732,
      "learning_rate": 2.025835347894023e-07,
      "loss": 0.1097,
      "step": 31255
    },
    {
      "epoch": 0.9118384969951573,
      "grad_norm": 0.9468722254681623,
      "learning_rate": 2.0245044054716557e-07,
      "loss": 0.1295,
      "step": 31256
    },
    {
      "epoch": 0.9118676702258008,
      "grad_norm": 1.0606643474312076,
      "learning_rate": 2.023173891363056e-07,
      "loss": 0.1195,
      "step": 31257
    },
    {
      "epoch": 0.9118968434564444,
      "grad_norm": 0.9831047885398735,
      "learning_rate": 2.0218438055801038e-07,
      "loss": 0.1126,
      "step": 31258
    },
    {
      "epoch": 0.911926016687088,
      "grad_norm": 0.859583230323244,
      "learning_rate": 2.0205141481346835e-07,
      "loss": 0.1127,
      "step": 31259
    },
    {
      "epoch": 0.9119551899177315,
      "grad_norm": 0.8819680571912979,
      "learning_rate": 2.0191849190386526e-07,
      "loss": 0.1071,
      "step": 31260
    },
    {
      "epoch": 0.911984363148375,
      "grad_norm": 0.7757292719689446,
      "learning_rate": 2.0178561183038793e-07,
      "loss": 0.1175,
      "step": 31261
    },
    {
      "epoch": 0.9120135363790186,
      "grad_norm": 0.8075015649567129,
      "learning_rate": 2.0165277459422428e-07,
      "loss": 0.1094,
      "step": 31262
    },
    {
      "epoch": 0.9120427096096622,
      "grad_norm": 0.8592848982920227,
      "learning_rate": 2.0151998019655895e-07,
      "loss": 0.1265,
      "step": 31263
    },
    {
      "epoch": 0.9120718828403057,
      "grad_norm": 1.12269639300866,
      "learning_rate": 2.0138722863857762e-07,
      "loss": 0.1202,
      "step": 31264
    },
    {
      "epoch": 0.9121010560709493,
      "grad_norm": 0.8409779073218775,
      "learning_rate": 2.0125451992146606e-07,
      "loss": 0.1012,
      "step": 31265
    },
    {
      "epoch": 0.9121302293015928,
      "grad_norm": 0.7299165332371558,
      "learning_rate": 2.0112185404640827e-07,
      "loss": 0.102,
      "step": 31266
    },
    {
      "epoch": 0.9121594025322364,
      "grad_norm": 0.8173740844966054,
      "learning_rate": 2.0098923101458833e-07,
      "loss": 0.1041,
      "step": 31267
    },
    {
      "epoch": 0.9121885757628799,
      "grad_norm": 0.6394795081855168,
      "learning_rate": 2.0085665082719142e-07,
      "loss": 0.0918,
      "step": 31268
    },
    {
      "epoch": 0.9122177489935236,
      "grad_norm": 0.7054351889259936,
      "learning_rate": 2.00724113485401e-07,
      "loss": 0.1102,
      "step": 31269
    },
    {
      "epoch": 0.9122469222241671,
      "grad_norm": 0.8141005998471933,
      "learning_rate": 2.0059161899040001e-07,
      "loss": 0.1159,
      "step": 31270
    },
    {
      "epoch": 0.9122760954548107,
      "grad_norm": 0.9182301652023297,
      "learning_rate": 2.004591673433709e-07,
      "loss": 0.1069,
      "step": 31271
    },
    {
      "epoch": 0.9123052686854543,
      "grad_norm": 0.7432654349729967,
      "learning_rate": 2.003267585454971e-07,
      "loss": 0.11,
      "step": 31272
    },
    {
      "epoch": 0.9123344419160978,
      "grad_norm": 0.8057741532574665,
      "learning_rate": 2.0019439259795935e-07,
      "loss": 0.1161,
      "step": 31273
    },
    {
      "epoch": 0.9123636151467414,
      "grad_norm": 1.125519038579545,
      "learning_rate": 2.0006206950194063e-07,
      "loss": 0.1175,
      "step": 31274
    },
    {
      "epoch": 0.9123927883773849,
      "grad_norm": 0.8786397529807448,
      "learning_rate": 1.9992978925862215e-07,
      "loss": 0.113,
      "step": 31275
    },
    {
      "epoch": 0.9124219616080285,
      "grad_norm": 0.8508726654735528,
      "learning_rate": 1.9979755186918525e-07,
      "loss": 0.1181,
      "step": 31276
    },
    {
      "epoch": 0.912451134838672,
      "grad_norm": 0.916797857464992,
      "learning_rate": 1.9966535733480897e-07,
      "loss": 0.1113,
      "step": 31277
    },
    {
      "epoch": 0.9124803080693156,
      "grad_norm": 0.8103247524664272,
      "learning_rate": 1.9953320565667457e-07,
      "loss": 0.1079,
      "step": 31278
    },
    {
      "epoch": 0.9125094812999591,
      "grad_norm": 0.8911431660576439,
      "learning_rate": 1.9940109683596165e-07,
      "loss": 0.1151,
      "step": 31279
    },
    {
      "epoch": 0.9125386545306027,
      "grad_norm": 0.768540094033884,
      "learning_rate": 1.9926903087385042e-07,
      "loss": 0.111,
      "step": 31280
    },
    {
      "epoch": 0.9125678277612462,
      "grad_norm": 0.8326238160989231,
      "learning_rate": 1.9913700777151823e-07,
      "loss": 0.1025,
      "step": 31281
    },
    {
      "epoch": 0.9125970009918899,
      "grad_norm": 1.2324609342680006,
      "learning_rate": 1.9900502753014584e-07,
      "loss": 0.0899,
      "step": 31282
    },
    {
      "epoch": 0.9126261742225334,
      "grad_norm": 0.7990047517751675,
      "learning_rate": 1.988730901509106e-07,
      "loss": 0.1146,
      "step": 31283
    },
    {
      "epoch": 0.912655347453177,
      "grad_norm": 0.7296273030813718,
      "learning_rate": 1.987411956349894e-07,
      "loss": 0.1181,
      "step": 31284
    },
    {
      "epoch": 0.9126845206838206,
      "grad_norm": 0.8432077346000755,
      "learning_rate": 1.9860934398356013e-07,
      "loss": 0.117,
      "step": 31285
    },
    {
      "epoch": 0.9127136939144641,
      "grad_norm": 0.8749049709019081,
      "learning_rate": 1.9847753519780188e-07,
      "loss": 0.1075,
      "step": 31286
    },
    {
      "epoch": 0.9127428671451077,
      "grad_norm": 0.7978980913542358,
      "learning_rate": 1.983457692788898e-07,
      "loss": 0.1059,
      "step": 31287
    },
    {
      "epoch": 0.9127720403757512,
      "grad_norm": 0.7783015059887668,
      "learning_rate": 1.9821404622799966e-07,
      "loss": 0.1023,
      "step": 31288
    },
    {
      "epoch": 0.9128012136063948,
      "grad_norm": 0.7633953813240251,
      "learning_rate": 1.9808236604630882e-07,
      "loss": 0.1187,
      "step": 31289
    },
    {
      "epoch": 0.9128303868370383,
      "grad_norm": 0.6597935910223036,
      "learning_rate": 1.9795072873499245e-07,
      "loss": 0.0849,
      "step": 31290
    },
    {
      "epoch": 0.9128595600676819,
      "grad_norm": 0.8487592715030142,
      "learning_rate": 1.9781913429522403e-07,
      "loss": 0.1089,
      "step": 31291
    },
    {
      "epoch": 0.9128887332983254,
      "grad_norm": 0.9464287978142161,
      "learning_rate": 1.9768758272818155e-07,
      "loss": 0.1104,
      "step": 31292
    },
    {
      "epoch": 0.912917906528969,
      "grad_norm": 1.1129414968464362,
      "learning_rate": 1.9755607403503797e-07,
      "loss": 0.1168,
      "step": 31293
    },
    {
      "epoch": 0.9129470797596125,
      "grad_norm": 0.7808060210399638,
      "learning_rate": 1.9742460821696674e-07,
      "loss": 0.1284,
      "step": 31294
    },
    {
      "epoch": 0.9129762529902561,
      "grad_norm": 0.9637865048798887,
      "learning_rate": 1.972931852751425e-07,
      "loss": 0.1215,
      "step": 31295
    },
    {
      "epoch": 0.9130054262208998,
      "grad_norm": 0.6754609672884183,
      "learning_rate": 1.9716180521073823e-07,
      "loss": 0.0873,
      "step": 31296
    },
    {
      "epoch": 0.9130345994515433,
      "grad_norm": 0.7668595822596354,
      "learning_rate": 1.9703046802492687e-07,
      "loss": 0.1092,
      "step": 31297
    },
    {
      "epoch": 0.9130637726821869,
      "grad_norm": 0.8351857025860321,
      "learning_rate": 1.9689917371888024e-07,
      "loss": 0.1223,
      "step": 31298
    },
    {
      "epoch": 0.9130929459128304,
      "grad_norm": 0.9598557356092702,
      "learning_rate": 1.9676792229377184e-07,
      "loss": 0.1364,
      "step": 31299
    },
    {
      "epoch": 0.913122119143474,
      "grad_norm": 0.799230935536841,
      "learning_rate": 1.9663671375077298e-07,
      "loss": 0.1027,
      "step": 31300
    },
    {
      "epoch": 0.9131512923741175,
      "grad_norm": 0.6777469754416956,
      "learning_rate": 1.9650554809105438e-07,
      "loss": 0.1189,
      "step": 31301
    },
    {
      "epoch": 0.9131804656047611,
      "grad_norm": 0.9142433031010697,
      "learning_rate": 1.9637442531578787e-07,
      "loss": 0.1191,
      "step": 31302
    },
    {
      "epoch": 0.9132096388354046,
      "grad_norm": 0.906970151087242,
      "learning_rate": 1.962433454261431e-07,
      "loss": 0.1158,
      "step": 31303
    },
    {
      "epoch": 0.9132388120660482,
      "grad_norm": 0.8526403777218867,
      "learning_rate": 1.9611230842329133e-07,
      "loss": 0.0898,
      "step": 31304
    },
    {
      "epoch": 0.9132679852966917,
      "grad_norm": 0.5530258071756962,
      "learning_rate": 1.9598131430840272e-07,
      "loss": 0.1151,
      "step": 31305
    },
    {
      "epoch": 0.9132971585273353,
      "grad_norm": 0.7692251461411396,
      "learning_rate": 1.9585036308264582e-07,
      "loss": 0.094,
      "step": 31306
    },
    {
      "epoch": 0.9133263317579788,
      "grad_norm": 0.9758808536373275,
      "learning_rate": 1.9571945474718967e-07,
      "loss": 0.0996,
      "step": 31307
    },
    {
      "epoch": 0.9133555049886224,
      "grad_norm": 0.8248060555101733,
      "learning_rate": 1.955885893032039e-07,
      "loss": 0.1169,
      "step": 31308
    },
    {
      "epoch": 0.913384678219266,
      "grad_norm": 0.7753417412953666,
      "learning_rate": 1.954577667518559e-07,
      "loss": 0.1001,
      "step": 31309
    },
    {
      "epoch": 0.9134138514499096,
      "grad_norm": 0.8813853837993885,
      "learning_rate": 1.953269870943142e-07,
      "loss": 0.1028,
      "step": 31310
    },
    {
      "epoch": 0.9134430246805532,
      "grad_norm": 0.7867172767948929,
      "learning_rate": 1.9519625033174562e-07,
      "loss": 0.093,
      "step": 31311
    },
    {
      "epoch": 0.9134721979111967,
      "grad_norm": 0.6112482339889289,
      "learning_rate": 1.9506555646531867e-07,
      "loss": 0.1046,
      "step": 31312
    },
    {
      "epoch": 0.9135013711418403,
      "grad_norm": 0.9858858148612809,
      "learning_rate": 1.9493490549619965e-07,
      "loss": 0.1101,
      "step": 31313
    },
    {
      "epoch": 0.9135305443724838,
      "grad_norm": 0.8754013133805054,
      "learning_rate": 1.9480429742555374e-07,
      "loss": 0.1102,
      "step": 31314
    },
    {
      "epoch": 0.9135597176031274,
      "grad_norm": 0.9901464111457875,
      "learning_rate": 1.9467373225454832e-07,
      "loss": 0.1031,
      "step": 31315
    },
    {
      "epoch": 0.9135888908337709,
      "grad_norm": 0.7140144003983445,
      "learning_rate": 1.9454320998434918e-07,
      "loss": 0.1245,
      "step": 31316
    },
    {
      "epoch": 0.9136180640644145,
      "grad_norm": 0.9792188827829292,
      "learning_rate": 1.9441273061612087e-07,
      "loss": 0.1102,
      "step": 31317
    },
    {
      "epoch": 0.913647237295058,
      "grad_norm": 0.7464639286634547,
      "learning_rate": 1.9428229415102807e-07,
      "loss": 0.1128,
      "step": 31318
    },
    {
      "epoch": 0.9136764105257016,
      "grad_norm": 0.6927326790382387,
      "learning_rate": 1.9415190059023647e-07,
      "loss": 0.1166,
      "step": 31319
    },
    {
      "epoch": 0.9137055837563451,
      "grad_norm": 0.8408633978323875,
      "learning_rate": 1.9402154993490962e-07,
      "loss": 0.1198,
      "step": 31320
    },
    {
      "epoch": 0.9137347569869887,
      "grad_norm": 1.1110077527922588,
      "learning_rate": 1.9389124218620937e-07,
      "loss": 0.1052,
      "step": 31321
    },
    {
      "epoch": 0.9137639302176322,
      "grad_norm": 1.0907989034999677,
      "learning_rate": 1.9376097734530196e-07,
      "loss": 0.1177,
      "step": 31322
    },
    {
      "epoch": 0.9137931034482759,
      "grad_norm": 0.842923815502877,
      "learning_rate": 1.9363075541334986e-07,
      "loss": 0.1135,
      "step": 31323
    },
    {
      "epoch": 0.9138222766789195,
      "grad_norm": 0.8011246284710944,
      "learning_rate": 1.9350057639151377e-07,
      "loss": 0.1004,
      "step": 31324
    },
    {
      "epoch": 0.913851449909563,
      "grad_norm": 0.8584964658344592,
      "learning_rate": 1.933704402809583e-07,
      "loss": 0.113,
      "step": 31325
    },
    {
      "epoch": 0.9138806231402066,
      "grad_norm": 0.9878084824197102,
      "learning_rate": 1.9324034708284368e-07,
      "loss": 0.111,
      "step": 31326
    },
    {
      "epoch": 0.9139097963708501,
      "grad_norm": 1.0150341445004687,
      "learning_rate": 1.9311029679833115e-07,
      "loss": 0.1242,
      "step": 31327
    },
    {
      "epoch": 0.9139389696014937,
      "grad_norm": 0.749833970785854,
      "learning_rate": 1.929802894285826e-07,
      "loss": 0.0944,
      "step": 31328
    },
    {
      "epoch": 0.9139681428321372,
      "grad_norm": 0.8103429378662382,
      "learning_rate": 1.9285032497475876e-07,
      "loss": 0.1373,
      "step": 31329
    },
    {
      "epoch": 0.9139973160627808,
      "grad_norm": 0.6616439365790598,
      "learning_rate": 1.927204034380198e-07,
      "loss": 0.0981,
      "step": 31330
    },
    {
      "epoch": 0.9140264892934243,
      "grad_norm": 0.7877981710024274,
      "learning_rate": 1.9259052481952534e-07,
      "loss": 0.1087,
      "step": 31331
    },
    {
      "epoch": 0.9140556625240679,
      "grad_norm": 1.1921717562156433,
      "learning_rate": 1.9246068912043504e-07,
      "loss": 0.1068,
      "step": 31332
    },
    {
      "epoch": 0.9140848357547114,
      "grad_norm": 0.674428734976857,
      "learning_rate": 1.9233089634190794e-07,
      "loss": 0.1142,
      "step": 31333
    },
    {
      "epoch": 0.914114008985355,
      "grad_norm": 0.9418225486542654,
      "learning_rate": 1.9220114648510259e-07,
      "loss": 0.136,
      "step": 31334
    },
    {
      "epoch": 0.9141431822159986,
      "grad_norm": 0.819986673302682,
      "learning_rate": 1.9207143955117858e-07,
      "loss": 0.0952,
      "step": 31335
    },
    {
      "epoch": 0.9141723554466422,
      "grad_norm": 0.9348377373039002,
      "learning_rate": 1.919417755412928e-07,
      "loss": 0.114,
      "step": 31336
    },
    {
      "epoch": 0.9142015286772858,
      "grad_norm": 0.833627907377239,
      "learning_rate": 1.918121544566026e-07,
      "loss": 0.1363,
      "step": 31337
    },
    {
      "epoch": 0.9142307019079293,
      "grad_norm": 0.9850077049898679,
      "learning_rate": 1.9168257629826604e-07,
      "loss": 0.123,
      "step": 31338
    },
    {
      "epoch": 0.9142598751385729,
      "grad_norm": 0.6545767772470965,
      "learning_rate": 1.9155304106743932e-07,
      "loss": 0.0977,
      "step": 31339
    },
    {
      "epoch": 0.9142890483692164,
      "grad_norm": 1.2227961436415566,
      "learning_rate": 1.9142354876527935e-07,
      "loss": 0.1189,
      "step": 31340
    },
    {
      "epoch": 0.91431822159986,
      "grad_norm": 0.6897087811078801,
      "learning_rate": 1.9129409939294185e-07,
      "loss": 0.1173,
      "step": 31341
    },
    {
      "epoch": 0.9143473948305035,
      "grad_norm": 1.1786573132138842,
      "learning_rate": 1.9116469295158312e-07,
      "loss": 0.1249,
      "step": 31342
    },
    {
      "epoch": 0.9143765680611471,
      "grad_norm": 0.9441161938170052,
      "learning_rate": 1.9103532944235781e-07,
      "loss": 0.0996,
      "step": 31343
    },
    {
      "epoch": 0.9144057412917906,
      "grad_norm": 0.7107152263994747,
      "learning_rate": 1.9090600886642109e-07,
      "loss": 0.1031,
      "step": 31344
    },
    {
      "epoch": 0.9144349145224342,
      "grad_norm": 0.9142669952677954,
      "learning_rate": 1.9077673122492702e-07,
      "loss": 0.107,
      "step": 31345
    },
    {
      "epoch": 0.9144640877530777,
      "grad_norm": 0.8485615026657264,
      "learning_rate": 1.906474965190308e-07,
      "loss": 0.1267,
      "step": 31346
    },
    {
      "epoch": 0.9144932609837213,
      "grad_norm": 0.6411471885901804,
      "learning_rate": 1.9051830474988597e-07,
      "loss": 0.1069,
      "step": 31347
    },
    {
      "epoch": 0.9145224342143649,
      "grad_norm": 0.8686416439086416,
      "learning_rate": 1.9038915591864493e-07,
      "loss": 0.1032,
      "step": 31348
    },
    {
      "epoch": 0.9145516074450084,
      "grad_norm": 0.8858180694698129,
      "learning_rate": 1.9026005002646174e-07,
      "loss": 0.1239,
      "step": 31349
    },
    {
      "epoch": 0.9145807806756521,
      "grad_norm": 0.7773263938989426,
      "learning_rate": 1.9013098707448885e-07,
      "loss": 0.1051,
      "step": 31350
    },
    {
      "epoch": 0.9146099539062956,
      "grad_norm": 0.9766792669376659,
      "learning_rate": 1.9000196706387697e-07,
      "loss": 0.1083,
      "step": 31351
    },
    {
      "epoch": 0.9146391271369392,
      "grad_norm": 0.8726764601428715,
      "learning_rate": 1.8987298999578076e-07,
      "loss": 0.1061,
      "step": 31352
    },
    {
      "epoch": 0.9146683003675827,
      "grad_norm": 0.8840248440184265,
      "learning_rate": 1.897440558713498e-07,
      "loss": 0.0984,
      "step": 31353
    },
    {
      "epoch": 0.9146974735982263,
      "grad_norm": 0.8373228389039238,
      "learning_rate": 1.8961516469173547e-07,
      "loss": 0.1066,
      "step": 31354
    },
    {
      "epoch": 0.9147266468288698,
      "grad_norm": 0.771490868722007,
      "learning_rate": 1.89486316458089e-07,
      "loss": 0.1168,
      "step": 31355
    },
    {
      "epoch": 0.9147558200595134,
      "grad_norm": 0.894960769756662,
      "learning_rate": 1.8935751117156008e-07,
      "loss": 0.1254,
      "step": 31356
    },
    {
      "epoch": 0.914784993290157,
      "grad_norm": 0.7806313323142715,
      "learning_rate": 1.8922874883329888e-07,
      "loss": 0.0965,
      "step": 31357
    },
    {
      "epoch": 0.9148141665208005,
      "grad_norm": 0.6557312970640437,
      "learning_rate": 1.8910002944445448e-07,
      "loss": 0.1062,
      "step": 31358
    },
    {
      "epoch": 0.914843339751444,
      "grad_norm": 0.6793510244450381,
      "learning_rate": 1.8897135300617708e-07,
      "loss": 0.1101,
      "step": 31359
    },
    {
      "epoch": 0.9148725129820876,
      "grad_norm": 0.9202773589720421,
      "learning_rate": 1.888427195196152e-07,
      "loss": 0.1169,
      "step": 31360
    },
    {
      "epoch": 0.9149016862127312,
      "grad_norm": 0.6585451880768798,
      "learning_rate": 1.8871412898591678e-07,
      "loss": 0.1034,
      "step": 31361
    },
    {
      "epoch": 0.9149308594433747,
      "grad_norm": 0.947702899641979,
      "learning_rate": 1.8858558140622928e-07,
      "loss": 0.1189,
      "step": 31362
    },
    {
      "epoch": 0.9149600326740184,
      "grad_norm": 0.7113266911015266,
      "learning_rate": 1.8845707678170232e-07,
      "loss": 0.0929,
      "step": 31363
    },
    {
      "epoch": 0.9149892059046619,
      "grad_norm": 0.7883772252736441,
      "learning_rate": 1.883286151134811e-07,
      "loss": 0.1026,
      "step": 31364
    },
    {
      "epoch": 0.9150183791353055,
      "grad_norm": 0.6954327366328499,
      "learning_rate": 1.8820019640271414e-07,
      "loss": 0.1091,
      "step": 31365
    },
    {
      "epoch": 0.915047552365949,
      "grad_norm": 0.8511500789027039,
      "learning_rate": 1.880718206505472e-07,
      "loss": 0.1197,
      "step": 31366
    },
    {
      "epoch": 0.9150767255965926,
      "grad_norm": 1.0019832829025392,
      "learning_rate": 1.8794348785812545e-07,
      "loss": 0.1044,
      "step": 31367
    },
    {
      "epoch": 0.9151058988272361,
      "grad_norm": 0.790297852887975,
      "learning_rate": 1.8781519802659577e-07,
      "loss": 0.1213,
      "step": 31368
    },
    {
      "epoch": 0.9151350720578797,
      "grad_norm": 0.6716434249793395,
      "learning_rate": 1.876869511571039e-07,
      "loss": 0.1045,
      "step": 31369
    },
    {
      "epoch": 0.9151642452885232,
      "grad_norm": 0.922704496735113,
      "learning_rate": 1.8755874725079394e-07,
      "loss": 0.1276,
      "step": 31370
    },
    {
      "epoch": 0.9151934185191668,
      "grad_norm": 0.7622788651070335,
      "learning_rate": 1.8743058630880993e-07,
      "loss": 0.0853,
      "step": 31371
    },
    {
      "epoch": 0.9152225917498104,
      "grad_norm": 0.8387169493634814,
      "learning_rate": 1.8730246833229772e-07,
      "loss": 0.1099,
      "step": 31372
    },
    {
      "epoch": 0.9152517649804539,
      "grad_norm": 0.6763488529931732,
      "learning_rate": 1.8717439332240017e-07,
      "loss": 0.111,
      "step": 31373
    },
    {
      "epoch": 0.9152809382110975,
      "grad_norm": 0.8951673166253913,
      "learning_rate": 1.8704636128025978e-07,
      "loss": 0.1099,
      "step": 31374
    },
    {
      "epoch": 0.915310111441741,
      "grad_norm": 0.8297555505125109,
      "learning_rate": 1.8691837220702113e-07,
      "loss": 0.1041,
      "step": 31375
    },
    {
      "epoch": 0.9153392846723846,
      "grad_norm": 0.9665615025615534,
      "learning_rate": 1.8679042610382613e-07,
      "loss": 0.1201,
      "step": 31376
    },
    {
      "epoch": 0.9153684579030282,
      "grad_norm": 0.9289330274306193,
      "learning_rate": 1.8666252297181776e-07,
      "loss": 0.1184,
      "step": 31377
    },
    {
      "epoch": 0.9153976311336718,
      "grad_norm": 1.014049638060475,
      "learning_rate": 1.865346628121367e-07,
      "loss": 0.1215,
      "step": 31378
    },
    {
      "epoch": 0.9154268043643153,
      "grad_norm": 0.9387538042209965,
      "learning_rate": 1.8640684562592548e-07,
      "loss": 0.1107,
      "step": 31379
    },
    {
      "epoch": 0.9154559775949589,
      "grad_norm": 0.7807314757172528,
      "learning_rate": 1.8627907141432422e-07,
      "loss": 0.143,
      "step": 31380
    },
    {
      "epoch": 0.9154851508256024,
      "grad_norm": 0.8032832425950431,
      "learning_rate": 1.8615134017847426e-07,
      "loss": 0.1143,
      "step": 31381
    },
    {
      "epoch": 0.915514324056246,
      "grad_norm": 0.9384815469369953,
      "learning_rate": 1.8602365191951687e-07,
      "loss": 0.1098,
      "step": 31382
    },
    {
      "epoch": 0.9155434972868896,
      "grad_norm": 1.1222528703641497,
      "learning_rate": 1.858960066385912e-07,
      "loss": 0.0858,
      "step": 31383
    },
    {
      "epoch": 0.9155726705175331,
      "grad_norm": 0.7615160881334084,
      "learning_rate": 1.8576840433683574e-07,
      "loss": 0.1175,
      "step": 31384
    },
    {
      "epoch": 0.9156018437481767,
      "grad_norm": 1.0065607394870228,
      "learning_rate": 1.8564084501539181e-07,
      "loss": 0.0987,
      "step": 31385
    },
    {
      "epoch": 0.9156310169788202,
      "grad_norm": 0.9255308678433942,
      "learning_rate": 1.8551332867539572e-07,
      "loss": 0.0969,
      "step": 31386
    },
    {
      "epoch": 0.9156601902094638,
      "grad_norm": 0.930750996718049,
      "learning_rate": 1.8538585531798881e-07,
      "loss": 0.1391,
      "step": 31387
    },
    {
      "epoch": 0.9156893634401073,
      "grad_norm": 0.8228319762684867,
      "learning_rate": 1.852584249443068e-07,
      "loss": 0.1052,
      "step": 31388
    },
    {
      "epoch": 0.9157185366707509,
      "grad_norm": 0.8235112268013984,
      "learning_rate": 1.8513103755548822e-07,
      "loss": 0.0928,
      "step": 31389
    },
    {
      "epoch": 0.9157477099013945,
      "grad_norm": 0.7145745651504304,
      "learning_rate": 1.8500369315267108e-07,
      "loss": 0.0915,
      "step": 31390
    },
    {
      "epoch": 0.9157768831320381,
      "grad_norm": 0.7421971743172168,
      "learning_rate": 1.8487639173699057e-07,
      "loss": 0.1101,
      "step": 31391
    },
    {
      "epoch": 0.9158060563626816,
      "grad_norm": 0.9867879653682757,
      "learning_rate": 1.847491333095841e-07,
      "loss": 0.1162,
      "step": 31392
    },
    {
      "epoch": 0.9158352295933252,
      "grad_norm": 0.843077088935799,
      "learning_rate": 1.8462191787158855e-07,
      "loss": 0.0963,
      "step": 31393
    },
    {
      "epoch": 0.9158644028239687,
      "grad_norm": 0.7220111672319596,
      "learning_rate": 1.8449474542413858e-07,
      "loss": 0.1055,
      "step": 31394
    },
    {
      "epoch": 0.9158935760546123,
      "grad_norm": 0.8945048968438312,
      "learning_rate": 1.843676159683705e-07,
      "loss": 0.1162,
      "step": 31395
    },
    {
      "epoch": 0.9159227492852559,
      "grad_norm": 0.8184157935974352,
      "learning_rate": 1.8424052950541892e-07,
      "loss": 0.1001,
      "step": 31396
    },
    {
      "epoch": 0.9159519225158994,
      "grad_norm": 0.9280777362350853,
      "learning_rate": 1.8411348603641743e-07,
      "loss": 0.1094,
      "step": 31397
    },
    {
      "epoch": 0.915981095746543,
      "grad_norm": 0.7392705056496901,
      "learning_rate": 1.8398648556250122e-07,
      "loss": 0.1035,
      "step": 31398
    },
    {
      "epoch": 0.9160102689771865,
      "grad_norm": 0.8085351305562487,
      "learning_rate": 1.8385952808480434e-07,
      "loss": 0.1192,
      "step": 31399
    },
    {
      "epoch": 0.9160394422078301,
      "grad_norm": 0.927287870661808,
      "learning_rate": 1.8373261360445983e-07,
      "loss": 0.1095,
      "step": 31400
    },
    {
      "epoch": 0.9160686154384736,
      "grad_norm": 0.8250135573400705,
      "learning_rate": 1.8360574212260063e-07,
      "loss": 0.121,
      "step": 31401
    },
    {
      "epoch": 0.9160977886691172,
      "grad_norm": 0.8695358461892482,
      "learning_rate": 1.8347891364035974e-07,
      "loss": 0.1178,
      "step": 31402
    },
    {
      "epoch": 0.9161269618997607,
      "grad_norm": 0.9794220199794262,
      "learning_rate": 1.833521281588696e-07,
      "loss": 0.1341,
      "step": 31403
    },
    {
      "epoch": 0.9161561351304044,
      "grad_norm": 0.9536576826698145,
      "learning_rate": 1.8322538567926152e-07,
      "loss": 0.1042,
      "step": 31404
    },
    {
      "epoch": 0.916185308361048,
      "grad_norm": 0.773398645910214,
      "learning_rate": 1.830986862026668e-07,
      "loss": 0.1068,
      "step": 31405
    },
    {
      "epoch": 0.9162144815916915,
      "grad_norm": 0.834665420479556,
      "learning_rate": 1.8297202973021787e-07,
      "loss": 0.1204,
      "step": 31406
    },
    {
      "epoch": 0.916243654822335,
      "grad_norm": 1.12506311343205,
      "learning_rate": 1.8284541626304496e-07,
      "loss": 0.0881,
      "step": 31407
    },
    {
      "epoch": 0.9162728280529786,
      "grad_norm": 0.7596616311493886,
      "learning_rate": 1.8271884580227716e-07,
      "loss": 0.1263,
      "step": 31408
    },
    {
      "epoch": 0.9163020012836222,
      "grad_norm": 0.7707059374752233,
      "learning_rate": 1.8259231834904689e-07,
      "loss": 0.1047,
      "step": 31409
    },
    {
      "epoch": 0.9163311745142657,
      "grad_norm": 0.7854974420155609,
      "learning_rate": 1.8246583390448102e-07,
      "loss": 0.1105,
      "step": 31410
    },
    {
      "epoch": 0.9163603477449093,
      "grad_norm": 0.8750514815335055,
      "learning_rate": 1.823393924697109e-07,
      "loss": 0.0884,
      "step": 31411
    },
    {
      "epoch": 0.9163895209755528,
      "grad_norm": 0.947681277184962,
      "learning_rate": 1.8221299404586445e-07,
      "loss": 0.1129,
      "step": 31412
    },
    {
      "epoch": 0.9164186942061964,
      "grad_norm": 0.9993209605677023,
      "learning_rate": 1.8208663863407083e-07,
      "loss": 0.121,
      "step": 31413
    },
    {
      "epoch": 0.9164478674368399,
      "grad_norm": 0.8050607414401262,
      "learning_rate": 1.819603262354569e-07,
      "loss": 0.0999,
      "step": 31414
    },
    {
      "epoch": 0.9164770406674835,
      "grad_norm": 0.8008160192839413,
      "learning_rate": 1.818340568511512e-07,
      "loss": 0.1305,
      "step": 31415
    },
    {
      "epoch": 0.916506213898127,
      "grad_norm": 0.875097615447291,
      "learning_rate": 1.8170783048228057e-07,
      "loss": 0.1149,
      "step": 31416
    },
    {
      "epoch": 0.9165353871287707,
      "grad_norm": 0.8675509678375348,
      "learning_rate": 1.8158164712997306e-07,
      "loss": 0.1067,
      "step": 31417
    },
    {
      "epoch": 0.9165645603594142,
      "grad_norm": 0.7910112209571196,
      "learning_rate": 1.8145550679535329e-07,
      "loss": 0.1177,
      "step": 31418
    },
    {
      "epoch": 0.9165937335900578,
      "grad_norm": 0.8016354528718228,
      "learning_rate": 1.8132940947954924e-07,
      "loss": 0.1315,
      "step": 31419
    },
    {
      "epoch": 0.9166229068207014,
      "grad_norm": 0.981834198358148,
      "learning_rate": 1.8120335518368614e-07,
      "loss": 0.1057,
      "step": 31420
    },
    {
      "epoch": 0.9166520800513449,
      "grad_norm": 0.823750186569676,
      "learning_rate": 1.8107734390888809e-07,
      "loss": 0.1175,
      "step": 31421
    },
    {
      "epoch": 0.9166812532819885,
      "grad_norm": 0.9265404485278054,
      "learning_rate": 1.809513756562814e-07,
      "loss": 0.1117,
      "step": 31422
    },
    {
      "epoch": 0.916710426512632,
      "grad_norm": 0.8217818917326766,
      "learning_rate": 1.808254504269913e-07,
      "loss": 0.0889,
      "step": 31423
    },
    {
      "epoch": 0.9167395997432756,
      "grad_norm": 0.7781014107621114,
      "learning_rate": 1.8069956822214018e-07,
      "loss": 0.108,
      "step": 31424
    },
    {
      "epoch": 0.9167687729739191,
      "grad_norm": 0.8778356214948702,
      "learning_rate": 1.805737290428533e-07,
      "loss": 0.1091,
      "step": 31425
    },
    {
      "epoch": 0.9167979462045627,
      "grad_norm": 0.6689752395268365,
      "learning_rate": 1.804479328902542e-07,
      "loss": 0.1044,
      "step": 31426
    },
    {
      "epoch": 0.9168271194352062,
      "grad_norm": 0.9883087329036726,
      "learning_rate": 1.8032217976546418e-07,
      "loss": 0.1035,
      "step": 31427
    },
    {
      "epoch": 0.9168562926658498,
      "grad_norm": 0.928078768675476,
      "learning_rate": 1.801964696696079e-07,
      "loss": 0.1163,
      "step": 31428
    },
    {
      "epoch": 0.9168854658964933,
      "grad_norm": 0.8242058167547271,
      "learning_rate": 1.8007080260380727e-07,
      "loss": 0.1364,
      "step": 31429
    },
    {
      "epoch": 0.9169146391271369,
      "grad_norm": 0.9462811646216613,
      "learning_rate": 1.7994517856918359e-07,
      "loss": 0.1061,
      "step": 31430
    },
    {
      "epoch": 0.9169438123577806,
      "grad_norm": 0.721219548430664,
      "learning_rate": 1.7981959756685875e-07,
      "loss": 0.1091,
      "step": 31431
    },
    {
      "epoch": 0.9169729855884241,
      "grad_norm": 0.848426286099118,
      "learning_rate": 1.7969405959795404e-07,
      "loss": 0.1011,
      "step": 31432
    },
    {
      "epoch": 0.9170021588190677,
      "grad_norm": 0.7847501625730108,
      "learning_rate": 1.7956856466358974e-07,
      "loss": 0.1313,
      "step": 31433
    },
    {
      "epoch": 0.9170313320497112,
      "grad_norm": 0.8439447752271549,
      "learning_rate": 1.7944311276488656e-07,
      "loss": 0.1344,
      "step": 31434
    },
    {
      "epoch": 0.9170605052803548,
      "grad_norm": 0.886942847069556,
      "learning_rate": 1.7931770390296423e-07,
      "loss": 0.1318,
      "step": 31435
    },
    {
      "epoch": 0.9170896785109983,
      "grad_norm": 0.9204156745615503,
      "learning_rate": 1.7919233807894343e-07,
      "loss": 0.0984,
      "step": 31436
    },
    {
      "epoch": 0.9171188517416419,
      "grad_norm": 0.9114936709997368,
      "learning_rate": 1.7906701529394277e-07,
      "loss": 0.1084,
      "step": 31437
    },
    {
      "epoch": 0.9171480249722854,
      "grad_norm": 1.0084742204612493,
      "learning_rate": 1.7894173554907967e-07,
      "loss": 0.1289,
      "step": 31438
    },
    {
      "epoch": 0.917177198202929,
      "grad_norm": 1.079762771034041,
      "learning_rate": 1.7881649884547492e-07,
      "loss": 0.0979,
      "step": 31439
    },
    {
      "epoch": 0.9172063714335725,
      "grad_norm": 0.8832897504243681,
      "learning_rate": 1.7869130518424538e-07,
      "loss": 0.1246,
      "step": 31440
    },
    {
      "epoch": 0.9172355446642161,
      "grad_norm": 0.7904649565451088,
      "learning_rate": 1.785661545665085e-07,
      "loss": 0.1275,
      "step": 31441
    },
    {
      "epoch": 0.9172647178948596,
      "grad_norm": 0.9835395240531951,
      "learning_rate": 1.7844104699338228e-07,
      "loss": 0.1032,
      "step": 31442
    },
    {
      "epoch": 0.9172938911255032,
      "grad_norm": 0.7235054219206365,
      "learning_rate": 1.783159824659836e-07,
      "loss": 0.0801,
      "step": 31443
    },
    {
      "epoch": 0.9173230643561467,
      "grad_norm": 0.829841051846416,
      "learning_rate": 1.7819096098542876e-07,
      "loss": 0.1281,
      "step": 31444
    },
    {
      "epoch": 0.9173522375867904,
      "grad_norm": 0.9330809494992617,
      "learning_rate": 1.7806598255283415e-07,
      "loss": 0.1278,
      "step": 31445
    },
    {
      "epoch": 0.917381410817434,
      "grad_norm": 0.7713121767513006,
      "learning_rate": 1.7794104716931437e-07,
      "loss": 0.0967,
      "step": 31446
    },
    {
      "epoch": 0.9174105840480775,
      "grad_norm": 0.7427712675609687,
      "learning_rate": 1.778161548359869e-07,
      "loss": 0.0993,
      "step": 31447
    },
    {
      "epoch": 0.9174397572787211,
      "grad_norm": 0.7985539646604289,
      "learning_rate": 1.7769130555396476e-07,
      "loss": 0.1081,
      "step": 31448
    },
    {
      "epoch": 0.9174689305093646,
      "grad_norm": 0.745822331716892,
      "learning_rate": 1.775664993243642e-07,
      "loss": 0.1267,
      "step": 31449
    },
    {
      "epoch": 0.9174981037400082,
      "grad_norm": 0.6665221359510427,
      "learning_rate": 1.7744173614829885e-07,
      "loss": 0.1091,
      "step": 31450
    },
    {
      "epoch": 0.9175272769706517,
      "grad_norm": 0.7913045617358165,
      "learning_rate": 1.7731701602688168e-07,
      "loss": 0.1176,
      "step": 31451
    },
    {
      "epoch": 0.9175564502012953,
      "grad_norm": 0.8724016247432931,
      "learning_rate": 1.7719233896122733e-07,
      "loss": 0.1168,
      "step": 31452
    },
    {
      "epoch": 0.9175856234319388,
      "grad_norm": 0.8798424447611202,
      "learning_rate": 1.7706770495244884e-07,
      "loss": 0.1,
      "step": 31453
    },
    {
      "epoch": 0.9176147966625824,
      "grad_norm": 0.9129538205177682,
      "learning_rate": 1.7694311400165753e-07,
      "loss": 0.1011,
      "step": 31454
    },
    {
      "epoch": 0.9176439698932259,
      "grad_norm": 0.7626105906124192,
      "learning_rate": 1.768185661099675e-07,
      "loss": 0.1063,
      "step": 31455
    },
    {
      "epoch": 0.9176731431238695,
      "grad_norm": 0.9591430922037872,
      "learning_rate": 1.766940612784901e-07,
      "loss": 0.1225,
      "step": 31456
    },
    {
      "epoch": 0.917702316354513,
      "grad_norm": 0.8823448331257628,
      "learning_rate": 1.7656959950833608e-07,
      "loss": 0.0987,
      "step": 31457
    },
    {
      "epoch": 0.9177314895851567,
      "grad_norm": 0.6726474082165065,
      "learning_rate": 1.7644518080061735e-07,
      "loss": 0.1016,
      "step": 31458
    },
    {
      "epoch": 0.9177606628158003,
      "grad_norm": 0.9953081672778715,
      "learning_rate": 1.7632080515644523e-07,
      "loss": 0.1399,
      "step": 31459
    },
    {
      "epoch": 0.9177898360464438,
      "grad_norm": 1.0520249333383132,
      "learning_rate": 1.761964725769294e-07,
      "loss": 0.0915,
      "step": 31460
    },
    {
      "epoch": 0.9178190092770874,
      "grad_norm": 0.7312001105015509,
      "learning_rate": 1.7607218306317896e-07,
      "loss": 0.09,
      "step": 31461
    },
    {
      "epoch": 0.9178481825077309,
      "grad_norm": 1.160657903192297,
      "learning_rate": 1.7594793661630526e-07,
      "loss": 0.1025,
      "step": 31462
    },
    {
      "epoch": 0.9178773557383745,
      "grad_norm": 0.8381060018723425,
      "learning_rate": 1.7582373323741686e-07,
      "loss": 0.1258,
      "step": 31463
    },
    {
      "epoch": 0.917906528969018,
      "grad_norm": 0.8742176123686887,
      "learning_rate": 1.756995729276223e-07,
      "loss": 0.118,
      "step": 31464
    },
    {
      "epoch": 0.9179357021996616,
      "grad_norm": 0.6928916489087649,
      "learning_rate": 1.7557545568803014e-07,
      "loss": 0.1119,
      "step": 31465
    },
    {
      "epoch": 0.9179648754303051,
      "grad_norm": 0.7375050529998568,
      "learning_rate": 1.7545138151974895e-07,
      "loss": 0.1241,
      "step": 31466
    },
    {
      "epoch": 0.9179940486609487,
      "grad_norm": 1.665030995723457,
      "learning_rate": 1.7532735042388617e-07,
      "loss": 0.1073,
      "step": 31467
    },
    {
      "epoch": 0.9180232218915922,
      "grad_norm": 0.8717161143773435,
      "learning_rate": 1.7520336240154867e-07,
      "loss": 0.095,
      "step": 31468
    },
    {
      "epoch": 0.9180523951222358,
      "grad_norm": 0.870272092108847,
      "learning_rate": 1.7507941745384394e-07,
      "loss": 0.1024,
      "step": 31469
    },
    {
      "epoch": 0.9180815683528794,
      "grad_norm": 0.940668690315811,
      "learning_rate": 1.749555155818783e-07,
      "loss": 0.1045,
      "step": 31470
    },
    {
      "epoch": 0.9181107415835229,
      "grad_norm": 0.6265336298709263,
      "learning_rate": 1.748316567867575e-07,
      "loss": 0.0996,
      "step": 31471
    },
    {
      "epoch": 0.9181399148141666,
      "grad_norm": 0.8199566455457374,
      "learning_rate": 1.7470784106958903e-07,
      "loss": 0.1025,
      "step": 31472
    },
    {
      "epoch": 0.9181690880448101,
      "grad_norm": 0.8002080741057659,
      "learning_rate": 1.7458406843147647e-07,
      "loss": 0.134,
      "step": 31473
    },
    {
      "epoch": 0.9181982612754537,
      "grad_norm": 0.8333135633377379,
      "learning_rate": 1.7446033887352498e-07,
      "loss": 0.121,
      "step": 31474
    },
    {
      "epoch": 0.9182274345060972,
      "grad_norm": 0.8724163486160376,
      "learning_rate": 1.7433665239684038e-07,
      "loss": 0.1135,
      "step": 31475
    },
    {
      "epoch": 0.9182566077367408,
      "grad_norm": 0.7607951231388181,
      "learning_rate": 1.742130090025257e-07,
      "loss": 0.1351,
      "step": 31476
    },
    {
      "epoch": 0.9182857809673843,
      "grad_norm": 0.866945882665733,
      "learning_rate": 1.7408940869168556e-07,
      "loss": 0.1216,
      "step": 31477
    },
    {
      "epoch": 0.9183149541980279,
      "grad_norm": 0.901991959683693,
      "learning_rate": 1.7396585146542245e-07,
      "loss": 0.1126,
      "step": 31478
    },
    {
      "epoch": 0.9183441274286714,
      "grad_norm": 0.7517972495851978,
      "learning_rate": 1.73842337324841e-07,
      "loss": 0.1195,
      "step": 31479
    },
    {
      "epoch": 0.918373300659315,
      "grad_norm": 0.9077314335051077,
      "learning_rate": 1.7371886627104317e-07,
      "loss": 0.1282,
      "step": 31480
    },
    {
      "epoch": 0.9184024738899585,
      "grad_norm": 0.719193242973939,
      "learning_rate": 1.7359543830513027e-07,
      "loss": 0.1065,
      "step": 31481
    },
    {
      "epoch": 0.9184316471206021,
      "grad_norm": 0.9507072030075335,
      "learning_rate": 1.734720534282053e-07,
      "loss": 0.1167,
      "step": 31482
    },
    {
      "epoch": 0.9184608203512457,
      "grad_norm": 0.8482801160596916,
      "learning_rate": 1.7334871164137013e-07,
      "loss": 0.1118,
      "step": 31483
    },
    {
      "epoch": 0.9184899935818892,
      "grad_norm": 0.7959575808115095,
      "learning_rate": 1.7322541294572505e-07,
      "loss": 0.1274,
      "step": 31484
    },
    {
      "epoch": 0.9185191668125329,
      "grad_norm": 0.8047235547623197,
      "learning_rate": 1.731021573423719e-07,
      "loss": 0.093,
      "step": 31485
    },
    {
      "epoch": 0.9185483400431764,
      "grad_norm": 0.7961735318616792,
      "learning_rate": 1.7297894483240984e-07,
      "loss": 0.1374,
      "step": 31486
    },
    {
      "epoch": 0.91857751327382,
      "grad_norm": 0.6548827610332445,
      "learning_rate": 1.7285577541693966e-07,
      "loss": 0.118,
      "step": 31487
    },
    {
      "epoch": 0.9186066865044635,
      "grad_norm": 0.8131044492909674,
      "learning_rate": 1.7273264909706043e-07,
      "loss": 0.117,
      "step": 31488
    },
    {
      "epoch": 0.9186358597351071,
      "grad_norm": 0.7032682394703215,
      "learning_rate": 1.7260956587387245e-07,
      "loss": 0.1158,
      "step": 31489
    },
    {
      "epoch": 0.9186650329657506,
      "grad_norm": 0.8437393397753546,
      "learning_rate": 1.7248652574847367e-07,
      "loss": 0.1176,
      "step": 31490
    },
    {
      "epoch": 0.9186942061963942,
      "grad_norm": 0.8217591225454703,
      "learning_rate": 1.7236352872196216e-07,
      "loss": 0.1155,
      "step": 31491
    },
    {
      "epoch": 0.9187233794270377,
      "grad_norm": 0.7316706954362365,
      "learning_rate": 1.72240574795437e-07,
      "loss": 0.1017,
      "step": 31492
    },
    {
      "epoch": 0.9187525526576813,
      "grad_norm": 0.7630958289001806,
      "learning_rate": 1.721176639699962e-07,
      "loss": 0.1119,
      "step": 31493
    },
    {
      "epoch": 0.9187817258883249,
      "grad_norm": 0.9153354818383868,
      "learning_rate": 1.7199479624673498e-07,
      "loss": 0.1219,
      "step": 31494
    },
    {
      "epoch": 0.9188108991189684,
      "grad_norm": 0.898372333136174,
      "learning_rate": 1.7187197162675252e-07,
      "loss": 0.1242,
      "step": 31495
    },
    {
      "epoch": 0.918840072349612,
      "grad_norm": 0.9563962679915206,
      "learning_rate": 1.7174919011114455e-07,
      "loss": 0.131,
      "step": 31496
    },
    {
      "epoch": 0.9188692455802555,
      "grad_norm": 0.9190040010802156,
      "learning_rate": 1.7162645170100746e-07,
      "loss": 0.1096,
      "step": 31497
    },
    {
      "epoch": 0.9188984188108991,
      "grad_norm": 0.9572626954655384,
      "learning_rate": 1.715037563974359e-07,
      "loss": 0.1085,
      "step": 31498
    },
    {
      "epoch": 0.9189275920415427,
      "grad_norm": 0.839287077203837,
      "learning_rate": 1.7138110420152676e-07,
      "loss": 0.106,
      "step": 31499
    },
    {
      "epoch": 0.9189567652721863,
      "grad_norm": 0.827734241275159,
      "learning_rate": 1.712584951143742e-07,
      "loss": 0.1079,
      "step": 31500
    },
    {
      "epoch": 0.9189859385028298,
      "grad_norm": 0.8295905862872595,
      "learning_rate": 1.711359291370729e-07,
      "loss": 0.1103,
      "step": 31501
    },
    {
      "epoch": 0.9190151117334734,
      "grad_norm": 0.9027734528844755,
      "learning_rate": 1.7101340627071804e-07,
      "loss": 0.0934,
      "step": 31502
    },
    {
      "epoch": 0.9190442849641169,
      "grad_norm": 0.8794472254828379,
      "learning_rate": 1.708909265164027e-07,
      "loss": 0.0978,
      "step": 31503
    },
    {
      "epoch": 0.9190734581947605,
      "grad_norm": 0.7755228381034948,
      "learning_rate": 1.7076848987521933e-07,
      "loss": 0.1121,
      "step": 31504
    },
    {
      "epoch": 0.919102631425404,
      "grad_norm": 0.8267621166910578,
      "learning_rate": 1.706460963482631e-07,
      "loss": 0.1178,
      "step": 31505
    },
    {
      "epoch": 0.9191318046560476,
      "grad_norm": 0.9177075774316056,
      "learning_rate": 1.7052374593662492e-07,
      "loss": 0.1037,
      "step": 31506
    },
    {
      "epoch": 0.9191609778866912,
      "grad_norm": 1.0129853969583604,
      "learning_rate": 1.7040143864139825e-07,
      "loss": 0.1022,
      "step": 31507
    },
    {
      "epoch": 0.9191901511173347,
      "grad_norm": 0.9014312892017495,
      "learning_rate": 1.7027917446367447e-07,
      "loss": 0.1245,
      "step": 31508
    },
    {
      "epoch": 0.9192193243479783,
      "grad_norm": 0.9386499379140206,
      "learning_rate": 1.7015695340454552e-07,
      "loss": 0.0983,
      "step": 31509
    },
    {
      "epoch": 0.9192484975786218,
      "grad_norm": 1.12692336892604,
      "learning_rate": 1.7003477546510217e-07,
      "loss": 0.1137,
      "step": 31510
    },
    {
      "epoch": 0.9192776708092654,
      "grad_norm": 0.8824618055641689,
      "learning_rate": 1.699126406464352e-07,
      "loss": 0.1049,
      "step": 31511
    },
    {
      "epoch": 0.919306844039909,
      "grad_norm": 0.7877852882881141,
      "learning_rate": 1.6979054894963486e-07,
      "loss": 0.1189,
      "step": 31512
    },
    {
      "epoch": 0.9193360172705526,
      "grad_norm": 1.2274157217371404,
      "learning_rate": 1.6966850037579196e-07,
      "loss": 0.1175,
      "step": 31513
    },
    {
      "epoch": 0.9193651905011961,
      "grad_norm": 0.8035846976351573,
      "learning_rate": 1.6954649492599507e-07,
      "loss": 0.1046,
      "step": 31514
    },
    {
      "epoch": 0.9193943637318397,
      "grad_norm": 0.7122975510823543,
      "learning_rate": 1.6942453260133497e-07,
      "loss": 0.1246,
      "step": 31515
    },
    {
      "epoch": 0.9194235369624832,
      "grad_norm": 0.8803363288778558,
      "learning_rate": 1.693026134028991e-07,
      "loss": 0.1143,
      "step": 31516
    },
    {
      "epoch": 0.9194527101931268,
      "grad_norm": 0.8039546970931579,
      "learning_rate": 1.6918073733177554e-07,
      "loss": 0.0965,
      "step": 31517
    },
    {
      "epoch": 0.9194818834237704,
      "grad_norm": 0.8576721603822425,
      "learning_rate": 1.6905890438905338e-07,
      "loss": 0.1258,
      "step": 31518
    },
    {
      "epoch": 0.9195110566544139,
      "grad_norm": 0.9595800848719139,
      "learning_rate": 1.6893711457582064e-07,
      "loss": 0.1138,
      "step": 31519
    },
    {
      "epoch": 0.9195402298850575,
      "grad_norm": 0.7927836583174125,
      "learning_rate": 1.6881536789316422e-07,
      "loss": 0.1252,
      "step": 31520
    },
    {
      "epoch": 0.919569403115701,
      "grad_norm": 0.7674785520740165,
      "learning_rate": 1.6869366434216993e-07,
      "loss": 0.0888,
      "step": 31521
    },
    {
      "epoch": 0.9195985763463446,
      "grad_norm": 0.8523844486809289,
      "learning_rate": 1.6857200392392635e-07,
      "loss": 0.1336,
      "step": 31522
    },
    {
      "epoch": 0.9196277495769881,
      "grad_norm": 0.8203410314200877,
      "learning_rate": 1.684503866395182e-07,
      "loss": 0.104,
      "step": 31523
    },
    {
      "epoch": 0.9196569228076317,
      "grad_norm": 1.0542733564281603,
      "learning_rate": 1.683288124900312e-07,
      "loss": 0.1096,
      "step": 31524
    },
    {
      "epoch": 0.9196860960382752,
      "grad_norm": 0.8929220501399922,
      "learning_rate": 1.682072814765512e-07,
      "loss": 0.1203,
      "step": 31525
    },
    {
      "epoch": 0.9197152692689189,
      "grad_norm": 0.9651186180879701,
      "learning_rate": 1.6808579360016343e-07,
      "loss": 0.1158,
      "step": 31526
    },
    {
      "epoch": 0.9197444424995624,
      "grad_norm": 0.9016379181013806,
      "learning_rate": 1.6796434886195256e-07,
      "loss": 0.1308,
      "step": 31527
    },
    {
      "epoch": 0.919773615730206,
      "grad_norm": 0.8093931544203599,
      "learning_rate": 1.6784294726300166e-07,
      "loss": 0.1066,
      "step": 31528
    },
    {
      "epoch": 0.9198027889608495,
      "grad_norm": 0.8010758614571243,
      "learning_rate": 1.6772158880439594e-07,
      "loss": 0.0962,
      "step": 31529
    },
    {
      "epoch": 0.9198319621914931,
      "grad_norm": 0.8112238434650944,
      "learning_rate": 1.6760027348721785e-07,
      "loss": 0.1079,
      "step": 31530
    },
    {
      "epoch": 0.9198611354221367,
      "grad_norm": 0.8691568220414676,
      "learning_rate": 1.6747900131255102e-07,
      "loss": 0.1323,
      "step": 31531
    },
    {
      "epoch": 0.9198903086527802,
      "grad_norm": 0.7292816387389669,
      "learning_rate": 1.6735777228147842e-07,
      "loss": 0.1101,
      "step": 31532
    },
    {
      "epoch": 0.9199194818834238,
      "grad_norm": 0.7633244929188836,
      "learning_rate": 1.6723658639508257e-07,
      "loss": 0.1122,
      "step": 31533
    },
    {
      "epoch": 0.9199486551140673,
      "grad_norm": 1.0608089952896051,
      "learning_rate": 1.6711544365444367e-07,
      "loss": 0.1312,
      "step": 31534
    },
    {
      "epoch": 0.9199778283447109,
      "grad_norm": 0.8281099543989686,
      "learning_rate": 1.669943440606453e-07,
      "loss": 0.1193,
      "step": 31535
    },
    {
      "epoch": 0.9200070015753544,
      "grad_norm": 0.7216056971282659,
      "learning_rate": 1.668732876147666e-07,
      "loss": 0.1164,
      "step": 31536
    },
    {
      "epoch": 0.920036174805998,
      "grad_norm": 0.7507557229293694,
      "learning_rate": 1.6675227431789009e-07,
      "loss": 0.1079,
      "step": 31537
    },
    {
      "epoch": 0.9200653480366415,
      "grad_norm": 0.8338918816597148,
      "learning_rate": 1.666313041710954e-07,
      "loss": 0.1182,
      "step": 31538
    },
    {
      "epoch": 0.9200945212672852,
      "grad_norm": 0.7484368235347966,
      "learning_rate": 1.6651037717546281e-07,
      "loss": 0.1114,
      "step": 31539
    },
    {
      "epoch": 0.9201236944979287,
      "grad_norm": 0.95879932611911,
      "learning_rate": 1.66389493332072e-07,
      "loss": 0.0989,
      "step": 31540
    },
    {
      "epoch": 0.9201528677285723,
      "grad_norm": 0.8048620998555469,
      "learning_rate": 1.6626865264200097e-07,
      "loss": 0.0904,
      "step": 31541
    },
    {
      "epoch": 0.9201820409592159,
      "grad_norm": 0.8090812381727864,
      "learning_rate": 1.6614785510633002e-07,
      "loss": 0.1112,
      "step": 31542
    },
    {
      "epoch": 0.9202112141898594,
      "grad_norm": 0.6789774324548513,
      "learning_rate": 1.6602710072613715e-07,
      "loss": 0.1107,
      "step": 31543
    },
    {
      "epoch": 0.920240387420503,
      "grad_norm": 1.1174933592605976,
      "learning_rate": 1.6590638950249982e-07,
      "loss": 0.1067,
      "step": 31544
    },
    {
      "epoch": 0.9202695606511465,
      "grad_norm": 0.7905427998092589,
      "learning_rate": 1.657857214364972e-07,
      "loss": 0.1029,
      "step": 31545
    },
    {
      "epoch": 0.9202987338817901,
      "grad_norm": 0.9453729966948536,
      "learning_rate": 1.656650965292056e-07,
      "loss": 0.1135,
      "step": 31546
    },
    {
      "epoch": 0.9203279071124336,
      "grad_norm": 0.8392599511973942,
      "learning_rate": 1.6554451478170085e-07,
      "loss": 0.1242,
      "step": 31547
    },
    {
      "epoch": 0.9203570803430772,
      "grad_norm": 0.5591915862294311,
      "learning_rate": 1.65423976195061e-07,
      "loss": 0.0989,
      "step": 31548
    },
    {
      "epoch": 0.9203862535737207,
      "grad_norm": 0.9739971293006879,
      "learning_rate": 1.653034807703624e-07,
      "loss": 0.1131,
      "step": 31549
    },
    {
      "epoch": 0.9204154268043643,
      "grad_norm": 0.8296837190875574,
      "learning_rate": 1.651830285086803e-07,
      "loss": 0.0911,
      "step": 31550
    },
    {
      "epoch": 0.9204446000350078,
      "grad_norm": 0.7338766359170078,
      "learning_rate": 1.650626194110888e-07,
      "loss": 0.1122,
      "step": 31551
    },
    {
      "epoch": 0.9204737732656514,
      "grad_norm": 0.9081774787175226,
      "learning_rate": 1.6494225347866543e-07,
      "loss": 0.1236,
      "step": 31552
    },
    {
      "epoch": 0.920502946496295,
      "grad_norm": 1.1705428651744463,
      "learning_rate": 1.6482193071248264e-07,
      "loss": 0.1046,
      "step": 31553
    },
    {
      "epoch": 0.9205321197269386,
      "grad_norm": 0.7857833395439243,
      "learning_rate": 1.6470165111361514e-07,
      "loss": 0.1125,
      "step": 31554
    },
    {
      "epoch": 0.9205612929575822,
      "grad_norm": 0.7386346121884209,
      "learning_rate": 1.6458141468313705e-07,
      "loss": 0.1016,
      "step": 31555
    },
    {
      "epoch": 0.9205904661882257,
      "grad_norm": 0.9834408545286342,
      "learning_rate": 1.644612214221225e-07,
      "loss": 0.097,
      "step": 31556
    },
    {
      "epoch": 0.9206196394188693,
      "grad_norm": 0.8244546780176799,
      "learning_rate": 1.6434107133164402e-07,
      "loss": 0.0978,
      "step": 31557
    },
    {
      "epoch": 0.9206488126495128,
      "grad_norm": 0.7044948386735506,
      "learning_rate": 1.6422096441277292e-07,
      "loss": 0.1168,
      "step": 31558
    },
    {
      "epoch": 0.9206779858801564,
      "grad_norm": 0.820548093941712,
      "learning_rate": 1.641009006665828e-07,
      "loss": 0.1092,
      "step": 31559
    },
    {
      "epoch": 0.9207071591107999,
      "grad_norm": 0.963253671691206,
      "learning_rate": 1.6398088009414616e-07,
      "loss": 0.1206,
      "step": 31560
    },
    {
      "epoch": 0.9207363323414435,
      "grad_norm": 0.7454790088032144,
      "learning_rate": 1.6386090269653322e-07,
      "loss": 0.1006,
      "step": 31561
    },
    {
      "epoch": 0.920765505572087,
      "grad_norm": 0.8767584099696616,
      "learning_rate": 1.637409684748159e-07,
      "loss": 0.1263,
      "step": 31562
    },
    {
      "epoch": 0.9207946788027306,
      "grad_norm": 0.808533787829623,
      "learning_rate": 1.6362107743006507e-07,
      "loss": 0.1131,
      "step": 31563
    },
    {
      "epoch": 0.9208238520333741,
      "grad_norm": 0.8194379113041063,
      "learning_rate": 1.6350122956335035e-07,
      "loss": 0.1065,
      "step": 31564
    },
    {
      "epoch": 0.9208530252640177,
      "grad_norm": 0.6831599856540861,
      "learning_rate": 1.633814248757415e-07,
      "loss": 0.1133,
      "step": 31565
    },
    {
      "epoch": 0.9208821984946614,
      "grad_norm": 0.784619164142305,
      "learning_rate": 1.6326166336830985e-07,
      "loss": 0.1008,
      "step": 31566
    },
    {
      "epoch": 0.9209113717253049,
      "grad_norm": 0.8300106688756127,
      "learning_rate": 1.6314194504212287e-07,
      "loss": 0.1122,
      "step": 31567
    },
    {
      "epoch": 0.9209405449559485,
      "grad_norm": 0.8176939053385948,
      "learning_rate": 1.6302226989824976e-07,
      "loss": 0.1037,
      "step": 31568
    },
    {
      "epoch": 0.920969718186592,
      "grad_norm": 0.6868479781352995,
      "learning_rate": 1.6290263793775962e-07,
      "loss": 0.1127,
      "step": 31569
    },
    {
      "epoch": 0.9209988914172356,
      "grad_norm": 0.7253165730691489,
      "learning_rate": 1.6278304916171995e-07,
      "loss": 0.1474,
      "step": 31570
    },
    {
      "epoch": 0.9210280646478791,
      "grad_norm": 0.7775839000205057,
      "learning_rate": 1.6266350357119765e-07,
      "loss": 0.1039,
      "step": 31571
    },
    {
      "epoch": 0.9210572378785227,
      "grad_norm": 0.6301357172828128,
      "learning_rate": 1.6254400116726133e-07,
      "loss": 0.0894,
      "step": 31572
    },
    {
      "epoch": 0.9210864111091662,
      "grad_norm": 0.8042375168854584,
      "learning_rate": 1.624245419509779e-07,
      "loss": 0.109,
      "step": 31573
    },
    {
      "epoch": 0.9211155843398098,
      "grad_norm": 0.8331649296079149,
      "learning_rate": 1.6230512592341263e-07,
      "loss": 0.1196,
      "step": 31574
    },
    {
      "epoch": 0.9211447575704533,
      "grad_norm": 0.7302033742048548,
      "learning_rate": 1.62185753085633e-07,
      "loss": 0.1123,
      "step": 31575
    },
    {
      "epoch": 0.9211739308010969,
      "grad_norm": 0.9631957716768756,
      "learning_rate": 1.6206642343870427e-07,
      "loss": 0.1191,
      "step": 31576
    },
    {
      "epoch": 0.9212031040317404,
      "grad_norm": 0.9610146717478596,
      "learning_rate": 1.6194713698369057e-07,
      "loss": 0.1328,
      "step": 31577
    },
    {
      "epoch": 0.921232277262384,
      "grad_norm": 0.6558015522080165,
      "learning_rate": 1.618278937216583e-07,
      "loss": 0.1077,
      "step": 31578
    },
    {
      "epoch": 0.9212614504930275,
      "grad_norm": 0.7104860258779933,
      "learning_rate": 1.6170869365367158e-07,
      "loss": 0.1055,
      "step": 31579
    },
    {
      "epoch": 0.9212906237236712,
      "grad_norm": 0.8201677319994595,
      "learning_rate": 1.6158953678079515e-07,
      "loss": 0.12,
      "step": 31580
    },
    {
      "epoch": 0.9213197969543148,
      "grad_norm": 1.1372388083613736,
      "learning_rate": 1.61470423104092e-07,
      "loss": 0.1189,
      "step": 31581
    },
    {
      "epoch": 0.9213489701849583,
      "grad_norm": 2.693507197644251,
      "learning_rate": 1.6135135262462577e-07,
      "loss": 0.0907,
      "step": 31582
    },
    {
      "epoch": 0.9213781434156019,
      "grad_norm": 0.8207127203449859,
      "learning_rate": 1.6123232534345946e-07,
      "loss": 0.1374,
      "step": 31583
    },
    {
      "epoch": 0.9214073166462454,
      "grad_norm": 0.7659081986364706,
      "learning_rate": 1.6111334126165611e-07,
      "loss": 0.1311,
      "step": 31584
    },
    {
      "epoch": 0.921436489876889,
      "grad_norm": 0.8268252639756813,
      "learning_rate": 1.609944003802777e-07,
      "loss": 0.1046,
      "step": 31585
    },
    {
      "epoch": 0.9214656631075325,
      "grad_norm": 0.7053070577774613,
      "learning_rate": 1.608755027003861e-07,
      "loss": 0.098,
      "step": 31586
    },
    {
      "epoch": 0.9214948363381761,
      "grad_norm": 0.7269891485623712,
      "learning_rate": 1.607566482230427e-07,
      "loss": 0.104,
      "step": 31587
    },
    {
      "epoch": 0.9215240095688196,
      "grad_norm": 0.7715076818314276,
      "learning_rate": 1.606378369493089e-07,
      "loss": 0.1146,
      "step": 31588
    },
    {
      "epoch": 0.9215531827994632,
      "grad_norm": 0.9468994735051999,
      "learning_rate": 1.6051906888024494e-07,
      "loss": 0.13,
      "step": 31589
    },
    {
      "epoch": 0.9215823560301067,
      "grad_norm": 0.9145211725139617,
      "learning_rate": 1.6040034401691163e-07,
      "loss": 0.1309,
      "step": 31590
    },
    {
      "epoch": 0.9216115292607503,
      "grad_norm": 0.8534177807613806,
      "learning_rate": 1.6028166236036868e-07,
      "loss": 0.1193,
      "step": 31591
    },
    {
      "epoch": 0.9216407024913938,
      "grad_norm": 0.640869112211262,
      "learning_rate": 1.601630239116758e-07,
      "loss": 0.0996,
      "step": 31592
    },
    {
      "epoch": 0.9216698757220375,
      "grad_norm": 0.8611550427653011,
      "learning_rate": 1.6004442867189217e-07,
      "loss": 0.1234,
      "step": 31593
    },
    {
      "epoch": 0.9216990489526811,
      "grad_norm": 0.8012778753854138,
      "learning_rate": 1.5992587664207638e-07,
      "loss": 0.0917,
      "step": 31594
    },
    {
      "epoch": 0.9217282221833246,
      "grad_norm": 0.7906154408473884,
      "learning_rate": 1.5980736782328644e-07,
      "loss": 0.1189,
      "step": 31595
    },
    {
      "epoch": 0.9217573954139682,
      "grad_norm": 0.9313369846310038,
      "learning_rate": 1.5968890221658207e-07,
      "loss": 0.1253,
      "step": 31596
    },
    {
      "epoch": 0.9217865686446117,
      "grad_norm": 0.73495900928635,
      "learning_rate": 1.595704798230191e-07,
      "loss": 0.1123,
      "step": 31597
    },
    {
      "epoch": 0.9218157418752553,
      "grad_norm": 0.8927840892463856,
      "learning_rate": 1.5945210064365503e-07,
      "loss": 0.1566,
      "step": 31598
    },
    {
      "epoch": 0.9218449151058988,
      "grad_norm": 0.8984768986685986,
      "learning_rate": 1.593337646795473e-07,
      "loss": 0.1018,
      "step": 31599
    },
    {
      "epoch": 0.9218740883365424,
      "grad_norm": 0.8765152866815265,
      "learning_rate": 1.5921547193175292e-07,
      "loss": 0.0899,
      "step": 31600
    },
    {
      "epoch": 0.9219032615671859,
      "grad_norm": 0.7967989330865396,
      "learning_rate": 1.5909722240132542e-07,
      "loss": 0.1142,
      "step": 31601
    },
    {
      "epoch": 0.9219324347978295,
      "grad_norm": 0.7442665459955101,
      "learning_rate": 1.5897901608932342e-07,
      "loss": 0.1072,
      "step": 31602
    },
    {
      "epoch": 0.921961608028473,
      "grad_norm": 0.9868247398129755,
      "learning_rate": 1.5886085299680166e-07,
      "loss": 0.1185,
      "step": 31603
    },
    {
      "epoch": 0.9219907812591166,
      "grad_norm": 0.78649939067307,
      "learning_rate": 1.5874273312481368e-07,
      "loss": 0.0949,
      "step": 31604
    },
    {
      "epoch": 0.9220199544897602,
      "grad_norm": 1.1103592657883514,
      "learning_rate": 1.5862465647441537e-07,
      "loss": 0.127,
      "step": 31605
    },
    {
      "epoch": 0.9220491277204037,
      "grad_norm": 0.8698085582139836,
      "learning_rate": 1.585066230466603e-07,
      "loss": 0.1126,
      "step": 31606
    },
    {
      "epoch": 0.9220783009510474,
      "grad_norm": 0.8990790270924715,
      "learning_rate": 1.5838863284260208e-07,
      "loss": 0.1079,
      "step": 31607
    },
    {
      "epoch": 0.9221074741816909,
      "grad_norm": 0.8297752819122285,
      "learning_rate": 1.582706858632943e-07,
      "loss": 0.1154,
      "step": 31608
    },
    {
      "epoch": 0.9221366474123345,
      "grad_norm": 0.846981054375405,
      "learning_rate": 1.5815278210979056e-07,
      "loss": 0.1038,
      "step": 31609
    },
    {
      "epoch": 0.922165820642978,
      "grad_norm": 0.8276836928239499,
      "learning_rate": 1.5803492158314283e-07,
      "loss": 0.1,
      "step": 31610
    },
    {
      "epoch": 0.9221949938736216,
      "grad_norm": 0.798405376271041,
      "learning_rate": 1.57917104284403e-07,
      "loss": 0.1101,
      "step": 31611
    },
    {
      "epoch": 0.9222241671042651,
      "grad_norm": 0.7716831291840625,
      "learning_rate": 1.5779933021462357e-07,
      "loss": 0.0903,
      "step": 31612
    },
    {
      "epoch": 0.9222533403349087,
      "grad_norm": 0.7221244000074717,
      "learning_rate": 1.5768159937485538e-07,
      "loss": 0.0964,
      "step": 31613
    },
    {
      "epoch": 0.9222825135655522,
      "grad_norm": 0.6758694466963024,
      "learning_rate": 1.5756391176615092e-07,
      "loss": 0.1065,
      "step": 31614
    },
    {
      "epoch": 0.9223116867961958,
      "grad_norm": 0.7512265184986077,
      "learning_rate": 1.5744626738955883e-07,
      "loss": 0.0868,
      "step": 31615
    },
    {
      "epoch": 0.9223408600268393,
      "grad_norm": 0.8501602567042736,
      "learning_rate": 1.5732866624613152e-07,
      "loss": 0.1137,
      "step": 31616
    },
    {
      "epoch": 0.9223700332574829,
      "grad_norm": 0.7045020634778086,
      "learning_rate": 1.572111083369171e-07,
      "loss": 0.0969,
      "step": 31617
    },
    {
      "epoch": 0.9223992064881265,
      "grad_norm": 0.8468483635821392,
      "learning_rate": 1.5709359366296583e-07,
      "loss": 0.1157,
      "step": 31618
    },
    {
      "epoch": 0.92242837971877,
      "grad_norm": 0.8278444364184621,
      "learning_rate": 1.5697612222532687e-07,
      "loss": 0.0975,
      "step": 31619
    },
    {
      "epoch": 0.9224575529494137,
      "grad_norm": 0.7522274086515124,
      "learning_rate": 1.5685869402504938e-07,
      "loss": 0.1062,
      "step": 31620
    },
    {
      "epoch": 0.9224867261800572,
      "grad_norm": 1.0404808666507726,
      "learning_rate": 1.5674130906318085e-07,
      "loss": 0.0988,
      "step": 31621
    },
    {
      "epoch": 0.9225158994107008,
      "grad_norm": 0.9457242979288193,
      "learning_rate": 1.566239673407699e-07,
      "loss": 0.106,
      "step": 31622
    },
    {
      "epoch": 0.9225450726413443,
      "grad_norm": 0.7968748994921456,
      "learning_rate": 1.5650666885886457e-07,
      "loss": 0.1583,
      "step": 31623
    },
    {
      "epoch": 0.9225742458719879,
      "grad_norm": 0.9325826393941028,
      "learning_rate": 1.5638941361851069e-07,
      "loss": 0.1074,
      "step": 31624
    },
    {
      "epoch": 0.9226034191026314,
      "grad_norm": 1.063030955184054,
      "learning_rate": 1.5627220162075574e-07,
      "loss": 0.1351,
      "step": 31625
    },
    {
      "epoch": 0.922632592333275,
      "grad_norm": 1.026518357218933,
      "learning_rate": 1.5615503286664668e-07,
      "loss": 0.1059,
      "step": 31626
    },
    {
      "epoch": 0.9226617655639185,
      "grad_norm": 0.7980818277717988,
      "learning_rate": 1.5603790735722933e-07,
      "loss": 0.1176,
      "step": 31627
    },
    {
      "epoch": 0.9226909387945621,
      "grad_norm": 0.8576736422214318,
      "learning_rate": 1.5592082509354845e-07,
      "loss": 0.1167,
      "step": 31628
    },
    {
      "epoch": 0.9227201120252057,
      "grad_norm": 0.7265952864485458,
      "learning_rate": 1.5580378607665092e-07,
      "loss": 0.1081,
      "step": 31629
    },
    {
      "epoch": 0.9227492852558492,
      "grad_norm": 0.7084197499329727,
      "learning_rate": 1.5568679030758095e-07,
      "loss": 0.1181,
      "step": 31630
    },
    {
      "epoch": 0.9227784584864928,
      "grad_norm": 0.836058832165904,
      "learning_rate": 1.555698377873821e-07,
      "loss": 0.1036,
      "step": 31631
    },
    {
      "epoch": 0.9228076317171363,
      "grad_norm": 0.7518532359470982,
      "learning_rate": 1.5545292851709915e-07,
      "loss": 0.0979,
      "step": 31632
    },
    {
      "epoch": 0.9228368049477799,
      "grad_norm": 0.7655427662612161,
      "learning_rate": 1.5533606249777677e-07,
      "loss": 0.1162,
      "step": 31633
    },
    {
      "epoch": 0.9228659781784235,
      "grad_norm": 0.7174580099324258,
      "learning_rate": 1.5521923973045694e-07,
      "loss": 0.1197,
      "step": 31634
    },
    {
      "epoch": 0.9228951514090671,
      "grad_norm": 0.9234571164611187,
      "learning_rate": 1.5510246021618325e-07,
      "loss": 0.1072,
      "step": 31635
    },
    {
      "epoch": 0.9229243246397106,
      "grad_norm": 1.3094640955070806,
      "learning_rate": 1.5498572395599877e-07,
      "loss": 0.133,
      "step": 31636
    },
    {
      "epoch": 0.9229534978703542,
      "grad_norm": 0.7505909050299393,
      "learning_rate": 1.548690309509443e-07,
      "loss": 0.1269,
      "step": 31637
    },
    {
      "epoch": 0.9229826711009977,
      "grad_norm": 0.6915615289355302,
      "learning_rate": 1.5475238120206293e-07,
      "loss": 0.0924,
      "step": 31638
    },
    {
      "epoch": 0.9230118443316413,
      "grad_norm": 0.9557879590278532,
      "learning_rate": 1.5463577471039548e-07,
      "loss": 0.1054,
      "step": 31639
    },
    {
      "epoch": 0.9230410175622848,
      "grad_norm": 0.9334558976212776,
      "learning_rate": 1.545192114769839e-07,
      "loss": 0.1501,
      "step": 31640
    },
    {
      "epoch": 0.9230701907929284,
      "grad_norm": 0.8468658970243811,
      "learning_rate": 1.5440269150286734e-07,
      "loss": 0.1224,
      "step": 31641
    },
    {
      "epoch": 0.923099364023572,
      "grad_norm": 0.9010276123425714,
      "learning_rate": 1.5428621478908723e-07,
      "loss": 0.1194,
      "step": 31642
    },
    {
      "epoch": 0.9231285372542155,
      "grad_norm": 0.7955231366219253,
      "learning_rate": 1.5416978133668213e-07,
      "loss": 0.0969,
      "step": 31643
    },
    {
      "epoch": 0.9231577104848591,
      "grad_norm": 0.7320602127955743,
      "learning_rate": 1.5405339114669348e-07,
      "loss": 0.1338,
      "step": 31644
    },
    {
      "epoch": 0.9231868837155026,
      "grad_norm": 0.860636207288666,
      "learning_rate": 1.5393704422015875e-07,
      "loss": 0.1185,
      "step": 31645
    },
    {
      "epoch": 0.9232160569461462,
      "grad_norm": 0.7553259095559253,
      "learning_rate": 1.5382074055811768e-07,
      "loss": 0.1141,
      "step": 31646
    },
    {
      "epoch": 0.9232452301767898,
      "grad_norm": 1.1257911236360727,
      "learning_rate": 1.5370448016160778e-07,
      "loss": 0.0884,
      "step": 31647
    },
    {
      "epoch": 0.9232744034074334,
      "grad_norm": 0.764563354961511,
      "learning_rate": 1.5358826303166764e-07,
      "loss": 0.1183,
      "step": 31648
    },
    {
      "epoch": 0.9233035766380769,
      "grad_norm": 0.8925355857901404,
      "learning_rate": 1.5347208916933366e-07,
      "loss": 0.097,
      "step": 31649
    },
    {
      "epoch": 0.9233327498687205,
      "grad_norm": 0.7161358890200636,
      "learning_rate": 1.5335595857564501e-07,
      "loss": 0.0823,
      "step": 31650
    },
    {
      "epoch": 0.923361923099364,
      "grad_norm": 1.0281336572021094,
      "learning_rate": 1.5323987125163697e-07,
      "loss": 0.122,
      "step": 31651
    },
    {
      "epoch": 0.9233910963300076,
      "grad_norm": 0.9173171067427917,
      "learning_rate": 1.531238271983465e-07,
      "loss": 0.113,
      "step": 31652
    },
    {
      "epoch": 0.9234202695606512,
      "grad_norm": 0.8164228517657877,
      "learning_rate": 1.5300782641680945e-07,
      "loss": 0.119,
      "step": 31653
    },
    {
      "epoch": 0.9234494427912947,
      "grad_norm": 0.7005962818840026,
      "learning_rate": 1.5289186890806108e-07,
      "loss": 0.1141,
      "step": 31654
    },
    {
      "epoch": 0.9234786160219383,
      "grad_norm": 0.8436412188491318,
      "learning_rate": 1.5277595467313723e-07,
      "loss": 0.1278,
      "step": 31655
    },
    {
      "epoch": 0.9235077892525818,
      "grad_norm": 1.0043470679257775,
      "learning_rate": 1.5266008371307262e-07,
      "loss": 0.0983,
      "step": 31656
    },
    {
      "epoch": 0.9235369624832254,
      "grad_norm": 0.9269625408708297,
      "learning_rate": 1.52544256028902e-07,
      "loss": 0.1022,
      "step": 31657
    },
    {
      "epoch": 0.9235661357138689,
      "grad_norm": 0.8423703202891744,
      "learning_rate": 1.5242847162165843e-07,
      "loss": 0.1178,
      "step": 31658
    },
    {
      "epoch": 0.9235953089445125,
      "grad_norm": 0.7931191425893267,
      "learning_rate": 1.523127304923766e-07,
      "loss": 0.1071,
      "step": 31659
    },
    {
      "epoch": 0.923624482175156,
      "grad_norm": 0.8677367734275767,
      "learning_rate": 1.5219703264208963e-07,
      "loss": 0.1022,
      "step": 31660
    },
    {
      "epoch": 0.9236536554057997,
      "grad_norm": 0.9929099802499494,
      "learning_rate": 1.5208137807183e-07,
      "loss": 0.1114,
      "step": 31661
    },
    {
      "epoch": 0.9236828286364432,
      "grad_norm": 0.6626511392472205,
      "learning_rate": 1.519657667826302e-07,
      "loss": 0.1134,
      "step": 31662
    },
    {
      "epoch": 0.9237120018670868,
      "grad_norm": 0.8092447279789307,
      "learning_rate": 1.518501987755233e-07,
      "loss": 0.1024,
      "step": 31663
    },
    {
      "epoch": 0.9237411750977303,
      "grad_norm": 1.0462278416088158,
      "learning_rate": 1.517346740515402e-07,
      "loss": 0.1233,
      "step": 31664
    },
    {
      "epoch": 0.9237703483283739,
      "grad_norm": 0.7076361377472974,
      "learning_rate": 1.5161919261171275e-07,
      "loss": 0.0986,
      "step": 31665
    },
    {
      "epoch": 0.9237995215590175,
      "grad_norm": 0.5744799080853549,
      "learning_rate": 1.5150375445707188e-07,
      "loss": 0.0979,
      "step": 31666
    },
    {
      "epoch": 0.923828694789661,
      "grad_norm": 1.062507853611053,
      "learning_rate": 1.5138835958864728e-07,
      "loss": 0.0956,
      "step": 31667
    },
    {
      "epoch": 0.9238578680203046,
      "grad_norm": 0.9327357428416654,
      "learning_rate": 1.5127300800747036e-07,
      "loss": 0.1027,
      "step": 31668
    },
    {
      "epoch": 0.9238870412509481,
      "grad_norm": 0.8754677669518118,
      "learning_rate": 1.5115769971457084e-07,
      "loss": 0.1177,
      "step": 31669
    },
    {
      "epoch": 0.9239162144815917,
      "grad_norm": 0.8856063982246912,
      "learning_rate": 1.510424347109779e-07,
      "loss": 0.1191,
      "step": 31670
    },
    {
      "epoch": 0.9239453877122352,
      "grad_norm": 0.7518185981710238,
      "learning_rate": 1.5092721299772017e-07,
      "loss": 0.0836,
      "step": 31671
    },
    {
      "epoch": 0.9239745609428788,
      "grad_norm": 0.799150001983761,
      "learning_rate": 1.5081203457582738e-07,
      "loss": 0.1002,
      "step": 31672
    },
    {
      "epoch": 0.9240037341735223,
      "grad_norm": 0.9102159400402953,
      "learning_rate": 1.5069689944632648e-07,
      "loss": 0.104,
      "step": 31673
    },
    {
      "epoch": 0.9240329074041659,
      "grad_norm": 0.9043552789685778,
      "learning_rate": 1.505818076102461e-07,
      "loss": 0.1379,
      "step": 31674
    },
    {
      "epoch": 0.9240620806348095,
      "grad_norm": 0.815463861856309,
      "learning_rate": 1.5046675906861374e-07,
      "loss": 0.1027,
      "step": 31675
    },
    {
      "epoch": 0.9240912538654531,
      "grad_norm": 0.7249940916778375,
      "learning_rate": 1.5035175382245692e-07,
      "loss": 0.112,
      "step": 31676
    },
    {
      "epoch": 0.9241204270960967,
      "grad_norm": 1.0881781103280483,
      "learning_rate": 1.502367918728015e-07,
      "loss": 0.1342,
      "step": 31677
    },
    {
      "epoch": 0.9241496003267402,
      "grad_norm": 0.9374697187390513,
      "learning_rate": 1.5012187322067439e-07,
      "loss": 0.133,
      "step": 31678
    },
    {
      "epoch": 0.9241787735573838,
      "grad_norm": 0.9271405452120017,
      "learning_rate": 1.5000699786710092e-07,
      "loss": 0.1134,
      "step": 31679
    },
    {
      "epoch": 0.9242079467880273,
      "grad_norm": 0.8913088739471651,
      "learning_rate": 1.4989216581310805e-07,
      "loss": 0.1128,
      "step": 31680
    },
    {
      "epoch": 0.9242371200186709,
      "grad_norm": 0.8566853906111136,
      "learning_rate": 1.497773770597194e-07,
      "loss": 0.0892,
      "step": 31681
    },
    {
      "epoch": 0.9242662932493144,
      "grad_norm": 0.8363044898995028,
      "learning_rate": 1.496626316079608e-07,
      "loss": 0.1206,
      "step": 31682
    },
    {
      "epoch": 0.924295466479958,
      "grad_norm": 0.8974881530619577,
      "learning_rate": 1.4954792945885643e-07,
      "loss": 0.1088,
      "step": 31683
    },
    {
      "epoch": 0.9243246397106015,
      "grad_norm": 0.8425431127384165,
      "learning_rate": 1.4943327061342993e-07,
      "loss": 0.1229,
      "step": 31684
    },
    {
      "epoch": 0.9243538129412451,
      "grad_norm": 0.9562055036842013,
      "learning_rate": 1.4931865507270548e-07,
      "loss": 0.1183,
      "step": 31685
    },
    {
      "epoch": 0.9243829861718886,
      "grad_norm": 0.7798587627156494,
      "learning_rate": 1.4920408283770616e-07,
      "loss": 0.0912,
      "step": 31686
    },
    {
      "epoch": 0.9244121594025322,
      "grad_norm": 1.0245868531379574,
      "learning_rate": 1.49089553909455e-07,
      "loss": 0.1396,
      "step": 31687
    },
    {
      "epoch": 0.9244413326331758,
      "grad_norm": 0.8113574996612866,
      "learning_rate": 1.48975068288974e-07,
      "loss": 0.1302,
      "step": 31688
    },
    {
      "epoch": 0.9244705058638194,
      "grad_norm": 0.8802965898818487,
      "learning_rate": 1.4886062597728567e-07,
      "loss": 0.1198,
      "step": 31689
    },
    {
      "epoch": 0.924499679094463,
      "grad_norm": 0.8279329466765452,
      "learning_rate": 1.4874622697541196e-07,
      "loss": 0.1119,
      "step": 31690
    },
    {
      "epoch": 0.9245288523251065,
      "grad_norm": 0.957264706814189,
      "learning_rate": 1.4863187128437317e-07,
      "loss": 0.1263,
      "step": 31691
    },
    {
      "epoch": 0.9245580255557501,
      "grad_norm": 0.8328102267929881,
      "learning_rate": 1.4851755890519125e-07,
      "loss": 0.0919,
      "step": 31692
    },
    {
      "epoch": 0.9245871987863936,
      "grad_norm": 0.7208471124902135,
      "learning_rate": 1.4840328983888653e-07,
      "loss": 0.0953,
      "step": 31693
    },
    {
      "epoch": 0.9246163720170372,
      "grad_norm": 0.801460191767315,
      "learning_rate": 1.482890640864787e-07,
      "loss": 0.135,
      "step": 31694
    },
    {
      "epoch": 0.9246455452476807,
      "grad_norm": 0.7721325867798572,
      "learning_rate": 1.4817488164898863e-07,
      "loss": 0.1061,
      "step": 31695
    },
    {
      "epoch": 0.9246747184783243,
      "grad_norm": 0.9239235113400591,
      "learning_rate": 1.480607425274344e-07,
      "loss": 0.0998,
      "step": 31696
    },
    {
      "epoch": 0.9247038917089678,
      "grad_norm": 0.8160326047347986,
      "learning_rate": 1.4794664672283577e-07,
      "loss": 0.1242,
      "step": 31697
    },
    {
      "epoch": 0.9247330649396114,
      "grad_norm": 0.748262937428363,
      "learning_rate": 1.4783259423621076e-07,
      "loss": 0.1087,
      "step": 31698
    },
    {
      "epoch": 0.9247622381702549,
      "grad_norm": 0.8645288476102676,
      "learning_rate": 1.4771858506857862e-07,
      "loss": 0.1115,
      "step": 31699
    },
    {
      "epoch": 0.9247914114008985,
      "grad_norm": 0.7152663028836039,
      "learning_rate": 1.476046192209568e-07,
      "loss": 0.0943,
      "step": 31700
    },
    {
      "epoch": 0.924820584631542,
      "grad_norm": 0.6501551081778637,
      "learning_rate": 1.4749069669436179e-07,
      "loss": 0.117,
      "step": 31701
    },
    {
      "epoch": 0.9248497578621857,
      "grad_norm": 0.6584951160164141,
      "learning_rate": 1.4737681748981214e-07,
      "loss": 0.0845,
      "step": 31702
    },
    {
      "epoch": 0.9248789310928293,
      "grad_norm": 0.7890857304082028,
      "learning_rate": 1.472629816083232e-07,
      "loss": 0.103,
      "step": 31703
    },
    {
      "epoch": 0.9249081043234728,
      "grad_norm": 0.8117915023448221,
      "learning_rate": 1.4714918905091246e-07,
      "loss": 0.1131,
      "step": 31704
    },
    {
      "epoch": 0.9249372775541164,
      "grad_norm": 0.7919583720680227,
      "learning_rate": 1.4703543981859524e-07,
      "loss": 0.1245,
      "step": 31705
    },
    {
      "epoch": 0.9249664507847599,
      "grad_norm": 0.7812810231682721,
      "learning_rate": 1.4692173391238684e-07,
      "loss": 0.1107,
      "step": 31706
    },
    {
      "epoch": 0.9249956240154035,
      "grad_norm": 0.8852469426470593,
      "learning_rate": 1.4680807133330312e-07,
      "loss": 0.1178,
      "step": 31707
    },
    {
      "epoch": 0.925024797246047,
      "grad_norm": 0.7730858926753635,
      "learning_rate": 1.466944520823582e-07,
      "loss": 0.0879,
      "step": 31708
    },
    {
      "epoch": 0.9250539704766906,
      "grad_norm": 0.8014526540941422,
      "learning_rate": 1.4658087616056582e-07,
      "loss": 0.1083,
      "step": 31709
    },
    {
      "epoch": 0.9250831437073341,
      "grad_norm": 0.865674395824994,
      "learning_rate": 1.4646734356894177e-07,
      "loss": 0.1022,
      "step": 31710
    },
    {
      "epoch": 0.9251123169379777,
      "grad_norm": 0.7474366567978811,
      "learning_rate": 1.4635385430849857e-07,
      "loss": 0.1113,
      "step": 31711
    },
    {
      "epoch": 0.9251414901686212,
      "grad_norm": 1.0057173331831166,
      "learning_rate": 1.4624040838024933e-07,
      "loss": 0.1215,
      "step": 31712
    },
    {
      "epoch": 0.9251706633992648,
      "grad_norm": 0.7728605512356185,
      "learning_rate": 1.461270057852071e-07,
      "loss": 0.1333,
      "step": 31713
    },
    {
      "epoch": 0.9251998366299083,
      "grad_norm": 0.7762412372580629,
      "learning_rate": 1.4601364652438387e-07,
      "loss": 0.1045,
      "step": 31714
    },
    {
      "epoch": 0.925229009860552,
      "grad_norm": 1.0697619655151576,
      "learning_rate": 1.4590033059879216e-07,
      "loss": 0.1314,
      "step": 31715
    },
    {
      "epoch": 0.9252581830911956,
      "grad_norm": 0.7180033330029212,
      "learning_rate": 1.4578705800944392e-07,
      "loss": 0.102,
      "step": 31716
    },
    {
      "epoch": 0.9252873563218391,
      "grad_norm": 0.9335884476519792,
      "learning_rate": 1.4567382875735002e-07,
      "loss": 0.1018,
      "step": 31717
    },
    {
      "epoch": 0.9253165295524827,
      "grad_norm": 0.6776272690764072,
      "learning_rate": 1.4556064284352135e-07,
      "loss": 0.0834,
      "step": 31718
    },
    {
      "epoch": 0.9253457027831262,
      "grad_norm": 0.6304707389915937,
      "learning_rate": 1.4544750026896814e-07,
      "loss": 0.1015,
      "step": 31719
    },
    {
      "epoch": 0.9253748760137698,
      "grad_norm": 0.8153619968939354,
      "learning_rate": 1.4533440103470132e-07,
      "loss": 0.1043,
      "step": 31720
    },
    {
      "epoch": 0.9254040492444133,
      "grad_norm": 0.9327875910475767,
      "learning_rate": 1.4522134514172948e-07,
      "loss": 0.1093,
      "step": 31721
    },
    {
      "epoch": 0.9254332224750569,
      "grad_norm": 0.9585100548820297,
      "learning_rate": 1.451083325910624e-07,
      "loss": 0.1055,
      "step": 31722
    },
    {
      "epoch": 0.9254623957057004,
      "grad_norm": 0.8694365073157463,
      "learning_rate": 1.449953633837098e-07,
      "loss": 0.1133,
      "step": 31723
    },
    {
      "epoch": 0.925491568936344,
      "grad_norm": 0.8312840869919882,
      "learning_rate": 1.448824375206792e-07,
      "loss": 0.1024,
      "step": 31724
    },
    {
      "epoch": 0.9255207421669875,
      "grad_norm": 0.7670830538040962,
      "learning_rate": 1.447695550029793e-07,
      "loss": 0.0885,
      "step": 31725
    },
    {
      "epoch": 0.9255499153976311,
      "grad_norm": 0.7136171008459236,
      "learning_rate": 1.4465671583161755e-07,
      "loss": 0.1015,
      "step": 31726
    },
    {
      "epoch": 0.9255790886282746,
      "grad_norm": 0.7312121747966311,
      "learning_rate": 1.4454392000760154e-07,
      "loss": 0.1108,
      "step": 31727
    },
    {
      "epoch": 0.9256082618589182,
      "grad_norm": 1.0217869543902838,
      "learning_rate": 1.444311675319382e-07,
      "loss": 0.0977,
      "step": 31728
    },
    {
      "epoch": 0.9256374350895619,
      "grad_norm": 0.734487254337658,
      "learning_rate": 1.4431845840563508e-07,
      "loss": 0.1235,
      "step": 31729
    },
    {
      "epoch": 0.9256666083202054,
      "grad_norm": 0.8334042837425122,
      "learning_rate": 1.4420579262969748e-07,
      "loss": 0.1168,
      "step": 31730
    },
    {
      "epoch": 0.925695781550849,
      "grad_norm": 0.7957090527856719,
      "learning_rate": 1.440931702051307e-07,
      "loss": 0.1004,
      "step": 31731
    },
    {
      "epoch": 0.9257249547814925,
      "grad_norm": 0.8122803162626553,
      "learning_rate": 1.439805911329417e-07,
      "loss": 0.108,
      "step": 31732
    },
    {
      "epoch": 0.9257541280121361,
      "grad_norm": 0.722345553644814,
      "learning_rate": 1.4386805541413361e-07,
      "loss": 0.1185,
      "step": 31733
    },
    {
      "epoch": 0.9257833012427796,
      "grad_norm": 0.8192274853642497,
      "learning_rate": 1.4375556304971338e-07,
      "loss": 0.1207,
      "step": 31734
    },
    {
      "epoch": 0.9258124744734232,
      "grad_norm": 0.8564909861136769,
      "learning_rate": 1.4364311404068355e-07,
      "loss": 0.0914,
      "step": 31735
    },
    {
      "epoch": 0.9258416477040667,
      "grad_norm": 1.0361703726960494,
      "learning_rate": 1.435307083880494e-07,
      "loss": 0.0932,
      "step": 31736
    },
    {
      "epoch": 0.9258708209347103,
      "grad_norm": 0.9041171044957181,
      "learning_rate": 1.4341834609281346e-07,
      "loss": 0.1138,
      "step": 31737
    },
    {
      "epoch": 0.9258999941653538,
      "grad_norm": 0.8281386330147424,
      "learning_rate": 1.4330602715597886e-07,
      "loss": 0.1159,
      "step": 31738
    },
    {
      "epoch": 0.9259291673959974,
      "grad_norm": 0.855771458799659,
      "learning_rate": 1.431937515785481e-07,
      "loss": 0.096,
      "step": 31739
    },
    {
      "epoch": 0.925958340626641,
      "grad_norm": 0.7884953687926183,
      "learning_rate": 1.4308151936152537e-07,
      "loss": 0.1097,
      "step": 31740
    },
    {
      "epoch": 0.9259875138572845,
      "grad_norm": 0.8772604667798529,
      "learning_rate": 1.4296933050591043e-07,
      "loss": 0.1147,
      "step": 31741
    },
    {
      "epoch": 0.9260166870879282,
      "grad_norm": 1.0410341867643633,
      "learning_rate": 1.428571850127064e-07,
      "loss": 0.121,
      "step": 31742
    },
    {
      "epoch": 0.9260458603185717,
      "grad_norm": 1.055490587638585,
      "learning_rate": 1.4274508288291411e-07,
      "loss": 0.109,
      "step": 31743
    },
    {
      "epoch": 0.9260750335492153,
      "grad_norm": 0.6718414874251398,
      "learning_rate": 1.4263302411753388e-07,
      "loss": 0.1284,
      "step": 31744
    },
    {
      "epoch": 0.9261042067798588,
      "grad_norm": 0.8776701654134024,
      "learning_rate": 1.42521008717566e-07,
      "loss": 0.1138,
      "step": 31745
    },
    {
      "epoch": 0.9261333800105024,
      "grad_norm": 0.8780409718093165,
      "learning_rate": 1.424090366840114e-07,
      "loss": 0.1165,
      "step": 31746
    },
    {
      "epoch": 0.9261625532411459,
      "grad_norm": 0.8087318675701313,
      "learning_rate": 1.422971080178698e-07,
      "loss": 0.1235,
      "step": 31747
    },
    {
      "epoch": 0.9261917264717895,
      "grad_norm": 0.8035138993306693,
      "learning_rate": 1.4218522272013924e-07,
      "loss": 0.1323,
      "step": 31748
    },
    {
      "epoch": 0.926220899702433,
      "grad_norm": 1.3027535291669212,
      "learning_rate": 1.420733807918201e-07,
      "loss": 0.1018,
      "step": 31749
    },
    {
      "epoch": 0.9262500729330766,
      "grad_norm": 0.7610153643861877,
      "learning_rate": 1.4196158223390987e-07,
      "loss": 0.1081,
      "step": 31750
    },
    {
      "epoch": 0.9262792461637201,
      "grad_norm": 0.8453250451507696,
      "learning_rate": 1.4184982704740668e-07,
      "loss": 0.1146,
      "step": 31751
    },
    {
      "epoch": 0.9263084193943637,
      "grad_norm": 0.9790357627712215,
      "learning_rate": 1.4173811523330804e-07,
      "loss": 0.1124,
      "step": 31752
    },
    {
      "epoch": 0.9263375926250073,
      "grad_norm": 0.9046145975244893,
      "learning_rate": 1.4162644679261262e-07,
      "loss": 0.0958,
      "step": 31753
    },
    {
      "epoch": 0.9263667658556508,
      "grad_norm": 1.1355467678113773,
      "learning_rate": 1.4151482172631627e-07,
      "loss": 0.1199,
      "step": 31754
    },
    {
      "epoch": 0.9263959390862944,
      "grad_norm": 1.057588745451631,
      "learning_rate": 1.4140324003541538e-07,
      "loss": 0.1414,
      "step": 31755
    },
    {
      "epoch": 0.926425112316938,
      "grad_norm": 0.9310083548881187,
      "learning_rate": 1.4129170172090645e-07,
      "loss": 0.1246,
      "step": 31756
    },
    {
      "epoch": 0.9264542855475816,
      "grad_norm": 0.6704608895088169,
      "learning_rate": 1.411802067837864e-07,
      "loss": 0.0913,
      "step": 31757
    },
    {
      "epoch": 0.9264834587782251,
      "grad_norm": 0.7102930648508062,
      "learning_rate": 1.4106875522504836e-07,
      "loss": 0.1242,
      "step": 31758
    },
    {
      "epoch": 0.9265126320088687,
      "grad_norm": 0.7892150966461705,
      "learning_rate": 1.409573470456893e-07,
      "loss": 0.1254,
      "step": 31759
    },
    {
      "epoch": 0.9265418052395122,
      "grad_norm": 0.890289439329342,
      "learning_rate": 1.4084598224670343e-07,
      "loss": 0.1324,
      "step": 31760
    },
    {
      "epoch": 0.9265709784701558,
      "grad_norm": 0.9709724081723901,
      "learning_rate": 1.4073466082908382e-07,
      "loss": 0.1005,
      "step": 31761
    },
    {
      "epoch": 0.9266001517007993,
      "grad_norm": 0.8535624983542714,
      "learning_rate": 1.406233827938247e-07,
      "loss": 0.1012,
      "step": 31762
    },
    {
      "epoch": 0.9266293249314429,
      "grad_norm": 0.830784851460545,
      "learning_rate": 1.405121481419214e-07,
      "loss": 0.109,
      "step": 31763
    },
    {
      "epoch": 0.9266584981620865,
      "grad_norm": 0.7923320312471503,
      "learning_rate": 1.4040095687436473e-07,
      "loss": 0.1017,
      "step": 31764
    },
    {
      "epoch": 0.92668767139273,
      "grad_norm": 0.8768415708836962,
      "learning_rate": 1.402898089921484e-07,
      "loss": 0.1191,
      "step": 31765
    },
    {
      "epoch": 0.9267168446233736,
      "grad_norm": 0.8864873670478928,
      "learning_rate": 1.4017870449626492e-07,
      "loss": 0.1401,
      "step": 31766
    },
    {
      "epoch": 0.9267460178540171,
      "grad_norm": 1.097276375567561,
      "learning_rate": 1.4006764338770573e-07,
      "loss": 0.1342,
      "step": 31767
    },
    {
      "epoch": 0.9267751910846607,
      "grad_norm": 0.7614307100585425,
      "learning_rate": 1.3995662566746115e-07,
      "loss": 0.0982,
      "step": 31768
    },
    {
      "epoch": 0.9268043643153043,
      "grad_norm": 0.6780695007409673,
      "learning_rate": 1.3984565133652484e-07,
      "loss": 0.0997,
      "step": 31769
    },
    {
      "epoch": 0.9268335375459479,
      "grad_norm": 0.88304632227594,
      "learning_rate": 1.3973472039588654e-07,
      "loss": 0.1148,
      "step": 31770
    },
    {
      "epoch": 0.9268627107765914,
      "grad_norm": 0.8076494746165436,
      "learning_rate": 1.39623832846536e-07,
      "loss": 0.0999,
      "step": 31771
    },
    {
      "epoch": 0.926891884007235,
      "grad_norm": 0.9200410378993642,
      "learning_rate": 1.395129886894636e-07,
      "loss": 0.106,
      "step": 31772
    },
    {
      "epoch": 0.9269210572378785,
      "grad_norm": 0.8777988146574237,
      "learning_rate": 1.3940218792565964e-07,
      "loss": 0.1227,
      "step": 31773
    },
    {
      "epoch": 0.9269502304685221,
      "grad_norm": 0.8797267955743507,
      "learning_rate": 1.3929143055611162e-07,
      "loss": 0.1173,
      "step": 31774
    },
    {
      "epoch": 0.9269794036991656,
      "grad_norm": 0.8029341141164189,
      "learning_rate": 1.391807165818093e-07,
      "loss": 0.1108,
      "step": 31775
    },
    {
      "epoch": 0.9270085769298092,
      "grad_norm": 0.8464957889930641,
      "learning_rate": 1.3907004600374198e-07,
      "loss": 0.106,
      "step": 31776
    },
    {
      "epoch": 0.9270377501604528,
      "grad_norm": 0.7510489981223771,
      "learning_rate": 1.389594188228971e-07,
      "loss": 0.1138,
      "step": 31777
    },
    {
      "epoch": 0.9270669233910963,
      "grad_norm": 0.7993096754734151,
      "learning_rate": 1.3884883504026116e-07,
      "loss": 0.0964,
      "step": 31778
    },
    {
      "epoch": 0.9270960966217399,
      "grad_norm": 0.7978983676680128,
      "learning_rate": 1.3873829465682277e-07,
      "loss": 0.098,
      "step": 31779
    },
    {
      "epoch": 0.9271252698523834,
      "grad_norm": 0.8653884600183934,
      "learning_rate": 1.3862779767356838e-07,
      "loss": 0.1233,
      "step": 31780
    },
    {
      "epoch": 0.927154443083027,
      "grad_norm": 0.7238549802724723,
      "learning_rate": 1.3851734409148443e-07,
      "loss": 0.1054,
      "step": 31781
    },
    {
      "epoch": 0.9271836163136705,
      "grad_norm": 0.9286978961897876,
      "learning_rate": 1.3840693391155735e-07,
      "loss": 0.0951,
      "step": 31782
    },
    {
      "epoch": 0.9272127895443142,
      "grad_norm": 0.6889075601075261,
      "learning_rate": 1.3829656713477247e-07,
      "loss": 0.1067,
      "step": 31783
    },
    {
      "epoch": 0.9272419627749577,
      "grad_norm": 0.8273047894290693,
      "learning_rate": 1.3818624376211564e-07,
      "loss": 0.1265,
      "step": 31784
    },
    {
      "epoch": 0.9272711360056013,
      "grad_norm": 0.8189016133973054,
      "learning_rate": 1.3807596379457056e-07,
      "loss": 0.099,
      "step": 31785
    },
    {
      "epoch": 0.9273003092362448,
      "grad_norm": 0.925306070894806,
      "learning_rate": 1.3796572723312308e-07,
      "loss": 0.1255,
      "step": 31786
    },
    {
      "epoch": 0.9273294824668884,
      "grad_norm": 0.7317591662671944,
      "learning_rate": 1.3785553407875685e-07,
      "loss": 0.1076,
      "step": 31787
    },
    {
      "epoch": 0.927358655697532,
      "grad_norm": 0.8200400377904487,
      "learning_rate": 1.3774538433245555e-07,
      "loss": 0.0947,
      "step": 31788
    },
    {
      "epoch": 0.9273878289281755,
      "grad_norm": 0.8756006166518264,
      "learning_rate": 1.376352779952034e-07,
      "loss": 0.1294,
      "step": 31789
    },
    {
      "epoch": 0.9274170021588191,
      "grad_norm": 0.6485610255946412,
      "learning_rate": 1.3752521506798233e-07,
      "loss": 0.0814,
      "step": 31790
    },
    {
      "epoch": 0.9274461753894626,
      "grad_norm": 0.9764972503010176,
      "learning_rate": 1.374151955517755e-07,
      "loss": 0.1108,
      "step": 31791
    },
    {
      "epoch": 0.9274753486201062,
      "grad_norm": 0.8023872679716414,
      "learning_rate": 1.3730521944756437e-07,
      "loss": 0.1228,
      "step": 31792
    },
    {
      "epoch": 0.9275045218507497,
      "grad_norm": 0.7619632234424019,
      "learning_rate": 1.3719528675633254e-07,
      "loss": 0.1042,
      "step": 31793
    },
    {
      "epoch": 0.9275336950813933,
      "grad_norm": 1.0058169030589184,
      "learning_rate": 1.370853974790598e-07,
      "loss": 0.1115,
      "step": 31794
    },
    {
      "epoch": 0.9275628683120368,
      "grad_norm": 0.8253121798893776,
      "learning_rate": 1.369755516167276e-07,
      "loss": 0.1266,
      "step": 31795
    },
    {
      "epoch": 0.9275920415426805,
      "grad_norm": 1.0321014996611595,
      "learning_rate": 1.368657491703168e-07,
      "loss": 0.0953,
      "step": 31796
    },
    {
      "epoch": 0.927621214773324,
      "grad_norm": 0.9970188652347979,
      "learning_rate": 1.3675599014080832e-07,
      "loss": 0.0953,
      "step": 31797
    },
    {
      "epoch": 0.9276503880039676,
      "grad_norm": 0.909214366606785,
      "learning_rate": 1.3664627452918021e-07,
      "loss": 0.0975,
      "step": 31798
    },
    {
      "epoch": 0.9276795612346111,
      "grad_norm": 0.8532918532153793,
      "learning_rate": 1.3653660233641397e-07,
      "loss": 0.0843,
      "step": 31799
    },
    {
      "epoch": 0.9277087344652547,
      "grad_norm": 0.9876479818329683,
      "learning_rate": 1.364269735634882e-07,
      "loss": 0.1055,
      "step": 31800
    },
    {
      "epoch": 0.9277379076958983,
      "grad_norm": 0.6862723268211314,
      "learning_rate": 1.363173882113805e-07,
      "loss": 0.0961,
      "step": 31801
    },
    {
      "epoch": 0.9277670809265418,
      "grad_norm": 0.8376938152866861,
      "learning_rate": 1.3620784628107065e-07,
      "loss": 0.1088,
      "step": 31802
    },
    {
      "epoch": 0.9277962541571854,
      "grad_norm": 0.8994906102792165,
      "learning_rate": 1.3609834777353669e-07,
      "loss": 0.1069,
      "step": 31803
    },
    {
      "epoch": 0.9278254273878289,
      "grad_norm": 1.007395043983961,
      "learning_rate": 1.3598889268975457e-07,
      "loss": 0.1073,
      "step": 31804
    },
    {
      "epoch": 0.9278546006184725,
      "grad_norm": 0.7227518113123437,
      "learning_rate": 1.3587948103070237e-07,
      "loss": 0.1094,
      "step": 31805
    },
    {
      "epoch": 0.927883773849116,
      "grad_norm": 0.849188319709201,
      "learning_rate": 1.3577011279735763e-07,
      "loss": 0.0976,
      "step": 31806
    },
    {
      "epoch": 0.9279129470797596,
      "grad_norm": 0.7660060966761473,
      "learning_rate": 1.3566078799069625e-07,
      "loss": 0.1007,
      "step": 31807
    },
    {
      "epoch": 0.9279421203104031,
      "grad_norm": 0.703551761846541,
      "learning_rate": 1.3555150661169358e-07,
      "loss": 0.1146,
      "step": 31808
    },
    {
      "epoch": 0.9279712935410467,
      "grad_norm": 0.9981733909666994,
      "learning_rate": 1.3544226866132658e-07,
      "loss": 0.1049,
      "step": 31809
    },
    {
      "epoch": 0.9280004667716903,
      "grad_norm": 0.8990268865529819,
      "learning_rate": 1.3533307414056894e-07,
      "loss": 0.1067,
      "step": 31810
    },
    {
      "epoch": 0.9280296400023339,
      "grad_norm": 0.8876133612278219,
      "learning_rate": 1.3522392305039656e-07,
      "loss": 0.1215,
      "step": 31811
    },
    {
      "epoch": 0.9280588132329775,
      "grad_norm": 0.8881889152314952,
      "learning_rate": 1.3511481539178362e-07,
      "loss": 0.1132,
      "step": 31812
    },
    {
      "epoch": 0.928087986463621,
      "grad_norm": 0.9820246173766248,
      "learning_rate": 1.350057511657049e-07,
      "loss": 0.1052,
      "step": 31813
    },
    {
      "epoch": 0.9281171596942646,
      "grad_norm": 0.868547077141067,
      "learning_rate": 1.34896730373133e-07,
      "loss": 0.1432,
      "step": 31814
    },
    {
      "epoch": 0.9281463329249081,
      "grad_norm": 0.7482346363401683,
      "learning_rate": 1.3478775301504154e-07,
      "loss": 0.1048,
      "step": 31815
    },
    {
      "epoch": 0.9281755061555517,
      "grad_norm": 0.7306615085695191,
      "learning_rate": 1.346788190924031e-07,
      "loss": 0.1037,
      "step": 31816
    },
    {
      "epoch": 0.9282046793861952,
      "grad_norm": 0.8326001612957701,
      "learning_rate": 1.3456992860619188e-07,
      "loss": 0.112,
      "step": 31817
    },
    {
      "epoch": 0.9282338526168388,
      "grad_norm": 0.9366844088472931,
      "learning_rate": 1.3446108155737826e-07,
      "loss": 0.1194,
      "step": 31818
    },
    {
      "epoch": 0.9282630258474823,
      "grad_norm": 0.872850129913893,
      "learning_rate": 1.3435227794693472e-07,
      "loss": 0.107,
      "step": 31819
    },
    {
      "epoch": 0.9282921990781259,
      "grad_norm": 0.8333367549240172,
      "learning_rate": 1.3424351777583278e-07,
      "loss": 0.1209,
      "step": 31820
    },
    {
      "epoch": 0.9283213723087694,
      "grad_norm": 0.7771479190120988,
      "learning_rate": 1.3413480104504272e-07,
      "loss": 0.1027,
      "step": 31821
    },
    {
      "epoch": 0.928350545539413,
      "grad_norm": 0.790493073916453,
      "learning_rate": 1.3402612775553546e-07,
      "loss": 0.107,
      "step": 31822
    },
    {
      "epoch": 0.9283797187700567,
      "grad_norm": 0.900113995987599,
      "learning_rate": 1.339174979082819e-07,
      "loss": 0.1121,
      "step": 31823
    },
    {
      "epoch": 0.9284088920007002,
      "grad_norm": 0.7251102759118273,
      "learning_rate": 1.3380891150425068e-07,
      "loss": 0.0845,
      "step": 31824
    },
    {
      "epoch": 0.9284380652313438,
      "grad_norm": 1.0832926473263882,
      "learning_rate": 1.3370036854441216e-07,
      "loss": 0.1111,
      "step": 31825
    },
    {
      "epoch": 0.9284672384619873,
      "grad_norm": 0.7568245320348015,
      "learning_rate": 1.3359186902973554e-07,
      "loss": 0.1318,
      "step": 31826
    },
    {
      "epoch": 0.9284964116926309,
      "grad_norm": 0.8860534980102193,
      "learning_rate": 1.334834129611884e-07,
      "loss": 0.1185,
      "step": 31827
    },
    {
      "epoch": 0.9285255849232744,
      "grad_norm": 0.8883106338757215,
      "learning_rate": 1.3337500033973882e-07,
      "loss": 0.1006,
      "step": 31828
    },
    {
      "epoch": 0.928554758153918,
      "grad_norm": 0.9404115406259522,
      "learning_rate": 1.3326663116635717e-07,
      "loss": 0.1163,
      "step": 31829
    },
    {
      "epoch": 0.9285839313845615,
      "grad_norm": 0.7562662765938616,
      "learning_rate": 1.3315830544200826e-07,
      "loss": 0.1209,
      "step": 31830
    },
    {
      "epoch": 0.9286131046152051,
      "grad_norm": 0.7330848003186264,
      "learning_rate": 1.3305002316766013e-07,
      "loss": 0.1243,
      "step": 31831
    },
    {
      "epoch": 0.9286422778458486,
      "grad_norm": 0.9121372954925305,
      "learning_rate": 1.329417843442804e-07,
      "loss": 0.1208,
      "step": 31832
    },
    {
      "epoch": 0.9286714510764922,
      "grad_norm": 0.8451668065558846,
      "learning_rate": 1.3283358897283438e-07,
      "loss": 0.1244,
      "step": 31833
    },
    {
      "epoch": 0.9287006243071357,
      "grad_norm": 0.8449992401771372,
      "learning_rate": 1.3272543705428742e-07,
      "loss": 0.1137,
      "step": 31834
    },
    {
      "epoch": 0.9287297975377793,
      "grad_norm": 0.9208350676847564,
      "learning_rate": 1.3261732858960598e-07,
      "loss": 0.0831,
      "step": 31835
    },
    {
      "epoch": 0.9287589707684228,
      "grad_norm": 0.6965852645334017,
      "learning_rate": 1.3250926357975537e-07,
      "loss": 0.1007,
      "step": 31836
    },
    {
      "epoch": 0.9287881439990665,
      "grad_norm": 0.72587681724953,
      "learning_rate": 1.3240124202570038e-07,
      "loss": 0.1049,
      "step": 31837
    },
    {
      "epoch": 0.9288173172297101,
      "grad_norm": 0.6481746470484258,
      "learning_rate": 1.3229326392840468e-07,
      "loss": 0.1028,
      "step": 31838
    },
    {
      "epoch": 0.9288464904603536,
      "grad_norm": 0.8006667847317643,
      "learning_rate": 1.321853292888331e-07,
      "loss": 0.099,
      "step": 31839
    },
    {
      "epoch": 0.9288756636909972,
      "grad_norm": 0.7546421097337805,
      "learning_rate": 1.3207743810794815e-07,
      "loss": 0.1228,
      "step": 31840
    },
    {
      "epoch": 0.9289048369216407,
      "grad_norm": 0.6913794747741638,
      "learning_rate": 1.3196959038671464e-07,
      "loss": 0.1136,
      "step": 31841
    },
    {
      "epoch": 0.9289340101522843,
      "grad_norm": 0.9952210474587667,
      "learning_rate": 1.3186178612609346e-07,
      "loss": 0.1047,
      "step": 31842
    },
    {
      "epoch": 0.9289631833829278,
      "grad_norm": 0.9197226240457339,
      "learning_rate": 1.317540253270494e-07,
      "loss": 0.1214,
      "step": 31843
    },
    {
      "epoch": 0.9289923566135714,
      "grad_norm": 0.7698174611805969,
      "learning_rate": 1.316463079905428e-07,
      "loss": 0.1196,
      "step": 31844
    },
    {
      "epoch": 0.9290215298442149,
      "grad_norm": 0.9122035865888464,
      "learning_rate": 1.3153863411753508e-07,
      "loss": 0.1147,
      "step": 31845
    },
    {
      "epoch": 0.9290507030748585,
      "grad_norm": 0.7490563563043272,
      "learning_rate": 1.3143100370898886e-07,
      "loss": 0.0893,
      "step": 31846
    },
    {
      "epoch": 0.929079876305502,
      "grad_norm": 0.8431775605464544,
      "learning_rate": 1.3132341676586447e-07,
      "loss": 0.1234,
      "step": 31847
    },
    {
      "epoch": 0.9291090495361456,
      "grad_norm": 0.7153092145860344,
      "learning_rate": 1.3121587328912222e-07,
      "loss": 0.1118,
      "step": 31848
    },
    {
      "epoch": 0.9291382227667891,
      "grad_norm": 1.19776123598946,
      "learning_rate": 1.3110837327972248e-07,
      "loss": 0.1406,
      "step": 31849
    },
    {
      "epoch": 0.9291673959974328,
      "grad_norm": 0.8648257833374993,
      "learning_rate": 1.3100091673862502e-07,
      "loss": 0.1015,
      "step": 31850
    },
    {
      "epoch": 0.9291965692280764,
      "grad_norm": 0.6255037728739352,
      "learning_rate": 1.3089350366678855e-07,
      "loss": 0.1013,
      "step": 31851
    },
    {
      "epoch": 0.9292257424587199,
      "grad_norm": 0.8270467120913118,
      "learning_rate": 1.3078613406517228e-07,
      "loss": 0.095,
      "step": 31852
    },
    {
      "epoch": 0.9292549156893635,
      "grad_norm": 0.9490322085209174,
      "learning_rate": 1.3067880793473597e-07,
      "loss": 0.1041,
      "step": 31853
    },
    {
      "epoch": 0.929284088920007,
      "grad_norm": 0.7924175106467733,
      "learning_rate": 1.3057152527643668e-07,
      "loss": 0.1159,
      "step": 31854
    },
    {
      "epoch": 0.9293132621506506,
      "grad_norm": 0.8274527911865962,
      "learning_rate": 1.3046428609123196e-07,
      "loss": 0.1125,
      "step": 31855
    },
    {
      "epoch": 0.9293424353812941,
      "grad_norm": 0.9026162693608782,
      "learning_rate": 1.3035709038007993e-07,
      "loss": 0.1152,
      "step": 31856
    },
    {
      "epoch": 0.9293716086119377,
      "grad_norm": 0.780596824956173,
      "learning_rate": 1.302499381439376e-07,
      "loss": 0.1127,
      "step": 31857
    },
    {
      "epoch": 0.9294007818425812,
      "grad_norm": 0.5554970680070257,
      "learning_rate": 1.3014282938376034e-07,
      "loss": 0.1046,
      "step": 31858
    },
    {
      "epoch": 0.9294299550732248,
      "grad_norm": 0.7311824802092963,
      "learning_rate": 1.3003576410050623e-07,
      "loss": 0.1093,
      "step": 31859
    },
    {
      "epoch": 0.9294591283038683,
      "grad_norm": 0.8817108602131031,
      "learning_rate": 1.299287422951301e-07,
      "loss": 0.096,
      "step": 31860
    },
    {
      "epoch": 0.9294883015345119,
      "grad_norm": 0.7418745675867275,
      "learning_rate": 1.2982176396858725e-07,
      "loss": 0.0966,
      "step": 31861
    },
    {
      "epoch": 0.9295174747651554,
      "grad_norm": 0.8576244592350082,
      "learning_rate": 1.2971482912183363e-07,
      "loss": 0.1137,
      "step": 31862
    },
    {
      "epoch": 0.929546647995799,
      "grad_norm": 0.9123038104314924,
      "learning_rate": 1.2960793775582347e-07,
      "loss": 0.1107,
      "step": 31863
    },
    {
      "epoch": 0.9295758212264427,
      "grad_norm": 0.6741393165961056,
      "learning_rate": 1.2950108987151045e-07,
      "loss": 0.0852,
      "step": 31864
    },
    {
      "epoch": 0.9296049944570862,
      "grad_norm": 0.7158006914295666,
      "learning_rate": 1.2939428546984878e-07,
      "loss": 0.099,
      "step": 31865
    },
    {
      "epoch": 0.9296341676877298,
      "grad_norm": 0.8480768058484065,
      "learning_rate": 1.292875245517927e-07,
      "loss": 0.104,
      "step": 31866
    },
    {
      "epoch": 0.9296633409183733,
      "grad_norm": 1.2313390211905544,
      "learning_rate": 1.2918080711829483e-07,
      "loss": 0.1127,
      "step": 31867
    },
    {
      "epoch": 0.9296925141490169,
      "grad_norm": 0.8906095587323312,
      "learning_rate": 1.2907413317030771e-07,
      "loss": 0.1576,
      "step": 31868
    },
    {
      "epoch": 0.9297216873796604,
      "grad_norm": 0.6460838578639841,
      "learning_rate": 1.2896750270878445e-07,
      "loss": 0.101,
      "step": 31869
    },
    {
      "epoch": 0.929750860610304,
      "grad_norm": 0.9412761702099182,
      "learning_rate": 1.2886091573467597e-07,
      "loss": 0.1133,
      "step": 31870
    },
    {
      "epoch": 0.9297800338409475,
      "grad_norm": 0.8029726891533026,
      "learning_rate": 1.2875437224893485e-07,
      "loss": 0.1114,
      "step": 31871
    },
    {
      "epoch": 0.9298092070715911,
      "grad_norm": 0.7234335704268551,
      "learning_rate": 1.2864787225251141e-07,
      "loss": 0.1216,
      "step": 31872
    },
    {
      "epoch": 0.9298383803022346,
      "grad_norm": 0.8342462671527551,
      "learning_rate": 1.2854141574635714e-07,
      "loss": 0.1079,
      "step": 31873
    },
    {
      "epoch": 0.9298675535328782,
      "grad_norm": 0.8613265968178566,
      "learning_rate": 1.284350027314224e-07,
      "loss": 0.1159,
      "step": 31874
    },
    {
      "epoch": 0.9298967267635218,
      "grad_norm": 0.8961476307437986,
      "learning_rate": 1.2832863320865696e-07,
      "loss": 0.1247,
      "step": 31875
    },
    {
      "epoch": 0.9299258999941653,
      "grad_norm": 0.7027444420307065,
      "learning_rate": 1.282223071790101e-07,
      "loss": 0.1247,
      "step": 31876
    },
    {
      "epoch": 0.929955073224809,
      "grad_norm": 0.9564352845338436,
      "learning_rate": 1.2811602464343155e-07,
      "loss": 0.1157,
      "step": 31877
    },
    {
      "epoch": 0.9299842464554525,
      "grad_norm": 1.1018576709758896,
      "learning_rate": 1.2800978560287002e-07,
      "loss": 0.1266,
      "step": 31878
    },
    {
      "epoch": 0.9300134196860961,
      "grad_norm": 0.8748241148358271,
      "learning_rate": 1.279035900582748e-07,
      "loss": 0.1055,
      "step": 31879
    },
    {
      "epoch": 0.9300425929167396,
      "grad_norm": 0.9006780298953522,
      "learning_rate": 1.2779743801059285e-07,
      "loss": 0.1082,
      "step": 31880
    },
    {
      "epoch": 0.9300717661473832,
      "grad_norm": 0.8733716203451908,
      "learning_rate": 1.2769132946077235e-07,
      "loss": 0.1543,
      "step": 31881
    },
    {
      "epoch": 0.9301009393780267,
      "grad_norm": 0.7921478917179678,
      "learning_rate": 1.2758526440976028e-07,
      "loss": 0.1141,
      "step": 31882
    },
    {
      "epoch": 0.9301301126086703,
      "grad_norm": 0.716895583888058,
      "learning_rate": 1.274792428585042e-07,
      "loss": 0.093,
      "step": 31883
    },
    {
      "epoch": 0.9301592858393138,
      "grad_norm": 0.9013773962196722,
      "learning_rate": 1.273732648079501e-07,
      "loss": 0.118,
      "step": 31884
    },
    {
      "epoch": 0.9301884590699574,
      "grad_norm": 0.8308549976620867,
      "learning_rate": 1.2726733025904436e-07,
      "loss": 0.1038,
      "step": 31885
    },
    {
      "epoch": 0.930217632300601,
      "grad_norm": 0.796908700369008,
      "learning_rate": 1.271614392127324e-07,
      "loss": 0.1128,
      "step": 31886
    },
    {
      "epoch": 0.9302468055312445,
      "grad_norm": 0.7871456170316263,
      "learning_rate": 1.2705559166996063e-07,
      "loss": 0.1173,
      "step": 31887
    },
    {
      "epoch": 0.9302759787618881,
      "grad_norm": 0.9454585672221922,
      "learning_rate": 1.2694978763167165e-07,
      "loss": 0.1364,
      "step": 31888
    },
    {
      "epoch": 0.9303051519925316,
      "grad_norm": 1.0150272764416546,
      "learning_rate": 1.2684402709881305e-07,
      "loss": 0.1072,
      "step": 31889
    },
    {
      "epoch": 0.9303343252231752,
      "grad_norm": 0.7229796377824613,
      "learning_rate": 1.2673831007232795e-07,
      "loss": 0.1134,
      "step": 31890
    },
    {
      "epoch": 0.9303634984538188,
      "grad_norm": 0.8158135053267521,
      "learning_rate": 1.2663263655315894e-07,
      "loss": 0.1112,
      "step": 31891
    },
    {
      "epoch": 0.9303926716844624,
      "grad_norm": 0.8555090184417553,
      "learning_rate": 1.265270065422508e-07,
      "loss": 0.1061,
      "step": 31892
    },
    {
      "epoch": 0.9304218449151059,
      "grad_norm": 0.771408734266717,
      "learning_rate": 1.2642142004054615e-07,
      "loss": 0.1042,
      "step": 31893
    },
    {
      "epoch": 0.9304510181457495,
      "grad_norm": 0.8500817337899947,
      "learning_rate": 1.2631587704898752e-07,
      "loss": 0.1167,
      "step": 31894
    },
    {
      "epoch": 0.930480191376393,
      "grad_norm": 0.8788507385964561,
      "learning_rate": 1.2621037756851695e-07,
      "loss": 0.1069,
      "step": 31895
    },
    {
      "epoch": 0.9305093646070366,
      "grad_norm": 0.8389933634147309,
      "learning_rate": 1.261049216000776e-07,
      "loss": 0.1039,
      "step": 31896
    },
    {
      "epoch": 0.9305385378376801,
      "grad_norm": 0.6556630091108445,
      "learning_rate": 1.259995091446098e-07,
      "loss": 0.1062,
      "step": 31897
    },
    {
      "epoch": 0.9305677110683237,
      "grad_norm": 0.7419564977416073,
      "learning_rate": 1.258941402030539e-07,
      "loss": 0.1197,
      "step": 31898
    },
    {
      "epoch": 0.9305968842989673,
      "grad_norm": 0.9155257068684064,
      "learning_rate": 1.2578881477635252e-07,
      "loss": 0.1234,
      "step": 31899
    },
    {
      "epoch": 0.9306260575296108,
      "grad_norm": 0.9316605553815461,
      "learning_rate": 1.2568353286544432e-07,
      "loss": 0.1127,
      "step": 31900
    },
    {
      "epoch": 0.9306552307602544,
      "grad_norm": 0.7153606449528723,
      "learning_rate": 1.2557829447127078e-07,
      "loss": 0.1421,
      "step": 31901
    },
    {
      "epoch": 0.9306844039908979,
      "grad_norm": 0.8417139113878996,
      "learning_rate": 1.2547309959477006e-07,
      "loss": 0.121,
      "step": 31902
    },
    {
      "epoch": 0.9307135772215415,
      "grad_norm": 0.8498931817356064,
      "learning_rate": 1.253679482368819e-07,
      "loss": 0.1217,
      "step": 31903
    },
    {
      "epoch": 0.9307427504521851,
      "grad_norm": 0.8064080262598519,
      "learning_rate": 1.2526284039854563e-07,
      "loss": 0.1281,
      "step": 31904
    },
    {
      "epoch": 0.9307719236828287,
      "grad_norm": 0.6760580647127779,
      "learning_rate": 1.2515777608069823e-07,
      "loss": 0.1054,
      "step": 31905
    },
    {
      "epoch": 0.9308010969134722,
      "grad_norm": 0.8039425443445063,
      "learning_rate": 1.250527552842784e-07,
      "loss": 0.1087,
      "step": 31906
    },
    {
      "epoch": 0.9308302701441158,
      "grad_norm": 0.954453858923193,
      "learning_rate": 1.2494777801022427e-07,
      "loss": 0.0989,
      "step": 31907
    },
    {
      "epoch": 0.9308594433747593,
      "grad_norm": 0.9314655885064037,
      "learning_rate": 1.2484284425947236e-07,
      "loss": 0.1338,
      "step": 31908
    },
    {
      "epoch": 0.9308886166054029,
      "grad_norm": 1.0844910436201352,
      "learning_rate": 1.2473795403296018e-07,
      "loss": 0.1432,
      "step": 31909
    },
    {
      "epoch": 0.9309177898360464,
      "grad_norm": 0.8381180465942429,
      "learning_rate": 1.2463310733162371e-07,
      "loss": 0.1106,
      "step": 31910
    },
    {
      "epoch": 0.93094696306669,
      "grad_norm": 0.6859338086230792,
      "learning_rate": 1.2452830415639882e-07,
      "loss": 0.1147,
      "step": 31911
    },
    {
      "epoch": 0.9309761362973336,
      "grad_norm": 0.7367652410515695,
      "learning_rate": 1.2442354450822092e-07,
      "loss": 0.1113,
      "step": 31912
    },
    {
      "epoch": 0.9310053095279771,
      "grad_norm": 0.9388791727098107,
      "learning_rate": 1.2431882838802646e-07,
      "loss": 0.0942,
      "step": 31913
    },
    {
      "epoch": 0.9310344827586207,
      "grad_norm": 1.6551175408790229,
      "learning_rate": 1.242141557967491e-07,
      "loss": 0.1083,
      "step": 31914
    },
    {
      "epoch": 0.9310636559892642,
      "grad_norm": 0.9551090036043243,
      "learning_rate": 1.2410952673532372e-07,
      "loss": 0.1214,
      "step": 31915
    },
    {
      "epoch": 0.9310928292199078,
      "grad_norm": 1.22625024778241,
      "learning_rate": 1.240049412046851e-07,
      "loss": 0.1086,
      "step": 31916
    },
    {
      "epoch": 0.9311220024505513,
      "grad_norm": 0.8530652157131269,
      "learning_rate": 1.2390039920576636e-07,
      "loss": 0.112,
      "step": 31917
    },
    {
      "epoch": 0.931151175681195,
      "grad_norm": 0.8429285448486027,
      "learning_rate": 1.2379590073949953e-07,
      "loss": 0.1048,
      "step": 31918
    },
    {
      "epoch": 0.9311803489118385,
      "grad_norm": 0.8755016187817724,
      "learning_rate": 1.2369144580682002e-07,
      "loss": 0.11,
      "step": 31919
    },
    {
      "epoch": 0.9312095221424821,
      "grad_norm": 0.826503120525939,
      "learning_rate": 1.2358703440865928e-07,
      "loss": 0.1269,
      "step": 31920
    },
    {
      "epoch": 0.9312386953731256,
      "grad_norm": 0.8628695192756125,
      "learning_rate": 1.2348266654594932e-07,
      "loss": 0.1065,
      "step": 31921
    },
    {
      "epoch": 0.9312678686037692,
      "grad_norm": 0.8469502563510405,
      "learning_rate": 1.2337834221962165e-07,
      "loss": 0.1195,
      "step": 31922
    },
    {
      "epoch": 0.9312970418344128,
      "grad_norm": 1.1229549960110636,
      "learning_rate": 1.2327406143060826e-07,
      "loss": 0.1244,
      "step": 31923
    },
    {
      "epoch": 0.9313262150650563,
      "grad_norm": 0.7639869690161986,
      "learning_rate": 1.2316982417983958e-07,
      "loss": 0.1116,
      "step": 31924
    },
    {
      "epoch": 0.9313553882956999,
      "grad_norm": 0.7455086304433893,
      "learning_rate": 1.230656304682465e-07,
      "loss": 0.1109,
      "step": 31925
    },
    {
      "epoch": 0.9313845615263434,
      "grad_norm": 1.0352930780386584,
      "learning_rate": 1.229614802967599e-07,
      "loss": 0.1054,
      "step": 31926
    },
    {
      "epoch": 0.931413734756987,
      "grad_norm": 0.7285119701188629,
      "learning_rate": 1.2285737366630857e-07,
      "loss": 0.1099,
      "step": 31927
    },
    {
      "epoch": 0.9314429079876305,
      "grad_norm": 0.8975598205261752,
      "learning_rate": 1.2275331057782224e-07,
      "loss": 0.0959,
      "step": 31928
    },
    {
      "epoch": 0.9314720812182741,
      "grad_norm": 0.870180636556917,
      "learning_rate": 1.226492910322302e-07,
      "loss": 0.1271,
      "step": 31929
    },
    {
      "epoch": 0.9315012544489176,
      "grad_norm": 0.8956178959689346,
      "learning_rate": 1.2254531503046062e-07,
      "loss": 0.1095,
      "step": 31930
    },
    {
      "epoch": 0.9315304276795612,
      "grad_norm": 0.6840812003072003,
      "learning_rate": 1.2244138257344275e-07,
      "loss": 0.0945,
      "step": 31931
    },
    {
      "epoch": 0.9315596009102048,
      "grad_norm": 0.7504785332715114,
      "learning_rate": 1.22337493662103e-07,
      "loss": 0.1007,
      "step": 31932
    },
    {
      "epoch": 0.9315887741408484,
      "grad_norm": 0.7472896064001563,
      "learning_rate": 1.2223364829737072e-07,
      "loss": 0.1053,
      "step": 31933
    },
    {
      "epoch": 0.931617947371492,
      "grad_norm": 0.8041613913350402,
      "learning_rate": 1.221298464801718e-07,
      "loss": 0.104,
      "step": 31934
    },
    {
      "epoch": 0.9316471206021355,
      "grad_norm": 0.915643845045573,
      "learning_rate": 1.220260882114327e-07,
      "loss": 0.0918,
      "step": 31935
    },
    {
      "epoch": 0.9316762938327791,
      "grad_norm": 0.6798992345577578,
      "learning_rate": 1.2192237349207993e-07,
      "loss": 0.1321,
      "step": 31936
    },
    {
      "epoch": 0.9317054670634226,
      "grad_norm": 0.8095701910911953,
      "learning_rate": 1.218187023230405e-07,
      "loss": 0.1049,
      "step": 31937
    },
    {
      "epoch": 0.9317346402940662,
      "grad_norm": 0.7939759489021841,
      "learning_rate": 1.2171507470523868e-07,
      "loss": 0.1138,
      "step": 31938
    },
    {
      "epoch": 0.9317638135247097,
      "grad_norm": 0.9562131842452575,
      "learning_rate": 1.2161149063960042e-07,
      "loss": 0.1215,
      "step": 31939
    },
    {
      "epoch": 0.9317929867553533,
      "grad_norm": 0.6817020416031795,
      "learning_rate": 1.2150795012705053e-07,
      "loss": 0.0997,
      "step": 31940
    },
    {
      "epoch": 0.9318221599859968,
      "grad_norm": 0.8691699055196034,
      "learning_rate": 1.2140445316851212e-07,
      "loss": 0.1235,
      "step": 31941
    },
    {
      "epoch": 0.9318513332166404,
      "grad_norm": 0.956361779604108,
      "learning_rate": 1.2130099976491062e-07,
      "loss": 0.1195,
      "step": 31942
    },
    {
      "epoch": 0.9318805064472839,
      "grad_norm": 0.7365849996540791,
      "learning_rate": 1.2119758991716912e-07,
      "loss": 0.1021,
      "step": 31943
    },
    {
      "epoch": 0.9319096796779275,
      "grad_norm": 0.6974657489834551,
      "learning_rate": 1.2109422362621138e-07,
      "loss": 0.1101,
      "step": 31944
    },
    {
      "epoch": 0.9319388529085711,
      "grad_norm": 0.9246905293246134,
      "learning_rate": 1.2099090089295884e-07,
      "loss": 0.13,
      "step": 31945
    },
    {
      "epoch": 0.9319680261392147,
      "grad_norm": 0.8507907797179879,
      "learning_rate": 1.2088762171833579e-07,
      "loss": 0.1279,
      "step": 31946
    },
    {
      "epoch": 0.9319971993698583,
      "grad_norm": 0.7888337363170564,
      "learning_rate": 1.207843861032626e-07,
      "loss": 0.1046,
      "step": 31947
    },
    {
      "epoch": 0.9320263726005018,
      "grad_norm": 0.7420549042870341,
      "learning_rate": 1.206811940486613e-07,
      "loss": 0.1101,
      "step": 31948
    },
    {
      "epoch": 0.9320555458311454,
      "grad_norm": 0.7910105412932991,
      "learning_rate": 1.205780455554545e-07,
      "loss": 0.1016,
      "step": 31949
    },
    {
      "epoch": 0.9320847190617889,
      "grad_norm": 0.8889477829657815,
      "learning_rate": 1.2047494062456199e-07,
      "loss": 0.1001,
      "step": 31950
    },
    {
      "epoch": 0.9321138922924325,
      "grad_norm": 0.7223779251255619,
      "learning_rate": 1.2037187925690364e-07,
      "loss": 0.1186,
      "step": 31951
    },
    {
      "epoch": 0.932143065523076,
      "grad_norm": 0.7825001685835803,
      "learning_rate": 1.2026886145340088e-07,
      "loss": 0.1219,
      "step": 31952
    },
    {
      "epoch": 0.9321722387537196,
      "grad_norm": 1.058982674765109,
      "learning_rate": 1.2016588721497247e-07,
      "loss": 0.1181,
      "step": 31953
    },
    {
      "epoch": 0.9322014119843631,
      "grad_norm": 0.9374541064444769,
      "learning_rate": 1.200629565425382e-07,
      "loss": 0.11,
      "step": 31954
    },
    {
      "epoch": 0.9322305852150067,
      "grad_norm": 0.9975085468401156,
      "learning_rate": 1.1996006943701676e-07,
      "loss": 0.1447,
      "step": 31955
    },
    {
      "epoch": 0.9322597584456502,
      "grad_norm": 0.8064543591715934,
      "learning_rate": 1.1985722589932747e-07,
      "loss": 0.1047,
      "step": 31956
    },
    {
      "epoch": 0.9322889316762938,
      "grad_norm": 0.7057703760831878,
      "learning_rate": 1.1975442593038788e-07,
      "loss": 0.1044,
      "step": 31957
    },
    {
      "epoch": 0.9323181049069373,
      "grad_norm": 0.6704747895430312,
      "learning_rate": 1.1965166953111508e-07,
      "loss": 0.0972,
      "step": 31958
    },
    {
      "epoch": 0.932347278137581,
      "grad_norm": 0.7856997979761003,
      "learning_rate": 1.1954895670242717e-07,
      "loss": 0.1125,
      "step": 31959
    },
    {
      "epoch": 0.9323764513682246,
      "grad_norm": 0.8636025047379615,
      "learning_rate": 1.194462874452418e-07,
      "loss": 0.1028,
      "step": 31960
    },
    {
      "epoch": 0.9324056245988681,
      "grad_norm": 0.9302947967398764,
      "learning_rate": 1.193436617604743e-07,
      "loss": 0.1224,
      "step": 31961
    },
    {
      "epoch": 0.9324347978295117,
      "grad_norm": 0.8765440605059343,
      "learning_rate": 1.1924107964904175e-07,
      "loss": 0.1388,
      "step": 31962
    },
    {
      "epoch": 0.9324639710601552,
      "grad_norm": 0.852805482050188,
      "learning_rate": 1.1913854111186008e-07,
      "loss": 0.133,
      "step": 31963
    },
    {
      "epoch": 0.9324931442907988,
      "grad_norm": 0.8567529364954466,
      "learning_rate": 1.190360461498441e-07,
      "loss": 0.1119,
      "step": 31964
    },
    {
      "epoch": 0.9325223175214423,
      "grad_norm": 0.8774635317691548,
      "learning_rate": 1.1893359476390809e-07,
      "loss": 0.1172,
      "step": 31965
    },
    {
      "epoch": 0.9325514907520859,
      "grad_norm": 0.7730345767223594,
      "learning_rate": 1.1883118695496853e-07,
      "loss": 0.1099,
      "step": 31966
    },
    {
      "epoch": 0.9325806639827294,
      "grad_norm": 0.954948814954049,
      "learning_rate": 1.1872882272393915e-07,
      "loss": 0.1122,
      "step": 31967
    },
    {
      "epoch": 0.932609837213373,
      "grad_norm": 0.8080294698130455,
      "learning_rate": 1.1862650207173365e-07,
      "loss": 0.1199,
      "step": 31968
    },
    {
      "epoch": 0.9326390104440165,
      "grad_norm": 1.2116619766730552,
      "learning_rate": 1.1852422499926519e-07,
      "loss": 0.0941,
      "step": 31969
    },
    {
      "epoch": 0.9326681836746601,
      "grad_norm": 0.8350327503632,
      "learning_rate": 1.1842199150744749e-07,
      "loss": 0.1017,
      "step": 31970
    },
    {
      "epoch": 0.9326973569053036,
      "grad_norm": 0.8437080265607998,
      "learning_rate": 1.1831980159719203e-07,
      "loss": 0.0813,
      "step": 31971
    },
    {
      "epoch": 0.9327265301359473,
      "grad_norm": 0.8397235501594001,
      "learning_rate": 1.1821765526941254e-07,
      "loss": 0.1091,
      "step": 31972
    },
    {
      "epoch": 0.9327557033665909,
      "grad_norm": 0.7604132266193332,
      "learning_rate": 1.1811555252502105e-07,
      "loss": 0.114,
      "step": 31973
    },
    {
      "epoch": 0.9327848765972344,
      "grad_norm": 0.842547410919885,
      "learning_rate": 1.1801349336492796e-07,
      "loss": 0.1102,
      "step": 31974
    },
    {
      "epoch": 0.932814049827878,
      "grad_norm": 1.0731862014744726,
      "learning_rate": 1.1791147779004474e-07,
      "loss": 0.0986,
      "step": 31975
    },
    {
      "epoch": 0.9328432230585215,
      "grad_norm": 0.696695142887009,
      "learning_rate": 1.1780950580128292e-07,
      "loss": 0.1141,
      "step": 31976
    },
    {
      "epoch": 0.9328723962891651,
      "grad_norm": 0.8698705517419311,
      "learning_rate": 1.1770757739955174e-07,
      "loss": 0.1044,
      "step": 31977
    },
    {
      "epoch": 0.9329015695198086,
      "grad_norm": 0.9605724397268341,
      "learning_rate": 1.1760569258576215e-07,
      "loss": 0.1014,
      "step": 31978
    },
    {
      "epoch": 0.9329307427504522,
      "grad_norm": 0.9189215535029513,
      "learning_rate": 1.1750385136082343e-07,
      "loss": 0.1069,
      "step": 31979
    },
    {
      "epoch": 0.9329599159810957,
      "grad_norm": 0.8198878868640496,
      "learning_rate": 1.1740205372564484e-07,
      "loss": 0.1059,
      "step": 31980
    },
    {
      "epoch": 0.9329890892117393,
      "grad_norm": 0.8415747852264213,
      "learning_rate": 1.173002996811351e-07,
      "loss": 0.1285,
      "step": 31981
    },
    {
      "epoch": 0.9330182624423828,
      "grad_norm": 0.7685357106287352,
      "learning_rate": 1.1719858922820293e-07,
      "loss": 0.1131,
      "step": 31982
    },
    {
      "epoch": 0.9330474356730264,
      "grad_norm": 0.830730054729226,
      "learning_rate": 1.1709692236775538e-07,
      "loss": 0.1162,
      "step": 31983
    },
    {
      "epoch": 0.93307660890367,
      "grad_norm": 0.7540497253570982,
      "learning_rate": 1.1699529910070173e-07,
      "loss": 0.1008,
      "step": 31984
    },
    {
      "epoch": 0.9331057821343135,
      "grad_norm": 0.924277777353587,
      "learning_rate": 1.1689371942794791e-07,
      "loss": 0.1048,
      "step": 31985
    },
    {
      "epoch": 0.9331349553649572,
      "grad_norm": 0.7920417138843862,
      "learning_rate": 1.1679218335040155e-07,
      "loss": 0.0971,
      "step": 31986
    },
    {
      "epoch": 0.9331641285956007,
      "grad_norm": 0.7963482116421964,
      "learning_rate": 1.1669069086896911e-07,
      "loss": 0.1026,
      "step": 31987
    },
    {
      "epoch": 0.9331933018262443,
      "grad_norm": 0.807518314704998,
      "learning_rate": 1.1658924198455546e-07,
      "loss": 0.0989,
      "step": 31988
    },
    {
      "epoch": 0.9332224750568878,
      "grad_norm": 0.9167264203577618,
      "learning_rate": 1.1648783669806762e-07,
      "loss": 0.0817,
      "step": 31989
    },
    {
      "epoch": 0.9332516482875314,
      "grad_norm": 0.7444747618564228,
      "learning_rate": 1.16386475010411e-07,
      "loss": 0.1103,
      "step": 31990
    },
    {
      "epoch": 0.9332808215181749,
      "grad_norm": 0.8915099216220743,
      "learning_rate": 1.1628515692249042e-07,
      "loss": 0.1158,
      "step": 31991
    },
    {
      "epoch": 0.9333099947488185,
      "grad_norm": 0.8595692223499609,
      "learning_rate": 1.1618388243520906e-07,
      "loss": 0.1202,
      "step": 31992
    },
    {
      "epoch": 0.933339167979462,
      "grad_norm": 0.7402023856329026,
      "learning_rate": 1.1608265154947285e-07,
      "loss": 0.1016,
      "step": 31993
    },
    {
      "epoch": 0.9333683412101056,
      "grad_norm": 0.8873259396295621,
      "learning_rate": 1.1598146426618495e-07,
      "loss": 0.1211,
      "step": 31994
    },
    {
      "epoch": 0.9333975144407491,
      "grad_norm": 0.9479931842533931,
      "learning_rate": 1.1588032058624798e-07,
      "loss": 0.1532,
      "step": 31995
    },
    {
      "epoch": 0.9334266876713927,
      "grad_norm": 0.898471466202595,
      "learning_rate": 1.1577922051056622e-07,
      "loss": 0.1103,
      "step": 31996
    },
    {
      "epoch": 0.9334558609020362,
      "grad_norm": 0.7137174191426734,
      "learning_rate": 1.1567816404004173e-07,
      "loss": 0.1149,
      "step": 31997
    },
    {
      "epoch": 0.9334850341326798,
      "grad_norm": 1.0439192802552064,
      "learning_rate": 1.15577151175576e-07,
      "loss": 0.1083,
      "step": 31998
    },
    {
      "epoch": 0.9335142073633235,
      "grad_norm": 0.9115704089747924,
      "learning_rate": 1.1547618191807164e-07,
      "loss": 0.0964,
      "step": 31999
    },
    {
      "epoch": 0.933543380593967,
      "grad_norm": 0.9143799232175939,
      "learning_rate": 1.1537525626843016e-07,
      "loss": 0.1159,
      "step": 32000
    },
    {
      "epoch": 0.9335725538246106,
      "grad_norm": 0.7971847368233306,
      "learning_rate": 1.1527437422755194e-07,
      "loss": 0.1197,
      "step": 32001
    },
    {
      "epoch": 0.9336017270552541,
      "grad_norm": 0.8082456541975708,
      "learning_rate": 1.1517353579633795e-07,
      "loss": 0.1226,
      "step": 32002
    },
    {
      "epoch": 0.9336309002858977,
      "grad_norm": 1.0664364553710668,
      "learning_rate": 1.150727409756891e-07,
      "loss": 0.1239,
      "step": 32003
    },
    {
      "epoch": 0.9336600735165412,
      "grad_norm": 0.7235267375604488,
      "learning_rate": 1.149719897665047e-07,
      "loss": 0.0996,
      "step": 32004
    },
    {
      "epoch": 0.9336892467471848,
      "grad_norm": 0.7508616798731628,
      "learning_rate": 1.1487128216968346e-07,
      "loss": 0.1029,
      "step": 32005
    },
    {
      "epoch": 0.9337184199778283,
      "grad_norm": 0.8663010406096253,
      "learning_rate": 1.1477061818612634e-07,
      "loss": 0.1005,
      "step": 32006
    },
    {
      "epoch": 0.9337475932084719,
      "grad_norm": 0.790146975826344,
      "learning_rate": 1.1466999781672982e-07,
      "loss": 0.1071,
      "step": 32007
    },
    {
      "epoch": 0.9337767664391154,
      "grad_norm": 0.7646811019953647,
      "learning_rate": 1.1456942106239377e-07,
      "loss": 0.1154,
      "step": 32008
    },
    {
      "epoch": 0.933805939669759,
      "grad_norm": 0.8885198983739938,
      "learning_rate": 1.1446888792401578e-07,
      "loss": 0.1067,
      "step": 32009
    },
    {
      "epoch": 0.9338351129004026,
      "grad_norm": 0.8334308513836821,
      "learning_rate": 1.1436839840249347e-07,
      "loss": 0.1062,
      "step": 32010
    },
    {
      "epoch": 0.9338642861310461,
      "grad_norm": 0.7974042968390833,
      "learning_rate": 1.1426795249872335e-07,
      "loss": 0.1165,
      "step": 32011
    },
    {
      "epoch": 0.9338934593616897,
      "grad_norm": 0.844353420312567,
      "learning_rate": 1.1416755021360304e-07,
      "loss": 0.1104,
      "step": 32012
    },
    {
      "epoch": 0.9339226325923333,
      "grad_norm": 1.0480805887133557,
      "learning_rate": 1.1406719154802848e-07,
      "loss": 0.1339,
      "step": 32013
    },
    {
      "epoch": 0.9339518058229769,
      "grad_norm": 0.9778261300554228,
      "learning_rate": 1.1396687650289561e-07,
      "loss": 0.1067,
      "step": 32014
    },
    {
      "epoch": 0.9339809790536204,
      "grad_norm": 0.9799128343500716,
      "learning_rate": 1.1386660507909986e-07,
      "loss": 0.1121,
      "step": 32015
    },
    {
      "epoch": 0.934010152284264,
      "grad_norm": 0.7926375659453858,
      "learning_rate": 1.1376637727753658e-07,
      "loss": 0.1157,
      "step": 32016
    },
    {
      "epoch": 0.9340393255149075,
      "grad_norm": 0.9591995660342114,
      "learning_rate": 1.136661930991012e-07,
      "loss": 0.1117,
      "step": 32017
    },
    {
      "epoch": 0.9340684987455511,
      "grad_norm": 0.8650595079341008,
      "learning_rate": 1.135660525446869e-07,
      "loss": 0.1177,
      "step": 32018
    },
    {
      "epoch": 0.9340976719761946,
      "grad_norm": 0.8103428031610451,
      "learning_rate": 1.1346595561518848e-07,
      "loss": 0.1335,
      "step": 32019
    },
    {
      "epoch": 0.9341268452068382,
      "grad_norm": 1.015601796023541,
      "learning_rate": 1.1336590231150024e-07,
      "loss": 0.1404,
      "step": 32020
    },
    {
      "epoch": 0.9341560184374817,
      "grad_norm": 1.02095304218318,
      "learning_rate": 1.1326589263451427e-07,
      "loss": 0.1121,
      "step": 32021
    },
    {
      "epoch": 0.9341851916681253,
      "grad_norm": 0.7554192565240885,
      "learning_rate": 1.1316592658512371e-07,
      "loss": 0.0964,
      "step": 32022
    },
    {
      "epoch": 0.9342143648987689,
      "grad_norm": 0.6385913709875719,
      "learning_rate": 1.130660041642212e-07,
      "loss": 0.1118,
      "step": 32023
    },
    {
      "epoch": 0.9342435381294124,
      "grad_norm": 0.9340508884192655,
      "learning_rate": 1.1296612537269935e-07,
      "loss": 0.1145,
      "step": 32024
    },
    {
      "epoch": 0.934272711360056,
      "grad_norm": 0.8276418139511901,
      "learning_rate": 1.1286629021144802e-07,
      "loss": 0.112,
      "step": 32025
    },
    {
      "epoch": 0.9343018845906996,
      "grad_norm": 0.8096684738787501,
      "learning_rate": 1.1276649868136091e-07,
      "loss": 0.1191,
      "step": 32026
    },
    {
      "epoch": 0.9343310578213432,
      "grad_norm": 0.9568940920296325,
      "learning_rate": 1.1266675078332734e-07,
      "loss": 0.1004,
      "step": 32027
    },
    {
      "epoch": 0.9343602310519867,
      "grad_norm": 0.7776215396400586,
      "learning_rate": 1.1256704651823825e-07,
      "loss": 0.1171,
      "step": 32028
    },
    {
      "epoch": 0.9343894042826303,
      "grad_norm": 0.7800938299421314,
      "learning_rate": 1.1246738588698458e-07,
      "loss": 0.0976,
      "step": 32029
    },
    {
      "epoch": 0.9344185775132738,
      "grad_norm": 0.6713518889285239,
      "learning_rate": 1.1236776889045508e-07,
      "loss": 0.1123,
      "step": 32030
    },
    {
      "epoch": 0.9344477507439174,
      "grad_norm": 1.3320616497280364,
      "learning_rate": 1.1226819552953849e-07,
      "loss": 0.1193,
      "step": 32031
    },
    {
      "epoch": 0.934476923974561,
      "grad_norm": 0.8440007410344869,
      "learning_rate": 1.121686658051252e-07,
      "loss": 0.1149,
      "step": 32032
    },
    {
      "epoch": 0.9345060972052045,
      "grad_norm": 0.7885423931377564,
      "learning_rate": 1.1206917971810339e-07,
      "loss": 0.098,
      "step": 32033
    },
    {
      "epoch": 0.934535270435848,
      "grad_norm": 1.829464074456043,
      "learning_rate": 1.1196973726936122e-07,
      "loss": 0.1248,
      "step": 32034
    },
    {
      "epoch": 0.9345644436664916,
      "grad_norm": 0.7603648793100546,
      "learning_rate": 1.1187033845978635e-07,
      "loss": 0.0902,
      "step": 32035
    },
    {
      "epoch": 0.9345936168971352,
      "grad_norm": 0.8812911824695407,
      "learning_rate": 1.1177098329026581e-07,
      "loss": 0.1017,
      "step": 32036
    },
    {
      "epoch": 0.9346227901277787,
      "grad_norm": 0.8232236047285875,
      "learning_rate": 1.1167167176168725e-07,
      "loss": 0.1251,
      "step": 32037
    },
    {
      "epoch": 0.9346519633584223,
      "grad_norm": 0.8740217291491303,
      "learning_rate": 1.1157240387493662e-07,
      "loss": 0.1219,
      "step": 32038
    },
    {
      "epoch": 0.9346811365890658,
      "grad_norm": 0.8123527785586346,
      "learning_rate": 1.1147317963090154e-07,
      "loss": 0.1116,
      "step": 32039
    },
    {
      "epoch": 0.9347103098197095,
      "grad_norm": 0.889147925885064,
      "learning_rate": 1.113739990304663e-07,
      "loss": 0.1074,
      "step": 32040
    },
    {
      "epoch": 0.934739483050353,
      "grad_norm": 0.796997302515763,
      "learning_rate": 1.1127486207451687e-07,
      "loss": 0.1272,
      "step": 32041
    },
    {
      "epoch": 0.9347686562809966,
      "grad_norm": 0.866471962490944,
      "learning_rate": 1.1117576876393921e-07,
      "loss": 0.1015,
      "step": 32042
    },
    {
      "epoch": 0.9347978295116401,
      "grad_norm": 0.7993024059883606,
      "learning_rate": 1.1107671909961648e-07,
      "loss": 0.0878,
      "step": 32043
    },
    {
      "epoch": 0.9348270027422837,
      "grad_norm": 0.8144702879758206,
      "learning_rate": 1.109777130824341e-07,
      "loss": 0.1101,
      "step": 32044
    },
    {
      "epoch": 0.9348561759729273,
      "grad_norm": 0.8887016433567662,
      "learning_rate": 1.1087875071327525e-07,
      "loss": 0.0909,
      "step": 32045
    },
    {
      "epoch": 0.9348853492035708,
      "grad_norm": 0.974323575414125,
      "learning_rate": 1.1077983199302422e-07,
      "loss": 0.1086,
      "step": 32046
    },
    {
      "epoch": 0.9349145224342144,
      "grad_norm": 0.700641465039684,
      "learning_rate": 1.1068095692256364e-07,
      "loss": 0.1202,
      "step": 32047
    },
    {
      "epoch": 0.9349436956648579,
      "grad_norm": 0.8344842765064419,
      "learning_rate": 1.1058212550277558e-07,
      "loss": 0.1092,
      "step": 32048
    },
    {
      "epoch": 0.9349728688955015,
      "grad_norm": 0.8276408686580056,
      "learning_rate": 1.1048333773454378e-07,
      "loss": 0.1098,
      "step": 32049
    },
    {
      "epoch": 0.935002042126145,
      "grad_norm": 0.6993079332475102,
      "learning_rate": 1.103845936187492e-07,
      "loss": 0.0848,
      "step": 32050
    },
    {
      "epoch": 0.9350312153567886,
      "grad_norm": 0.779769502283324,
      "learning_rate": 1.1028589315627448e-07,
      "loss": 0.1039,
      "step": 32051
    },
    {
      "epoch": 0.9350603885874321,
      "grad_norm": 1.1898230035142983,
      "learning_rate": 1.1018723634799888e-07,
      "loss": 0.1305,
      "step": 32052
    },
    {
      "epoch": 0.9350895618180758,
      "grad_norm": 0.77744475191222,
      "learning_rate": 1.1008862319480562e-07,
      "loss": 0.1017,
      "step": 32053
    },
    {
      "epoch": 0.9351187350487193,
      "grad_norm": 0.8275057891511748,
      "learning_rate": 1.0999005369757287e-07,
      "loss": 0.1094,
      "step": 32054
    },
    {
      "epoch": 0.9351479082793629,
      "grad_norm": 0.7830985358322079,
      "learning_rate": 1.098915278571816e-07,
      "loss": 0.1077,
      "step": 32055
    },
    {
      "epoch": 0.9351770815100064,
      "grad_norm": 1.038007306517236,
      "learning_rate": 1.0979304567451166e-07,
      "loss": 0.1104,
      "step": 32056
    },
    {
      "epoch": 0.93520625474065,
      "grad_norm": 0.7998792968146583,
      "learning_rate": 1.0969460715044234e-07,
      "loss": 0.1038,
      "step": 32057
    },
    {
      "epoch": 0.9352354279712936,
      "grad_norm": 0.7130960643140338,
      "learning_rate": 1.0959621228585126e-07,
      "loss": 0.0875,
      "step": 32058
    },
    {
      "epoch": 0.9352646012019371,
      "grad_norm": 0.7991544545494476,
      "learning_rate": 1.0949786108161885e-07,
      "loss": 0.1205,
      "step": 32059
    },
    {
      "epoch": 0.9352937744325807,
      "grad_norm": 0.9583770351838974,
      "learning_rate": 1.0939955353862164e-07,
      "loss": 0.1215,
      "step": 32060
    },
    {
      "epoch": 0.9353229476632242,
      "grad_norm": 0.708747277978358,
      "learning_rate": 1.0930128965773723e-07,
      "loss": 0.1091,
      "step": 32061
    },
    {
      "epoch": 0.9353521208938678,
      "grad_norm": 0.6758522069728627,
      "learning_rate": 1.0920306943984383e-07,
      "loss": 0.0796,
      "step": 32062
    },
    {
      "epoch": 0.9353812941245113,
      "grad_norm": 0.7543564942859913,
      "learning_rate": 1.0910489288581794e-07,
      "loss": 0.0836,
      "step": 32063
    },
    {
      "epoch": 0.9354104673551549,
      "grad_norm": 1.2929445917883506,
      "learning_rate": 1.0900675999653609e-07,
      "loss": 0.096,
      "step": 32064
    },
    {
      "epoch": 0.9354396405857984,
      "grad_norm": 0.8529796359826995,
      "learning_rate": 1.0890867077287425e-07,
      "loss": 0.0917,
      "step": 32065
    },
    {
      "epoch": 0.935468813816442,
      "grad_norm": 0.7370111459922979,
      "learning_rate": 1.088106252157084e-07,
      "loss": 0.1283,
      "step": 32066
    },
    {
      "epoch": 0.9354979870470856,
      "grad_norm": 0.9501203927021816,
      "learning_rate": 1.0871262332591281e-07,
      "loss": 0.1104,
      "step": 32067
    },
    {
      "epoch": 0.9355271602777292,
      "grad_norm": 0.8333221115080217,
      "learning_rate": 1.0861466510436347e-07,
      "loss": 0.1006,
      "step": 32068
    },
    {
      "epoch": 0.9355563335083728,
      "grad_norm": 0.7896071035671067,
      "learning_rate": 1.0851675055193579e-07,
      "loss": 0.1054,
      "step": 32069
    },
    {
      "epoch": 0.9355855067390163,
      "grad_norm": 0.8726117427575014,
      "learning_rate": 1.0841887966950237e-07,
      "loss": 0.1,
      "step": 32070
    },
    {
      "epoch": 0.9356146799696599,
      "grad_norm": 0.8472957512982819,
      "learning_rate": 1.08321052457937e-07,
      "loss": 0.1025,
      "step": 32071
    },
    {
      "epoch": 0.9356438532003034,
      "grad_norm": 0.9050049817836707,
      "learning_rate": 1.0822326891811396e-07,
      "loss": 0.1128,
      "step": 32072
    },
    {
      "epoch": 0.935673026430947,
      "grad_norm": 0.7629335989022237,
      "learning_rate": 1.0812552905090534e-07,
      "loss": 0.142,
      "step": 32073
    },
    {
      "epoch": 0.9357021996615905,
      "grad_norm": 0.7569119322303541,
      "learning_rate": 1.0802783285718488e-07,
      "loss": 0.0998,
      "step": 32074
    },
    {
      "epoch": 0.9357313728922341,
      "grad_norm": 0.8736446102508948,
      "learning_rate": 1.0793018033782355e-07,
      "loss": 0.107,
      "step": 32075
    },
    {
      "epoch": 0.9357605461228776,
      "grad_norm": 0.9946571567558669,
      "learning_rate": 1.0783257149369453e-07,
      "loss": 0.1203,
      "step": 32076
    },
    {
      "epoch": 0.9357897193535212,
      "grad_norm": 1.1956383419050371,
      "learning_rate": 1.0773500632566769e-07,
      "loss": 0.1192,
      "step": 32077
    },
    {
      "epoch": 0.9358188925841647,
      "grad_norm": 0.8033344768674102,
      "learning_rate": 1.0763748483461511e-07,
      "loss": 0.1095,
      "step": 32078
    },
    {
      "epoch": 0.9358480658148083,
      "grad_norm": 0.8577610732697442,
      "learning_rate": 1.0754000702140666e-07,
      "loss": 0.1083,
      "step": 32079
    },
    {
      "epoch": 0.935877239045452,
      "grad_norm": 1.2640094165415217,
      "learning_rate": 1.0744257288691384e-07,
      "loss": 0.1301,
      "step": 32080
    },
    {
      "epoch": 0.9359064122760955,
      "grad_norm": 0.88340311691982,
      "learning_rate": 1.0734518243200598e-07,
      "loss": 0.1192,
      "step": 32081
    },
    {
      "epoch": 0.935935585506739,
      "grad_norm": 0.8064643768696518,
      "learning_rate": 1.0724783565755126e-07,
      "loss": 0.112,
      "step": 32082
    },
    {
      "epoch": 0.9359647587373826,
      "grad_norm": 0.8475180988623313,
      "learning_rate": 1.071505325644212e-07,
      "loss": 0.1187,
      "step": 32083
    },
    {
      "epoch": 0.9359939319680262,
      "grad_norm": 0.9446665934044493,
      "learning_rate": 1.0705327315348235e-07,
      "loss": 0.1235,
      "step": 32084
    },
    {
      "epoch": 0.9360231051986697,
      "grad_norm": 0.768170012115103,
      "learning_rate": 1.0695605742560345e-07,
      "loss": 0.1149,
      "step": 32085
    },
    {
      "epoch": 0.9360522784293133,
      "grad_norm": 0.7996103309772392,
      "learning_rate": 1.0685888538165323e-07,
      "loss": 0.1085,
      "step": 32086
    },
    {
      "epoch": 0.9360814516599568,
      "grad_norm": 0.8840239057508736,
      "learning_rate": 1.0676175702249936e-07,
      "loss": 0.1163,
      "step": 32087
    },
    {
      "epoch": 0.9361106248906004,
      "grad_norm": 1.0532790900297067,
      "learning_rate": 1.0666467234900779e-07,
      "loss": 0.1052,
      "step": 32088
    },
    {
      "epoch": 0.9361397981212439,
      "grad_norm": 0.8433485382052007,
      "learning_rate": 1.0656763136204617e-07,
      "loss": 0.1038,
      "step": 32089
    },
    {
      "epoch": 0.9361689713518875,
      "grad_norm": 0.7560474299323872,
      "learning_rate": 1.0647063406248048e-07,
      "loss": 0.1131,
      "step": 32090
    },
    {
      "epoch": 0.936198144582531,
      "grad_norm": 0.7042252536882668,
      "learning_rate": 1.0637368045117669e-07,
      "loss": 0.1216,
      "step": 32091
    },
    {
      "epoch": 0.9362273178131746,
      "grad_norm": 0.9983765184405167,
      "learning_rate": 1.062767705290002e-07,
      "loss": 0.1196,
      "step": 32092
    },
    {
      "epoch": 0.9362564910438181,
      "grad_norm": 0.8783148249243051,
      "learning_rate": 1.0617990429681702e-07,
      "loss": 0.1095,
      "step": 32093
    },
    {
      "epoch": 0.9362856642744618,
      "grad_norm": 0.7706835504619733,
      "learning_rate": 1.0608308175549142e-07,
      "loss": 0.1227,
      "step": 32094
    },
    {
      "epoch": 0.9363148375051054,
      "grad_norm": 0.8939887644792357,
      "learning_rate": 1.0598630290588718e-07,
      "loss": 0.1011,
      "step": 32095
    },
    {
      "epoch": 0.9363440107357489,
      "grad_norm": 0.7309322511998092,
      "learning_rate": 1.0588956774886971e-07,
      "loss": 0.1088,
      "step": 32096
    },
    {
      "epoch": 0.9363731839663925,
      "grad_norm": 0.6661812416048095,
      "learning_rate": 1.057928762853011e-07,
      "loss": 0.1095,
      "step": 32097
    },
    {
      "epoch": 0.936402357197036,
      "grad_norm": 0.8456105650420225,
      "learning_rate": 1.0569622851604567e-07,
      "loss": 0.109,
      "step": 32098
    },
    {
      "epoch": 0.9364315304276796,
      "grad_norm": 0.86400129011861,
      "learning_rate": 1.0559962444196603e-07,
      "loss": 0.1139,
      "step": 32099
    },
    {
      "epoch": 0.9364607036583231,
      "grad_norm": 1.114747802139998,
      "learning_rate": 1.0550306406392486e-07,
      "loss": 0.0981,
      "step": 32100
    },
    {
      "epoch": 0.9364898768889667,
      "grad_norm": 0.8562614775002866,
      "learning_rate": 1.0540654738278366e-07,
      "loss": 0.1225,
      "step": 32101
    },
    {
      "epoch": 0.9365190501196102,
      "grad_norm": 0.8197540439884026,
      "learning_rate": 1.0531007439940455e-07,
      "loss": 0.0977,
      "step": 32102
    },
    {
      "epoch": 0.9365482233502538,
      "grad_norm": 1.1298989345727763,
      "learning_rate": 1.0521364511464794e-07,
      "loss": 0.1151,
      "step": 32103
    },
    {
      "epoch": 0.9365773965808973,
      "grad_norm": 0.833695601396288,
      "learning_rate": 1.051172595293759e-07,
      "loss": 0.1046,
      "step": 32104
    },
    {
      "epoch": 0.9366065698115409,
      "grad_norm": 0.9868865547606661,
      "learning_rate": 1.0502091764444833e-07,
      "loss": 0.1126,
      "step": 32105
    },
    {
      "epoch": 0.9366357430421844,
      "grad_norm": 0.8271959984514432,
      "learning_rate": 1.0492461946072563e-07,
      "loss": 0.1155,
      "step": 32106
    },
    {
      "epoch": 0.9366649162728281,
      "grad_norm": 0.8410155090686664,
      "learning_rate": 1.0482836497906768e-07,
      "loss": 0.1013,
      "step": 32107
    },
    {
      "epoch": 0.9366940895034717,
      "grad_norm": 0.7599988649280618,
      "learning_rate": 1.0473215420033322e-07,
      "loss": 0.123,
      "step": 32108
    },
    {
      "epoch": 0.9367232627341152,
      "grad_norm": 0.7464152785830234,
      "learning_rate": 1.0463598712538104e-07,
      "loss": 0.1349,
      "step": 32109
    },
    {
      "epoch": 0.9367524359647588,
      "grad_norm": 0.9067006560858255,
      "learning_rate": 1.0453986375507097e-07,
      "loss": 0.0916,
      "step": 32110
    },
    {
      "epoch": 0.9367816091954023,
      "grad_norm": 0.7312901611799437,
      "learning_rate": 1.0444378409026012e-07,
      "loss": 0.1003,
      "step": 32111
    },
    {
      "epoch": 0.9368107824260459,
      "grad_norm": 0.9593404712318456,
      "learning_rate": 1.0434774813180615e-07,
      "loss": 0.1263,
      "step": 32112
    },
    {
      "epoch": 0.9368399556566894,
      "grad_norm": 0.9865768206964413,
      "learning_rate": 1.0425175588056724e-07,
      "loss": 0.1046,
      "step": 32113
    },
    {
      "epoch": 0.936869128887333,
      "grad_norm": 0.9644272670639106,
      "learning_rate": 1.0415580733739994e-07,
      "loss": 0.0898,
      "step": 32114
    },
    {
      "epoch": 0.9368983021179765,
      "grad_norm": 0.7781047636323862,
      "learning_rate": 1.0405990250315967e-07,
      "loss": 0.0851,
      "step": 32115
    },
    {
      "epoch": 0.9369274753486201,
      "grad_norm": 0.9933907973999181,
      "learning_rate": 1.039640413787052e-07,
      "loss": 0.1071,
      "step": 32116
    },
    {
      "epoch": 0.9369566485792636,
      "grad_norm": 0.8128984664761882,
      "learning_rate": 1.0386822396489027e-07,
      "loss": 0.1204,
      "step": 32117
    },
    {
      "epoch": 0.9369858218099072,
      "grad_norm": 0.8148280473503925,
      "learning_rate": 1.0377245026257143e-07,
      "loss": 0.1088,
      "step": 32118
    },
    {
      "epoch": 0.9370149950405507,
      "grad_norm": 0.6600777179891715,
      "learning_rate": 1.0367672027260356e-07,
      "loss": 0.1235,
      "step": 32119
    },
    {
      "epoch": 0.9370441682711943,
      "grad_norm": 0.7702166296209475,
      "learning_rate": 1.0358103399584096e-07,
      "loss": 0.0957,
      "step": 32120
    },
    {
      "epoch": 0.937073341501838,
      "grad_norm": 0.8574890822227507,
      "learning_rate": 1.0348539143313741e-07,
      "loss": 0.103,
      "step": 32121
    },
    {
      "epoch": 0.9371025147324815,
      "grad_norm": 0.7744931859645473,
      "learning_rate": 1.0338979258534776e-07,
      "loss": 0.102,
      "step": 32122
    },
    {
      "epoch": 0.9371316879631251,
      "grad_norm": 0.7518278067773781,
      "learning_rate": 1.0329423745332523e-07,
      "loss": 0.107,
      "step": 32123
    },
    {
      "epoch": 0.9371608611937686,
      "grad_norm": 1.0942535864638883,
      "learning_rate": 1.0319872603792302e-07,
      "loss": 0.1049,
      "step": 32124
    },
    {
      "epoch": 0.9371900344244122,
      "grad_norm": 1.0858144600217252,
      "learning_rate": 1.0310325833999269e-07,
      "loss": 0.0933,
      "step": 32125
    },
    {
      "epoch": 0.9372192076550557,
      "grad_norm": 1.026925791389885,
      "learning_rate": 1.0300783436038852e-07,
      "loss": 0.125,
      "step": 32126
    },
    {
      "epoch": 0.9372483808856993,
      "grad_norm": 0.8653665951855168,
      "learning_rate": 1.0291245409996097e-07,
      "loss": 0.1069,
      "step": 32127
    },
    {
      "epoch": 0.9372775541163428,
      "grad_norm": 1.0504917778941396,
      "learning_rate": 1.0281711755956159e-07,
      "loss": 0.0888,
      "step": 32128
    },
    {
      "epoch": 0.9373067273469864,
      "grad_norm": 0.9266732479399925,
      "learning_rate": 1.0272182474004299e-07,
      "loss": 0.1189,
      "step": 32129
    },
    {
      "epoch": 0.9373359005776299,
      "grad_norm": 0.9230162594241422,
      "learning_rate": 1.0262657564225397e-07,
      "loss": 0.1184,
      "step": 32130
    },
    {
      "epoch": 0.9373650738082735,
      "grad_norm": 0.8241398713510739,
      "learning_rate": 1.0253137026704607e-07,
      "loss": 0.1142,
      "step": 32131
    },
    {
      "epoch": 0.937394247038917,
      "grad_norm": 0.8606522985963572,
      "learning_rate": 1.0243620861526915e-07,
      "loss": 0.0991,
      "step": 32132
    },
    {
      "epoch": 0.9374234202695606,
      "grad_norm": 0.8115170765044549,
      "learning_rate": 1.0234109068777254e-07,
      "loss": 0.1125,
      "step": 32133
    },
    {
      "epoch": 0.9374525935002043,
      "grad_norm": 0.7330669259049344,
      "learning_rate": 1.0224601648540555e-07,
      "loss": 0.1186,
      "step": 32134
    },
    {
      "epoch": 0.9374817667308478,
      "grad_norm": 0.9528809566799136,
      "learning_rate": 1.021509860090164e-07,
      "loss": 0.1318,
      "step": 32135
    },
    {
      "epoch": 0.9375109399614914,
      "grad_norm": 1.2634693443948881,
      "learning_rate": 1.0205599925945442e-07,
      "loss": 0.1086,
      "step": 32136
    },
    {
      "epoch": 0.9375401131921349,
      "grad_norm": 0.861775568004273,
      "learning_rate": 1.0196105623756781e-07,
      "loss": 0.1107,
      "step": 32137
    },
    {
      "epoch": 0.9375692864227785,
      "grad_norm": 0.8751065366212872,
      "learning_rate": 1.0186615694420255e-07,
      "loss": 0.1303,
      "step": 32138
    },
    {
      "epoch": 0.937598459653422,
      "grad_norm": 0.7100950551611143,
      "learning_rate": 1.0177130138020741e-07,
      "loss": 0.0952,
      "step": 32139
    },
    {
      "epoch": 0.9376276328840656,
      "grad_norm": 0.8449839806862404,
      "learning_rate": 1.0167648954642895e-07,
      "loss": 0.127,
      "step": 32140
    },
    {
      "epoch": 0.9376568061147091,
      "grad_norm": 0.6529840587223028,
      "learning_rate": 1.0158172144371369e-07,
      "loss": 0.1051,
      "step": 32141
    },
    {
      "epoch": 0.9376859793453527,
      "grad_norm": 0.8914275159657254,
      "learning_rate": 1.0148699707290711e-07,
      "loss": 0.1235,
      "step": 32142
    },
    {
      "epoch": 0.9377151525759962,
      "grad_norm": 0.7751532525943846,
      "learning_rate": 1.013923164348557e-07,
      "loss": 0.1017,
      "step": 32143
    },
    {
      "epoch": 0.9377443258066398,
      "grad_norm": 0.9557705989379754,
      "learning_rate": 1.0129767953040326e-07,
      "loss": 0.1229,
      "step": 32144
    },
    {
      "epoch": 0.9377734990372834,
      "grad_norm": 0.8410134147210956,
      "learning_rate": 1.0120308636039632e-07,
      "loss": 0.1123,
      "step": 32145
    },
    {
      "epoch": 0.9378026722679269,
      "grad_norm": 1.1326334761987327,
      "learning_rate": 1.0110853692567924e-07,
      "loss": 0.1247,
      "step": 32146
    },
    {
      "epoch": 0.9378318454985705,
      "grad_norm": 0.9496660829235316,
      "learning_rate": 1.0101403122709518e-07,
      "loss": 0.1071,
      "step": 32147
    },
    {
      "epoch": 0.9378610187292141,
      "grad_norm": 0.8437905836275701,
      "learning_rate": 1.0091956926548852e-07,
      "loss": 0.1181,
      "step": 32148
    },
    {
      "epoch": 0.9378901919598577,
      "grad_norm": 1.0453297402604065,
      "learning_rate": 1.0082515104170243e-07,
      "loss": 0.12,
      "step": 32149
    },
    {
      "epoch": 0.9379193651905012,
      "grad_norm": 0.8428288523939395,
      "learning_rate": 1.0073077655657959e-07,
      "loss": 0.1256,
      "step": 32150
    },
    {
      "epoch": 0.9379485384211448,
      "grad_norm": 0.7548680701419213,
      "learning_rate": 1.0063644581096322e-07,
      "loss": 0.1148,
      "step": 32151
    },
    {
      "epoch": 0.9379777116517883,
      "grad_norm": 0.9450970919153946,
      "learning_rate": 1.0054215880569485e-07,
      "loss": 0.1304,
      "step": 32152
    },
    {
      "epoch": 0.9380068848824319,
      "grad_norm": 0.8915063403696859,
      "learning_rate": 1.0044791554161659e-07,
      "loss": 0.1171,
      "step": 32153
    },
    {
      "epoch": 0.9380360581130754,
      "grad_norm": 0.8769882247763162,
      "learning_rate": 1.0035371601957e-07,
      "loss": 0.0982,
      "step": 32154
    },
    {
      "epoch": 0.938065231343719,
      "grad_norm": 0.8917244343084209,
      "learning_rate": 1.0025956024039551e-07,
      "loss": 0.1008,
      "step": 32155
    },
    {
      "epoch": 0.9380944045743626,
      "grad_norm": 0.8660089227440425,
      "learning_rate": 1.0016544820493357e-07,
      "loss": 0.1061,
      "step": 32156
    },
    {
      "epoch": 0.9381235778050061,
      "grad_norm": 0.8980029735838663,
      "learning_rate": 1.0007137991402572e-07,
      "loss": 0.0905,
      "step": 32157
    },
    {
      "epoch": 0.9381527510356497,
      "grad_norm": 0.7552683733797441,
      "learning_rate": 9.997735536851017e-08,
      "loss": 0.1182,
      "step": 32158
    },
    {
      "epoch": 0.9381819242662932,
      "grad_norm": 0.9670731861815297,
      "learning_rate": 9.988337456922737e-08,
      "loss": 0.1212,
      "step": 32159
    },
    {
      "epoch": 0.9382110974969368,
      "grad_norm": 0.7434491962365531,
      "learning_rate": 9.978943751701609e-08,
      "loss": 0.1103,
      "step": 32160
    },
    {
      "epoch": 0.9382402707275804,
      "grad_norm": 0.7559641120074799,
      "learning_rate": 9.969554421271455e-08,
      "loss": 0.0968,
      "step": 32161
    },
    {
      "epoch": 0.938269443958224,
      "grad_norm": 0.8034284376231915,
      "learning_rate": 9.960169465716152e-08,
      "loss": 0.1308,
      "step": 32162
    },
    {
      "epoch": 0.9382986171888675,
      "grad_norm": 0.757018958058187,
      "learning_rate": 9.950788885119522e-08,
      "loss": 0.1431,
      "step": 32163
    },
    {
      "epoch": 0.9383277904195111,
      "grad_norm": 1.1414076423319532,
      "learning_rate": 9.941412679565276e-08,
      "loss": 0.1374,
      "step": 32164
    },
    {
      "epoch": 0.9383569636501546,
      "grad_norm": 1.070905765134784,
      "learning_rate": 9.932040849137014e-08,
      "loss": 0.0941,
      "step": 32165
    },
    {
      "epoch": 0.9383861368807982,
      "grad_norm": 0.8973320288044775,
      "learning_rate": 9.922673393918614e-08,
      "loss": 0.1241,
      "step": 32166
    },
    {
      "epoch": 0.9384153101114417,
      "grad_norm": 1.2789932825467665,
      "learning_rate": 9.913310313993562e-08,
      "loss": 0.0778,
      "step": 32167
    },
    {
      "epoch": 0.9384444833420853,
      "grad_norm": 0.8551584840952743,
      "learning_rate": 9.903951609445406e-08,
      "loss": 0.0932,
      "step": 32168
    },
    {
      "epoch": 0.9384736565727289,
      "grad_norm": 0.7892981867974093,
      "learning_rate": 9.894597280357798e-08,
      "loss": 0.1129,
      "step": 32169
    },
    {
      "epoch": 0.9385028298033724,
      "grad_norm": 1.038712545214295,
      "learning_rate": 9.885247326814285e-08,
      "loss": 0.1077,
      "step": 32170
    },
    {
      "epoch": 0.938532003034016,
      "grad_norm": 1.0693996277997444,
      "learning_rate": 9.875901748898298e-08,
      "loss": 0.0953,
      "step": 32171
    },
    {
      "epoch": 0.9385611762646595,
      "grad_norm": 0.8098148128622791,
      "learning_rate": 9.86656054669316e-08,
      "loss": 0.1142,
      "step": 32172
    },
    {
      "epoch": 0.9385903494953031,
      "grad_norm": 0.7900266748948263,
      "learning_rate": 9.857223720282472e-08,
      "loss": 0.1078,
      "step": 32173
    },
    {
      "epoch": 0.9386195227259466,
      "grad_norm": 0.9237403114889481,
      "learning_rate": 9.847891269749388e-08,
      "loss": 0.1015,
      "step": 32174
    },
    {
      "epoch": 0.9386486959565903,
      "grad_norm": 0.8923289365142516,
      "learning_rate": 9.838563195177342e-08,
      "loss": 0.1209,
      "step": 32175
    },
    {
      "epoch": 0.9386778691872338,
      "grad_norm": 0.7569550043956034,
      "learning_rate": 9.829239496649656e-08,
      "loss": 0.1195,
      "step": 32176
    },
    {
      "epoch": 0.9387070424178774,
      "grad_norm": 0.6888915327303003,
      "learning_rate": 9.819920174249486e-08,
      "loss": 0.1012,
      "step": 32177
    },
    {
      "epoch": 0.9387362156485209,
      "grad_norm": 0.7796935344380027,
      "learning_rate": 9.810605228059988e-08,
      "loss": 0.0985,
      "step": 32178
    },
    {
      "epoch": 0.9387653888791645,
      "grad_norm": 0.7837266477355513,
      "learning_rate": 9.801294658164484e-08,
      "loss": 0.1067,
      "step": 32179
    },
    {
      "epoch": 0.938794562109808,
      "grad_norm": 0.8458686648916788,
      "learning_rate": 9.791988464645907e-08,
      "loss": 0.096,
      "step": 32180
    },
    {
      "epoch": 0.9388237353404516,
      "grad_norm": 0.7529063868239642,
      "learning_rate": 9.782686647587524e-08,
      "loss": 0.1069,
      "step": 32181
    },
    {
      "epoch": 0.9388529085710952,
      "grad_norm": 0.8567471399250969,
      "learning_rate": 9.773389207072214e-08,
      "loss": 0.104,
      "step": 32182
    },
    {
      "epoch": 0.9388820818017387,
      "grad_norm": 0.7800869455553334,
      "learning_rate": 9.764096143183133e-08,
      "loss": 0.1197,
      "step": 32183
    },
    {
      "epoch": 0.9389112550323823,
      "grad_norm": 0.9397806084771332,
      "learning_rate": 9.754807456003157e-08,
      "loss": 0.1264,
      "step": 32184
    },
    {
      "epoch": 0.9389404282630258,
      "grad_norm": 0.809745770145711,
      "learning_rate": 9.745523145615166e-08,
      "loss": 0.0863,
      "step": 32185
    },
    {
      "epoch": 0.9389696014936694,
      "grad_norm": 0.9002286358801612,
      "learning_rate": 9.736243212102147e-08,
      "loss": 0.1165,
      "step": 32186
    },
    {
      "epoch": 0.9389987747243129,
      "grad_norm": 0.7805276518654509,
      "learning_rate": 9.726967655546926e-08,
      "loss": 0.122,
      "step": 32187
    },
    {
      "epoch": 0.9390279479549565,
      "grad_norm": 0.9600304009593331,
      "learning_rate": 9.717696476032267e-08,
      "loss": 0.1003,
      "step": 32188
    },
    {
      "epoch": 0.9390571211856001,
      "grad_norm": 0.754286919698732,
      "learning_rate": 9.708429673640995e-08,
      "loss": 0.1065,
      "step": 32189
    },
    {
      "epoch": 0.9390862944162437,
      "grad_norm": 0.8368083610034457,
      "learning_rate": 9.699167248455876e-08,
      "loss": 0.1218,
      "step": 32190
    },
    {
      "epoch": 0.9391154676468872,
      "grad_norm": 0.7588346705004958,
      "learning_rate": 9.689909200559455e-08,
      "loss": 0.1006,
      "step": 32191
    },
    {
      "epoch": 0.9391446408775308,
      "grad_norm": 0.8304193073111201,
      "learning_rate": 9.6806555300345e-08,
      "loss": 0.1198,
      "step": 32192
    },
    {
      "epoch": 0.9391738141081744,
      "grad_norm": 0.7431972213954714,
      "learning_rate": 9.671406236963666e-08,
      "loss": 0.1089,
      "step": 32193
    },
    {
      "epoch": 0.9392029873388179,
      "grad_norm": 0.624978760386684,
      "learning_rate": 9.662161321429441e-08,
      "loss": 0.0997,
      "step": 32194
    },
    {
      "epoch": 0.9392321605694615,
      "grad_norm": 0.6894520225294304,
      "learning_rate": 9.652920783514319e-08,
      "loss": 0.0965,
      "step": 32195
    },
    {
      "epoch": 0.939261333800105,
      "grad_norm": 0.8174341948911126,
      "learning_rate": 9.643684623300953e-08,
      "loss": 0.084,
      "step": 32196
    },
    {
      "epoch": 0.9392905070307486,
      "grad_norm": 1.0582396097798812,
      "learning_rate": 9.634452840871667e-08,
      "loss": 0.1178,
      "step": 32197
    },
    {
      "epoch": 0.9393196802613921,
      "grad_norm": 0.7454065307174922,
      "learning_rate": 9.625225436308949e-08,
      "loss": 0.1038,
      "step": 32198
    },
    {
      "epoch": 0.9393488534920357,
      "grad_norm": 0.8494845543516857,
      "learning_rate": 9.616002409695069e-08,
      "loss": 0.1187,
      "step": 32199
    },
    {
      "epoch": 0.9393780267226792,
      "grad_norm": 0.7343299135462298,
      "learning_rate": 9.60678376111257e-08,
      "loss": 0.1044,
      "step": 32200
    },
    {
      "epoch": 0.9394071999533228,
      "grad_norm": 0.8797948726980744,
      "learning_rate": 9.59756949064361e-08,
      "loss": 0.114,
      "step": 32201
    },
    {
      "epoch": 0.9394363731839664,
      "grad_norm": 0.8113452424525245,
      "learning_rate": 9.588359598370456e-08,
      "loss": 0.1128,
      "step": 32202
    },
    {
      "epoch": 0.93946554641461,
      "grad_norm": 0.7252834619600642,
      "learning_rate": 9.579154084375375e-08,
      "loss": 0.1058,
      "step": 32203
    },
    {
      "epoch": 0.9394947196452536,
      "grad_norm": 0.8894169244713941,
      "learning_rate": 9.569952948740525e-08,
      "loss": 0.1201,
      "step": 32204
    },
    {
      "epoch": 0.9395238928758971,
      "grad_norm": 0.7713508233508065,
      "learning_rate": 9.560756191548004e-08,
      "loss": 0.1009,
      "step": 32205
    },
    {
      "epoch": 0.9395530661065407,
      "grad_norm": 0.860856659261507,
      "learning_rate": 9.551563812880083e-08,
      "loss": 0.1411,
      "step": 32206
    },
    {
      "epoch": 0.9395822393371842,
      "grad_norm": 0.8472523668044764,
      "learning_rate": 9.542375812818694e-08,
      "loss": 0.0957,
      "step": 32207
    },
    {
      "epoch": 0.9396114125678278,
      "grad_norm": 0.8393016543633706,
      "learning_rate": 9.533192191445828e-08,
      "loss": 0.1207,
      "step": 32208
    },
    {
      "epoch": 0.9396405857984713,
      "grad_norm": 0.8120017488468546,
      "learning_rate": 9.524012948843586e-08,
      "loss": 0.1046,
      "step": 32209
    },
    {
      "epoch": 0.9396697590291149,
      "grad_norm": 0.8621657024627788,
      "learning_rate": 9.514838085093847e-08,
      "loss": 0.1239,
      "step": 32210
    },
    {
      "epoch": 0.9396989322597584,
      "grad_norm": 0.84849491023201,
      "learning_rate": 9.5056676002786e-08,
      "loss": 0.1414,
      "step": 32211
    },
    {
      "epoch": 0.939728105490402,
      "grad_norm": 0.6648422318792367,
      "learning_rate": 9.496501494479615e-08,
      "loss": 0.1237,
      "step": 32212
    },
    {
      "epoch": 0.9397572787210455,
      "grad_norm": 0.6795376554667742,
      "learning_rate": 9.48733976777877e-08,
      "loss": 0.1037,
      "step": 32213
    },
    {
      "epoch": 0.9397864519516891,
      "grad_norm": 0.7282466940752219,
      "learning_rate": 9.478182420257887e-08,
      "loss": 0.1184,
      "step": 32214
    },
    {
      "epoch": 0.9398156251823326,
      "grad_norm": 0.7548401752090214,
      "learning_rate": 9.46902945199868e-08,
      "loss": 0.0954,
      "step": 32215
    },
    {
      "epoch": 0.9398447984129763,
      "grad_norm": 0.8676952289040676,
      "learning_rate": 9.45988086308286e-08,
      "loss": 0.117,
      "step": 32216
    },
    {
      "epoch": 0.9398739716436199,
      "grad_norm": 0.8790333954437566,
      "learning_rate": 9.45073665359214e-08,
      "loss": 0.1184,
      "step": 32217
    },
    {
      "epoch": 0.9399031448742634,
      "grad_norm": 0.8883700766774195,
      "learning_rate": 9.441596823608123e-08,
      "loss": 0.0971,
      "step": 32218
    },
    {
      "epoch": 0.939932318104907,
      "grad_norm": 0.7900934281400136,
      "learning_rate": 9.432461373212465e-08,
      "loss": 0.1061,
      "step": 32219
    },
    {
      "epoch": 0.9399614913355505,
      "grad_norm": 0.9932949804585048,
      "learning_rate": 9.423330302486655e-08,
      "loss": 0.1203,
      "step": 32220
    },
    {
      "epoch": 0.9399906645661941,
      "grad_norm": 0.8228353632921792,
      "learning_rate": 9.41420361151224e-08,
      "loss": 0.0977,
      "step": 32221
    },
    {
      "epoch": 0.9400198377968376,
      "grad_norm": 0.7413417391788512,
      "learning_rate": 9.405081300370712e-08,
      "loss": 0.1055,
      "step": 32222
    },
    {
      "epoch": 0.9400490110274812,
      "grad_norm": 0.9451930479248437,
      "learning_rate": 9.395963369143501e-08,
      "loss": 0.1083,
      "step": 32223
    },
    {
      "epoch": 0.9400781842581247,
      "grad_norm": 0.8405664626413575,
      "learning_rate": 9.386849817912047e-08,
      "loss": 0.0938,
      "step": 32224
    },
    {
      "epoch": 0.9401073574887683,
      "grad_norm": 1.0092823366652526,
      "learning_rate": 9.377740646757616e-08,
      "loss": 0.1025,
      "step": 32225
    },
    {
      "epoch": 0.9401365307194118,
      "grad_norm": 0.7185649144012645,
      "learning_rate": 9.368635855761642e-08,
      "loss": 0.1294,
      "step": 32226
    },
    {
      "epoch": 0.9401657039500554,
      "grad_norm": 0.6742166336971404,
      "learning_rate": 9.35953544500534e-08,
      "loss": 0.1332,
      "step": 32227
    },
    {
      "epoch": 0.9401948771806989,
      "grad_norm": 0.9077843544231706,
      "learning_rate": 9.350439414569978e-08,
      "loss": 0.1196,
      "step": 32228
    },
    {
      "epoch": 0.9402240504113426,
      "grad_norm": 1.8356673550242073,
      "learning_rate": 9.341347764536768e-08,
      "loss": 0.1123,
      "step": 32229
    },
    {
      "epoch": 0.9402532236419862,
      "grad_norm": 0.8468451722893325,
      "learning_rate": 9.332260494986866e-08,
      "loss": 0.1417,
      "step": 32230
    },
    {
      "epoch": 0.9402823968726297,
      "grad_norm": 1.4832384056344383,
      "learning_rate": 9.323177606001433e-08,
      "loss": 0.1128,
      "step": 32231
    },
    {
      "epoch": 0.9403115701032733,
      "grad_norm": 0.8977563019204942,
      "learning_rate": 9.314099097661511e-08,
      "loss": 0.1274,
      "step": 32232
    },
    {
      "epoch": 0.9403407433339168,
      "grad_norm": 0.7821348721161093,
      "learning_rate": 9.30502497004826e-08,
      "loss": 0.1133,
      "step": 32233
    },
    {
      "epoch": 0.9403699165645604,
      "grad_norm": 1.015399189609931,
      "learning_rate": 9.295955223242503e-08,
      "loss": 0.1313,
      "step": 32234
    },
    {
      "epoch": 0.9403990897952039,
      "grad_norm": 0.7625042097865166,
      "learning_rate": 9.286889857325343e-08,
      "loss": 0.1284,
      "step": 32235
    },
    {
      "epoch": 0.9404282630258475,
      "grad_norm": 0.7857895787778961,
      "learning_rate": 9.277828872377714e-08,
      "loss": 0.1001,
      "step": 32236
    },
    {
      "epoch": 0.940457436256491,
      "grad_norm": 0.8786706465429232,
      "learning_rate": 9.268772268480498e-08,
      "loss": 0.1039,
      "step": 32237
    },
    {
      "epoch": 0.9404866094871346,
      "grad_norm": 0.8768587824741662,
      "learning_rate": 9.25972004571446e-08,
      "loss": 0.0987,
      "step": 32238
    },
    {
      "epoch": 0.9405157827177781,
      "grad_norm": 0.8281244894290761,
      "learning_rate": 9.250672204160538e-08,
      "loss": 0.1132,
      "step": 32239
    },
    {
      "epoch": 0.9405449559484217,
      "grad_norm": 0.8666201805007784,
      "learning_rate": 9.241628743899445e-08,
      "loss": 0.1054,
      "step": 32240
    },
    {
      "epoch": 0.9405741291790652,
      "grad_norm": 0.9872921626328762,
      "learning_rate": 9.232589665012004e-08,
      "loss": 0.1145,
      "step": 32241
    },
    {
      "epoch": 0.9406033024097088,
      "grad_norm": 0.9099156945262562,
      "learning_rate": 9.223554967578763e-08,
      "loss": 0.1263,
      "step": 32242
    },
    {
      "epoch": 0.9406324756403525,
      "grad_norm": 0.7816016344764367,
      "learning_rate": 9.214524651680545e-08,
      "loss": 0.1092,
      "step": 32243
    },
    {
      "epoch": 0.940661648870996,
      "grad_norm": 0.750991763943585,
      "learning_rate": 9.205498717397843e-08,
      "loss": 0.1075,
      "step": 32244
    },
    {
      "epoch": 0.9406908221016396,
      "grad_norm": 0.8534571402613954,
      "learning_rate": 9.196477164811313e-08,
      "loss": 0.1177,
      "step": 32245
    },
    {
      "epoch": 0.9407199953322831,
      "grad_norm": 0.9121166727582298,
      "learning_rate": 9.18745999400139e-08,
      "loss": 0.1059,
      "step": 32246
    },
    {
      "epoch": 0.9407491685629267,
      "grad_norm": 0.8343823646972929,
      "learning_rate": 9.178447205048735e-08,
      "loss": 0.1119,
      "step": 32247
    },
    {
      "epoch": 0.9407783417935702,
      "grad_norm": 0.8591376981119724,
      "learning_rate": 9.169438798033725e-08,
      "loss": 0.1116,
      "step": 32248
    },
    {
      "epoch": 0.9408075150242138,
      "grad_norm": 0.8586963944195071,
      "learning_rate": 9.160434773036797e-08,
      "loss": 0.1219,
      "step": 32249
    },
    {
      "epoch": 0.9408366882548573,
      "grad_norm": 0.720980567545608,
      "learning_rate": 9.151435130138331e-08,
      "loss": 0.0961,
      "step": 32250
    },
    {
      "epoch": 0.9408658614855009,
      "grad_norm": 0.6589360325697987,
      "learning_rate": 9.142439869418651e-08,
      "loss": 0.111,
      "step": 32251
    },
    {
      "epoch": 0.9408950347161444,
      "grad_norm": 0.953257786619246,
      "learning_rate": 9.133448990958083e-08,
      "loss": 0.1047,
      "step": 32252
    },
    {
      "epoch": 0.940924207946788,
      "grad_norm": 1.0535537006691844,
      "learning_rate": 9.12446249483695e-08,
      "loss": 0.1357,
      "step": 32253
    },
    {
      "epoch": 0.9409533811774315,
      "grad_norm": 0.9181781532984107,
      "learning_rate": 9.115480381135466e-08,
      "loss": 0.1141,
      "step": 32254
    },
    {
      "epoch": 0.9409825544080751,
      "grad_norm": 0.7004570130653078,
      "learning_rate": 9.106502649933679e-08,
      "loss": 0.1197,
      "step": 32255
    },
    {
      "epoch": 0.9410117276387188,
      "grad_norm": 0.7811468517955279,
      "learning_rate": 9.097529301311969e-08,
      "loss": 0.1161,
      "step": 32256
    },
    {
      "epoch": 0.9410409008693623,
      "grad_norm": 0.9753875994681088,
      "learning_rate": 9.088560335350272e-08,
      "loss": 0.102,
      "step": 32257
    },
    {
      "epoch": 0.9410700741000059,
      "grad_norm": 0.7541581719261251,
      "learning_rate": 9.07959575212869e-08,
      "loss": 0.12,
      "step": 32258
    },
    {
      "epoch": 0.9410992473306494,
      "grad_norm": 0.8599202291508046,
      "learning_rate": 9.07063555172727e-08,
      "loss": 0.1118,
      "step": 32259
    },
    {
      "epoch": 0.941128420561293,
      "grad_norm": 0.8262972867905722,
      "learning_rate": 9.061679734226115e-08,
      "loss": 0.0981,
      "step": 32260
    },
    {
      "epoch": 0.9411575937919365,
      "grad_norm": 0.8200818163966893,
      "learning_rate": 9.05272829970505e-08,
      "loss": 0.096,
      "step": 32261
    },
    {
      "epoch": 0.9411867670225801,
      "grad_norm": 0.6948563973969044,
      "learning_rate": 9.043781248244011e-08,
      "loss": 0.108,
      "step": 32262
    },
    {
      "epoch": 0.9412159402532236,
      "grad_norm": 0.9039716304296024,
      "learning_rate": 9.034838579922878e-08,
      "loss": 0.125,
      "step": 32263
    },
    {
      "epoch": 0.9412451134838672,
      "grad_norm": 0.948507507146162,
      "learning_rate": 9.025900294821533e-08,
      "loss": 0.1054,
      "step": 32264
    },
    {
      "epoch": 0.9412742867145107,
      "grad_norm": 0.8100205279020445,
      "learning_rate": 9.016966393019688e-08,
      "loss": 0.1169,
      "step": 32265
    },
    {
      "epoch": 0.9413034599451543,
      "grad_norm": 0.7338423233355329,
      "learning_rate": 9.008036874597226e-08,
      "loss": 0.0986,
      "step": 32266
    },
    {
      "epoch": 0.9413326331757979,
      "grad_norm": 1.026312423149906,
      "learning_rate": 8.999111739633803e-08,
      "loss": 0.1079,
      "step": 32267
    },
    {
      "epoch": 0.9413618064064414,
      "grad_norm": 0.9176621989537525,
      "learning_rate": 8.990190988209025e-08,
      "loss": 0.1179,
      "step": 32268
    },
    {
      "epoch": 0.941390979637085,
      "grad_norm": 0.7518978043031617,
      "learning_rate": 8.981274620402713e-08,
      "loss": 0.1076,
      "step": 32269
    },
    {
      "epoch": 0.9414201528677286,
      "grad_norm": 0.7782733248428133,
      "learning_rate": 8.972362636294307e-08,
      "loss": 0.1037,
      "step": 32270
    },
    {
      "epoch": 0.9414493260983722,
      "grad_norm": 0.88650877935467,
      "learning_rate": 8.963455035963409e-08,
      "loss": 0.1086,
      "step": 32271
    },
    {
      "epoch": 0.9414784993290157,
      "grad_norm": 0.8043260987467993,
      "learning_rate": 8.954551819489565e-08,
      "loss": 0.1127,
      "step": 32272
    },
    {
      "epoch": 0.9415076725596593,
      "grad_norm": 0.7528719477366806,
      "learning_rate": 8.945652986952325e-08,
      "loss": 0.0967,
      "step": 32273
    },
    {
      "epoch": 0.9415368457903028,
      "grad_norm": 0.9861613255844213,
      "learning_rate": 8.936758538431067e-08,
      "loss": 0.1141,
      "step": 32274
    },
    {
      "epoch": 0.9415660190209464,
      "grad_norm": 0.8871444967746114,
      "learning_rate": 8.92786847400512e-08,
      "loss": 0.1181,
      "step": 32275
    },
    {
      "epoch": 0.9415951922515899,
      "grad_norm": 0.7471736057557377,
      "learning_rate": 8.918982793753972e-08,
      "loss": 0.1081,
      "step": 32276
    },
    {
      "epoch": 0.9416243654822335,
      "grad_norm": 0.7884487385782475,
      "learning_rate": 8.910101497756951e-08,
      "loss": 0.0975,
      "step": 32277
    },
    {
      "epoch": 0.941653538712877,
      "grad_norm": 1.1634976749767227,
      "learning_rate": 8.901224586093271e-08,
      "loss": 0.1253,
      "step": 32278
    },
    {
      "epoch": 0.9416827119435206,
      "grad_norm": 0.8013935716951578,
      "learning_rate": 8.892352058842258e-08,
      "loss": 0.113,
      "step": 32279
    },
    {
      "epoch": 0.9417118851741642,
      "grad_norm": 0.908155959134173,
      "learning_rate": 8.883483916083068e-08,
      "loss": 0.1102,
      "step": 32280
    },
    {
      "epoch": 0.9417410584048077,
      "grad_norm": 0.926664533373589,
      "learning_rate": 8.874620157894864e-08,
      "loss": 0.1175,
      "step": 32281
    },
    {
      "epoch": 0.9417702316354513,
      "grad_norm": 0.9466495200848368,
      "learning_rate": 8.865760784356859e-08,
      "loss": 0.105,
      "step": 32282
    },
    {
      "epoch": 0.9417994048660949,
      "grad_norm": 0.9163070272330989,
      "learning_rate": 8.856905795548098e-08,
      "loss": 0.1314,
      "step": 32283
    },
    {
      "epoch": 0.9418285780967385,
      "grad_norm": 0.7883809830136383,
      "learning_rate": 8.848055191547633e-08,
      "loss": 0.1055,
      "step": 32284
    },
    {
      "epoch": 0.941857751327382,
      "grad_norm": 0.8342912192103017,
      "learning_rate": 8.839208972434455e-08,
      "loss": 0.1023,
      "step": 32285
    },
    {
      "epoch": 0.9418869245580256,
      "grad_norm": 0.8348360942403166,
      "learning_rate": 8.830367138287555e-08,
      "loss": 0.1087,
      "step": 32286
    },
    {
      "epoch": 0.9419160977886691,
      "grad_norm": 1.0003868136548781,
      "learning_rate": 8.821529689185981e-08,
      "loss": 0.1174,
      "step": 32287
    },
    {
      "epoch": 0.9419452710193127,
      "grad_norm": 0.8001076929016344,
      "learning_rate": 8.81269662520845e-08,
      "loss": 0.1091,
      "step": 32288
    },
    {
      "epoch": 0.9419744442499562,
      "grad_norm": 1.2193951438076844,
      "learning_rate": 8.803867946433897e-08,
      "loss": 0.115,
      "step": 32289
    },
    {
      "epoch": 0.9420036174805998,
      "grad_norm": 0.8640853572218443,
      "learning_rate": 8.795043652941204e-08,
      "loss": 0.1199,
      "step": 32290
    },
    {
      "epoch": 0.9420327907112434,
      "grad_norm": 0.7240457643107918,
      "learning_rate": 8.786223744809085e-08,
      "loss": 0.1039,
      "step": 32291
    },
    {
      "epoch": 0.9420619639418869,
      "grad_norm": 0.9594373398207939,
      "learning_rate": 8.777408222116257e-08,
      "loss": 0.0993,
      "step": 32292
    },
    {
      "epoch": 0.9420911371725305,
      "grad_norm": 0.7836878391748856,
      "learning_rate": 8.768597084941543e-08,
      "loss": 0.1092,
      "step": 32293
    },
    {
      "epoch": 0.942120310403174,
      "grad_norm": 0.7213913717550353,
      "learning_rate": 8.759790333363439e-08,
      "loss": 0.1087,
      "step": 32294
    },
    {
      "epoch": 0.9421494836338176,
      "grad_norm": 0.7591685255798649,
      "learning_rate": 8.750987967460711e-08,
      "loss": 0.11,
      "step": 32295
    },
    {
      "epoch": 0.9421786568644611,
      "grad_norm": 0.8033655210901949,
      "learning_rate": 8.74218998731191e-08,
      "loss": 0.112,
      "step": 32296
    },
    {
      "epoch": 0.9422078300951048,
      "grad_norm": 1.0044959807320812,
      "learning_rate": 8.733396392995531e-08,
      "loss": 0.1237,
      "step": 32297
    },
    {
      "epoch": 0.9422370033257483,
      "grad_norm": 0.7833893556060869,
      "learning_rate": 8.724607184590117e-08,
      "loss": 0.1222,
      "step": 32298
    },
    {
      "epoch": 0.9422661765563919,
      "grad_norm": 0.8650228097079342,
      "learning_rate": 8.715822362174165e-08,
      "loss": 0.1107,
      "step": 32299
    },
    {
      "epoch": 0.9422953497870354,
      "grad_norm": 0.8301736997589824,
      "learning_rate": 8.707041925826054e-08,
      "loss": 0.1245,
      "step": 32300
    },
    {
      "epoch": 0.942324523017679,
      "grad_norm": 0.7275716008408162,
      "learning_rate": 8.698265875624168e-08,
      "loss": 0.1235,
      "step": 32301
    },
    {
      "epoch": 0.9423536962483225,
      "grad_norm": 0.8993784203191924,
      "learning_rate": 8.689494211646887e-08,
      "loss": 0.1119,
      "step": 32302
    },
    {
      "epoch": 0.9423828694789661,
      "grad_norm": 1.1175801904530618,
      "learning_rate": 8.680726933972538e-08,
      "loss": 0.1101,
      "step": 32303
    },
    {
      "epoch": 0.9424120427096097,
      "grad_norm": 0.8399352229791115,
      "learning_rate": 8.671964042679392e-08,
      "loss": 0.1215,
      "step": 32304
    },
    {
      "epoch": 0.9424412159402532,
      "grad_norm": 0.7335315001881796,
      "learning_rate": 8.663205537845609e-08,
      "loss": 0.1098,
      "step": 32305
    },
    {
      "epoch": 0.9424703891708968,
      "grad_norm": 0.861878034998344,
      "learning_rate": 8.654451419549459e-08,
      "loss": 0.1251,
      "step": 32306
    },
    {
      "epoch": 0.9424995624015403,
      "grad_norm": 0.7872442627576178,
      "learning_rate": 8.645701687869046e-08,
      "loss": 0.1145,
      "step": 32307
    },
    {
      "epoch": 0.9425287356321839,
      "grad_norm": 0.7276279017712955,
      "learning_rate": 8.63695634288253e-08,
      "loss": 0.1241,
      "step": 32308
    },
    {
      "epoch": 0.9425579088628274,
      "grad_norm": 0.8494474455200676,
      "learning_rate": 8.628215384668015e-08,
      "loss": 0.0962,
      "step": 32309
    },
    {
      "epoch": 0.9425870820934711,
      "grad_norm": 0.9091544186677172,
      "learning_rate": 8.61947881330344e-08,
      "loss": 0.1053,
      "step": 32310
    },
    {
      "epoch": 0.9426162553241146,
      "grad_norm": 0.800220103593855,
      "learning_rate": 8.610746628866851e-08,
      "loss": 0.1024,
      "step": 32311
    },
    {
      "epoch": 0.9426454285547582,
      "grad_norm": 0.6635900744825833,
      "learning_rate": 8.602018831436243e-08,
      "loss": 0.0982,
      "step": 32312
    },
    {
      "epoch": 0.9426746017854017,
      "grad_norm": 0.8273136038765367,
      "learning_rate": 8.593295421089498e-08,
      "loss": 0.1251,
      "step": 32313
    },
    {
      "epoch": 0.9427037750160453,
      "grad_norm": 0.9357943378239129,
      "learning_rate": 8.584576397904498e-08,
      "loss": 0.1015,
      "step": 32314
    },
    {
      "epoch": 0.9427329482466889,
      "grad_norm": 0.7543237702459774,
      "learning_rate": 8.57586176195907e-08,
      "loss": 0.1262,
      "step": 32315
    },
    {
      "epoch": 0.9427621214773324,
      "grad_norm": 0.8876534827741925,
      "learning_rate": 8.567151513331096e-08,
      "loss": 0.1259,
      "step": 32316
    },
    {
      "epoch": 0.942791294707976,
      "grad_norm": 0.8748683143361243,
      "learning_rate": 8.558445652098291e-08,
      "loss": 0.1071,
      "step": 32317
    },
    {
      "epoch": 0.9428204679386195,
      "grad_norm": 1.019682043259005,
      "learning_rate": 8.549744178338259e-08,
      "loss": 0.1214,
      "step": 32318
    },
    {
      "epoch": 0.9428496411692631,
      "grad_norm": 0.7724503288375518,
      "learning_rate": 8.54104709212883e-08,
      "loss": 0.1062,
      "step": 32319
    },
    {
      "epoch": 0.9428788143999066,
      "grad_norm": 0.8884411022332633,
      "learning_rate": 8.53235439354766e-08,
      "loss": 0.0994,
      "step": 32320
    },
    {
      "epoch": 0.9429079876305502,
      "grad_norm": 0.9568008653287678,
      "learning_rate": 8.5236660826723e-08,
      "loss": 0.1107,
      "step": 32321
    },
    {
      "epoch": 0.9429371608611937,
      "grad_norm": 0.7016210509757499,
      "learning_rate": 8.514982159580298e-08,
      "loss": 0.1078,
      "step": 32322
    },
    {
      "epoch": 0.9429663340918373,
      "grad_norm": 0.9077299274976988,
      "learning_rate": 8.506302624349205e-08,
      "loss": 0.1139,
      "step": 32323
    },
    {
      "epoch": 0.9429955073224809,
      "grad_norm": 0.7983976891298535,
      "learning_rate": 8.497627477056514e-08,
      "loss": 0.1156,
      "step": 32324
    },
    {
      "epoch": 0.9430246805531245,
      "grad_norm": 0.887561043756048,
      "learning_rate": 8.488956717779661e-08,
      "loss": 0.104,
      "step": 32325
    },
    {
      "epoch": 0.943053853783768,
      "grad_norm": 0.8654210067457109,
      "learning_rate": 8.480290346596087e-08,
      "loss": 0.1238,
      "step": 32326
    },
    {
      "epoch": 0.9430830270144116,
      "grad_norm": 0.7727612534057382,
      "learning_rate": 8.471628363583174e-08,
      "loss": 0.1094,
      "step": 32327
    },
    {
      "epoch": 0.9431122002450552,
      "grad_norm": 0.8980402081435425,
      "learning_rate": 8.46297076881819e-08,
      "loss": 0.1149,
      "step": 32328
    },
    {
      "epoch": 0.9431413734756987,
      "grad_norm": 0.6740917503354988,
      "learning_rate": 8.454317562378467e-08,
      "loss": 0.1058,
      "step": 32329
    },
    {
      "epoch": 0.9431705467063423,
      "grad_norm": 0.7988787708196102,
      "learning_rate": 8.445668744341274e-08,
      "loss": 0.1105,
      "step": 32330
    },
    {
      "epoch": 0.9431997199369858,
      "grad_norm": 0.7638095409849585,
      "learning_rate": 8.43702431478377e-08,
      "loss": 0.1008,
      "step": 32331
    },
    {
      "epoch": 0.9432288931676294,
      "grad_norm": 0.8684555261019707,
      "learning_rate": 8.428384273783175e-08,
      "loss": 0.1109,
      "step": 32332
    },
    {
      "epoch": 0.9432580663982729,
      "grad_norm": 0.6328075299142163,
      "learning_rate": 8.419748621416646e-08,
      "loss": 0.0915,
      "step": 32333
    },
    {
      "epoch": 0.9432872396289165,
      "grad_norm": 0.9430103200458374,
      "learning_rate": 8.411117357761289e-08,
      "loss": 0.1011,
      "step": 32334
    },
    {
      "epoch": 0.94331641285956,
      "grad_norm": 0.8249591915840404,
      "learning_rate": 8.402490482893988e-08,
      "loss": 0.1203,
      "step": 32335
    },
    {
      "epoch": 0.9433455860902036,
      "grad_norm": 0.7291717954263409,
      "learning_rate": 8.393867996892014e-08,
      "loss": 0.0926,
      "step": 32336
    },
    {
      "epoch": 0.9433747593208472,
      "grad_norm": 0.8703143796152049,
      "learning_rate": 8.385249899832249e-08,
      "loss": 0.1244,
      "step": 32337
    },
    {
      "epoch": 0.9434039325514908,
      "grad_norm": 0.779070003302947,
      "learning_rate": 8.37663619179152e-08,
      "loss": 0.1155,
      "step": 32338
    },
    {
      "epoch": 0.9434331057821344,
      "grad_norm": 1.0629336366988282,
      "learning_rate": 8.36802687284688e-08,
      "loss": 0.1194,
      "step": 32339
    },
    {
      "epoch": 0.9434622790127779,
      "grad_norm": 0.7835093401142775,
      "learning_rate": 8.359421943075153e-08,
      "loss": 0.0926,
      "step": 32340
    },
    {
      "epoch": 0.9434914522434215,
      "grad_norm": 0.9044756377979435,
      "learning_rate": 8.350821402553111e-08,
      "loss": 0.1051,
      "step": 32341
    },
    {
      "epoch": 0.943520625474065,
      "grad_norm": 0.8804094413338565,
      "learning_rate": 8.342225251357527e-08,
      "loss": 0.1142,
      "step": 32342
    },
    {
      "epoch": 0.9435497987047086,
      "grad_norm": 1.3066257204772769,
      "learning_rate": 8.333633489565284e-08,
      "loss": 0.137,
      "step": 32343
    },
    {
      "epoch": 0.9435789719353521,
      "grad_norm": 0.7875741132318617,
      "learning_rate": 8.325046117252933e-08,
      "loss": 0.1035,
      "step": 32344
    },
    {
      "epoch": 0.9436081451659957,
      "grad_norm": 0.8731378998190231,
      "learning_rate": 8.316463134497188e-08,
      "loss": 0.1231,
      "step": 32345
    },
    {
      "epoch": 0.9436373183966392,
      "grad_norm": 0.8524985718756968,
      "learning_rate": 8.30788454137471e-08,
      "loss": 0.0967,
      "step": 32346
    },
    {
      "epoch": 0.9436664916272828,
      "grad_norm": 0.7584908542335126,
      "learning_rate": 8.299310337962052e-08,
      "loss": 0.1058,
      "step": 32347
    },
    {
      "epoch": 0.9436956648579263,
      "grad_norm": 0.6681495845564815,
      "learning_rate": 8.290740524335817e-08,
      "loss": 0.0966,
      "step": 32348
    },
    {
      "epoch": 0.9437248380885699,
      "grad_norm": 0.8429285285254013,
      "learning_rate": 8.282175100572387e-08,
      "loss": 0.1175,
      "step": 32349
    },
    {
      "epoch": 0.9437540113192134,
      "grad_norm": 0.8027082932232955,
      "learning_rate": 8.27361406674837e-08,
      "loss": 0.0911,
      "step": 32350
    },
    {
      "epoch": 0.9437831845498571,
      "grad_norm": 0.8567381679743021,
      "learning_rate": 8.265057422940148e-08,
      "loss": 0.1255,
      "step": 32351
    },
    {
      "epoch": 0.9438123577805007,
      "grad_norm": 0.8541403054887776,
      "learning_rate": 8.256505169224105e-08,
      "loss": 0.0995,
      "step": 32352
    },
    {
      "epoch": 0.9438415310111442,
      "grad_norm": 0.7698180543207574,
      "learning_rate": 8.247957305676568e-08,
      "loss": 0.0865,
      "step": 32353
    },
    {
      "epoch": 0.9438707042417878,
      "grad_norm": 1.0897995547477701,
      "learning_rate": 8.239413832373865e-08,
      "loss": 0.1105,
      "step": 32354
    },
    {
      "epoch": 0.9438998774724313,
      "grad_norm": 0.894631470770573,
      "learning_rate": 8.230874749392326e-08,
      "loss": 0.1217,
      "step": 32355
    },
    {
      "epoch": 0.9439290507030749,
      "grad_norm": 0.8897706786415958,
      "learning_rate": 8.222340056808109e-08,
      "loss": 0.1373,
      "step": 32356
    },
    {
      "epoch": 0.9439582239337184,
      "grad_norm": 0.8475899700246209,
      "learning_rate": 8.213809754697489e-08,
      "loss": 0.1133,
      "step": 32357
    },
    {
      "epoch": 0.943987397164362,
      "grad_norm": 0.7119139509831721,
      "learning_rate": 8.205283843136513e-08,
      "loss": 0.1076,
      "step": 32358
    },
    {
      "epoch": 0.9440165703950055,
      "grad_norm": 0.8062916356226331,
      "learning_rate": 8.196762322201401e-08,
      "loss": 0.1006,
      "step": 32359
    },
    {
      "epoch": 0.9440457436256491,
      "grad_norm": 1.0753529891432876,
      "learning_rate": 8.188245191968202e-08,
      "loss": 0.101,
      "step": 32360
    },
    {
      "epoch": 0.9440749168562926,
      "grad_norm": 0.8753470313202542,
      "learning_rate": 8.179732452512911e-08,
      "loss": 0.0974,
      "step": 32361
    },
    {
      "epoch": 0.9441040900869362,
      "grad_norm": 0.7564879364630781,
      "learning_rate": 8.17122410391158e-08,
      "loss": 0.1147,
      "step": 32362
    },
    {
      "epoch": 0.9441332633175797,
      "grad_norm": 0.7842977363139345,
      "learning_rate": 8.162720146240144e-08,
      "loss": 0.1151,
      "step": 32363
    },
    {
      "epoch": 0.9441624365482234,
      "grad_norm": 0.7961969065277302,
      "learning_rate": 8.154220579574601e-08,
      "loss": 0.1064,
      "step": 32364
    },
    {
      "epoch": 0.944191609778867,
      "grad_norm": 0.9942200818097617,
      "learning_rate": 8.145725403990668e-08,
      "loss": 0.1206,
      "step": 32365
    },
    {
      "epoch": 0.9442207830095105,
      "grad_norm": 0.8811670485871332,
      "learning_rate": 8.137234619564282e-08,
      "loss": 0.1039,
      "step": 32366
    },
    {
      "epoch": 0.9442499562401541,
      "grad_norm": 0.8439337869759281,
      "learning_rate": 8.12874822637133e-08,
      "loss": 0.1075,
      "step": 32367
    },
    {
      "epoch": 0.9442791294707976,
      "grad_norm": 0.9234172909187949,
      "learning_rate": 8.120266224487416e-08,
      "loss": 0.0889,
      "step": 32368
    },
    {
      "epoch": 0.9443083027014412,
      "grad_norm": 0.7791893482755741,
      "learning_rate": 8.111788613988369e-08,
      "loss": 0.1555,
      "step": 32369
    },
    {
      "epoch": 0.9443374759320847,
      "grad_norm": 0.8860145963518655,
      "learning_rate": 8.103315394949906e-08,
      "loss": 0.1119,
      "step": 32370
    },
    {
      "epoch": 0.9443666491627283,
      "grad_norm": 0.8118565613192408,
      "learning_rate": 8.094846567447523e-08,
      "loss": 0.0869,
      "step": 32371
    },
    {
      "epoch": 0.9443958223933718,
      "grad_norm": 1.0808754968211483,
      "learning_rate": 8.086382131556935e-08,
      "loss": 0.1027,
      "step": 32372
    },
    {
      "epoch": 0.9444249956240154,
      "grad_norm": 0.7209412532372016,
      "learning_rate": 8.077922087353751e-08,
      "loss": 0.114,
      "step": 32373
    },
    {
      "epoch": 0.9444541688546589,
      "grad_norm": 1.3381069972839603,
      "learning_rate": 8.069466434913464e-08,
      "loss": 0.1124,
      "step": 32374
    },
    {
      "epoch": 0.9444833420853025,
      "grad_norm": 0.750155217556688,
      "learning_rate": 8.06101517431146e-08,
      "loss": 0.0897,
      "step": 32375
    },
    {
      "epoch": 0.944512515315946,
      "grad_norm": 0.7369771329983555,
      "learning_rate": 8.052568305623342e-08,
      "loss": 0.1086,
      "step": 32376
    },
    {
      "epoch": 0.9445416885465896,
      "grad_norm": 0.801568927073826,
      "learning_rate": 8.044125828924442e-08,
      "loss": 0.0983,
      "step": 32377
    },
    {
      "epoch": 0.9445708617772333,
      "grad_norm": 0.781518255797064,
      "learning_rate": 8.035687744290143e-08,
      "loss": 0.1041,
      "step": 32378
    },
    {
      "epoch": 0.9446000350078768,
      "grad_norm": 0.7125332917030004,
      "learning_rate": 8.027254051795774e-08,
      "loss": 0.0979,
      "step": 32379
    },
    {
      "epoch": 0.9446292082385204,
      "grad_norm": 0.6843526691826908,
      "learning_rate": 8.018824751516663e-08,
      "loss": 0.1184,
      "step": 32380
    },
    {
      "epoch": 0.9446583814691639,
      "grad_norm": 1.002080672606367,
      "learning_rate": 8.010399843528083e-08,
      "loss": 0.0985,
      "step": 32381
    },
    {
      "epoch": 0.9446875546998075,
      "grad_norm": 0.7824026578557902,
      "learning_rate": 8.00197932790514e-08,
      "loss": 0.113,
      "step": 32382
    },
    {
      "epoch": 0.944716727930451,
      "grad_norm": 0.7873377395419179,
      "learning_rate": 7.993563204723054e-08,
      "loss": 0.113,
      "step": 32383
    },
    {
      "epoch": 0.9447459011610946,
      "grad_norm": 0.9239821817302963,
      "learning_rate": 7.98515147405704e-08,
      "loss": 0.1215,
      "step": 32384
    },
    {
      "epoch": 0.9447750743917381,
      "grad_norm": 0.8012849692276517,
      "learning_rate": 7.976744135982095e-08,
      "loss": 0.1125,
      "step": 32385
    },
    {
      "epoch": 0.9448042476223817,
      "grad_norm": 0.87859202581409,
      "learning_rate": 7.968341190573325e-08,
      "loss": 0.1211,
      "step": 32386
    },
    {
      "epoch": 0.9448334208530252,
      "grad_norm": 0.8184684077296053,
      "learning_rate": 7.959942637905783e-08,
      "loss": 0.1036,
      "step": 32387
    },
    {
      "epoch": 0.9448625940836688,
      "grad_norm": 0.8394996941678644,
      "learning_rate": 7.951548478054405e-08,
      "loss": 0.1164,
      "step": 32388
    },
    {
      "epoch": 0.9448917673143123,
      "grad_norm": 0.7561225631203814,
      "learning_rate": 7.943158711094079e-08,
      "loss": 0.0907,
      "step": 32389
    },
    {
      "epoch": 0.9449209405449559,
      "grad_norm": 0.6961283322428043,
      "learning_rate": 7.934773337099855e-08,
      "loss": 0.0987,
      "step": 32390
    },
    {
      "epoch": 0.9449501137755996,
      "grad_norm": 0.7230837570967884,
      "learning_rate": 7.926392356146507e-08,
      "loss": 0.105,
      "step": 32391
    },
    {
      "epoch": 0.9449792870062431,
      "grad_norm": 0.892162969730835,
      "learning_rate": 7.918015768308806e-08,
      "loss": 0.1104,
      "step": 32392
    },
    {
      "epoch": 0.9450084602368867,
      "grad_norm": 0.7914370249735397,
      "learning_rate": 7.90964357366164e-08,
      "loss": 0.1058,
      "step": 32393
    },
    {
      "epoch": 0.9450376334675302,
      "grad_norm": 0.7030963808324242,
      "learning_rate": 7.90127577227967e-08,
      "loss": 0.1102,
      "step": 32394
    },
    {
      "epoch": 0.9450668066981738,
      "grad_norm": 0.9540850690225302,
      "learning_rate": 7.89291236423767e-08,
      "loss": 0.1207,
      "step": 32395
    },
    {
      "epoch": 0.9450959799288173,
      "grad_norm": 1.2345368165618769,
      "learning_rate": 7.884553349610191e-08,
      "loss": 0.1145,
      "step": 32396
    },
    {
      "epoch": 0.9451251531594609,
      "grad_norm": 0.7108277104669307,
      "learning_rate": 7.876198728472062e-08,
      "loss": 0.1173,
      "step": 32397
    },
    {
      "epoch": 0.9451543263901044,
      "grad_norm": 0.7955082892917396,
      "learning_rate": 7.867848500897668e-08,
      "loss": 0.113,
      "step": 32398
    },
    {
      "epoch": 0.945183499620748,
      "grad_norm": 0.8259253011058484,
      "learning_rate": 7.859502666961672e-08,
      "loss": 0.1283,
      "step": 32399
    },
    {
      "epoch": 0.9452126728513915,
      "grad_norm": 0.7176344232509159,
      "learning_rate": 7.851161226738569e-08,
      "loss": 0.0807,
      "step": 32400
    },
    {
      "epoch": 0.9452418460820351,
      "grad_norm": 0.7244473039419826,
      "learning_rate": 7.842824180302743e-08,
      "loss": 0.1229,
      "step": 32401
    },
    {
      "epoch": 0.9452710193126787,
      "grad_norm": 0.763476028012814,
      "learning_rate": 7.834491527728694e-08,
      "loss": 0.1113,
      "step": 32402
    },
    {
      "epoch": 0.9453001925433222,
      "grad_norm": 0.9109140432203537,
      "learning_rate": 7.826163269090914e-08,
      "loss": 0.1065,
      "step": 32403
    },
    {
      "epoch": 0.9453293657739658,
      "grad_norm": 0.747528977273796,
      "learning_rate": 7.817839404463623e-08,
      "loss": 0.1216,
      "step": 32404
    },
    {
      "epoch": 0.9453585390046094,
      "grad_norm": 0.7952431788295623,
      "learning_rate": 7.809519933921095e-08,
      "loss": 0.1179,
      "step": 32405
    },
    {
      "epoch": 0.945387712235253,
      "grad_norm": 0.8155826659945792,
      "learning_rate": 7.80120485753777e-08,
      "loss": 0.11,
      "step": 32406
    },
    {
      "epoch": 0.9454168854658965,
      "grad_norm": 0.8149750360445821,
      "learning_rate": 7.792894175387755e-08,
      "loss": 0.096,
      "step": 32407
    },
    {
      "epoch": 0.9454460586965401,
      "grad_norm": 0.5960639511586834,
      "learning_rate": 7.784587887545269e-08,
      "loss": 0.098,
      "step": 32408
    },
    {
      "epoch": 0.9454752319271836,
      "grad_norm": 0.9003230157312117,
      "learning_rate": 7.776285994084476e-08,
      "loss": 0.1141,
      "step": 32409
    },
    {
      "epoch": 0.9455044051578272,
      "grad_norm": 0.6997698470257007,
      "learning_rate": 7.767988495079536e-08,
      "loss": 0.1029,
      "step": 32410
    },
    {
      "epoch": 0.9455335783884707,
      "grad_norm": 0.7786886267937401,
      "learning_rate": 7.759695390604505e-08,
      "loss": 0.0877,
      "step": 32411
    },
    {
      "epoch": 0.9455627516191143,
      "grad_norm": 0.8070390944776786,
      "learning_rate": 7.75140668073332e-08,
      "loss": 0.1096,
      "step": 32412
    },
    {
      "epoch": 0.9455919248497578,
      "grad_norm": 0.6548227331117534,
      "learning_rate": 7.74312236554009e-08,
      "loss": 0.1166,
      "step": 32413
    },
    {
      "epoch": 0.9456210980804014,
      "grad_norm": 0.7864888096709484,
      "learning_rate": 7.734842445098811e-08,
      "loss": 0.1104,
      "step": 32414
    },
    {
      "epoch": 0.945650271311045,
      "grad_norm": 0.7404766707515956,
      "learning_rate": 7.726566919483313e-08,
      "loss": 0.1091,
      "step": 32415
    },
    {
      "epoch": 0.9456794445416885,
      "grad_norm": 1.1420709390800285,
      "learning_rate": 7.718295788767537e-08,
      "loss": 0.1208,
      "step": 32416
    },
    {
      "epoch": 0.9457086177723321,
      "grad_norm": 1.083427221237561,
      "learning_rate": 7.710029053025258e-08,
      "loss": 0.1335,
      "step": 32417
    },
    {
      "epoch": 0.9457377910029756,
      "grad_norm": 0.8615196675754264,
      "learning_rate": 7.701766712330305e-08,
      "loss": 0.0932,
      "step": 32418
    },
    {
      "epoch": 0.9457669642336193,
      "grad_norm": 0.8327881297641448,
      "learning_rate": 7.693508766756508e-08,
      "loss": 0.128,
      "step": 32419
    },
    {
      "epoch": 0.9457961374642628,
      "grad_norm": 0.7653280774890531,
      "learning_rate": 7.68525521637753e-08,
      "loss": 0.113,
      "step": 32420
    },
    {
      "epoch": 0.9458253106949064,
      "grad_norm": 0.8654162343913903,
      "learning_rate": 7.677006061267089e-08,
      "loss": 0.1037,
      "step": 32421
    },
    {
      "epoch": 0.9458544839255499,
      "grad_norm": 0.8712620590015554,
      "learning_rate": 7.668761301498739e-08,
      "loss": 0.0953,
      "step": 32422
    },
    {
      "epoch": 0.9458836571561935,
      "grad_norm": 0.7792027222281702,
      "learning_rate": 7.660520937146199e-08,
      "loss": 0.1309,
      "step": 32423
    },
    {
      "epoch": 0.945912830386837,
      "grad_norm": 0.939320024063705,
      "learning_rate": 7.65228496828302e-08,
      "loss": 0.1182,
      "step": 32424
    },
    {
      "epoch": 0.9459420036174806,
      "grad_norm": 0.7470944219962071,
      "learning_rate": 7.644053394982698e-08,
      "loss": 0.1234,
      "step": 32425
    },
    {
      "epoch": 0.9459711768481242,
      "grad_norm": 0.7567899105879465,
      "learning_rate": 7.635826217318676e-08,
      "loss": 0.1142,
      "step": 32426
    },
    {
      "epoch": 0.9460003500787677,
      "grad_norm": 0.7022270603143397,
      "learning_rate": 7.627603435364562e-08,
      "loss": 0.1059,
      "step": 32427
    },
    {
      "epoch": 0.9460295233094113,
      "grad_norm": 0.8755207850523682,
      "learning_rate": 7.619385049193573e-08,
      "loss": 0.115,
      "step": 32428
    },
    {
      "epoch": 0.9460586965400548,
      "grad_norm": 0.8289761925866534,
      "learning_rate": 7.611171058879208e-08,
      "loss": 0.1208,
      "step": 32429
    },
    {
      "epoch": 0.9460878697706984,
      "grad_norm": 0.7441816679164663,
      "learning_rate": 7.602961464494796e-08,
      "loss": 0.1055,
      "step": 32430
    },
    {
      "epoch": 0.9461170430013419,
      "grad_norm": 0.8571158621900724,
      "learning_rate": 7.594756266113556e-08,
      "loss": 0.1038,
      "step": 32431
    },
    {
      "epoch": 0.9461462162319856,
      "grad_norm": 0.9005891761133941,
      "learning_rate": 7.586555463808765e-08,
      "loss": 0.1166,
      "step": 32432
    },
    {
      "epoch": 0.9461753894626291,
      "grad_norm": 0.7337949228878811,
      "learning_rate": 7.578359057653751e-08,
      "loss": 0.119,
      "step": 32433
    },
    {
      "epoch": 0.9462045626932727,
      "grad_norm": 0.8261839710203674,
      "learning_rate": 7.57016704772151e-08,
      "loss": 0.1355,
      "step": 32434
    },
    {
      "epoch": 0.9462337359239162,
      "grad_norm": 0.7690019357159564,
      "learning_rate": 7.56197943408532e-08,
      "loss": 0.1031,
      "step": 32435
    },
    {
      "epoch": 0.9462629091545598,
      "grad_norm": 0.8164878148238821,
      "learning_rate": 7.553796216818177e-08,
      "loss": 0.1049,
      "step": 32436
    },
    {
      "epoch": 0.9462920823852033,
      "grad_norm": 0.790209351407258,
      "learning_rate": 7.545617395993188e-08,
      "loss": 0.1319,
      "step": 32437
    },
    {
      "epoch": 0.9463212556158469,
      "grad_norm": 0.7399463395526849,
      "learning_rate": 7.537442971683406e-08,
      "loss": 0.113,
      "step": 32438
    },
    {
      "epoch": 0.9463504288464905,
      "grad_norm": 0.8608838647635368,
      "learning_rate": 7.529272943961774e-08,
      "loss": 0.1114,
      "step": 32439
    },
    {
      "epoch": 0.946379602077134,
      "grad_norm": 0.7811986672017879,
      "learning_rate": 7.521107312901177e-08,
      "loss": 0.1439,
      "step": 32440
    },
    {
      "epoch": 0.9464087753077776,
      "grad_norm": 0.8592101554393792,
      "learning_rate": 7.512946078574667e-08,
      "loss": 0.1024,
      "step": 32441
    },
    {
      "epoch": 0.9464379485384211,
      "grad_norm": 0.868918578391751,
      "learning_rate": 7.50478924105491e-08,
      "loss": 0.1281,
      "step": 32442
    },
    {
      "epoch": 0.9464671217690647,
      "grad_norm": 0.9483448154147353,
      "learning_rate": 7.496636800414847e-08,
      "loss": 0.122,
      "step": 32443
    },
    {
      "epoch": 0.9464962949997082,
      "grad_norm": 0.7123911422266416,
      "learning_rate": 7.488488756727252e-08,
      "loss": 0.1023,
      "step": 32444
    },
    {
      "epoch": 0.9465254682303518,
      "grad_norm": 0.9491089059453062,
      "learning_rate": 7.480345110064846e-08,
      "loss": 0.0922,
      "step": 32445
    },
    {
      "epoch": 0.9465546414609954,
      "grad_norm": 0.8656337906944751,
      "learning_rate": 7.472205860500403e-08,
      "loss": 0.1105,
      "step": 32446
    },
    {
      "epoch": 0.946583814691639,
      "grad_norm": 0.9254558721488838,
      "learning_rate": 7.464071008106477e-08,
      "loss": 0.1273,
      "step": 32447
    },
    {
      "epoch": 0.9466129879222825,
      "grad_norm": 1.1120337824344588,
      "learning_rate": 7.455940552955732e-08,
      "loss": 0.0938,
      "step": 32448
    },
    {
      "epoch": 0.9466421611529261,
      "grad_norm": 0.8734464676563862,
      "learning_rate": 7.447814495120775e-08,
      "loss": 0.1188,
      "step": 32449
    },
    {
      "epoch": 0.9466713343835697,
      "grad_norm": 0.932221433076052,
      "learning_rate": 7.439692834674217e-08,
      "loss": 0.0833,
      "step": 32450
    },
    {
      "epoch": 0.9467005076142132,
      "grad_norm": 0.771003383833281,
      "learning_rate": 7.431575571688443e-08,
      "loss": 0.1447,
      "step": 32451
    },
    {
      "epoch": 0.9467296808448568,
      "grad_norm": 0.7799731886129633,
      "learning_rate": 7.42346270623595e-08,
      "loss": 0.138,
      "step": 32452
    },
    {
      "epoch": 0.9467588540755003,
      "grad_norm": 0.8575818353583003,
      "learning_rate": 7.415354238389239e-08,
      "loss": 0.1272,
      "step": 32453
    },
    {
      "epoch": 0.9467880273061439,
      "grad_norm": 0.8350146255677302,
      "learning_rate": 7.407250168220692e-08,
      "loss": 0.1115,
      "step": 32454
    },
    {
      "epoch": 0.9468172005367874,
      "grad_norm": 1.2155046907044247,
      "learning_rate": 7.399150495802532e-08,
      "loss": 0.1331,
      "step": 32455
    },
    {
      "epoch": 0.946846373767431,
      "grad_norm": 0.7861259438910083,
      "learning_rate": 7.391055221207199e-08,
      "loss": 0.1078,
      "step": 32456
    },
    {
      "epoch": 0.9468755469980745,
      "grad_norm": 0.7929531775979544,
      "learning_rate": 7.382964344506971e-08,
      "loss": 0.0894,
      "step": 32457
    },
    {
      "epoch": 0.9469047202287181,
      "grad_norm": 0.7207911773731474,
      "learning_rate": 7.374877865774011e-08,
      "loss": 0.1174,
      "step": 32458
    },
    {
      "epoch": 0.9469338934593617,
      "grad_norm": 0.8473804329915969,
      "learning_rate": 7.366795785080538e-08,
      "loss": 0.1043,
      "step": 32459
    },
    {
      "epoch": 0.9469630666900053,
      "grad_norm": 0.9293679818654809,
      "learning_rate": 7.358718102498718e-08,
      "loss": 0.0955,
      "step": 32460
    },
    {
      "epoch": 0.9469922399206488,
      "grad_norm": 0.9925557625259872,
      "learning_rate": 7.350644818100605e-08,
      "loss": 0.1084,
      "step": 32461
    },
    {
      "epoch": 0.9470214131512924,
      "grad_norm": 0.8357931404349741,
      "learning_rate": 7.342575931958362e-08,
      "loss": 0.1226,
      "step": 32462
    },
    {
      "epoch": 0.947050586381936,
      "grad_norm": 0.8516494057558605,
      "learning_rate": 7.334511444144043e-08,
      "loss": 0.1135,
      "step": 32463
    },
    {
      "epoch": 0.9470797596125795,
      "grad_norm": 0.9690824340711388,
      "learning_rate": 7.326451354729591e-08,
      "loss": 0.1019,
      "step": 32464
    },
    {
      "epoch": 0.9471089328432231,
      "grad_norm": 0.8434389326351772,
      "learning_rate": 7.318395663786892e-08,
      "loss": 0.1279,
      "step": 32465
    },
    {
      "epoch": 0.9471381060738666,
      "grad_norm": 0.7854950540974656,
      "learning_rate": 7.310344371388057e-08,
      "loss": 0.1118,
      "step": 32466
    },
    {
      "epoch": 0.9471672793045102,
      "grad_norm": 0.6897796201625724,
      "learning_rate": 7.302297477604747e-08,
      "loss": 0.0937,
      "step": 32467
    },
    {
      "epoch": 0.9471964525351537,
      "grad_norm": 0.8238705776744283,
      "learning_rate": 7.294254982508963e-08,
      "loss": 0.1019,
      "step": 32468
    },
    {
      "epoch": 0.9472256257657973,
      "grad_norm": 0.680516572844136,
      "learning_rate": 7.286216886172425e-08,
      "loss": 0.1055,
      "step": 32469
    },
    {
      "epoch": 0.9472547989964408,
      "grad_norm": 0.8786545586463291,
      "learning_rate": 7.278183188666965e-08,
      "loss": 0.1434,
      "step": 32470
    },
    {
      "epoch": 0.9472839722270844,
      "grad_norm": 1.0483298869582072,
      "learning_rate": 7.270153890064246e-08,
      "loss": 0.1187,
      "step": 32471
    },
    {
      "epoch": 0.9473131454577279,
      "grad_norm": 0.7640401178064328,
      "learning_rate": 7.262128990435934e-08,
      "loss": 0.12,
      "step": 32472
    },
    {
      "epoch": 0.9473423186883716,
      "grad_norm": 0.7435605455000042,
      "learning_rate": 7.25410848985375e-08,
      "loss": 0.1075,
      "step": 32473
    },
    {
      "epoch": 0.9473714919190152,
      "grad_norm": 0.839756218587077,
      "learning_rate": 7.246092388389247e-08,
      "loss": 0.117,
      "step": 32474
    },
    {
      "epoch": 0.9474006651496587,
      "grad_norm": 0.9486867625872087,
      "learning_rate": 7.23808068611398e-08,
      "loss": 0.1018,
      "step": 32475
    },
    {
      "epoch": 0.9474298383803023,
      "grad_norm": 0.686994518444723,
      "learning_rate": 7.230073383099556e-08,
      "loss": 0.1132,
      "step": 32476
    },
    {
      "epoch": 0.9474590116109458,
      "grad_norm": 1.194465992391673,
      "learning_rate": 7.222070479417365e-08,
      "loss": 0.1322,
      "step": 32477
    },
    {
      "epoch": 0.9474881848415894,
      "grad_norm": 0.9061250797428757,
      "learning_rate": 7.214071975138847e-08,
      "loss": 0.0982,
      "step": 32478
    },
    {
      "epoch": 0.9475173580722329,
      "grad_norm": 0.8336272683284411,
      "learning_rate": 7.206077870335504e-08,
      "loss": 0.1363,
      "step": 32479
    },
    {
      "epoch": 0.9475465313028765,
      "grad_norm": 0.7735124687475878,
      "learning_rate": 7.198088165078664e-08,
      "loss": 0.1225,
      "step": 32480
    },
    {
      "epoch": 0.94757570453352,
      "grad_norm": 1.0237110014291062,
      "learning_rate": 7.190102859439662e-08,
      "loss": 0.1145,
      "step": 32481
    },
    {
      "epoch": 0.9476048777641636,
      "grad_norm": 0.7648542443407397,
      "learning_rate": 7.182121953489718e-08,
      "loss": 0.0958,
      "step": 32482
    },
    {
      "epoch": 0.9476340509948071,
      "grad_norm": 1.0863584681681098,
      "learning_rate": 7.174145447300218e-08,
      "loss": 0.1251,
      "step": 32483
    },
    {
      "epoch": 0.9476632242254507,
      "grad_norm": 0.8585402501507573,
      "learning_rate": 7.166173340942273e-08,
      "loss": 0.1177,
      "step": 32484
    },
    {
      "epoch": 0.9476923974560942,
      "grad_norm": 0.791338627216461,
      "learning_rate": 7.158205634487103e-08,
      "loss": 0.0855,
      "step": 32485
    },
    {
      "epoch": 0.9477215706867379,
      "grad_norm": 0.8626801997444347,
      "learning_rate": 7.150242328005763e-08,
      "loss": 0.0975,
      "step": 32486
    },
    {
      "epoch": 0.9477507439173815,
      "grad_norm": 0.7039490546372619,
      "learning_rate": 7.142283421569474e-08,
      "loss": 0.1164,
      "step": 32487
    },
    {
      "epoch": 0.947779917148025,
      "grad_norm": 0.7765559387927986,
      "learning_rate": 7.134328915249177e-08,
      "loss": 0.1068,
      "step": 32488
    },
    {
      "epoch": 0.9478090903786686,
      "grad_norm": 0.8814504468730606,
      "learning_rate": 7.126378809115931e-08,
      "loss": 0.129,
      "step": 32489
    },
    {
      "epoch": 0.9478382636093121,
      "grad_norm": 0.608087582565708,
      "learning_rate": 7.118433103240729e-08,
      "loss": 0.1025,
      "step": 32490
    },
    {
      "epoch": 0.9478674368399557,
      "grad_norm": 0.7717572647366848,
      "learning_rate": 7.110491797694519e-08,
      "loss": 0.1095,
      "step": 32491
    },
    {
      "epoch": 0.9478966100705992,
      "grad_norm": 0.9897115468888162,
      "learning_rate": 7.10255489254813e-08,
      "loss": 0.1052,
      "step": 32492
    },
    {
      "epoch": 0.9479257833012428,
      "grad_norm": 0.8760283595412774,
      "learning_rate": 7.094622387872508e-08,
      "loss": 0.1139,
      "step": 32493
    },
    {
      "epoch": 0.9479549565318863,
      "grad_norm": 0.6954293766892239,
      "learning_rate": 7.086694283738427e-08,
      "loss": 0.1182,
      "step": 32494
    },
    {
      "epoch": 0.9479841297625299,
      "grad_norm": 0.9198009490309836,
      "learning_rate": 7.078770580216664e-08,
      "loss": 0.1093,
      "step": 32495
    },
    {
      "epoch": 0.9480133029931734,
      "grad_norm": 1.0179819067131206,
      "learning_rate": 7.070851277377944e-08,
      "loss": 0.1204,
      "step": 32496
    },
    {
      "epoch": 0.948042476223817,
      "grad_norm": 0.8157738612589095,
      "learning_rate": 7.062936375293039e-08,
      "loss": 0.1359,
      "step": 32497
    },
    {
      "epoch": 0.9480716494544605,
      "grad_norm": 0.8817424538251957,
      "learning_rate": 7.055025874032562e-08,
      "loss": 0.1249,
      "step": 32498
    },
    {
      "epoch": 0.9481008226851041,
      "grad_norm": 0.8186664338244846,
      "learning_rate": 7.047119773667066e-08,
      "loss": 0.1045,
      "step": 32499
    },
    {
      "epoch": 0.9481299959157478,
      "grad_norm": 0.8767729803975891,
      "learning_rate": 7.039218074267273e-08,
      "loss": 0.1211,
      "step": 32500
    },
    {
      "epoch": 0.9481591691463913,
      "grad_norm": 0.768487745352673,
      "learning_rate": 7.031320775903682e-08,
      "loss": 0.1096,
      "step": 32501
    },
    {
      "epoch": 0.9481883423770349,
      "grad_norm": 0.8039052449936003,
      "learning_rate": 7.023427878646739e-08,
      "loss": 0.104,
      "step": 32502
    },
    {
      "epoch": 0.9482175156076784,
      "grad_norm": 0.8063128935223344,
      "learning_rate": 7.015539382566882e-08,
      "loss": 0.0907,
      "step": 32503
    },
    {
      "epoch": 0.948246688838322,
      "grad_norm": 0.9271385773683889,
      "learning_rate": 7.007655287734727e-08,
      "loss": 0.1003,
      "step": 32504
    },
    {
      "epoch": 0.9482758620689655,
      "grad_norm": 0.9647932622383132,
      "learning_rate": 6.999775594220437e-08,
      "loss": 0.119,
      "step": 32505
    },
    {
      "epoch": 0.9483050352996091,
      "grad_norm": 0.8090997577971532,
      "learning_rate": 6.991900302094567e-08,
      "loss": 0.109,
      "step": 32506
    },
    {
      "epoch": 0.9483342085302526,
      "grad_norm": 0.8575302787877651,
      "learning_rate": 6.984029411427285e-08,
      "loss": 0.1288,
      "step": 32507
    },
    {
      "epoch": 0.9483633817608962,
      "grad_norm": 1.143824229557474,
      "learning_rate": 6.976162922288865e-08,
      "loss": 0.118,
      "step": 32508
    },
    {
      "epoch": 0.9483925549915397,
      "grad_norm": 0.8649502905621248,
      "learning_rate": 6.968300834749531e-08,
      "loss": 0.1038,
      "step": 32509
    },
    {
      "epoch": 0.9484217282221833,
      "grad_norm": 0.9273763814160385,
      "learning_rate": 6.960443148879559e-08,
      "loss": 0.1177,
      "step": 32510
    },
    {
      "epoch": 0.9484509014528268,
      "grad_norm": 0.7928934445398926,
      "learning_rate": 6.952589864749115e-08,
      "loss": 0.1143,
      "step": 32511
    },
    {
      "epoch": 0.9484800746834704,
      "grad_norm": 0.9374873688596449,
      "learning_rate": 6.944740982428144e-08,
      "loss": 0.1088,
      "step": 32512
    },
    {
      "epoch": 0.9485092479141141,
      "grad_norm": 0.8220838038955269,
      "learning_rate": 6.936896501986868e-08,
      "loss": 0.1247,
      "step": 32513
    },
    {
      "epoch": 0.9485384211447576,
      "grad_norm": 0.7494913221019011,
      "learning_rate": 6.929056423495285e-08,
      "loss": 0.1118,
      "step": 32514
    },
    {
      "epoch": 0.9485675943754012,
      "grad_norm": 0.574077662448829,
      "learning_rate": 6.921220747023394e-08,
      "loss": 0.0967,
      "step": 32515
    },
    {
      "epoch": 0.9485967676060447,
      "grad_norm": 0.9191293324336272,
      "learning_rate": 6.913389472641085e-08,
      "loss": 0.1119,
      "step": 32516
    },
    {
      "epoch": 0.9486259408366883,
      "grad_norm": 0.8234192992220227,
      "learning_rate": 6.905562600418359e-08,
      "loss": 0.1148,
      "step": 32517
    },
    {
      "epoch": 0.9486551140673318,
      "grad_norm": 0.8801974156793452,
      "learning_rate": 6.897740130425046e-08,
      "loss": 0.1079,
      "step": 32518
    },
    {
      "epoch": 0.9486842872979754,
      "grad_norm": 0.7527567196852354,
      "learning_rate": 6.88992206273098e-08,
      "loss": 0.1209,
      "step": 32519
    },
    {
      "epoch": 0.9487134605286189,
      "grad_norm": 0.7828147462334349,
      "learning_rate": 6.882108397406051e-08,
      "loss": 0.1172,
      "step": 32520
    },
    {
      "epoch": 0.9487426337592625,
      "grad_norm": 0.7652612727291339,
      "learning_rate": 6.874299134519868e-08,
      "loss": 0.0962,
      "step": 32521
    },
    {
      "epoch": 0.948771806989906,
      "grad_norm": 0.9264724715795395,
      "learning_rate": 6.86649427414221e-08,
      "loss": 0.1053,
      "step": 32522
    },
    {
      "epoch": 0.9488009802205496,
      "grad_norm": 0.7727918440017685,
      "learning_rate": 6.858693816342854e-08,
      "loss": 0.0987,
      "step": 32523
    },
    {
      "epoch": 0.9488301534511931,
      "grad_norm": 0.8662458394639372,
      "learning_rate": 6.8508977611913e-08,
      "loss": 0.1073,
      "step": 32524
    },
    {
      "epoch": 0.9488593266818367,
      "grad_norm": 0.9801777045089599,
      "learning_rate": 6.843106108757214e-08,
      "loss": 0.1183,
      "step": 32525
    },
    {
      "epoch": 0.9488884999124803,
      "grad_norm": 0.8325529827915136,
      "learning_rate": 6.835318859110152e-08,
      "loss": 0.1045,
      "step": 32526
    },
    {
      "epoch": 0.9489176731431239,
      "grad_norm": 0.8014491100772806,
      "learning_rate": 6.827536012319613e-08,
      "loss": 0.1235,
      "step": 32527
    },
    {
      "epoch": 0.9489468463737675,
      "grad_norm": 0.9160593276523509,
      "learning_rate": 6.819757568455155e-08,
      "loss": 0.0946,
      "step": 32528
    },
    {
      "epoch": 0.948976019604411,
      "grad_norm": 0.8850257788412628,
      "learning_rate": 6.811983527586108e-08,
      "loss": 0.1158,
      "step": 32529
    },
    {
      "epoch": 0.9490051928350546,
      "grad_norm": 0.7254581291890501,
      "learning_rate": 6.804213889781974e-08,
      "loss": 0.121,
      "step": 32530
    },
    {
      "epoch": 0.9490343660656981,
      "grad_norm": 0.8242057641046956,
      "learning_rate": 6.796448655112142e-08,
      "loss": 0.1055,
      "step": 32531
    },
    {
      "epoch": 0.9490635392963417,
      "grad_norm": 0.7271872053019198,
      "learning_rate": 6.788687823645723e-08,
      "loss": 0.095,
      "step": 32532
    },
    {
      "epoch": 0.9490927125269852,
      "grad_norm": 0.7964237689345448,
      "learning_rate": 6.780931395452273e-08,
      "loss": 0.1194,
      "step": 32533
    },
    {
      "epoch": 0.9491218857576288,
      "grad_norm": 0.7695145596993271,
      "learning_rate": 6.773179370600958e-08,
      "loss": 0.1105,
      "step": 32534
    },
    {
      "epoch": 0.9491510589882723,
      "grad_norm": 0.8957846772447188,
      "learning_rate": 6.765431749160889e-08,
      "loss": 0.1241,
      "step": 32535
    },
    {
      "epoch": 0.9491802322189159,
      "grad_norm": 0.8944661895065943,
      "learning_rate": 6.75768853120129e-08,
      "loss": 0.1188,
      "step": 32536
    },
    {
      "epoch": 0.9492094054495595,
      "grad_norm": 0.7907215058520678,
      "learning_rate": 6.749949716791382e-08,
      "loss": 0.1327,
      "step": 32537
    },
    {
      "epoch": 0.949238578680203,
      "grad_norm": 0.8652602143387097,
      "learning_rate": 6.742215306000055e-08,
      "loss": 0.0997,
      "step": 32538
    },
    {
      "epoch": 0.9492677519108466,
      "grad_norm": 0.9278490960321242,
      "learning_rate": 6.734485298896531e-08,
      "loss": 0.1197,
      "step": 32539
    },
    {
      "epoch": 0.9492969251414902,
      "grad_norm": 0.8093230324059786,
      "learning_rate": 6.726759695549812e-08,
      "loss": 0.1298,
      "step": 32540
    },
    {
      "epoch": 0.9493260983721338,
      "grad_norm": 0.8335422554079511,
      "learning_rate": 6.71903849602884e-08,
      "loss": 0.1049,
      "step": 32541
    },
    {
      "epoch": 0.9493552716027773,
      "grad_norm": 0.8902666678634433,
      "learning_rate": 6.711321700402451e-08,
      "loss": 0.1042,
      "step": 32542
    },
    {
      "epoch": 0.9493844448334209,
      "grad_norm": 0.7495172320445483,
      "learning_rate": 6.7036093087397e-08,
      "loss": 0.0962,
      "step": 32543
    },
    {
      "epoch": 0.9494136180640644,
      "grad_norm": 0.6637903822240693,
      "learning_rate": 6.695901321109311e-08,
      "loss": 0.1163,
      "step": 32544
    },
    {
      "epoch": 0.949442791294708,
      "grad_norm": 0.8944716687778668,
      "learning_rate": 6.688197737580226e-08,
      "loss": 0.1148,
      "step": 32545
    },
    {
      "epoch": 0.9494719645253515,
      "grad_norm": 0.7032948601866934,
      "learning_rate": 6.68049855822106e-08,
      "loss": 0.0942,
      "step": 32546
    },
    {
      "epoch": 0.9495011377559951,
      "grad_norm": 0.6992534994997684,
      "learning_rate": 6.672803783100701e-08,
      "loss": 0.1009,
      "step": 32547
    },
    {
      "epoch": 0.9495303109866386,
      "grad_norm": 0.7429958521862423,
      "learning_rate": 6.66511341228776e-08,
      "loss": 0.1014,
      "step": 32548
    },
    {
      "epoch": 0.9495594842172822,
      "grad_norm": 0.7998772670532653,
      "learning_rate": 6.657427445850906e-08,
      "loss": 0.1069,
      "step": 32549
    },
    {
      "epoch": 0.9495886574479258,
      "grad_norm": 0.8214812360205656,
      "learning_rate": 6.64974588385875e-08,
      "loss": 0.111,
      "step": 32550
    },
    {
      "epoch": 0.9496178306785693,
      "grad_norm": 0.6227033556614411,
      "learning_rate": 6.642068726379958e-08,
      "loss": 0.1119,
      "step": 32551
    },
    {
      "epoch": 0.9496470039092129,
      "grad_norm": 0.8738627469696575,
      "learning_rate": 6.634395973482976e-08,
      "loss": 0.1482,
      "step": 32552
    },
    {
      "epoch": 0.9496761771398564,
      "grad_norm": 0.8448002130865022,
      "learning_rate": 6.626727625236307e-08,
      "loss": 0.1065,
      "step": 32553
    },
    {
      "epoch": 0.9497053503705001,
      "grad_norm": 0.6625229258472054,
      "learning_rate": 6.619063681708504e-08,
      "loss": 0.1054,
      "step": 32554
    },
    {
      "epoch": 0.9497345236011436,
      "grad_norm": 0.7563557777917117,
      "learning_rate": 6.611404142967847e-08,
      "loss": 0.1074,
      "step": 32555
    },
    {
      "epoch": 0.9497636968317872,
      "grad_norm": 0.9927561808730692,
      "learning_rate": 6.603749009082782e-08,
      "loss": 0.1136,
      "step": 32556
    },
    {
      "epoch": 0.9497928700624307,
      "grad_norm": 0.7893414017567375,
      "learning_rate": 6.596098280121699e-08,
      "loss": 0.1051,
      "step": 32557
    },
    {
      "epoch": 0.9498220432930743,
      "grad_norm": 0.8773759445176594,
      "learning_rate": 6.588451956152875e-08,
      "loss": 0.1216,
      "step": 32558
    },
    {
      "epoch": 0.9498512165237178,
      "grad_norm": 0.8107183958581707,
      "learning_rate": 6.580810037244533e-08,
      "loss": 0.1146,
      "step": 32559
    },
    {
      "epoch": 0.9498803897543614,
      "grad_norm": 0.7921634542797843,
      "learning_rate": 6.573172523464954e-08,
      "loss": 0.0983,
      "step": 32560
    },
    {
      "epoch": 0.949909562985005,
      "grad_norm": 0.7370843643468035,
      "learning_rate": 6.56553941488236e-08,
      "loss": 0.0908,
      "step": 32561
    },
    {
      "epoch": 0.9499387362156485,
      "grad_norm": 0.6442301494912659,
      "learning_rate": 6.557910711564697e-08,
      "loss": 0.0944,
      "step": 32562
    },
    {
      "epoch": 0.9499679094462921,
      "grad_norm": 0.7498354563584554,
      "learning_rate": 6.550286413580298e-08,
      "loss": 0.117,
      "step": 32563
    },
    {
      "epoch": 0.9499970826769356,
      "grad_norm": 0.7483228818550821,
      "learning_rate": 6.542666520997166e-08,
      "loss": 0.0957,
      "step": 32564
    },
    {
      "epoch": 0.9500262559075792,
      "grad_norm": 0.8375602683930373,
      "learning_rate": 6.535051033883245e-08,
      "loss": 0.1179,
      "step": 32565
    },
    {
      "epoch": 0.9500554291382227,
      "grad_norm": 0.7275852938497513,
      "learning_rate": 6.527439952306647e-08,
      "loss": 0.1083,
      "step": 32566
    },
    {
      "epoch": 0.9500846023688664,
      "grad_norm": 0.9878603165436323,
      "learning_rate": 6.519833276335263e-08,
      "loss": 0.1156,
      "step": 32567
    },
    {
      "epoch": 0.9501137755995099,
      "grad_norm": 0.7871433797833636,
      "learning_rate": 6.512231006036984e-08,
      "loss": 0.1064,
      "step": 32568
    },
    {
      "epoch": 0.9501429488301535,
      "grad_norm": 0.6917231232985578,
      "learning_rate": 6.504633141479644e-08,
      "loss": 0.1166,
      "step": 32569
    },
    {
      "epoch": 0.950172122060797,
      "grad_norm": 0.7962815841372336,
      "learning_rate": 6.497039682731243e-08,
      "loss": 0.107,
      "step": 32570
    },
    {
      "epoch": 0.9502012952914406,
      "grad_norm": 0.8131996014082601,
      "learning_rate": 6.489450629859394e-08,
      "loss": 0.1028,
      "step": 32571
    },
    {
      "epoch": 0.9502304685220841,
      "grad_norm": 0.7680218242347662,
      "learning_rate": 6.481865982931934e-08,
      "loss": 0.0984,
      "step": 32572
    },
    {
      "epoch": 0.9502596417527277,
      "grad_norm": 0.7015601473634733,
      "learning_rate": 6.474285742016583e-08,
      "loss": 0.1083,
      "step": 32573
    },
    {
      "epoch": 0.9502888149833713,
      "grad_norm": 0.8884200993336463,
      "learning_rate": 6.466709907180957e-08,
      "loss": 0.1146,
      "step": 32574
    },
    {
      "epoch": 0.9503179882140148,
      "grad_norm": 0.9475049563156215,
      "learning_rate": 6.459138478492721e-08,
      "loss": 0.1122,
      "step": 32575
    },
    {
      "epoch": 0.9503471614446584,
      "grad_norm": 0.7885576938871349,
      "learning_rate": 6.45157145601949e-08,
      "loss": 0.1029,
      "step": 32576
    },
    {
      "epoch": 0.9503763346753019,
      "grad_norm": 0.7762302852183923,
      "learning_rate": 6.444008839828875e-08,
      "loss": 0.1176,
      "step": 32577
    },
    {
      "epoch": 0.9504055079059455,
      "grad_norm": 0.8350150433801837,
      "learning_rate": 6.436450629988267e-08,
      "loss": 0.1091,
      "step": 32578
    },
    {
      "epoch": 0.950434681136589,
      "grad_norm": 0.7493533951830512,
      "learning_rate": 6.428896826565223e-08,
      "loss": 0.1181,
      "step": 32579
    },
    {
      "epoch": 0.9504638543672326,
      "grad_norm": 0.991159279510208,
      "learning_rate": 6.421347429627134e-08,
      "loss": 0.109,
      "step": 32580
    },
    {
      "epoch": 0.9504930275978762,
      "grad_norm": 0.8734027167661529,
      "learning_rate": 6.413802439241445e-08,
      "loss": 0.1119,
      "step": 32581
    },
    {
      "epoch": 0.9505222008285198,
      "grad_norm": 0.9273288398776804,
      "learning_rate": 6.406261855475492e-08,
      "loss": 0.1043,
      "step": 32582
    },
    {
      "epoch": 0.9505513740591633,
      "grad_norm": 0.8732606294444948,
      "learning_rate": 6.398725678396611e-08,
      "loss": 0.1099,
      "step": 32583
    },
    {
      "epoch": 0.9505805472898069,
      "grad_norm": 0.6687579571720464,
      "learning_rate": 6.39119390807208e-08,
      "loss": 0.1276,
      "step": 32584
    },
    {
      "epoch": 0.9506097205204505,
      "grad_norm": 0.8621781554936837,
      "learning_rate": 6.383666544569122e-08,
      "loss": 0.112,
      "step": 32585
    },
    {
      "epoch": 0.950638893751094,
      "grad_norm": 0.7539262707182163,
      "learning_rate": 6.376143587954964e-08,
      "loss": 0.1071,
      "step": 32586
    },
    {
      "epoch": 0.9506680669817376,
      "grad_norm": 0.7066981580124291,
      "learning_rate": 6.368625038296772e-08,
      "loss": 0.1211,
      "step": 32587
    },
    {
      "epoch": 0.9506972402123811,
      "grad_norm": 0.6900478942678211,
      "learning_rate": 6.36111089566166e-08,
      "loss": 0.1042,
      "step": 32588
    },
    {
      "epoch": 0.9507264134430247,
      "grad_norm": 0.7289498956348031,
      "learning_rate": 6.353601160116685e-08,
      "loss": 0.1204,
      "step": 32589
    },
    {
      "epoch": 0.9507555866736682,
      "grad_norm": 0.8394279801179323,
      "learning_rate": 6.34609583172896e-08,
      "loss": 0.1201,
      "step": 32590
    },
    {
      "epoch": 0.9507847599043118,
      "grad_norm": 0.8028538442739547,
      "learning_rate": 6.338594910565376e-08,
      "loss": 0.0991,
      "step": 32591
    },
    {
      "epoch": 0.9508139331349553,
      "grad_norm": 1.0028204581743745,
      "learning_rate": 6.331098396692991e-08,
      "loss": 0.1242,
      "step": 32592
    },
    {
      "epoch": 0.9508431063655989,
      "grad_norm": 0.7777868754168333,
      "learning_rate": 6.323606290178697e-08,
      "loss": 0.1065,
      "step": 32593
    },
    {
      "epoch": 0.9508722795962425,
      "grad_norm": 0.6508464250057778,
      "learning_rate": 6.316118591089493e-08,
      "loss": 0.1047,
      "step": 32594
    },
    {
      "epoch": 0.9509014528268861,
      "grad_norm": 0.9154484747314856,
      "learning_rate": 6.308635299491994e-08,
      "loss": 0.1295,
      "step": 32595
    },
    {
      "epoch": 0.9509306260575296,
      "grad_norm": 0.8207156517485412,
      "learning_rate": 6.301156415453257e-08,
      "loss": 0.1293,
      "step": 32596
    },
    {
      "epoch": 0.9509597992881732,
      "grad_norm": 0.715485299643818,
      "learning_rate": 6.293681939039898e-08,
      "loss": 0.11,
      "step": 32597
    },
    {
      "epoch": 0.9509889725188168,
      "grad_norm": 0.8233722592986634,
      "learning_rate": 6.286211870318693e-08,
      "loss": 0.1142,
      "step": 32598
    },
    {
      "epoch": 0.9510181457494603,
      "grad_norm": 0.8366408034701655,
      "learning_rate": 6.278746209356313e-08,
      "loss": 0.1298,
      "step": 32599
    },
    {
      "epoch": 0.9510473189801039,
      "grad_norm": 0.8882187416155616,
      "learning_rate": 6.271284956219425e-08,
      "loss": 0.1018,
      "step": 32600
    },
    {
      "epoch": 0.9510764922107474,
      "grad_norm": 0.7522134535995358,
      "learning_rate": 6.263828110974645e-08,
      "loss": 0.1109,
      "step": 32601
    },
    {
      "epoch": 0.951105665441391,
      "grad_norm": 0.86579167965794,
      "learning_rate": 6.256375673688586e-08,
      "loss": 0.1142,
      "step": 32602
    },
    {
      "epoch": 0.9511348386720345,
      "grad_norm": 1.0030382824630497,
      "learning_rate": 6.248927644427694e-08,
      "loss": 0.1024,
      "step": 32603
    },
    {
      "epoch": 0.9511640119026781,
      "grad_norm": 0.6475231820740863,
      "learning_rate": 6.241484023258526e-08,
      "loss": 0.1105,
      "step": 32604
    },
    {
      "epoch": 0.9511931851333216,
      "grad_norm": 0.976756384760913,
      "learning_rate": 6.23404481024753e-08,
      "loss": 0.1011,
      "step": 32605
    },
    {
      "epoch": 0.9512223583639652,
      "grad_norm": 0.9550830639701129,
      "learning_rate": 6.226610005461043e-08,
      "loss": 0.1185,
      "step": 32606
    },
    {
      "epoch": 0.9512515315946087,
      "grad_norm": 0.9693759171969529,
      "learning_rate": 6.219179608965564e-08,
      "loss": 0.0972,
      "step": 32607
    },
    {
      "epoch": 0.9512807048252524,
      "grad_norm": 0.8450852425709949,
      "learning_rate": 6.211753620827377e-08,
      "loss": 0.1223,
      "step": 32608
    },
    {
      "epoch": 0.951309878055896,
      "grad_norm": 0.9628134473348807,
      "learning_rate": 6.204332041112759e-08,
      "loss": 0.1071,
      "step": 32609
    },
    {
      "epoch": 0.9513390512865395,
      "grad_norm": 0.7988154187396046,
      "learning_rate": 6.196914869887993e-08,
      "loss": 0.0981,
      "step": 32610
    },
    {
      "epoch": 0.9513682245171831,
      "grad_norm": 0.8331880469684507,
      "learning_rate": 6.189502107219302e-08,
      "loss": 0.1174,
      "step": 32611
    },
    {
      "epoch": 0.9513973977478266,
      "grad_norm": 0.7796078562984378,
      "learning_rate": 6.182093753172858e-08,
      "loss": 0.1269,
      "step": 32612
    },
    {
      "epoch": 0.9514265709784702,
      "grad_norm": 0.8244154041719207,
      "learning_rate": 6.174689807814771e-08,
      "loss": 0.1081,
      "step": 32613
    },
    {
      "epoch": 0.9514557442091137,
      "grad_norm": 0.8878633346593348,
      "learning_rate": 6.167290271211213e-08,
      "loss": 0.1021,
      "step": 32614
    },
    {
      "epoch": 0.9514849174397573,
      "grad_norm": 0.7331652320317426,
      "learning_rate": 6.15989514342813e-08,
      "loss": 0.1258,
      "step": 32615
    },
    {
      "epoch": 0.9515140906704008,
      "grad_norm": 0.8022422341246551,
      "learning_rate": 6.152504424531636e-08,
      "loss": 0.1125,
      "step": 32616
    },
    {
      "epoch": 0.9515432639010444,
      "grad_norm": 0.9996228397094764,
      "learning_rate": 6.145118114587733e-08,
      "loss": 0.1084,
      "step": 32617
    },
    {
      "epoch": 0.9515724371316879,
      "grad_norm": 1.0537827190381488,
      "learning_rate": 6.137736213662316e-08,
      "loss": 0.1023,
      "step": 32618
    },
    {
      "epoch": 0.9516016103623315,
      "grad_norm": 0.7684435751201382,
      "learning_rate": 6.130358721821272e-08,
      "loss": 0.101,
      "step": 32619
    },
    {
      "epoch": 0.951630783592975,
      "grad_norm": 0.8037522029007881,
      "learning_rate": 6.122985639130497e-08,
      "loss": 0.1156,
      "step": 32620
    },
    {
      "epoch": 0.9516599568236187,
      "grad_norm": 0.8974018603279343,
      "learning_rate": 6.115616965655824e-08,
      "loss": 0.1245,
      "step": 32621
    },
    {
      "epoch": 0.9516891300542623,
      "grad_norm": 0.7059823710551251,
      "learning_rate": 6.108252701462925e-08,
      "loss": 0.1014,
      "step": 32622
    },
    {
      "epoch": 0.9517183032849058,
      "grad_norm": 1.0761115786263042,
      "learning_rate": 6.100892846617745e-08,
      "loss": 0.1448,
      "step": 32623
    },
    {
      "epoch": 0.9517474765155494,
      "grad_norm": 1.0273670520567701,
      "learning_rate": 6.093537401185901e-08,
      "loss": 0.1133,
      "step": 32624
    },
    {
      "epoch": 0.9517766497461929,
      "grad_norm": 0.9418891085534057,
      "learning_rate": 6.086186365233005e-08,
      "loss": 0.0893,
      "step": 32625
    },
    {
      "epoch": 0.9518058229768365,
      "grad_norm": 0.6973764481163859,
      "learning_rate": 6.078839738824782e-08,
      "loss": 0.1099,
      "step": 32626
    },
    {
      "epoch": 0.95183499620748,
      "grad_norm": 0.6891166173220694,
      "learning_rate": 6.071497522026737e-08,
      "loss": 0.1131,
      "step": 32627
    },
    {
      "epoch": 0.9518641694381236,
      "grad_norm": 0.9490350493604558,
      "learning_rate": 6.064159714904428e-08,
      "loss": 0.1425,
      "step": 32628
    },
    {
      "epoch": 0.9518933426687671,
      "grad_norm": 0.8951531329085967,
      "learning_rate": 6.056826317523357e-08,
      "loss": 0.1325,
      "step": 32629
    },
    {
      "epoch": 0.9519225158994107,
      "grad_norm": 0.7920240975475742,
      "learning_rate": 6.049497329949139e-08,
      "loss": 0.1008,
      "step": 32630
    },
    {
      "epoch": 0.9519516891300542,
      "grad_norm": 0.8504101902469182,
      "learning_rate": 6.042172752247e-08,
      "loss": 0.1079,
      "step": 32631
    },
    {
      "epoch": 0.9519808623606978,
      "grad_norm": 0.830303144502529,
      "learning_rate": 6.034852584482442e-08,
      "loss": 0.0959,
      "step": 32632
    },
    {
      "epoch": 0.9520100355913413,
      "grad_norm": 0.6676870435088427,
      "learning_rate": 6.027536826720859e-08,
      "loss": 0.1203,
      "step": 32633
    },
    {
      "epoch": 0.9520392088219849,
      "grad_norm": 0.9017591609409177,
      "learning_rate": 6.020225479027419e-08,
      "loss": 0.1242,
      "step": 32634
    },
    {
      "epoch": 0.9520683820526286,
      "grad_norm": 1.08394380267901,
      "learning_rate": 6.012918541467572e-08,
      "loss": 0.1333,
      "step": 32635
    },
    {
      "epoch": 0.9520975552832721,
      "grad_norm": 0.7961625153907718,
      "learning_rate": 6.005616014106375e-08,
      "loss": 0.1073,
      "step": 32636
    },
    {
      "epoch": 0.9521267285139157,
      "grad_norm": 0.6973454160567234,
      "learning_rate": 5.998317897009165e-08,
      "loss": 0.1214,
      "step": 32637
    },
    {
      "epoch": 0.9521559017445592,
      "grad_norm": 0.8946820605255777,
      "learning_rate": 5.991024190241057e-08,
      "loss": 0.1193,
      "step": 32638
    },
    {
      "epoch": 0.9521850749752028,
      "grad_norm": 0.8561616523767539,
      "learning_rate": 5.983734893867166e-08,
      "loss": 0.0908,
      "step": 32639
    },
    {
      "epoch": 0.9522142482058463,
      "grad_norm": 0.8619288451934363,
      "learning_rate": 5.976450007952495e-08,
      "loss": 0.1369,
      "step": 32640
    },
    {
      "epoch": 0.9522434214364899,
      "grad_norm": 1.1506017454184636,
      "learning_rate": 5.969169532562158e-08,
      "loss": 0.1303,
      "step": 32641
    },
    {
      "epoch": 0.9522725946671334,
      "grad_norm": 0.9371048522861081,
      "learning_rate": 5.96189346776116e-08,
      "loss": 0.1144,
      "step": 32642
    },
    {
      "epoch": 0.952301767897777,
      "grad_norm": 0.7567991425407807,
      "learning_rate": 5.954621813614447e-08,
      "loss": 0.1135,
      "step": 32643
    },
    {
      "epoch": 0.9523309411284205,
      "grad_norm": 0.6268691765113534,
      "learning_rate": 5.9473545701869696e-08,
      "loss": 0.1092,
      "step": 32644
    },
    {
      "epoch": 0.9523601143590641,
      "grad_norm": 0.7497910810730019,
      "learning_rate": 5.940091737543507e-08,
      "loss": 0.1186,
      "step": 32645
    },
    {
      "epoch": 0.9523892875897076,
      "grad_norm": 0.8616186187157856,
      "learning_rate": 5.9328333157489535e-08,
      "loss": 0.1029,
      "step": 32646
    },
    {
      "epoch": 0.9524184608203512,
      "grad_norm": 0.9867630838410505,
      "learning_rate": 5.925579304868201e-08,
      "loss": 0.1162,
      "step": 32647
    },
    {
      "epoch": 0.9524476340509949,
      "grad_norm": 0.7842768877345314,
      "learning_rate": 5.9183297049658637e-08,
      "loss": 0.1112,
      "step": 32648
    },
    {
      "epoch": 0.9524768072816384,
      "grad_norm": 0.6791383638777265,
      "learning_rate": 5.9110845161067245e-08,
      "loss": 0.0876,
      "step": 32649
    },
    {
      "epoch": 0.952505980512282,
      "grad_norm": 0.7673675376467145,
      "learning_rate": 5.9038437383555636e-08,
      "loss": 0.091,
      "step": 32650
    },
    {
      "epoch": 0.9525351537429255,
      "grad_norm": 0.8811284227523455,
      "learning_rate": 5.896607371776886e-08,
      "loss": 0.1073,
      "step": 32651
    },
    {
      "epoch": 0.9525643269735691,
      "grad_norm": 0.8780540531852291,
      "learning_rate": 5.88937541643525e-08,
      "loss": 0.1318,
      "step": 32652
    },
    {
      "epoch": 0.9525935002042126,
      "grad_norm": 0.8550725392908367,
      "learning_rate": 5.882147872395438e-08,
      "loss": 0.1066,
      "step": 32653
    },
    {
      "epoch": 0.9526226734348562,
      "grad_norm": 0.9066859606474291,
      "learning_rate": 5.874924739721843e-08,
      "loss": 0.1124,
      "step": 32654
    },
    {
      "epoch": 0.9526518466654997,
      "grad_norm": 0.8223402910741842,
      "learning_rate": 5.8677060184789134e-08,
      "loss": 0.1127,
      "step": 32655
    },
    {
      "epoch": 0.9526810198961433,
      "grad_norm": 0.7837047894990621,
      "learning_rate": 5.860491708731153e-08,
      "loss": 0.104,
      "step": 32656
    },
    {
      "epoch": 0.9527101931267868,
      "grad_norm": 0.8590150222609634,
      "learning_rate": 5.85328181054301e-08,
      "loss": 0.0879,
      "step": 32657
    },
    {
      "epoch": 0.9527393663574304,
      "grad_norm": 0.9602282152252887,
      "learning_rate": 5.8460763239787666e-08,
      "loss": 0.1379,
      "step": 32658
    },
    {
      "epoch": 0.952768539588074,
      "grad_norm": 1.0993961155785237,
      "learning_rate": 5.83887524910276e-08,
      "loss": 0.1038,
      "step": 32659
    },
    {
      "epoch": 0.9527977128187175,
      "grad_norm": 0.811768160733312,
      "learning_rate": 5.8316785859793836e-08,
      "loss": 0.1116,
      "step": 32660
    },
    {
      "epoch": 0.952826886049361,
      "grad_norm": 0.8474323462158622,
      "learning_rate": 5.824486334672752e-08,
      "loss": 0.0862,
      "step": 32661
    },
    {
      "epoch": 0.9528560592800047,
      "grad_norm": 1.2586626991754393,
      "learning_rate": 5.817298495247148e-08,
      "loss": 0.1136,
      "step": 32662
    },
    {
      "epoch": 0.9528852325106483,
      "grad_norm": 0.9847370764031189,
      "learning_rate": 5.8101150677667975e-08,
      "loss": 0.1196,
      "step": 32663
    },
    {
      "epoch": 0.9529144057412918,
      "grad_norm": 0.7908098364741442,
      "learning_rate": 5.802936052295649e-08,
      "loss": 0.1057,
      "step": 32664
    },
    {
      "epoch": 0.9529435789719354,
      "grad_norm": 0.663226266575957,
      "learning_rate": 5.795761448897985e-08,
      "loss": 0.1082,
      "step": 32665
    },
    {
      "epoch": 0.9529727522025789,
      "grad_norm": 0.8961722386481162,
      "learning_rate": 5.78859125763781e-08,
      "loss": 0.0796,
      "step": 32666
    },
    {
      "epoch": 0.9530019254332225,
      "grad_norm": 0.7789305018609528,
      "learning_rate": 5.7814254785790724e-08,
      "loss": 0.1054,
      "step": 32667
    },
    {
      "epoch": 0.953031098663866,
      "grad_norm": 0.8662469204807426,
      "learning_rate": 5.774264111785832e-08,
      "loss": 0.1085,
      "step": 32668
    },
    {
      "epoch": 0.9530602718945096,
      "grad_norm": 0.7783665091937831,
      "learning_rate": 5.767107157321927e-08,
      "loss": 0.1043,
      "step": 32669
    },
    {
      "epoch": 0.9530894451251531,
      "grad_norm": 0.8554149891511343,
      "learning_rate": 5.759954615251307e-08,
      "loss": 0.1212,
      "step": 32670
    },
    {
      "epoch": 0.9531186183557967,
      "grad_norm": 1.116922813535786,
      "learning_rate": 5.752806485637863e-08,
      "loss": 0.1013,
      "step": 32671
    },
    {
      "epoch": 0.9531477915864403,
      "grad_norm": 0.6838342260271995,
      "learning_rate": 5.745662768545324e-08,
      "loss": 0.0908,
      "step": 32672
    },
    {
      "epoch": 0.9531769648170838,
      "grad_norm": 0.6748127117389364,
      "learning_rate": 5.7385234640375817e-08,
      "loss": 0.0936,
      "step": 32673
    },
    {
      "epoch": 0.9532061380477274,
      "grad_norm": 0.8109081455760337,
      "learning_rate": 5.731388572178309e-08,
      "loss": 0.1078,
      "step": 32674
    },
    {
      "epoch": 0.9532353112783709,
      "grad_norm": 0.8996950441897913,
      "learning_rate": 5.724258093031176e-08,
      "loss": 0.1237,
      "step": 32675
    },
    {
      "epoch": 0.9532644845090146,
      "grad_norm": 0.9041151303209126,
      "learning_rate": 5.717132026659855e-08,
      "loss": 0.1279,
      "step": 32676
    },
    {
      "epoch": 0.9532936577396581,
      "grad_norm": 0.9389216356559982,
      "learning_rate": 5.710010373128016e-08,
      "loss": 0.1226,
      "step": 32677
    },
    {
      "epoch": 0.9533228309703017,
      "grad_norm": 2.2230861250735847,
      "learning_rate": 5.702893132499221e-08,
      "loss": 0.1248,
      "step": 32678
    },
    {
      "epoch": 0.9533520042009452,
      "grad_norm": 0.8085138104073984,
      "learning_rate": 5.695780304836973e-08,
      "loss": 0.1027,
      "step": 32679
    },
    {
      "epoch": 0.9533811774315888,
      "grad_norm": 0.8832825819353127,
      "learning_rate": 5.6886718902048334e-08,
      "loss": 0.0885,
      "step": 32680
    },
    {
      "epoch": 0.9534103506622323,
      "grad_norm": 1.217138091986393,
      "learning_rate": 5.6815678886661953e-08,
      "loss": 0.1106,
      "step": 32681
    },
    {
      "epoch": 0.9534395238928759,
      "grad_norm": 1.086105299322099,
      "learning_rate": 5.674468300284508e-08,
      "loss": 0.1116,
      "step": 32682
    },
    {
      "epoch": 0.9534686971235194,
      "grad_norm": 0.8230304666633287,
      "learning_rate": 5.667373125123166e-08,
      "loss": 0.1122,
      "step": 32683
    },
    {
      "epoch": 0.953497870354163,
      "grad_norm": 0.7375332041179844,
      "learning_rate": 5.660282363245562e-08,
      "loss": 0.1066,
      "step": 32684
    },
    {
      "epoch": 0.9535270435848066,
      "grad_norm": 0.811745805945028,
      "learning_rate": 5.653196014714868e-08,
      "loss": 0.1065,
      "step": 32685
    },
    {
      "epoch": 0.9535562168154501,
      "grad_norm": 0.9421793573950739,
      "learning_rate": 5.646114079594478e-08,
      "loss": 0.113,
      "step": 32686
    },
    {
      "epoch": 0.9535853900460937,
      "grad_norm": 0.9071335278536922,
      "learning_rate": 5.6390365579476195e-08,
      "loss": 0.1076,
      "step": 32687
    },
    {
      "epoch": 0.9536145632767372,
      "grad_norm": 0.958546515415864,
      "learning_rate": 5.631963449837352e-08,
      "loss": 0.11,
      "step": 32688
    },
    {
      "epoch": 0.9536437365073809,
      "grad_norm": 0.6393352449910052,
      "learning_rate": 5.624894755326904e-08,
      "loss": 0.0829,
      "step": 32689
    },
    {
      "epoch": 0.9536729097380244,
      "grad_norm": 0.8669815361704764,
      "learning_rate": 5.617830474479391e-08,
      "loss": 0.1175,
      "step": 32690
    },
    {
      "epoch": 0.953702082968668,
      "grad_norm": 0.7695261088984546,
      "learning_rate": 5.6107706073578735e-08,
      "loss": 0.1254,
      "step": 32691
    },
    {
      "epoch": 0.9537312561993115,
      "grad_norm": 0.846695217818394,
      "learning_rate": 5.6037151540253574e-08,
      "loss": 0.103,
      "step": 32692
    },
    {
      "epoch": 0.9537604294299551,
      "grad_norm": 0.72007459617846,
      "learning_rate": 5.596664114544903e-08,
      "loss": 0.079,
      "step": 32693
    },
    {
      "epoch": 0.9537896026605986,
      "grad_norm": 0.7513026332398145,
      "learning_rate": 5.589617488979349e-08,
      "loss": 0.1083,
      "step": 32694
    },
    {
      "epoch": 0.9538187758912422,
      "grad_norm": 0.6366230101102938,
      "learning_rate": 5.5825752773917e-08,
      "loss": 0.0907,
      "step": 32695
    },
    {
      "epoch": 0.9538479491218858,
      "grad_norm": 0.8375176551428908,
      "learning_rate": 5.5755374798447394e-08,
      "loss": 0.101,
      "step": 32696
    },
    {
      "epoch": 0.9538771223525293,
      "grad_norm": 0.8338850696366432,
      "learning_rate": 5.568504096401417e-08,
      "loss": 0.1109,
      "step": 32697
    },
    {
      "epoch": 0.9539062955831729,
      "grad_norm": 0.9244959917031603,
      "learning_rate": 5.56147512712446e-08,
      "loss": 0.1138,
      "step": 32698
    },
    {
      "epoch": 0.9539354688138164,
      "grad_norm": 0.7904081543814052,
      "learning_rate": 5.5544505720765974e-08,
      "loss": 0.1074,
      "step": 32699
    },
    {
      "epoch": 0.95396464204446,
      "grad_norm": 0.71380367927924,
      "learning_rate": 5.547430431320555e-08,
      "loss": 0.1079,
      "step": 32700
    },
    {
      "epoch": 0.9539938152751035,
      "grad_norm": 0.722018876045306,
      "learning_rate": 5.540414704919006e-08,
      "loss": 0.0951,
      "step": 32701
    },
    {
      "epoch": 0.9540229885057471,
      "grad_norm": 0.983592964344662,
      "learning_rate": 5.533403392934622e-08,
      "loss": 0.1095,
      "step": 32702
    },
    {
      "epoch": 0.9540521617363907,
      "grad_norm": 0.7452949862713076,
      "learning_rate": 5.5263964954299644e-08,
      "loss": 0.0971,
      "step": 32703
    },
    {
      "epoch": 0.9540813349670343,
      "grad_norm": 0.9514650723231737,
      "learning_rate": 5.519394012467649e-08,
      "loss": 0.1353,
      "step": 32704
    },
    {
      "epoch": 0.9541105081976778,
      "grad_norm": 0.7965203008504623,
      "learning_rate": 5.5123959441100713e-08,
      "loss": 0.109,
      "step": 32705
    },
    {
      "epoch": 0.9541396814283214,
      "grad_norm": 0.7555129535758409,
      "learning_rate": 5.505402290419792e-08,
      "loss": 0.102,
      "step": 32706
    },
    {
      "epoch": 0.954168854658965,
      "grad_norm": 0.8251394665377272,
      "learning_rate": 5.498413051459261e-08,
      "loss": 0.1277,
      "step": 32707
    },
    {
      "epoch": 0.9541980278896085,
      "grad_norm": 0.7186693398842615,
      "learning_rate": 5.4914282272908737e-08,
      "loss": 0.1214,
      "step": 32708
    },
    {
      "epoch": 0.954227201120252,
      "grad_norm": 0.8534837535293472,
      "learning_rate": 5.484447817976912e-08,
      "loss": 0.1309,
      "step": 32709
    },
    {
      "epoch": 0.9542563743508956,
      "grad_norm": 0.9406564450983651,
      "learning_rate": 5.477471823579772e-08,
      "loss": 0.1216,
      "step": 32710
    },
    {
      "epoch": 0.9542855475815392,
      "grad_norm": 0.6421733732819564,
      "learning_rate": 5.470500244161736e-08,
      "loss": 0.1008,
      "step": 32711
    },
    {
      "epoch": 0.9543147208121827,
      "grad_norm": 0.8149105103919811,
      "learning_rate": 5.4635330797849217e-08,
      "loss": 0.1093,
      "step": 32712
    },
    {
      "epoch": 0.9543438940428263,
      "grad_norm": 0.7854076563447636,
      "learning_rate": 5.456570330511724e-08,
      "loss": 0.1234,
      "step": 32713
    },
    {
      "epoch": 0.9543730672734698,
      "grad_norm": 0.8292307039778413,
      "learning_rate": 5.449611996404203e-08,
      "loss": 0.0911,
      "step": 32714
    },
    {
      "epoch": 0.9544022405041134,
      "grad_norm": 0.847145646906693,
      "learning_rate": 5.442658077524476e-08,
      "loss": 0.104,
      "step": 32715
    },
    {
      "epoch": 0.954431413734757,
      "grad_norm": 0.5969312758250869,
      "learning_rate": 5.435708573934662e-08,
      "loss": 0.085,
      "step": 32716
    },
    {
      "epoch": 0.9544605869654006,
      "grad_norm": 0.8888413792758529,
      "learning_rate": 5.428763485696764e-08,
      "loss": 0.128,
      "step": 32717
    },
    {
      "epoch": 0.9544897601960441,
      "grad_norm": 0.8815023539978275,
      "learning_rate": 5.4218228128727345e-08,
      "loss": 0.1126,
      "step": 32718
    },
    {
      "epoch": 0.9545189334266877,
      "grad_norm": 0.8988664623351925,
      "learning_rate": 5.4148865555246896e-08,
      "loss": 0.1305,
      "step": 32719
    },
    {
      "epoch": 0.9545481066573313,
      "grad_norm": 0.8547614744774444,
      "learning_rate": 5.407954713714414e-08,
      "loss": 0.1212,
      "step": 32720
    },
    {
      "epoch": 0.9545772798879748,
      "grad_norm": 0.9884351548375194,
      "learning_rate": 5.4010272875039125e-08,
      "loss": 0.1045,
      "step": 32721
    },
    {
      "epoch": 0.9546064531186184,
      "grad_norm": 0.7517468882826768,
      "learning_rate": 5.3941042769549146e-08,
      "loss": 0.1197,
      "step": 32722
    },
    {
      "epoch": 0.9546356263492619,
      "grad_norm": 0.7619462510575757,
      "learning_rate": 5.387185682129259e-08,
      "loss": 0.1188,
      "step": 32723
    },
    {
      "epoch": 0.9546647995799055,
      "grad_norm": 0.9094217397506837,
      "learning_rate": 5.380271503088841e-08,
      "loss": 0.1104,
      "step": 32724
    },
    {
      "epoch": 0.954693972810549,
      "grad_norm": 0.8684033543593407,
      "learning_rate": 5.373361739895222e-08,
      "loss": 0.1074,
      "step": 32725
    },
    {
      "epoch": 0.9547231460411926,
      "grad_norm": 0.7737522514610439,
      "learning_rate": 5.366456392610131e-08,
      "loss": 0.1272,
      "step": 32726
    },
    {
      "epoch": 0.9547523192718361,
      "grad_norm": 0.8463556442373639,
      "learning_rate": 5.3595554612952404e-08,
      "loss": 0.1081,
      "step": 32727
    },
    {
      "epoch": 0.9547814925024797,
      "grad_norm": 0.7869651876905523,
      "learning_rate": 5.352658946012224e-08,
      "loss": 0.106,
      "step": 32728
    },
    {
      "epoch": 0.9548106657331232,
      "grad_norm": 1.1556828545657285,
      "learning_rate": 5.345766846822475e-08,
      "loss": 0.1097,
      "step": 32729
    },
    {
      "epoch": 0.9548398389637669,
      "grad_norm": 0.8149670517569663,
      "learning_rate": 5.3388791637877244e-08,
      "loss": 0.1278,
      "step": 32730
    },
    {
      "epoch": 0.9548690121944104,
      "grad_norm": 0.8282747326715098,
      "learning_rate": 5.3319958969693665e-08,
      "loss": 0.1176,
      "step": 32731
    },
    {
      "epoch": 0.954898185425054,
      "grad_norm": 0.853088271351987,
      "learning_rate": 5.3251170464288516e-08,
      "loss": 0.099,
      "step": 32732
    },
    {
      "epoch": 0.9549273586556976,
      "grad_norm": 0.6244398348582837,
      "learning_rate": 5.3182426122275753e-08,
      "loss": 0.1136,
      "step": 32733
    },
    {
      "epoch": 0.9549565318863411,
      "grad_norm": 0.7176252117683201,
      "learning_rate": 5.311372594426989e-08,
      "loss": 0.1149,
      "step": 32734
    },
    {
      "epoch": 0.9549857051169847,
      "grad_norm": 0.7292212616731406,
      "learning_rate": 5.304506993088321e-08,
      "loss": 0.1046,
      "step": 32735
    },
    {
      "epoch": 0.9550148783476282,
      "grad_norm": 0.6841145006758904,
      "learning_rate": 5.2976458082729666e-08,
      "loss": 0.098,
      "step": 32736
    },
    {
      "epoch": 0.9550440515782718,
      "grad_norm": 0.768517442130169,
      "learning_rate": 5.290789040042099e-08,
      "loss": 0.1047,
      "step": 32737
    },
    {
      "epoch": 0.9550732248089153,
      "grad_norm": 0.8851022480303948,
      "learning_rate": 5.283936688457003e-08,
      "loss": 0.1126,
      "step": 32738
    },
    {
      "epoch": 0.9551023980395589,
      "grad_norm": 0.9288174884552797,
      "learning_rate": 5.277088753578796e-08,
      "loss": 0.1247,
      "step": 32739
    },
    {
      "epoch": 0.9551315712702024,
      "grad_norm": 0.9339753629522923,
      "learning_rate": 5.2702452354687075e-08,
      "loss": 0.1158,
      "step": 32740
    },
    {
      "epoch": 0.955160744500846,
      "grad_norm": 0.7427770459169746,
      "learning_rate": 5.2634061341876874e-08,
      "loss": 0.1088,
      "step": 32741
    },
    {
      "epoch": 0.9551899177314895,
      "grad_norm": 0.7796077651791834,
      "learning_rate": 5.256571449796854e-08,
      "loss": 0.1001,
      "step": 32742
    },
    {
      "epoch": 0.9552190909621332,
      "grad_norm": 0.841522036360295,
      "learning_rate": 5.2497411823573264e-08,
      "loss": 0.1188,
      "step": 32743
    },
    {
      "epoch": 0.9552482641927768,
      "grad_norm": 0.6581909732765954,
      "learning_rate": 5.2429153319299987e-08,
      "loss": 0.1126,
      "step": 32744
    },
    {
      "epoch": 0.9552774374234203,
      "grad_norm": 0.8948577551819488,
      "learning_rate": 5.236093898575767e-08,
      "loss": 0.1175,
      "step": 32745
    },
    {
      "epoch": 0.9553066106540639,
      "grad_norm": 0.6663435400942943,
      "learning_rate": 5.229276882355583e-08,
      "loss": 0.0963,
      "step": 32746
    },
    {
      "epoch": 0.9553357838847074,
      "grad_norm": 0.7926324562650706,
      "learning_rate": 5.222464283330342e-08,
      "loss": 0.1044,
      "step": 32747
    },
    {
      "epoch": 0.955364957115351,
      "grad_norm": 0.9649432308644017,
      "learning_rate": 5.215656101560829e-08,
      "loss": 0.1203,
      "step": 32748
    },
    {
      "epoch": 0.9553941303459945,
      "grad_norm": 1.0764631275838574,
      "learning_rate": 5.2088523371077724e-08,
      "loss": 0.1174,
      "step": 32749
    },
    {
      "epoch": 0.9554233035766381,
      "grad_norm": 0.730160856196989,
      "learning_rate": 5.202052990032014e-08,
      "loss": 0.099,
      "step": 32750
    },
    {
      "epoch": 0.9554524768072816,
      "grad_norm": 0.8186530304108599,
      "learning_rate": 5.1952580603941705e-08,
      "loss": 0.118,
      "step": 32751
    },
    {
      "epoch": 0.9554816500379252,
      "grad_norm": 0.7699885973715855,
      "learning_rate": 5.188467548254972e-08,
      "loss": 0.0974,
      "step": 32752
    },
    {
      "epoch": 0.9555108232685687,
      "grad_norm": 0.9613407817142756,
      "learning_rate": 5.1816814536749804e-08,
      "loss": 0.095,
      "step": 32753
    },
    {
      "epoch": 0.9555399964992123,
      "grad_norm": 0.7684494701360869,
      "learning_rate": 5.174899776714814e-08,
      "loss": 0.1038,
      "step": 32754
    },
    {
      "epoch": 0.9555691697298558,
      "grad_norm": 0.715427278530436,
      "learning_rate": 5.1681225174350926e-08,
      "loss": 0.1099,
      "step": 32755
    },
    {
      "epoch": 0.9555983429604994,
      "grad_norm": 0.8722937914171108,
      "learning_rate": 5.1613496758961545e-08,
      "loss": 0.1198,
      "step": 32756
    },
    {
      "epoch": 0.9556275161911431,
      "grad_norm": 0.8399977858132577,
      "learning_rate": 5.154581252158619e-08,
      "loss": 0.1371,
      "step": 32757
    },
    {
      "epoch": 0.9556566894217866,
      "grad_norm": 0.9756169504636132,
      "learning_rate": 5.147817246282882e-08,
      "loss": 0.1103,
      "step": 32758
    },
    {
      "epoch": 0.9556858626524302,
      "grad_norm": 0.9175257322200491,
      "learning_rate": 5.1410576583291736e-08,
      "loss": 0.126,
      "step": 32759
    },
    {
      "epoch": 0.9557150358830737,
      "grad_norm": 1.1429093656478735,
      "learning_rate": 5.134302488358056e-08,
      "loss": 0.1195,
      "step": 32760
    },
    {
      "epoch": 0.9557442091137173,
      "grad_norm": 0.6904741345709744,
      "learning_rate": 5.127551736429759e-08,
      "loss": 0.1156,
      "step": 32761
    },
    {
      "epoch": 0.9557733823443608,
      "grad_norm": 0.6425373426976441,
      "learning_rate": 5.120805402604512e-08,
      "loss": 0.1188,
      "step": 32762
    },
    {
      "epoch": 0.9558025555750044,
      "grad_norm": 0.8153282788925978,
      "learning_rate": 5.114063486942655e-08,
      "loss": 0.1034,
      "step": 32763
    },
    {
      "epoch": 0.9558317288056479,
      "grad_norm": 0.9550712149912414,
      "learning_rate": 5.1073259895042527e-08,
      "loss": 0.0952,
      "step": 32764
    },
    {
      "epoch": 0.9558609020362915,
      "grad_norm": 0.7575655806906811,
      "learning_rate": 5.100592910349478e-08,
      "loss": 0.0979,
      "step": 32765
    },
    {
      "epoch": 0.955890075266935,
      "grad_norm": 0.7274889051662751,
      "learning_rate": 5.0938642495384495e-08,
      "loss": 0.102,
      "step": 32766
    },
    {
      "epoch": 0.9559192484975786,
      "grad_norm": 0.7797081707174183,
      "learning_rate": 5.087140007131286e-08,
      "loss": 0.0989,
      "step": 32767
    },
    {
      "epoch": 0.9559484217282221,
      "grad_norm": 0.7614799380669769,
      "learning_rate": 5.0804201831880505e-08,
      "loss": 0.0928,
      "step": 32768
    },
    {
      "epoch": 0.9559775949588657,
      "grad_norm": 0.8549511031994224,
      "learning_rate": 5.073704777768584e-08,
      "loss": 0.1075,
      "step": 32769
    },
    {
      "epoch": 0.9560067681895094,
      "grad_norm": 0.8778242344808694,
      "learning_rate": 5.0669937909330056e-08,
      "loss": 0.1186,
      "step": 32770
    },
    {
      "epoch": 0.9560359414201529,
      "grad_norm": 0.8661055700702226,
      "learning_rate": 5.0602872227411e-08,
      "loss": 0.1228,
      "step": 32771
    },
    {
      "epoch": 0.9560651146507965,
      "grad_norm": 0.7538873029924936,
      "learning_rate": 5.053585073252765e-08,
      "loss": 0.1002,
      "step": 32772
    },
    {
      "epoch": 0.95609428788144,
      "grad_norm": 0.7249928753886032,
      "learning_rate": 5.046887342527951e-08,
      "loss": 0.1145,
      "step": 32773
    },
    {
      "epoch": 0.9561234611120836,
      "grad_norm": 0.7851065270411969,
      "learning_rate": 5.0401940306263884e-08,
      "loss": 0.0956,
      "step": 32774
    },
    {
      "epoch": 0.9561526343427271,
      "grad_norm": 0.7216456921036118,
      "learning_rate": 5.0335051376077527e-08,
      "loss": 0.1069,
      "step": 32775
    },
    {
      "epoch": 0.9561818075733707,
      "grad_norm": 0.7984319027499761,
      "learning_rate": 5.026820663531828e-08,
      "loss": 0.1144,
      "step": 32776
    },
    {
      "epoch": 0.9562109808040142,
      "grad_norm": 0.7090533146522467,
      "learning_rate": 5.02014060845829e-08,
      "loss": 0.117,
      "step": 32777
    },
    {
      "epoch": 0.9562401540346578,
      "grad_norm": 0.7948516064912404,
      "learning_rate": 5.013464972446813e-08,
      "loss": 0.1167,
      "step": 32778
    },
    {
      "epoch": 0.9562693272653013,
      "grad_norm": 0.9578149005999798,
      "learning_rate": 5.0067937555569603e-08,
      "loss": 0.1289,
      "step": 32779
    },
    {
      "epoch": 0.9562985004959449,
      "grad_norm": 0.7431435145855708,
      "learning_rate": 5.000126957848239e-08,
      "loss": 0.1186,
      "step": 32780
    },
    {
      "epoch": 0.9563276737265884,
      "grad_norm": 0.9385269079835733,
      "learning_rate": 4.9934645793802696e-08,
      "loss": 0.1311,
      "step": 32781
    },
    {
      "epoch": 0.956356846957232,
      "grad_norm": 0.7426492576133975,
      "learning_rate": 4.9868066202124476e-08,
      "loss": 0.1035,
      "step": 32782
    },
    {
      "epoch": 0.9563860201878756,
      "grad_norm": 0.722847872566871,
      "learning_rate": 4.980153080404227e-08,
      "loss": 0.1105,
      "step": 32783
    },
    {
      "epoch": 0.9564151934185192,
      "grad_norm": 0.8650234530628166,
      "learning_rate": 4.973503960015058e-08,
      "loss": 0.1222,
      "step": 32784
    },
    {
      "epoch": 0.9564443666491628,
      "grad_norm": 0.8360374043334173,
      "learning_rate": 4.9668592591042844e-08,
      "loss": 0.1185,
      "step": 32785
    },
    {
      "epoch": 0.9564735398798063,
      "grad_norm": 0.839297362196858,
      "learning_rate": 4.9602189777311906e-08,
      "loss": 0.1155,
      "step": 32786
    },
    {
      "epoch": 0.9565027131104499,
      "grad_norm": 0.6934641601053315,
      "learning_rate": 4.9535831159551186e-08,
      "loss": 0.1112,
      "step": 32787
    },
    {
      "epoch": 0.9565318863410934,
      "grad_norm": 1.53006393883715,
      "learning_rate": 4.9469516738352986e-08,
      "loss": 0.1085,
      "step": 32788
    },
    {
      "epoch": 0.956561059571737,
      "grad_norm": 0.8742586377683601,
      "learning_rate": 4.94032465143085e-08,
      "loss": 0.1067,
      "step": 32789
    },
    {
      "epoch": 0.9565902328023805,
      "grad_norm": 0.8120832384895197,
      "learning_rate": 4.933702048801003e-08,
      "loss": 0.1044,
      "step": 32790
    },
    {
      "epoch": 0.9566194060330241,
      "grad_norm": 0.7112104474890033,
      "learning_rate": 4.927083866004934e-08,
      "loss": 0.1008,
      "step": 32791
    },
    {
      "epoch": 0.9566485792636676,
      "grad_norm": 0.8156216727451591,
      "learning_rate": 4.920470103101649e-08,
      "loss": 0.0775,
      "step": 32792
    },
    {
      "epoch": 0.9566777524943112,
      "grad_norm": 0.9304001213469875,
      "learning_rate": 4.9138607601502684e-08,
      "loss": 0.1237,
      "step": 32793
    },
    {
      "epoch": 0.9567069257249547,
      "grad_norm": 0.7397240385780188,
      "learning_rate": 4.90725583720969e-08,
      "loss": 0.1073,
      "step": 32794
    },
    {
      "epoch": 0.9567360989555983,
      "grad_norm": 0.8207286387182503,
      "learning_rate": 4.9006553343389774e-08,
      "loss": 0.1118,
      "step": 32795
    },
    {
      "epoch": 0.9567652721862419,
      "grad_norm": 0.9117029824346379,
      "learning_rate": 4.894059251596972e-08,
      "loss": 0.1349,
      "step": 32796
    },
    {
      "epoch": 0.9567944454168855,
      "grad_norm": 0.9294226671764082,
      "learning_rate": 4.887467589042683e-08,
      "loss": 0.1069,
      "step": 32797
    },
    {
      "epoch": 0.9568236186475291,
      "grad_norm": 0.9921486183494397,
      "learning_rate": 4.88088034673484e-08,
      "loss": 0.1036,
      "step": 32798
    },
    {
      "epoch": 0.9568527918781726,
      "grad_norm": 0.995969663092959,
      "learning_rate": 4.874297524732341e-08,
      "loss": 0.1221,
      "step": 32799
    },
    {
      "epoch": 0.9568819651088162,
      "grad_norm": 0.7589754390526152,
      "learning_rate": 4.867719123093917e-08,
      "loss": 0.1041,
      "step": 32800
    },
    {
      "epoch": 0.9569111383394597,
      "grad_norm": 0.920959659120988,
      "learning_rate": 4.861145141878243e-08,
      "loss": 0.1026,
      "step": 32801
    },
    {
      "epoch": 0.9569403115701033,
      "grad_norm": 1.0198832879935908,
      "learning_rate": 4.8545755811441054e-08,
      "loss": 0.12,
      "step": 32802
    },
    {
      "epoch": 0.9569694848007468,
      "grad_norm": 0.7420504055522724,
      "learning_rate": 4.8480104409501236e-08,
      "loss": 0.1258,
      "step": 32803
    },
    {
      "epoch": 0.9569986580313904,
      "grad_norm": 0.738726918682796,
      "learning_rate": 4.8414497213549184e-08,
      "loss": 0.1081,
      "step": 32804
    },
    {
      "epoch": 0.957027831262034,
      "grad_norm": 0.6951912015343367,
      "learning_rate": 4.834893422416997e-08,
      "loss": 0.1024,
      "step": 32805
    },
    {
      "epoch": 0.9570570044926775,
      "grad_norm": 0.8459641707126396,
      "learning_rate": 4.828341544194981e-08,
      "loss": 0.1158,
      "step": 32806
    },
    {
      "epoch": 0.957086177723321,
      "grad_norm": 0.7477834215712245,
      "learning_rate": 4.8217940867473225e-08,
      "loss": 0.102,
      "step": 32807
    },
    {
      "epoch": 0.9571153509539646,
      "grad_norm": 0.7300391656927202,
      "learning_rate": 4.8152510501324745e-08,
      "loss": 0.1436,
      "step": 32808
    },
    {
      "epoch": 0.9571445241846082,
      "grad_norm": 1.000883386167395,
      "learning_rate": 4.8087124344088353e-08,
      "loss": 0.1075,
      "step": 32809
    },
    {
      "epoch": 0.9571736974152517,
      "grad_norm": 0.9674040131635351,
      "learning_rate": 4.8021782396348026e-08,
      "loss": 0.1262,
      "step": 32810
    },
    {
      "epoch": 0.9572028706458954,
      "grad_norm": 0.6453790645349463,
      "learning_rate": 4.795648465868719e-08,
      "loss": 0.0904,
      "step": 32811
    },
    {
      "epoch": 0.9572320438765389,
      "grad_norm": 0.7435397773090228,
      "learning_rate": 4.7891231131688695e-08,
      "loss": 0.1081,
      "step": 32812
    },
    {
      "epoch": 0.9572612171071825,
      "grad_norm": 0.7425090442832256,
      "learning_rate": 4.782602181593488e-08,
      "loss": 0.1281,
      "step": 32813
    },
    {
      "epoch": 0.957290390337826,
      "grad_norm": 0.7247203167434828,
      "learning_rate": 4.7760856712008584e-08,
      "loss": 0.1165,
      "step": 32814
    },
    {
      "epoch": 0.9573195635684696,
      "grad_norm": 1.0385829791443835,
      "learning_rate": 4.769573582049103e-08,
      "loss": 0.1239,
      "step": 32815
    },
    {
      "epoch": 0.9573487367991131,
      "grad_norm": 0.8691451414890681,
      "learning_rate": 4.763065914196341e-08,
      "loss": 0.132,
      "step": 32816
    },
    {
      "epoch": 0.9573779100297567,
      "grad_norm": 0.7427551091637435,
      "learning_rate": 4.756562667700748e-08,
      "loss": 0.1042,
      "step": 32817
    },
    {
      "epoch": 0.9574070832604002,
      "grad_norm": 0.9395494879599302,
      "learning_rate": 4.750063842620389e-08,
      "loss": 0.128,
      "step": 32818
    },
    {
      "epoch": 0.9574362564910438,
      "grad_norm": 0.8307875710800233,
      "learning_rate": 4.743569439013107e-08,
      "loss": 0.1175,
      "step": 32819
    },
    {
      "epoch": 0.9574654297216874,
      "grad_norm": 0.788224328384697,
      "learning_rate": 4.737079456937077e-08,
      "loss": 0.1032,
      "step": 32820
    },
    {
      "epoch": 0.9574946029523309,
      "grad_norm": 0.9026167256181064,
      "learning_rate": 4.730593896450197e-08,
      "loss": 0.1181,
      "step": 32821
    },
    {
      "epoch": 0.9575237761829745,
      "grad_norm": 0.7578415303003803,
      "learning_rate": 4.724112757610311e-08,
      "loss": 0.1037,
      "step": 32822
    },
    {
      "epoch": 0.957552949413618,
      "grad_norm": 0.712269092387141,
      "learning_rate": 4.717636040475315e-08,
      "loss": 0.1184,
      "step": 32823
    },
    {
      "epoch": 0.9575821226442617,
      "grad_norm": 0.7880497015147347,
      "learning_rate": 4.711163745103053e-08,
      "loss": 0.1004,
      "step": 32824
    },
    {
      "epoch": 0.9576112958749052,
      "grad_norm": 0.8939136102826749,
      "learning_rate": 4.704695871551257e-08,
      "loss": 0.0993,
      "step": 32825
    },
    {
      "epoch": 0.9576404691055488,
      "grad_norm": 0.5951060923281825,
      "learning_rate": 4.698232419877658e-08,
      "loss": 0.094,
      "step": 32826
    },
    {
      "epoch": 0.9576696423361923,
      "grad_norm": 0.7474564157472834,
      "learning_rate": 4.6917733901400976e-08,
      "loss": 0.1207,
      "step": 32827
    },
    {
      "epoch": 0.9576988155668359,
      "grad_norm": 0.8172784909372025,
      "learning_rate": 4.685318782396087e-08,
      "loss": 0.1042,
      "step": 32828
    },
    {
      "epoch": 0.9577279887974794,
      "grad_norm": 0.8084277300051048,
      "learning_rate": 4.678868596703301e-08,
      "loss": 0.0943,
      "step": 32829
    },
    {
      "epoch": 0.957757162028123,
      "grad_norm": 0.8381053554024123,
      "learning_rate": 4.6724228331194166e-08,
      "loss": 0.1208,
      "step": 32830
    },
    {
      "epoch": 0.9577863352587666,
      "grad_norm": 0.7795954897951408,
      "learning_rate": 4.665981491701776e-08,
      "loss": 0.1105,
      "step": 32831
    },
    {
      "epoch": 0.9578155084894101,
      "grad_norm": 1.1263082644142337,
      "learning_rate": 4.6595445725080566e-08,
      "loss": 0.1193,
      "step": 32832
    },
    {
      "epoch": 0.9578446817200537,
      "grad_norm": 0.7691307823628832,
      "learning_rate": 4.653112075595711e-08,
      "loss": 0.1138,
      "step": 32833
    },
    {
      "epoch": 0.9578738549506972,
      "grad_norm": 0.9599208035477919,
      "learning_rate": 4.6466840010221395e-08,
      "loss": 0.1025,
      "step": 32834
    },
    {
      "epoch": 0.9579030281813408,
      "grad_norm": 0.9485995240003718,
      "learning_rate": 4.640260348844683e-08,
      "loss": 0.0876,
      "step": 32835
    },
    {
      "epoch": 0.9579322014119843,
      "grad_norm": 0.7077820665793446,
      "learning_rate": 4.6338411191207414e-08,
      "loss": 0.1085,
      "step": 32836
    },
    {
      "epoch": 0.9579613746426279,
      "grad_norm": 0.827331988072541,
      "learning_rate": 4.627426311907601e-08,
      "loss": 0.1123,
      "step": 32837
    },
    {
      "epoch": 0.9579905478732715,
      "grad_norm": 0.8156878645710716,
      "learning_rate": 4.621015927262551e-08,
      "loss": 0.1085,
      "step": 32838
    },
    {
      "epoch": 0.9580197211039151,
      "grad_norm": 0.7015158744487017,
      "learning_rate": 4.614609965242822e-08,
      "loss": 0.129,
      "step": 32839
    },
    {
      "epoch": 0.9580488943345586,
      "grad_norm": 0.7925884216061743,
      "learning_rate": 4.608208425905592e-08,
      "loss": 0.1252,
      "step": 32840
    },
    {
      "epoch": 0.9580780675652022,
      "grad_norm": 0.9877298809925094,
      "learning_rate": 4.601811309308035e-08,
      "loss": 0.1214,
      "step": 32841
    },
    {
      "epoch": 0.9581072407958457,
      "grad_norm": 0.81919534179844,
      "learning_rate": 4.595418615507219e-08,
      "loss": 0.1239,
      "step": 32842
    },
    {
      "epoch": 0.9581364140264893,
      "grad_norm": 0.7967418626689056,
      "learning_rate": 4.589030344560208e-08,
      "loss": 0.0949,
      "step": 32843
    },
    {
      "epoch": 0.9581655872571329,
      "grad_norm": 0.8079560103900926,
      "learning_rate": 4.582646496524124e-08,
      "loss": 0.1193,
      "step": 32844
    },
    {
      "epoch": 0.9581947604877764,
      "grad_norm": 0.9559706878467896,
      "learning_rate": 4.5762670714559196e-08,
      "loss": 0.104,
      "step": 32845
    },
    {
      "epoch": 0.95822393371842,
      "grad_norm": 0.684763539678176,
      "learning_rate": 4.569892069412496e-08,
      "loss": 0.0884,
      "step": 32846
    },
    {
      "epoch": 0.9582531069490635,
      "grad_norm": 0.8668853365655103,
      "learning_rate": 4.563521490450862e-08,
      "loss": 0.0919,
      "step": 32847
    },
    {
      "epoch": 0.9582822801797071,
      "grad_norm": 0.9643167870716441,
      "learning_rate": 4.557155334627805e-08,
      "loss": 0.0941,
      "step": 32848
    },
    {
      "epoch": 0.9583114534103506,
      "grad_norm": 0.873714390428912,
      "learning_rate": 4.550793602000114e-08,
      "loss": 0.0918,
      "step": 32849
    },
    {
      "epoch": 0.9583406266409942,
      "grad_norm": 0.893234930076437,
      "learning_rate": 4.544436292624743e-08,
      "loss": 0.1033,
      "step": 32850
    },
    {
      "epoch": 0.9583697998716378,
      "grad_norm": 0.7652129907452041,
      "learning_rate": 4.538083406558425e-08,
      "loss": 0.1277,
      "step": 32851
    },
    {
      "epoch": 0.9583989731022814,
      "grad_norm": 0.7577640621083667,
      "learning_rate": 4.531734943857724e-08,
      "loss": 0.1071,
      "step": 32852
    },
    {
      "epoch": 0.958428146332925,
      "grad_norm": 1.1307995167316394,
      "learning_rate": 4.525390904579485e-08,
      "loss": 0.0948,
      "step": 32853
    },
    {
      "epoch": 0.9584573195635685,
      "grad_norm": 0.7003890147655835,
      "learning_rate": 4.5190512887802186e-08,
      "loss": 0.1044,
      "step": 32854
    },
    {
      "epoch": 0.958486492794212,
      "grad_norm": 0.8369313551339875,
      "learning_rate": 4.512716096516601e-08,
      "loss": 0.1015,
      "step": 32855
    },
    {
      "epoch": 0.9585156660248556,
      "grad_norm": 0.7204840338085293,
      "learning_rate": 4.506385327845197e-08,
      "loss": 0.1002,
      "step": 32856
    },
    {
      "epoch": 0.9585448392554992,
      "grad_norm": 1.4576364723203665,
      "learning_rate": 4.500058982822464e-08,
      "loss": 0.1044,
      "step": 32857
    },
    {
      "epoch": 0.9585740124861427,
      "grad_norm": 0.9758919669163466,
      "learning_rate": 4.493737061504966e-08,
      "loss": 0.099,
      "step": 32858
    },
    {
      "epoch": 0.9586031857167863,
      "grad_norm": 0.8193843089787872,
      "learning_rate": 4.487419563949047e-08,
      "loss": 0.1227,
      "step": 32859
    },
    {
      "epoch": 0.9586323589474298,
      "grad_norm": 0.8757366667764627,
      "learning_rate": 4.4811064902112175e-08,
      "loss": 0.1016,
      "step": 32860
    },
    {
      "epoch": 0.9586615321780734,
      "grad_norm": 0.6686091926794362,
      "learning_rate": 4.474797840347711e-08,
      "loss": 0.0864,
      "step": 32861
    },
    {
      "epoch": 0.9586907054087169,
      "grad_norm": 0.8385389414468926,
      "learning_rate": 4.468493614414926e-08,
      "loss": 0.1282,
      "step": 32862
    },
    {
      "epoch": 0.9587198786393605,
      "grad_norm": 1.046296384758437,
      "learning_rate": 4.462193812469151e-08,
      "loss": 0.1227,
      "step": 32863
    },
    {
      "epoch": 0.958749051870004,
      "grad_norm": 0.9308343716101727,
      "learning_rate": 4.4558984345666745e-08,
      "loss": 0.0847,
      "step": 32864
    },
    {
      "epoch": 0.9587782251006477,
      "grad_norm": 0.6667854690095036,
      "learning_rate": 4.4496074807635626e-08,
      "loss": 0.1114,
      "step": 32865
    },
    {
      "epoch": 0.9588073983312912,
      "grad_norm": 0.7987377356130329,
      "learning_rate": 4.443320951116103e-08,
      "loss": 0.1207,
      "step": 32866
    },
    {
      "epoch": 0.9588365715619348,
      "grad_norm": 1.01976480586201,
      "learning_rate": 4.437038845680308e-08,
      "loss": 0.0965,
      "step": 32867
    },
    {
      "epoch": 0.9588657447925784,
      "grad_norm": 0.8932694195104837,
      "learning_rate": 4.4307611645124096e-08,
      "loss": 0.1133,
      "step": 32868
    },
    {
      "epoch": 0.9588949180232219,
      "grad_norm": 0.7346521941143019,
      "learning_rate": 4.4244879076683065e-08,
      "loss": 0.1083,
      "step": 32869
    },
    {
      "epoch": 0.9589240912538655,
      "grad_norm": 0.7835186886056975,
      "learning_rate": 4.418219075204122e-08,
      "loss": 0.1083,
      "step": 32870
    },
    {
      "epoch": 0.958953264484509,
      "grad_norm": 0.8545462903301045,
      "learning_rate": 4.411954667175811e-08,
      "loss": 0.1427,
      "step": 32871
    },
    {
      "epoch": 0.9589824377151526,
      "grad_norm": 0.7472910583556739,
      "learning_rate": 4.405694683639161e-08,
      "loss": 0.1214,
      "step": 32872
    },
    {
      "epoch": 0.9590116109457961,
      "grad_norm": 0.967913088083774,
      "learning_rate": 4.3994391246501846e-08,
      "loss": 0.1058,
      "step": 32873
    },
    {
      "epoch": 0.9590407841764397,
      "grad_norm": 0.7862975461298798,
      "learning_rate": 4.39318799026478e-08,
      "loss": 0.1173,
      "step": 32874
    },
    {
      "epoch": 0.9590699574070832,
      "grad_norm": 0.9360038111422968,
      "learning_rate": 4.3869412805386256e-08,
      "loss": 0.1114,
      "step": 32875
    },
    {
      "epoch": 0.9590991306377268,
      "grad_norm": 0.7903278081066042,
      "learning_rate": 4.380698995527566e-08,
      "loss": 0.1173,
      "step": 32876
    },
    {
      "epoch": 0.9591283038683703,
      "grad_norm": 0.905513339696466,
      "learning_rate": 4.374461135287278e-08,
      "loss": 0.1239,
      "step": 32877
    },
    {
      "epoch": 0.959157477099014,
      "grad_norm": 0.8878514363182609,
      "learning_rate": 4.3682276998735505e-08,
      "loss": 0.1103,
      "step": 32878
    },
    {
      "epoch": 0.9591866503296576,
      "grad_norm": 0.9682928562932697,
      "learning_rate": 4.361998689341895e-08,
      "loss": 0.1312,
      "step": 32879
    },
    {
      "epoch": 0.9592158235603011,
      "grad_norm": 0.6971320156321881,
      "learning_rate": 4.355774103748045e-08,
      "loss": 0.1155,
      "step": 32880
    },
    {
      "epoch": 0.9592449967909447,
      "grad_norm": 0.93114582243866,
      "learning_rate": 4.3495539431475106e-08,
      "loss": 0.1156,
      "step": 32881
    },
    {
      "epoch": 0.9592741700215882,
      "grad_norm": 1.006106115889474,
      "learning_rate": 4.343338207595804e-08,
      "loss": 0.1183,
      "step": 32882
    },
    {
      "epoch": 0.9593033432522318,
      "grad_norm": 0.7048743503820754,
      "learning_rate": 4.3371268971484915e-08,
      "loss": 0.1177,
      "step": 32883
    },
    {
      "epoch": 0.9593325164828753,
      "grad_norm": 0.9426349108180363,
      "learning_rate": 4.330920011860973e-08,
      "loss": 0.11,
      "step": 32884
    },
    {
      "epoch": 0.9593616897135189,
      "grad_norm": 0.9077580728160185,
      "learning_rate": 4.3247175517887044e-08,
      "loss": 0.1293,
      "step": 32885
    },
    {
      "epoch": 0.9593908629441624,
      "grad_norm": 1.0798663308631007,
      "learning_rate": 4.318519516986974e-08,
      "loss": 0.1283,
      "step": 32886
    },
    {
      "epoch": 0.959420036174806,
      "grad_norm": 0.7957556625772834,
      "learning_rate": 4.312325907511183e-08,
      "loss": 0.1151,
      "step": 32887
    },
    {
      "epoch": 0.9594492094054495,
      "grad_norm": 1.0294127171563132,
      "learning_rate": 4.3061367234166764e-08,
      "loss": 0.1007,
      "step": 32888
    },
    {
      "epoch": 0.9594783826360931,
      "grad_norm": 0.8257449616386338,
      "learning_rate": 4.2999519647585755e-08,
      "loss": 0.0825,
      "step": 32889
    },
    {
      "epoch": 0.9595075558667366,
      "grad_norm": 0.7546505037339957,
      "learning_rate": 4.293771631592225e-08,
      "loss": 0.1035,
      "step": 32890
    },
    {
      "epoch": 0.9595367290973802,
      "grad_norm": 0.7245457104184709,
      "learning_rate": 4.287595723972693e-08,
      "loss": 0.1109,
      "step": 32891
    },
    {
      "epoch": 0.9595659023280239,
      "grad_norm": 0.8459373428090952,
      "learning_rate": 4.281424241955212e-08,
      "loss": 0.0971,
      "step": 32892
    },
    {
      "epoch": 0.9595950755586674,
      "grad_norm": 0.8149609414731143,
      "learning_rate": 4.2752571855948496e-08,
      "loss": 0.1057,
      "step": 32893
    },
    {
      "epoch": 0.959624248789311,
      "grad_norm": 0.7574602051965764,
      "learning_rate": 4.269094554946618e-08,
      "loss": 0.1307,
      "step": 32894
    },
    {
      "epoch": 0.9596534220199545,
      "grad_norm": 0.84237972087483,
      "learning_rate": 4.262936350065583e-08,
      "loss": 0.0923,
      "step": 32895
    },
    {
      "epoch": 0.9596825952505981,
      "grad_norm": 0.8154954755563415,
      "learning_rate": 4.256782571006701e-08,
      "loss": 0.0889,
      "step": 32896
    },
    {
      "epoch": 0.9597117684812416,
      "grad_norm": 0.7796157232550976,
      "learning_rate": 4.2506332178249286e-08,
      "loss": 0.1263,
      "step": 32897
    },
    {
      "epoch": 0.9597409417118852,
      "grad_norm": 0.6344972446368206,
      "learning_rate": 4.244488290575166e-08,
      "loss": 0.0886,
      "step": 32898
    },
    {
      "epoch": 0.9597701149425287,
      "grad_norm": 1.2740887915383508,
      "learning_rate": 4.2383477893122584e-08,
      "loss": 0.093,
      "step": 32899
    },
    {
      "epoch": 0.9597992881731723,
      "grad_norm": 0.8093674068745108,
      "learning_rate": 4.23221171409105e-08,
      "loss": 0.11,
      "step": 32900
    },
    {
      "epoch": 0.9598284614038158,
      "grad_norm": 0.9556726021725885,
      "learning_rate": 4.2260800649662756e-08,
      "loss": 0.1429,
      "step": 32901
    },
    {
      "epoch": 0.9598576346344594,
      "grad_norm": 0.7988374231433764,
      "learning_rate": 4.219952841992725e-08,
      "loss": 0.1224,
      "step": 32902
    },
    {
      "epoch": 0.9598868078651029,
      "grad_norm": 0.9391374602952318,
      "learning_rate": 4.2138300452250756e-08,
      "loss": 0.1175,
      "step": 32903
    },
    {
      "epoch": 0.9599159810957465,
      "grad_norm": 0.7569434105553122,
      "learning_rate": 4.207711674718007e-08,
      "loss": 0.115,
      "step": 32904
    },
    {
      "epoch": 0.9599451543263902,
      "grad_norm": 0.7241375268849807,
      "learning_rate": 4.201597730526141e-08,
      "loss": 0.1069,
      "step": 32905
    },
    {
      "epoch": 0.9599743275570337,
      "grad_norm": 0.7945752981885397,
      "learning_rate": 4.1954882127040466e-08,
      "loss": 0.0892,
      "step": 32906
    },
    {
      "epoch": 0.9600035007876773,
      "grad_norm": 0.8453466298444707,
      "learning_rate": 4.18938312130629e-08,
      "loss": 0.1165,
      "step": 32907
    },
    {
      "epoch": 0.9600326740183208,
      "grad_norm": 0.7038362290638049,
      "learning_rate": 4.183282456387327e-08,
      "loss": 0.1226,
      "step": 32908
    },
    {
      "epoch": 0.9600618472489644,
      "grad_norm": 0.8187939030680478,
      "learning_rate": 4.177186218001617e-08,
      "loss": 0.1085,
      "step": 32909
    },
    {
      "epoch": 0.9600910204796079,
      "grad_norm": 0.9455942275266978,
      "learning_rate": 4.171094406203724e-08,
      "loss": 0.1064,
      "step": 32910
    },
    {
      "epoch": 0.9601201937102515,
      "grad_norm": 0.8317222060420671,
      "learning_rate": 4.165007021047884e-08,
      "loss": 0.0955,
      "step": 32911
    },
    {
      "epoch": 0.960149366940895,
      "grad_norm": 0.7933141621435059,
      "learning_rate": 4.1589240625884986e-08,
      "loss": 0.1097,
      "step": 32912
    },
    {
      "epoch": 0.9601785401715386,
      "grad_norm": 0.8913368630929362,
      "learning_rate": 4.152845530879912e-08,
      "loss": 0.1138,
      "step": 32913
    },
    {
      "epoch": 0.9602077134021821,
      "grad_norm": 0.7858133652973975,
      "learning_rate": 4.1467714259763034e-08,
      "loss": 0.077,
      "step": 32914
    },
    {
      "epoch": 0.9602368866328257,
      "grad_norm": 0.8070321620835452,
      "learning_rate": 4.1407017479319636e-08,
      "loss": 0.1108,
      "step": 32915
    },
    {
      "epoch": 0.9602660598634692,
      "grad_norm": 0.7260421751741842,
      "learning_rate": 4.13463649680107e-08,
      "loss": 0.0944,
      "step": 32916
    },
    {
      "epoch": 0.9602952330941128,
      "grad_norm": 0.8073658116030934,
      "learning_rate": 4.128575672637747e-08,
      "loss": 0.1107,
      "step": 32917
    },
    {
      "epoch": 0.9603244063247564,
      "grad_norm": 0.7818934109657987,
      "learning_rate": 4.122519275496173e-08,
      "loss": 0.1207,
      "step": 32918
    },
    {
      "epoch": 0.9603535795554,
      "grad_norm": 0.6977574966582254,
      "learning_rate": 4.11646730543036e-08,
      "loss": 0.1011,
      "step": 32919
    },
    {
      "epoch": 0.9603827527860436,
      "grad_norm": 0.7836466621465713,
      "learning_rate": 4.110419762494322e-08,
      "loss": 0.1056,
      "step": 32920
    },
    {
      "epoch": 0.9604119260166871,
      "grad_norm": 0.7855120714498354,
      "learning_rate": 4.10437664674207e-08,
      "loss": 0.11,
      "step": 32921
    },
    {
      "epoch": 0.9604410992473307,
      "grad_norm": 0.7335995347301505,
      "learning_rate": 4.098337958227561e-08,
      "loss": 0.1251,
      "step": 32922
    },
    {
      "epoch": 0.9604702724779742,
      "grad_norm": 0.8376774119876726,
      "learning_rate": 4.0923036970047516e-08,
      "loss": 0.1322,
      "step": 32923
    },
    {
      "epoch": 0.9604994457086178,
      "grad_norm": 0.8464667864558919,
      "learning_rate": 4.086273863127488e-08,
      "loss": 0.1078,
      "step": 32924
    },
    {
      "epoch": 0.9605286189392613,
      "grad_norm": 0.7911772461653852,
      "learning_rate": 4.08024845664956e-08,
      "loss": 0.1028,
      "step": 32925
    },
    {
      "epoch": 0.9605577921699049,
      "grad_norm": 0.9011527629033576,
      "learning_rate": 4.074227477624759e-08,
      "loss": 0.1104,
      "step": 32926
    },
    {
      "epoch": 0.9605869654005484,
      "grad_norm": 0.9795115911375706,
      "learning_rate": 4.068210926106875e-08,
      "loss": 0.1424,
      "step": 32927
    },
    {
      "epoch": 0.960616138631192,
      "grad_norm": 0.6949332747096366,
      "learning_rate": 4.062198802149642e-08,
      "loss": 0.1111,
      "step": 32928
    },
    {
      "epoch": 0.9606453118618355,
      "grad_norm": 0.8964322759490889,
      "learning_rate": 4.056191105806684e-08,
      "loss": 0.1154,
      "step": 32929
    },
    {
      "epoch": 0.9606744850924791,
      "grad_norm": 0.8493600943026116,
      "learning_rate": 4.0501878371316806e-08,
      "loss": 0.1171,
      "step": 32930
    },
    {
      "epoch": 0.9607036583231227,
      "grad_norm": 0.7911199460032626,
      "learning_rate": 4.044188996178255e-08,
      "loss": 0.1151,
      "step": 32931
    },
    {
      "epoch": 0.9607328315537662,
      "grad_norm": 0.9322881469754698,
      "learning_rate": 4.0381945829998105e-08,
      "loss": 0.0967,
      "step": 32932
    },
    {
      "epoch": 0.9607620047844099,
      "grad_norm": 0.7760666350108234,
      "learning_rate": 4.0322045976500246e-08,
      "loss": 0.1201,
      "step": 32933
    },
    {
      "epoch": 0.9607911780150534,
      "grad_norm": 0.9476825495695969,
      "learning_rate": 4.0262190401822995e-08,
      "loss": 0.1226,
      "step": 32934
    },
    {
      "epoch": 0.960820351245697,
      "grad_norm": 0.8372849613729361,
      "learning_rate": 4.0202379106501486e-08,
      "loss": 0.1042,
      "step": 32935
    },
    {
      "epoch": 0.9608495244763405,
      "grad_norm": 0.8716121342632084,
      "learning_rate": 4.014261209106862e-08,
      "loss": 0.1012,
      "step": 32936
    },
    {
      "epoch": 0.9608786977069841,
      "grad_norm": 0.8071127617666869,
      "learning_rate": 4.0082889356058416e-08,
      "loss": 0.1087,
      "step": 32937
    },
    {
      "epoch": 0.9609078709376276,
      "grad_norm": 1.0736021576704553,
      "learning_rate": 4.002321090200434e-08,
      "loss": 0.1343,
      "step": 32938
    },
    {
      "epoch": 0.9609370441682712,
      "grad_norm": 1.272409731944864,
      "learning_rate": 3.996357672943874e-08,
      "loss": 0.1102,
      "step": 32939
    },
    {
      "epoch": 0.9609662173989147,
      "grad_norm": 0.8475898315710867,
      "learning_rate": 3.990398683889507e-08,
      "loss": 0.121,
      "step": 32940
    },
    {
      "epoch": 0.9609953906295583,
      "grad_norm": 0.7365683823754445,
      "learning_rate": 3.984444123090403e-08,
      "loss": 0.114,
      "step": 32941
    },
    {
      "epoch": 0.9610245638602019,
      "grad_norm": 0.8605090063585837,
      "learning_rate": 3.978493990599741e-08,
      "loss": 0.1141,
      "step": 32942
    },
    {
      "epoch": 0.9610537370908454,
      "grad_norm": 0.717129922732362,
      "learning_rate": 3.972548286470701e-08,
      "loss": 0.1042,
      "step": 32943
    },
    {
      "epoch": 0.961082910321489,
      "grad_norm": 0.7521501690881692,
      "learning_rate": 3.966607010756351e-08,
      "loss": 0.1269,
      "step": 32944
    },
    {
      "epoch": 0.9611120835521325,
      "grad_norm": 0.6593434180891637,
      "learning_rate": 3.960670163509706e-08,
      "loss": 0.0992,
      "step": 32945
    },
    {
      "epoch": 0.9611412567827762,
      "grad_norm": 0.8931219923621592,
      "learning_rate": 3.954737744783776e-08,
      "loss": 0.1182,
      "step": 32946
    },
    {
      "epoch": 0.9611704300134197,
      "grad_norm": 0.79771928209597,
      "learning_rate": 3.9488097546315774e-08,
      "loss": 0.1016,
      "step": 32947
    },
    {
      "epoch": 0.9611996032440633,
      "grad_norm": 0.8599903754100809,
      "learning_rate": 3.942886193105955e-08,
      "loss": 0.1198,
      "step": 32948
    },
    {
      "epoch": 0.9612287764747068,
      "grad_norm": 0.7881054768386069,
      "learning_rate": 3.936967060259811e-08,
      "loss": 0.1202,
      "step": 32949
    },
    {
      "epoch": 0.9612579497053504,
      "grad_norm": 0.8539346892614871,
      "learning_rate": 3.931052356145992e-08,
      "loss": 0.0911,
      "step": 32950
    },
    {
      "epoch": 0.9612871229359939,
      "grad_norm": 0.7723839977611351,
      "learning_rate": 3.925142080817346e-08,
      "loss": 0.0975,
      "step": 32951
    },
    {
      "epoch": 0.9613162961666375,
      "grad_norm": 1.1751985653196537,
      "learning_rate": 3.9192362343266065e-08,
      "loss": 0.1021,
      "step": 32952
    },
    {
      "epoch": 0.961345469397281,
      "grad_norm": 1.1151891842326118,
      "learning_rate": 3.913334816726511e-08,
      "loss": 0.1307,
      "step": 32953
    },
    {
      "epoch": 0.9613746426279246,
      "grad_norm": 1.0173224641863299,
      "learning_rate": 3.907437828069738e-08,
      "loss": 0.1285,
      "step": 32954
    },
    {
      "epoch": 0.9614038158585682,
      "grad_norm": 1.1297874265647203,
      "learning_rate": 3.901545268408913e-08,
      "loss": 0.1072,
      "step": 32955
    },
    {
      "epoch": 0.9614329890892117,
      "grad_norm": 0.7802241055611593,
      "learning_rate": 3.8956571377966603e-08,
      "loss": 0.1209,
      "step": 32956
    },
    {
      "epoch": 0.9614621623198553,
      "grad_norm": 0.8205962512299297,
      "learning_rate": 3.889773436285604e-08,
      "loss": 0.1092,
      "step": 32957
    },
    {
      "epoch": 0.9614913355504988,
      "grad_norm": 0.8153359881409391,
      "learning_rate": 3.8838941639282036e-08,
      "loss": 0.143,
      "step": 32958
    },
    {
      "epoch": 0.9615205087811424,
      "grad_norm": 0.8393741738916831,
      "learning_rate": 3.8780193207769154e-08,
      "loss": 0.1295,
      "step": 32959
    },
    {
      "epoch": 0.961549682011786,
      "grad_norm": 0.8312326704942081,
      "learning_rate": 3.8721489068842543e-08,
      "loss": 0.1003,
      "step": 32960
    },
    {
      "epoch": 0.9615788552424296,
      "grad_norm": 0.7485737538452321,
      "learning_rate": 3.866282922302622e-08,
      "loss": 0.1245,
      "step": 32961
    },
    {
      "epoch": 0.9616080284730731,
      "grad_norm": 0.6920980023177091,
      "learning_rate": 3.860421367084366e-08,
      "loss": 0.1222,
      "step": 32962
    },
    {
      "epoch": 0.9616372017037167,
      "grad_norm": 0.8393656485189316,
      "learning_rate": 3.8545642412818327e-08,
      "loss": 0.1136,
      "step": 32963
    },
    {
      "epoch": 0.9616663749343602,
      "grad_norm": 0.6426028953976453,
      "learning_rate": 3.84871154494737e-08,
      "loss": 0.0787,
      "step": 32964
    },
    {
      "epoch": 0.9616955481650038,
      "grad_norm": 0.9316557809494685,
      "learning_rate": 3.842863278133102e-08,
      "loss": 0.1054,
      "step": 32965
    },
    {
      "epoch": 0.9617247213956474,
      "grad_norm": 0.8161632030829696,
      "learning_rate": 3.837019440891321e-08,
      "loss": 0.1011,
      "step": 32966
    },
    {
      "epoch": 0.9617538946262909,
      "grad_norm": 0.86914766144398,
      "learning_rate": 3.8311800332742065e-08,
      "loss": 0.1156,
      "step": 32967
    },
    {
      "epoch": 0.9617830678569345,
      "grad_norm": 0.9001803427227271,
      "learning_rate": 3.825345055333829e-08,
      "loss": 0.1143,
      "step": 32968
    },
    {
      "epoch": 0.961812241087578,
      "grad_norm": 0.6701753188413643,
      "learning_rate": 3.819514507122368e-08,
      "loss": 0.1106,
      "step": 32969
    },
    {
      "epoch": 0.9618414143182216,
      "grad_norm": 0.8365762014485906,
      "learning_rate": 3.813688388691783e-08,
      "loss": 0.124,
      "step": 32970
    },
    {
      "epoch": 0.9618705875488651,
      "grad_norm": 1.013591248087509,
      "learning_rate": 3.807866700094198e-08,
      "loss": 0.119,
      "step": 32971
    },
    {
      "epoch": 0.9618997607795087,
      "grad_norm": 0.7875181488612599,
      "learning_rate": 3.8020494413815165e-08,
      "loss": 0.0823,
      "step": 32972
    },
    {
      "epoch": 0.9619289340101523,
      "grad_norm": 0.7823356857316066,
      "learning_rate": 3.796236612605641e-08,
      "loss": 0.1174,
      "step": 32973
    },
    {
      "epoch": 0.9619581072407959,
      "grad_norm": 0.7649462781377928,
      "learning_rate": 3.790428213818531e-08,
      "loss": 0.122,
      "step": 32974
    },
    {
      "epoch": 0.9619872804714394,
      "grad_norm": 0.7209496697471285,
      "learning_rate": 3.784624245072088e-08,
      "loss": 0.0914,
      "step": 32975
    },
    {
      "epoch": 0.962016453702083,
      "grad_norm": 0.8375459597140754,
      "learning_rate": 3.778824706417994e-08,
      "loss": 0.1241,
      "step": 32976
    },
    {
      "epoch": 0.9620456269327265,
      "grad_norm": 0.6895641257485305,
      "learning_rate": 3.7730295979080956e-08,
      "loss": 0.0936,
      "step": 32977
    },
    {
      "epoch": 0.9620748001633701,
      "grad_norm": 0.9472593070816314,
      "learning_rate": 3.767238919594185e-08,
      "loss": 0.0892,
      "step": 32978
    },
    {
      "epoch": 0.9621039733940137,
      "grad_norm": 0.7789037981698359,
      "learning_rate": 3.761452671527832e-08,
      "loss": 0.1096,
      "step": 32979
    },
    {
      "epoch": 0.9621331466246572,
      "grad_norm": 0.7157690630040628,
      "learning_rate": 3.755670853760773e-08,
      "loss": 0.1179,
      "step": 32980
    },
    {
      "epoch": 0.9621623198553008,
      "grad_norm": 0.9952490367997867,
      "learning_rate": 3.7498934663446897e-08,
      "loss": 0.1266,
      "step": 32981
    },
    {
      "epoch": 0.9621914930859443,
      "grad_norm": 0.78846643130223,
      "learning_rate": 3.7441205093310394e-08,
      "loss": 0.0923,
      "step": 32982
    },
    {
      "epoch": 0.9622206663165879,
      "grad_norm": 0.7882133989258244,
      "learning_rate": 3.738351982771449e-08,
      "loss": 0.1077,
      "step": 32983
    },
    {
      "epoch": 0.9622498395472314,
      "grad_norm": 0.8372383457356164,
      "learning_rate": 3.7325878867173757e-08,
      "loss": 0.113,
      "step": 32984
    },
    {
      "epoch": 0.962279012777875,
      "grad_norm": 0.8303243025515532,
      "learning_rate": 3.72682822122028e-08,
      "loss": 0.1028,
      "step": 32985
    },
    {
      "epoch": 0.9623081860085185,
      "grad_norm": 0.7177457152597371,
      "learning_rate": 3.7210729863315645e-08,
      "loss": 0.0972,
      "step": 32986
    },
    {
      "epoch": 0.9623373592391622,
      "grad_norm": 0.6882980232675601,
      "learning_rate": 3.7153221821026875e-08,
      "loss": 0.1213,
      "step": 32987
    },
    {
      "epoch": 0.9623665324698057,
      "grad_norm": 0.8717886244412169,
      "learning_rate": 3.709575808584942e-08,
      "loss": 0.1317,
      "step": 32988
    },
    {
      "epoch": 0.9623957057004493,
      "grad_norm": 0.7009483093016554,
      "learning_rate": 3.703833865829565e-08,
      "loss": 0.105,
      "step": 32989
    },
    {
      "epoch": 0.9624248789310929,
      "grad_norm": 0.7956738157981716,
      "learning_rate": 3.6980963538879585e-08,
      "loss": 0.1406,
      "step": 32990
    },
    {
      "epoch": 0.9624540521617364,
      "grad_norm": 1.178436146550977,
      "learning_rate": 3.69236327281125e-08,
      "loss": 0.1005,
      "step": 32991
    },
    {
      "epoch": 0.96248322539238,
      "grad_norm": 1.0159017263716346,
      "learning_rate": 3.68663462265062e-08,
      "loss": 0.0968,
      "step": 32992
    },
    {
      "epoch": 0.9625123986230235,
      "grad_norm": 0.7600469535711079,
      "learning_rate": 3.680910403457194e-08,
      "loss": 0.1155,
      "step": 32993
    },
    {
      "epoch": 0.9625415718536671,
      "grad_norm": 0.6000076446619093,
      "learning_rate": 3.6751906152822095e-08,
      "loss": 0.1024,
      "step": 32994
    },
    {
      "epoch": 0.9625707450843106,
      "grad_norm": 0.8937829675335617,
      "learning_rate": 3.669475258176625e-08,
      "loss": 0.1128,
      "step": 32995
    },
    {
      "epoch": 0.9625999183149542,
      "grad_norm": 0.8659494459743594,
      "learning_rate": 3.663764332191455e-08,
      "loss": 0.116,
      "step": 32996
    },
    {
      "epoch": 0.9626290915455977,
      "grad_norm": 1.0176679697840387,
      "learning_rate": 3.658057837377716e-08,
      "loss": 0.0958,
      "step": 32997
    },
    {
      "epoch": 0.9626582647762413,
      "grad_norm": 0.77596651910336,
      "learning_rate": 3.6523557737863646e-08,
      "loss": 0.1245,
      "step": 32998
    },
    {
      "epoch": 0.9626874380068848,
      "grad_norm": 0.7578318550528443,
      "learning_rate": 3.646658141468251e-08,
      "loss": 0.1087,
      "step": 32999
    },
    {
      "epoch": 0.9627166112375285,
      "grad_norm": 0.9924055428571892,
      "learning_rate": 3.640964940474334e-08,
      "loss": 0.1082,
      "step": 33000
    },
    {
      "epoch": 0.962745784468172,
      "grad_norm": 0.7900169952493743,
      "learning_rate": 3.635276170855351e-08,
      "loss": 0.1185,
      "step": 33001
    },
    {
      "epoch": 0.9627749576988156,
      "grad_norm": 0.9883778430563414,
      "learning_rate": 3.629591832662149e-08,
      "loss": 0.1256,
      "step": 33002
    },
    {
      "epoch": 0.9628041309294592,
      "grad_norm": 0.8830936674417562,
      "learning_rate": 3.623911925945467e-08,
      "loss": 0.1038,
      "step": 33003
    },
    {
      "epoch": 0.9628333041601027,
      "grad_norm": 0.9489308384871062,
      "learning_rate": 3.618236450755985e-08,
      "loss": 0.1226,
      "step": 33004
    },
    {
      "epoch": 0.9628624773907463,
      "grad_norm": 0.7970359065891596,
      "learning_rate": 3.6125654071444414e-08,
      "loss": 0.1031,
      "step": 33005
    },
    {
      "epoch": 0.9628916506213898,
      "grad_norm": 0.8478636430615173,
      "learning_rate": 3.606898795161351e-08,
      "loss": 0.1215,
      "step": 33006
    },
    {
      "epoch": 0.9629208238520334,
      "grad_norm": 1.1068734345463336,
      "learning_rate": 3.6012366148574505e-08,
      "loss": 0.1089,
      "step": 33007
    },
    {
      "epoch": 0.9629499970826769,
      "grad_norm": 0.8433289126861183,
      "learning_rate": 3.5955788662831445e-08,
      "loss": 0.1157,
      "step": 33008
    },
    {
      "epoch": 0.9629791703133205,
      "grad_norm": 0.743744804801746,
      "learning_rate": 3.589925549489004e-08,
      "loss": 0.0957,
      "step": 33009
    },
    {
      "epoch": 0.963008343543964,
      "grad_norm": 0.9682467808318107,
      "learning_rate": 3.5842766645255436e-08,
      "loss": 0.1065,
      "step": 33010
    },
    {
      "epoch": 0.9630375167746076,
      "grad_norm": 0.8089201034724288,
      "learning_rate": 3.578632211443112e-08,
      "loss": 0.0963,
      "step": 33011
    },
    {
      "epoch": 0.9630666900052511,
      "grad_norm": 0.7571672096064874,
      "learning_rate": 3.5729921902921684e-08,
      "loss": 0.1121,
      "step": 33012
    },
    {
      "epoch": 0.9630958632358947,
      "grad_norm": 0.8249523443104517,
      "learning_rate": 3.567356601123062e-08,
      "loss": 0.1084,
      "step": 33013
    },
    {
      "epoch": 0.9631250364665384,
      "grad_norm": 0.9194602558745402,
      "learning_rate": 3.561725443986086e-08,
      "loss": 0.1028,
      "step": 33014
    },
    {
      "epoch": 0.9631542096971819,
      "grad_norm": 0.7845212325979251,
      "learning_rate": 3.556098718931478e-08,
      "loss": 0.1123,
      "step": 33015
    },
    {
      "epoch": 0.9631833829278255,
      "grad_norm": 0.8637188543436544,
      "learning_rate": 3.55047642600953e-08,
      "loss": 0.1098,
      "step": 33016
    },
    {
      "epoch": 0.963212556158469,
      "grad_norm": 0.7449570557338919,
      "learning_rate": 3.544858565270426e-08,
      "loss": 0.0923,
      "step": 33017
    },
    {
      "epoch": 0.9632417293891126,
      "grad_norm": 0.8004000862337594,
      "learning_rate": 3.5392451367643466e-08,
      "loss": 0.0861,
      "step": 33018
    },
    {
      "epoch": 0.9632709026197561,
      "grad_norm": 0.6309363146460676,
      "learning_rate": 3.5336361405413076e-08,
      "loss": 0.1095,
      "step": 33019
    },
    {
      "epoch": 0.9633000758503997,
      "grad_norm": 0.7282028087219455,
      "learning_rate": 3.5280315766514915e-08,
      "loss": 0.102,
      "step": 33020
    },
    {
      "epoch": 0.9633292490810432,
      "grad_norm": 0.7912197510853144,
      "learning_rate": 3.522431445144858e-08,
      "loss": 0.0866,
      "step": 33021
    },
    {
      "epoch": 0.9633584223116868,
      "grad_norm": 0.9651112176832288,
      "learning_rate": 3.5168357460714785e-08,
      "loss": 0.1145,
      "step": 33022
    },
    {
      "epoch": 0.9633875955423303,
      "grad_norm": 0.8764524239134768,
      "learning_rate": 3.5112444794812016e-08,
      "loss": 0.119,
      "step": 33023
    },
    {
      "epoch": 0.9634167687729739,
      "grad_norm": 0.7404967584345311,
      "learning_rate": 3.5056576454240984e-08,
      "loss": 0.1232,
      "step": 33024
    },
    {
      "epoch": 0.9634459420036174,
      "grad_norm": 0.9301669890657671,
      "learning_rate": 3.5000752439499076e-08,
      "loss": 0.1166,
      "step": 33025
    },
    {
      "epoch": 0.963475115234261,
      "grad_norm": 0.9760219893991673,
      "learning_rate": 3.494497275108533e-08,
      "loss": 0.1369,
      "step": 33026
    },
    {
      "epoch": 0.9635042884649047,
      "grad_norm": 0.8939962196058359,
      "learning_rate": 3.4889237389497673e-08,
      "loss": 0.1372,
      "step": 33027
    },
    {
      "epoch": 0.9635334616955482,
      "grad_norm": 0.8861108558928846,
      "learning_rate": 3.48335463552335e-08,
      "loss": 0.0916,
      "step": 33028
    },
    {
      "epoch": 0.9635626349261918,
      "grad_norm": 0.6805670338444785,
      "learning_rate": 3.477789964879019e-08,
      "loss": 0.0971,
      "step": 33029
    },
    {
      "epoch": 0.9635918081568353,
      "grad_norm": 0.7093956552409721,
      "learning_rate": 3.4722297270664564e-08,
      "loss": 0.1201,
      "step": 33030
    },
    {
      "epoch": 0.9636209813874789,
      "grad_norm": 0.8640660654636393,
      "learning_rate": 3.4666739221352885e-08,
      "loss": 0.1015,
      "step": 33031
    },
    {
      "epoch": 0.9636501546181224,
      "grad_norm": 1.0140748355593368,
      "learning_rate": 3.461122550135143e-08,
      "loss": 0.1179,
      "step": 33032
    },
    {
      "epoch": 0.963679327848766,
      "grad_norm": 0.7772640268005586,
      "learning_rate": 3.4555756111155356e-08,
      "loss": 0.1227,
      "step": 33033
    },
    {
      "epoch": 0.9637085010794095,
      "grad_norm": 1.1213463004520035,
      "learning_rate": 3.4500331051260383e-08,
      "loss": 0.1257,
      "step": 33034
    },
    {
      "epoch": 0.9637376743100531,
      "grad_norm": 0.8425831603907519,
      "learning_rate": 3.4444950322161106e-08,
      "loss": 0.1079,
      "step": 33035
    },
    {
      "epoch": 0.9637668475406966,
      "grad_norm": 0.6934389390212334,
      "learning_rate": 3.438961392435158e-08,
      "loss": 0.114,
      "step": 33036
    },
    {
      "epoch": 0.9637960207713402,
      "grad_norm": 0.8457633294947993,
      "learning_rate": 3.433432185832641e-08,
      "loss": 0.1563,
      "step": 33037
    },
    {
      "epoch": 0.9638251940019837,
      "grad_norm": 0.8949233028699001,
      "learning_rate": 3.4279074124579094e-08,
      "loss": 0.1168,
      "step": 33038
    },
    {
      "epoch": 0.9638543672326273,
      "grad_norm": 0.6767415914019637,
      "learning_rate": 3.422387072360256e-08,
      "loss": 0.095,
      "step": 33039
    },
    {
      "epoch": 0.9638835404632708,
      "grad_norm": 0.8134499340441764,
      "learning_rate": 3.4168711655889756e-08,
      "loss": 0.1106,
      "step": 33040
    },
    {
      "epoch": 0.9639127136939145,
      "grad_norm": 1.791976430899528,
      "learning_rate": 3.411359692193361e-08,
      "loss": 0.0971,
      "step": 33041
    },
    {
      "epoch": 0.9639418869245581,
      "grad_norm": 0.8628128175687187,
      "learning_rate": 3.405852652222596e-08,
      "loss": 0.0912,
      "step": 33042
    },
    {
      "epoch": 0.9639710601552016,
      "grad_norm": 0.7840498889838119,
      "learning_rate": 3.400350045725809e-08,
      "loss": 0.0998,
      "step": 33043
    },
    {
      "epoch": 0.9640002333858452,
      "grad_norm": 0.6967376898445659,
      "learning_rate": 3.3948518727521807e-08,
      "loss": 0.1097,
      "step": 33044
    },
    {
      "epoch": 0.9640294066164887,
      "grad_norm": 0.9112833338701539,
      "learning_rate": 3.3893581333507286e-08,
      "loss": 0.1213,
      "step": 33045
    },
    {
      "epoch": 0.9640585798471323,
      "grad_norm": 0.8504616077584305,
      "learning_rate": 3.383868827570524e-08,
      "loss": 0.1123,
      "step": 33046
    },
    {
      "epoch": 0.9640877530777758,
      "grad_norm": 0.7451934307086762,
      "learning_rate": 3.3783839554605845e-08,
      "loss": 0.0916,
      "step": 33047
    },
    {
      "epoch": 0.9641169263084194,
      "grad_norm": 0.8904984547388519,
      "learning_rate": 3.372903517069925e-08,
      "loss": 0.1235,
      "step": 33048
    },
    {
      "epoch": 0.9641460995390629,
      "grad_norm": 0.8104490859134511,
      "learning_rate": 3.3674275124473966e-08,
      "loss": 0.0947,
      "step": 33049
    },
    {
      "epoch": 0.9641752727697065,
      "grad_norm": 0.8997780029101679,
      "learning_rate": 3.361955941641959e-08,
      "loss": 0.1266,
      "step": 33050
    },
    {
      "epoch": 0.96420444600035,
      "grad_norm": 0.7583106702966642,
      "learning_rate": 3.356488804702407e-08,
      "loss": 0.1317,
      "step": 33051
    },
    {
      "epoch": 0.9642336192309936,
      "grad_norm": 0.9595040396376152,
      "learning_rate": 3.351026101677535e-08,
      "loss": 0.1255,
      "step": 33052
    },
    {
      "epoch": 0.9642627924616372,
      "grad_norm": 0.8097031344318243,
      "learning_rate": 3.345567832616137e-08,
      "loss": 0.092,
      "step": 33053
    },
    {
      "epoch": 0.9642919656922808,
      "grad_norm": 0.8790094470262768,
      "learning_rate": 3.3401139975669515e-08,
      "loss": 0.1251,
      "step": 33054
    },
    {
      "epoch": 0.9643211389229244,
      "grad_norm": 0.8628659152043027,
      "learning_rate": 3.334664596578718e-08,
      "loss": 0.1039,
      "step": 33055
    },
    {
      "epoch": 0.9643503121535679,
      "grad_norm": 0.8293429931343395,
      "learning_rate": 3.329219629699954e-08,
      "loss": 0.1141,
      "step": 33056
    },
    {
      "epoch": 0.9643794853842115,
      "grad_norm": 0.8531929437695732,
      "learning_rate": 3.323779096979396e-08,
      "loss": 0.0961,
      "step": 33057
    },
    {
      "epoch": 0.964408658614855,
      "grad_norm": 0.721512424405809,
      "learning_rate": 3.3183429984655626e-08,
      "loss": 0.098,
      "step": 33058
    },
    {
      "epoch": 0.9644378318454986,
      "grad_norm": 0.7962545737939721,
      "learning_rate": 3.312911334207025e-08,
      "loss": 0.1024,
      "step": 33059
    },
    {
      "epoch": 0.9644670050761421,
      "grad_norm": 0.8252475799718151,
      "learning_rate": 3.307484104252245e-08,
      "loss": 0.1019,
      "step": 33060
    },
    {
      "epoch": 0.9644961783067857,
      "grad_norm": 0.8107301483904512,
      "learning_rate": 3.302061308649629e-08,
      "loss": 0.0977,
      "step": 33061
    },
    {
      "epoch": 0.9645253515374292,
      "grad_norm": 0.7197481795572213,
      "learning_rate": 3.296642947447693e-08,
      "loss": 0.0951,
      "step": 33062
    },
    {
      "epoch": 0.9645545247680728,
      "grad_norm": 0.8894285379799222,
      "learning_rate": 3.2912290206947305e-08,
      "loss": 0.1271,
      "step": 33063
    },
    {
      "epoch": 0.9645836979987163,
      "grad_norm": 0.6619987909443094,
      "learning_rate": 3.2858195284390936e-08,
      "loss": 0.092,
      "step": 33064
    },
    {
      "epoch": 0.9646128712293599,
      "grad_norm": 0.7370591185472479,
      "learning_rate": 3.280414470729076e-08,
      "loss": 0.1097,
      "step": 33065
    },
    {
      "epoch": 0.9646420444600035,
      "grad_norm": 0.7120999999351729,
      "learning_rate": 3.2750138476129736e-08,
      "loss": 0.0959,
      "step": 33066
    },
    {
      "epoch": 0.964671217690647,
      "grad_norm": 1.7231995176486146,
      "learning_rate": 3.269617659138968e-08,
      "loss": 0.1007,
      "step": 33067
    },
    {
      "epoch": 0.9647003909212907,
      "grad_norm": 0.743186413695021,
      "learning_rate": 3.264225905355245e-08,
      "loss": 0.1084,
      "step": 33068
    },
    {
      "epoch": 0.9647295641519342,
      "grad_norm": 0.6858552204728713,
      "learning_rate": 3.258838586309876e-08,
      "loss": 0.1067,
      "step": 33069
    },
    {
      "epoch": 0.9647587373825778,
      "grad_norm": 0.7390737277577913,
      "learning_rate": 3.253455702051045e-08,
      "loss": 0.132,
      "step": 33070
    },
    {
      "epoch": 0.9647879106132213,
      "grad_norm": 0.7102986768287417,
      "learning_rate": 3.248077252626769e-08,
      "loss": 0.1048,
      "step": 33071
    },
    {
      "epoch": 0.9648170838438649,
      "grad_norm": 0.8894319490504129,
      "learning_rate": 3.2427032380851206e-08,
      "loss": 0.1153,
      "step": 33072
    },
    {
      "epoch": 0.9648462570745084,
      "grad_norm": 1.2816692484979821,
      "learning_rate": 3.2373336584740066e-08,
      "loss": 0.1329,
      "step": 33073
    },
    {
      "epoch": 0.964875430305152,
      "grad_norm": 0.8388061203776557,
      "learning_rate": 3.231968513841388e-08,
      "loss": 0.1132,
      "step": 33074
    },
    {
      "epoch": 0.9649046035357955,
      "grad_norm": 0.8260590103042612,
      "learning_rate": 3.2266078042351155e-08,
      "loss": 0.1003,
      "step": 33075
    },
    {
      "epoch": 0.9649337767664391,
      "grad_norm": 0.8251853949977179,
      "learning_rate": 3.221251529703151e-08,
      "loss": 0.1083,
      "step": 33076
    },
    {
      "epoch": 0.9649629499970827,
      "grad_norm": 0.7431120586565373,
      "learning_rate": 3.2158996902932896e-08,
      "loss": 0.1028,
      "step": 33077
    },
    {
      "epoch": 0.9649921232277262,
      "grad_norm": 1.0436005730416644,
      "learning_rate": 3.21055228605327e-08,
      "loss": 0.1054,
      "step": 33078
    },
    {
      "epoch": 0.9650212964583698,
      "grad_norm": 0.8754435615184252,
      "learning_rate": 3.2052093170307774e-08,
      "loss": 0.0967,
      "step": 33079
    },
    {
      "epoch": 0.9650504696890133,
      "grad_norm": 0.946657646123212,
      "learning_rate": 3.199870783273662e-08,
      "loss": 0.1219,
      "step": 33080
    },
    {
      "epoch": 0.965079642919657,
      "grad_norm": 0.5836997331509567,
      "learning_rate": 3.194536684829497e-08,
      "loss": 0.0944,
      "step": 33081
    },
    {
      "epoch": 0.9651088161503005,
      "grad_norm": 0.8370698797524847,
      "learning_rate": 3.189207021745855e-08,
      "loss": 0.1203,
      "step": 33082
    },
    {
      "epoch": 0.9651379893809441,
      "grad_norm": 0.6781248954224448,
      "learning_rate": 3.1838817940704206e-08,
      "loss": 0.1109,
      "step": 33083
    },
    {
      "epoch": 0.9651671626115876,
      "grad_norm": 0.9151357452442114,
      "learning_rate": 3.178561001850655e-08,
      "loss": 0.1234,
      "step": 33084
    },
    {
      "epoch": 0.9651963358422312,
      "grad_norm": 0.9572796163881453,
      "learning_rate": 3.1732446451341326e-08,
      "loss": 0.1005,
      "step": 33085
    },
    {
      "epoch": 0.9652255090728747,
      "grad_norm": 0.8989409858058658,
      "learning_rate": 3.167932723968259e-08,
      "loss": 0.1152,
      "step": 33086
    },
    {
      "epoch": 0.9652546823035183,
      "grad_norm": 0.9120765378663985,
      "learning_rate": 3.162625238400496e-08,
      "loss": 0.1157,
      "step": 33087
    },
    {
      "epoch": 0.9652838555341618,
      "grad_norm": 0.8992382488484729,
      "learning_rate": 3.157322188478196e-08,
      "loss": 0.1051,
      "step": 33088
    },
    {
      "epoch": 0.9653130287648054,
      "grad_norm": 0.6885380623396845,
      "learning_rate": 3.1520235742487084e-08,
      "loss": 0.089,
      "step": 33089
    },
    {
      "epoch": 0.965342201995449,
      "grad_norm": 0.7241347519213581,
      "learning_rate": 3.1467293957593846e-08,
      "loss": 0.1072,
      "step": 33090
    },
    {
      "epoch": 0.9653713752260925,
      "grad_norm": 0.7173859987060868,
      "learning_rate": 3.1414396530574655e-08,
      "loss": 0.1185,
      "step": 33091
    },
    {
      "epoch": 0.9654005484567361,
      "grad_norm": 0.9488102912576474,
      "learning_rate": 3.136154346190079e-08,
      "loss": 0.1123,
      "step": 33092
    },
    {
      "epoch": 0.9654297216873796,
      "grad_norm": 0.7968865901405182,
      "learning_rate": 3.1308734752045767e-08,
      "loss": 0.1124,
      "step": 33093
    },
    {
      "epoch": 0.9654588949180232,
      "grad_norm": 0.8296046017990707,
      "learning_rate": 3.125597040147976e-08,
      "loss": 0.1047,
      "step": 33094
    },
    {
      "epoch": 0.9654880681486668,
      "grad_norm": 1.1175573532422671,
      "learning_rate": 3.1203250410674625e-08,
      "loss": 0.1087,
      "step": 33095
    },
    {
      "epoch": 0.9655172413793104,
      "grad_norm": 0.8937334867429435,
      "learning_rate": 3.115057478010053e-08,
      "loss": 0.0985,
      "step": 33096
    },
    {
      "epoch": 0.9655464146099539,
      "grad_norm": 1.1154369897194318,
      "learning_rate": 3.1097943510227657e-08,
      "loss": 0.1029,
      "step": 33097
    },
    {
      "epoch": 0.9655755878405975,
      "grad_norm": 0.9569703271745534,
      "learning_rate": 3.1045356601526746e-08,
      "loss": 0.1053,
      "step": 33098
    },
    {
      "epoch": 0.965604761071241,
      "grad_norm": 1.0134480377950137,
      "learning_rate": 3.09928140544663e-08,
      "loss": 0.1022,
      "step": 33099
    },
    {
      "epoch": 0.9656339343018846,
      "grad_norm": 0.969656792964338,
      "learning_rate": 3.094031586951596e-08,
      "loss": 0.1002,
      "step": 33100
    },
    {
      "epoch": 0.9656631075325282,
      "grad_norm": 0.7280858817022674,
      "learning_rate": 3.0887862047144227e-08,
      "loss": 0.0964,
      "step": 33101
    },
    {
      "epoch": 0.9656922807631717,
      "grad_norm": 0.80575138456284,
      "learning_rate": 3.083545258781961e-08,
      "loss": 0.1356,
      "step": 33102
    },
    {
      "epoch": 0.9657214539938153,
      "grad_norm": 0.9630461123158345,
      "learning_rate": 3.078308749200953e-08,
      "loss": 0.1038,
      "step": 33103
    },
    {
      "epoch": 0.9657506272244588,
      "grad_norm": 0.7824225056970916,
      "learning_rate": 3.0730766760182494e-08,
      "loss": 0.1202,
      "step": 33104
    },
    {
      "epoch": 0.9657798004551024,
      "grad_norm": 0.7243585512474819,
      "learning_rate": 3.067849039280424e-08,
      "loss": 0.1011,
      "step": 33105
    },
    {
      "epoch": 0.9658089736857459,
      "grad_norm": 0.8562293576233819,
      "learning_rate": 3.0626258390342165e-08,
      "loss": 0.1141,
      "step": 33106
    },
    {
      "epoch": 0.9658381469163895,
      "grad_norm": 0.7856195383148817,
      "learning_rate": 3.057407075326258e-08,
      "loss": 0.107,
      "step": 33107
    },
    {
      "epoch": 0.9658673201470331,
      "grad_norm": 1.3227329182781364,
      "learning_rate": 3.052192748203175e-08,
      "loss": 0.1194,
      "step": 33108
    },
    {
      "epoch": 0.9658964933776767,
      "grad_norm": 0.8942257656405606,
      "learning_rate": 3.046982857711434e-08,
      "loss": 0.1131,
      "step": 33109
    },
    {
      "epoch": 0.9659256666083202,
      "grad_norm": 0.8446760466420564,
      "learning_rate": 3.0417774038976614e-08,
      "loss": 0.1032,
      "step": 33110
    },
    {
      "epoch": 0.9659548398389638,
      "grad_norm": 0.656919495620798,
      "learning_rate": 3.0365763868082096e-08,
      "loss": 0.0995,
      "step": 33111
    },
    {
      "epoch": 0.9659840130696074,
      "grad_norm": 0.6938332147972859,
      "learning_rate": 3.031379806489598e-08,
      "loss": 0.1018,
      "step": 33112
    },
    {
      "epoch": 0.9660131863002509,
      "grad_norm": 0.8633441793085521,
      "learning_rate": 3.026187662988178e-08,
      "loss": 0.1079,
      "step": 33113
    },
    {
      "epoch": 0.9660423595308945,
      "grad_norm": 0.9591116687070425,
      "learning_rate": 3.0209999563503015e-08,
      "loss": 0.1093,
      "step": 33114
    },
    {
      "epoch": 0.966071532761538,
      "grad_norm": 0.7233070936573648,
      "learning_rate": 3.015816686622319e-08,
      "loss": 0.1051,
      "step": 33115
    },
    {
      "epoch": 0.9661007059921816,
      "grad_norm": 0.8609837590828512,
      "learning_rate": 3.010637853850473e-08,
      "loss": 0.1113,
      "step": 33116
    },
    {
      "epoch": 0.9661298792228251,
      "grad_norm": 0.8313443937140413,
      "learning_rate": 3.0054634580810594e-08,
      "loss": 0.0928,
      "step": 33117
    },
    {
      "epoch": 0.9661590524534687,
      "grad_norm": 0.8196854774424199,
      "learning_rate": 3.000293499360207e-08,
      "loss": 0.0932,
      "step": 33118
    },
    {
      "epoch": 0.9661882256841122,
      "grad_norm": 0.7198436893292882,
      "learning_rate": 2.995127977734047e-08,
      "loss": 0.1083,
      "step": 33119
    },
    {
      "epoch": 0.9662173989147558,
      "grad_norm": 0.9496831099723301,
      "learning_rate": 2.9899668932487636e-08,
      "loss": 0.0956,
      "step": 33120
    },
    {
      "epoch": 0.9662465721453993,
      "grad_norm": 0.7647315277194521,
      "learning_rate": 2.984810245950431e-08,
      "loss": 0.0781,
      "step": 33121
    },
    {
      "epoch": 0.966275745376043,
      "grad_norm": 0.8740475248357543,
      "learning_rate": 2.9796580358850134e-08,
      "loss": 0.1247,
      "step": 33122
    },
    {
      "epoch": 0.9663049186066865,
      "grad_norm": 0.8668125591352983,
      "learning_rate": 2.9745102630985844e-08,
      "loss": 0.1056,
      "step": 33123
    },
    {
      "epoch": 0.9663340918373301,
      "grad_norm": 0.8427167068623925,
      "learning_rate": 2.9693669276371073e-08,
      "loss": 0.1337,
      "step": 33124
    },
    {
      "epoch": 0.9663632650679737,
      "grad_norm": 0.9657409626030692,
      "learning_rate": 2.96422802954649e-08,
      "loss": 0.1255,
      "step": 33125
    },
    {
      "epoch": 0.9663924382986172,
      "grad_norm": 0.8345508028686581,
      "learning_rate": 2.9590935688725288e-08,
      "loss": 0.1361,
      "step": 33126
    },
    {
      "epoch": 0.9664216115292608,
      "grad_norm": 1.0172467489185282,
      "learning_rate": 2.9539635456611872e-08,
      "loss": 0.0841,
      "step": 33127
    },
    {
      "epoch": 0.9664507847599043,
      "grad_norm": 0.8745450174531825,
      "learning_rate": 2.9488379599581507e-08,
      "loss": 0.1381,
      "step": 33128
    },
    {
      "epoch": 0.9664799579905479,
      "grad_norm": 1.157553950817821,
      "learning_rate": 2.943716811809272e-08,
      "loss": 0.1123,
      "step": 33129
    },
    {
      "epoch": 0.9665091312211914,
      "grad_norm": 0.8153465544904627,
      "learning_rate": 2.9386001012601805e-08,
      "loss": 0.1208,
      "step": 33130
    },
    {
      "epoch": 0.966538304451835,
      "grad_norm": 0.7524619022845139,
      "learning_rate": 2.9334878283566737e-08,
      "loss": 0.1063,
      "step": 33131
    },
    {
      "epoch": 0.9665674776824785,
      "grad_norm": 0.8994755021243348,
      "learning_rate": 2.9283799931442704e-08,
      "loss": 0.1001,
      "step": 33132
    },
    {
      "epoch": 0.9665966509131221,
      "grad_norm": 0.7204162617885567,
      "learning_rate": 2.923276595668656e-08,
      "loss": 0.1077,
      "step": 33133
    },
    {
      "epoch": 0.9666258241437656,
      "grad_norm": 0.8019148939981262,
      "learning_rate": 2.9181776359754054e-08,
      "loss": 0.1395,
      "step": 33134
    },
    {
      "epoch": 0.9666549973744093,
      "grad_norm": 0.9545709462728342,
      "learning_rate": 2.9130831141099268e-08,
      "loss": 0.0897,
      "step": 33135
    },
    {
      "epoch": 0.9666841706050529,
      "grad_norm": 0.7268565055547692,
      "learning_rate": 2.907993030117795e-08,
      "loss": 0.101,
      "step": 33136
    },
    {
      "epoch": 0.9667133438356964,
      "grad_norm": 0.7361034516151381,
      "learning_rate": 2.9029073840444733e-08,
      "loss": 0.1089,
      "step": 33137
    },
    {
      "epoch": 0.96674251706634,
      "grad_norm": 1.0953714849920393,
      "learning_rate": 2.89782617593537e-08,
      "loss": 0.1053,
      "step": 33138
    },
    {
      "epoch": 0.9667716902969835,
      "grad_norm": 0.7727801956727839,
      "learning_rate": 2.8927494058357265e-08,
      "loss": 0.0999,
      "step": 33139
    },
    {
      "epoch": 0.9668008635276271,
      "grad_norm": 0.9204105396856075,
      "learning_rate": 2.887677073790951e-08,
      "loss": 0.0884,
      "step": 33140
    },
    {
      "epoch": 0.9668300367582706,
      "grad_norm": 0.7203501760453466,
      "learning_rate": 2.882609179846341e-08,
      "loss": 0.0931,
      "step": 33141
    },
    {
      "epoch": 0.9668592099889142,
      "grad_norm": 0.7463886266732962,
      "learning_rate": 2.877545724047137e-08,
      "loss": 0.1241,
      "step": 33142
    },
    {
      "epoch": 0.9668883832195577,
      "grad_norm": 0.693979411774402,
      "learning_rate": 2.8724867064385265e-08,
      "loss": 0.0919,
      "step": 33143
    },
    {
      "epoch": 0.9669175564502013,
      "grad_norm": 0.7106745996989098,
      "learning_rate": 2.8674321270656946e-08,
      "loss": 0.1145,
      "step": 33144
    },
    {
      "epoch": 0.9669467296808448,
      "grad_norm": 0.8100047697571606,
      "learning_rate": 2.8623819859737168e-08,
      "loss": 0.1155,
      "step": 33145
    },
    {
      "epoch": 0.9669759029114884,
      "grad_norm": 0.7790447099378133,
      "learning_rate": 2.8573362832077234e-08,
      "loss": 0.098,
      "step": 33146
    },
    {
      "epoch": 0.9670050761421319,
      "grad_norm": 0.7495023717099109,
      "learning_rate": 2.8522950188127342e-08,
      "loss": 0.1144,
      "step": 33147
    },
    {
      "epoch": 0.9670342493727755,
      "grad_norm": 0.8861864894397857,
      "learning_rate": 2.847258192833824e-08,
      "loss": 0.1241,
      "step": 33148
    },
    {
      "epoch": 0.9670634226034192,
      "grad_norm": 1.1470730068790005,
      "learning_rate": 2.8422258053159014e-08,
      "loss": 0.1134,
      "step": 33149
    },
    {
      "epoch": 0.9670925958340627,
      "grad_norm": 0.7778296006944421,
      "learning_rate": 2.8371978563039304e-08,
      "loss": 0.1124,
      "step": 33150
    },
    {
      "epoch": 0.9671217690647063,
      "grad_norm": 0.648556325252706,
      "learning_rate": 2.8321743458427087e-08,
      "loss": 0.1219,
      "step": 33151
    },
    {
      "epoch": 0.9671509422953498,
      "grad_norm": 0.8489118138471465,
      "learning_rate": 2.8271552739772e-08,
      "loss": 0.1121,
      "step": 33152
    },
    {
      "epoch": 0.9671801155259934,
      "grad_norm": 1.0260411798809466,
      "learning_rate": 2.8221406407521466e-08,
      "loss": 0.1003,
      "step": 33153
    },
    {
      "epoch": 0.9672092887566369,
      "grad_norm": 0.7700507317507708,
      "learning_rate": 2.817130446212346e-08,
      "loss": 0.1063,
      "step": 33154
    },
    {
      "epoch": 0.9672384619872805,
      "grad_norm": 0.8193315575784067,
      "learning_rate": 2.81212469040254e-08,
      "loss": 0.1049,
      "step": 33155
    },
    {
      "epoch": 0.967267635217924,
      "grad_norm": 0.900771176844274,
      "learning_rate": 2.8071233733673597e-08,
      "loss": 0.1234,
      "step": 33156
    },
    {
      "epoch": 0.9672968084485676,
      "grad_norm": 0.9560339972914657,
      "learning_rate": 2.8021264951514916e-08,
      "loss": 0.0932,
      "step": 33157
    },
    {
      "epoch": 0.9673259816792111,
      "grad_norm": 0.6546265651253206,
      "learning_rate": 2.7971340557995665e-08,
      "loss": 0.1132,
      "step": 33158
    },
    {
      "epoch": 0.9673551549098547,
      "grad_norm": 1.0290057482398285,
      "learning_rate": 2.7921460553561042e-08,
      "loss": 0.1238,
      "step": 33159
    },
    {
      "epoch": 0.9673843281404982,
      "grad_norm": 0.9428544993149429,
      "learning_rate": 2.7871624938656805e-08,
      "loss": 0.1157,
      "step": 33160
    },
    {
      "epoch": 0.9674135013711418,
      "grad_norm": 1.0415995121814428,
      "learning_rate": 2.7821833713728152e-08,
      "loss": 0.1169,
      "step": 33161
    },
    {
      "epoch": 0.9674426746017853,
      "grad_norm": 0.6790913256221224,
      "learning_rate": 2.7772086879218617e-08,
      "loss": 0.1206,
      "step": 33162
    },
    {
      "epoch": 0.967471847832429,
      "grad_norm": 0.7560335334944387,
      "learning_rate": 2.77223844355734e-08,
      "loss": 0.0954,
      "step": 33163
    },
    {
      "epoch": 0.9675010210630726,
      "grad_norm": 0.9423620708381659,
      "learning_rate": 2.7672726383235482e-08,
      "loss": 0.105,
      "step": 33164
    },
    {
      "epoch": 0.9675301942937161,
      "grad_norm": 0.7353544534745028,
      "learning_rate": 2.7623112722648394e-08,
      "loss": 0.1118,
      "step": 33165
    },
    {
      "epoch": 0.9675593675243597,
      "grad_norm": 0.8067071208282636,
      "learning_rate": 2.757354345425567e-08,
      "loss": 0.1131,
      "step": 33166
    },
    {
      "epoch": 0.9675885407550032,
      "grad_norm": 0.9932651772818989,
      "learning_rate": 2.7524018578498623e-08,
      "loss": 0.1231,
      "step": 33167
    },
    {
      "epoch": 0.9676177139856468,
      "grad_norm": 0.8114355437385394,
      "learning_rate": 2.7474538095820792e-08,
      "loss": 0.1231,
      "step": 33168
    },
    {
      "epoch": 0.9676468872162903,
      "grad_norm": 0.8220403344879893,
      "learning_rate": 2.742510200666293e-08,
      "loss": 0.1324,
      "step": 33169
    },
    {
      "epoch": 0.9676760604469339,
      "grad_norm": 0.9237094451487354,
      "learning_rate": 2.737571031146691e-08,
      "loss": 0.1029,
      "step": 33170
    },
    {
      "epoch": 0.9677052336775774,
      "grad_norm": 0.8566931516908737,
      "learning_rate": 2.732636301067293e-08,
      "loss": 0.1103,
      "step": 33171
    },
    {
      "epoch": 0.967734406908221,
      "grad_norm": 0.8089451056696318,
      "learning_rate": 2.7277060104722865e-08,
      "loss": 0.1302,
      "step": 33172
    },
    {
      "epoch": 0.9677635801388645,
      "grad_norm": 0.8801491851632947,
      "learning_rate": 2.722780159405525e-08,
      "loss": 0.1215,
      "step": 33173
    },
    {
      "epoch": 0.9677927533695081,
      "grad_norm": 0.894810803757725,
      "learning_rate": 2.7178587479111397e-08,
      "loss": 0.0923,
      "step": 33174
    },
    {
      "epoch": 0.9678219266001516,
      "grad_norm": 0.920052125297922,
      "learning_rate": 2.7129417760329846e-08,
      "loss": 0.1213,
      "step": 33175
    },
    {
      "epoch": 0.9678510998307953,
      "grad_norm": 0.9199774232586273,
      "learning_rate": 2.708029243814969e-08,
      "loss": 0.1189,
      "step": 33176
    },
    {
      "epoch": 0.9678802730614389,
      "grad_norm": 0.776353475684395,
      "learning_rate": 2.703121151300947e-08,
      "loss": 0.1001,
      "step": 33177
    },
    {
      "epoch": 0.9679094462920824,
      "grad_norm": 0.9131735148224904,
      "learning_rate": 2.698217498534772e-08,
      "loss": 0.1248,
      "step": 33178
    },
    {
      "epoch": 0.967938619522726,
      "grad_norm": 0.7870175820259996,
      "learning_rate": 2.693318285560187e-08,
      "loss": 0.1014,
      "step": 33179
    },
    {
      "epoch": 0.9679677927533695,
      "grad_norm": 0.8996599779335045,
      "learning_rate": 2.6884235124209345e-08,
      "loss": 0.1041,
      "step": 33180
    },
    {
      "epoch": 0.9679969659840131,
      "grad_norm": 0.8984752240465178,
      "learning_rate": 2.6835331791607023e-08,
      "loss": 0.134,
      "step": 33181
    },
    {
      "epoch": 0.9680261392146566,
      "grad_norm": 0.7918699501152622,
      "learning_rate": 2.6786472858231772e-08,
      "loss": 0.1004,
      "step": 33182
    },
    {
      "epoch": 0.9680553124453002,
      "grad_norm": 0.7766932154413836,
      "learning_rate": 2.673765832451991e-08,
      "loss": 0.1131,
      "step": 33183
    },
    {
      "epoch": 0.9680844856759437,
      "grad_norm": 1.3130603270873022,
      "learning_rate": 2.6688888190906647e-08,
      "loss": 0.0971,
      "step": 33184
    },
    {
      "epoch": 0.9681136589065873,
      "grad_norm": 0.9150769599954006,
      "learning_rate": 2.66401624578283e-08,
      "loss": 0.1051,
      "step": 33185
    },
    {
      "epoch": 0.9681428321372308,
      "grad_norm": 0.9161269213580724,
      "learning_rate": 2.6591481125718967e-08,
      "loss": 0.1002,
      "step": 33186
    },
    {
      "epoch": 0.9681720053678744,
      "grad_norm": 0.6980169049739354,
      "learning_rate": 2.6542844195013297e-08,
      "loss": 0.1066,
      "step": 33187
    },
    {
      "epoch": 0.968201178598518,
      "grad_norm": 0.7662191903971364,
      "learning_rate": 2.6494251666146497e-08,
      "loss": 0.092,
      "step": 33188
    },
    {
      "epoch": 0.9682303518291615,
      "grad_norm": 0.9375710003997315,
      "learning_rate": 2.644570353955156e-08,
      "loss": 0.1104,
      "step": 33189
    },
    {
      "epoch": 0.9682595250598052,
      "grad_norm": 0.6504484474349596,
      "learning_rate": 2.6397199815662022e-08,
      "loss": 0.1005,
      "step": 33190
    },
    {
      "epoch": 0.9682886982904487,
      "grad_norm": 0.8032264552360474,
      "learning_rate": 2.6348740494910875e-08,
      "loss": 0.1198,
      "step": 33191
    },
    {
      "epoch": 0.9683178715210923,
      "grad_norm": 0.9963163192859755,
      "learning_rate": 2.6300325577731102e-08,
      "loss": 0.11,
      "step": 33192
    },
    {
      "epoch": 0.9683470447517358,
      "grad_norm": 0.9106261616915791,
      "learning_rate": 2.625195506455458e-08,
      "loss": 0.1056,
      "step": 33193
    },
    {
      "epoch": 0.9683762179823794,
      "grad_norm": 2.154062381770336,
      "learning_rate": 2.6203628955813188e-08,
      "loss": 0.1141,
      "step": 33194
    },
    {
      "epoch": 0.9684053912130229,
      "grad_norm": 1.0892300851194652,
      "learning_rate": 2.6155347251938247e-08,
      "loss": 0.1288,
      "step": 33195
    },
    {
      "epoch": 0.9684345644436665,
      "grad_norm": 0.8808649017784892,
      "learning_rate": 2.610710995336163e-08,
      "loss": 0.1041,
      "step": 33196
    },
    {
      "epoch": 0.96846373767431,
      "grad_norm": 0.875432106744955,
      "learning_rate": 2.6058917060513002e-08,
      "loss": 0.0885,
      "step": 33197
    },
    {
      "epoch": 0.9684929109049536,
      "grad_norm": 0.7705466745780121,
      "learning_rate": 2.601076857382312e-08,
      "loss": 0.1057,
      "step": 33198
    },
    {
      "epoch": 0.9685220841355971,
      "grad_norm": 0.7848633989472745,
      "learning_rate": 2.5962664493721646e-08,
      "loss": 0.1077,
      "step": 33199
    },
    {
      "epoch": 0.9685512573662407,
      "grad_norm": 0.9357134369685914,
      "learning_rate": 2.5914604820638233e-08,
      "loss": 0.105,
      "step": 33200
    },
    {
      "epoch": 0.9685804305968843,
      "grad_norm": 1.0718354077973316,
      "learning_rate": 2.5866589555001432e-08,
      "loss": 0.1006,
      "step": 33201
    },
    {
      "epoch": 0.9686096038275278,
      "grad_norm": 0.9688733295168274,
      "learning_rate": 2.5818618697240337e-08,
      "loss": 0.1132,
      "step": 33202
    },
    {
      "epoch": 0.9686387770581715,
      "grad_norm": 0.8703435809535759,
      "learning_rate": 2.5770692247783502e-08,
      "loss": 0.0975,
      "step": 33203
    },
    {
      "epoch": 0.968667950288815,
      "grad_norm": 0.7218303744110426,
      "learning_rate": 2.5722810207058356e-08,
      "loss": 0.0993,
      "step": 33204
    },
    {
      "epoch": 0.9686971235194586,
      "grad_norm": 0.7759747512927662,
      "learning_rate": 2.5674972575492896e-08,
      "loss": 0.1138,
      "step": 33205
    },
    {
      "epoch": 0.9687262967501021,
      "grad_norm": 0.8570026436756201,
      "learning_rate": 2.562717935351289e-08,
      "loss": 0.1186,
      "step": 33206
    },
    {
      "epoch": 0.9687554699807457,
      "grad_norm": 0.7398037588879186,
      "learning_rate": 2.5579430541546324e-08,
      "loss": 0.0901,
      "step": 33207
    },
    {
      "epoch": 0.9687846432113892,
      "grad_norm": 0.7236714997816808,
      "learning_rate": 2.553172614001953e-08,
      "loss": 0.1161,
      "step": 33208
    },
    {
      "epoch": 0.9688138164420328,
      "grad_norm": 1.2607678151739428,
      "learning_rate": 2.5484066149357723e-08,
      "loss": 0.1117,
      "step": 33209
    },
    {
      "epoch": 0.9688429896726763,
      "grad_norm": 0.9101920600804906,
      "learning_rate": 2.543645056998667e-08,
      "loss": 0.0882,
      "step": 33210
    },
    {
      "epoch": 0.9688721629033199,
      "grad_norm": 0.8108032443512756,
      "learning_rate": 2.538887940233159e-08,
      "loss": 0.1143,
      "step": 33211
    },
    {
      "epoch": 0.9689013361339635,
      "grad_norm": 0.7041602985171656,
      "learning_rate": 2.5341352646816585e-08,
      "loss": 0.1145,
      "step": 33212
    },
    {
      "epoch": 0.968930509364607,
      "grad_norm": 0.8401975422025159,
      "learning_rate": 2.5293870303866876e-08,
      "loss": 0.1186,
      "step": 33213
    },
    {
      "epoch": 0.9689596825952506,
      "grad_norm": 0.8464830955458004,
      "learning_rate": 2.5246432373906004e-08,
      "loss": 0.1015,
      "step": 33214
    },
    {
      "epoch": 0.9689888558258941,
      "grad_norm": 0.8387548712600454,
      "learning_rate": 2.5199038857357526e-08,
      "loss": 0.096,
      "step": 33215
    },
    {
      "epoch": 0.9690180290565377,
      "grad_norm": 0.9199625599619493,
      "learning_rate": 2.5151689754643883e-08,
      "loss": 0.1151,
      "step": 33216
    },
    {
      "epoch": 0.9690472022871813,
      "grad_norm": 1.3089082232519005,
      "learning_rate": 2.5104385066188618e-08,
      "loss": 0.1145,
      "step": 33217
    },
    {
      "epoch": 0.9690763755178249,
      "grad_norm": 0.8277829418479505,
      "learning_rate": 2.505712479241418e-08,
      "loss": 0.0957,
      "step": 33218
    },
    {
      "epoch": 0.9691055487484684,
      "grad_norm": 0.6815338576041906,
      "learning_rate": 2.5009908933741335e-08,
      "loss": 0.106,
      "step": 33219
    },
    {
      "epoch": 0.969134721979112,
      "grad_norm": 0.8492507818011145,
      "learning_rate": 2.496273749059308e-08,
      "loss": 0.1094,
      "step": 33220
    },
    {
      "epoch": 0.9691638952097555,
      "grad_norm": 0.775050926146816,
      "learning_rate": 2.4915610463389637e-08,
      "loss": 0.0947,
      "step": 33221
    },
    {
      "epoch": 0.9691930684403991,
      "grad_norm": 0.7439142650091273,
      "learning_rate": 2.486852785255178e-08,
      "loss": 0.1078,
      "step": 33222
    },
    {
      "epoch": 0.9692222416710427,
      "grad_norm": 0.9900561762145669,
      "learning_rate": 2.4821489658500286e-08,
      "loss": 0.126,
      "step": 33223
    },
    {
      "epoch": 0.9692514149016862,
      "grad_norm": 0.7834785723011627,
      "learning_rate": 2.4774495881654813e-08,
      "loss": 0.113,
      "step": 33224
    },
    {
      "epoch": 0.9692805881323298,
      "grad_norm": 0.7419422618761509,
      "learning_rate": 2.472754652243503e-08,
      "loss": 0.0971,
      "step": 33225
    },
    {
      "epoch": 0.9693097613629733,
      "grad_norm": 1.0690866161387549,
      "learning_rate": 2.468064158125949e-08,
      "loss": 0.1132,
      "step": 33226
    },
    {
      "epoch": 0.9693389345936169,
      "grad_norm": 0.9161898550399602,
      "learning_rate": 2.463378105854841e-08,
      "loss": 0.139,
      "step": 33227
    },
    {
      "epoch": 0.9693681078242604,
      "grad_norm": 0.8379852469233645,
      "learning_rate": 2.4586964954718683e-08,
      "loss": 0.1007,
      "step": 33228
    },
    {
      "epoch": 0.969397281054904,
      "grad_norm": 0.7514237099956552,
      "learning_rate": 2.454019327018886e-08,
      "loss": 0.1222,
      "step": 33229
    },
    {
      "epoch": 0.9694264542855476,
      "grad_norm": 0.8241317805620066,
      "learning_rate": 2.449346600537639e-08,
      "loss": 0.1079,
      "step": 33230
    },
    {
      "epoch": 0.9694556275161912,
      "grad_norm": 0.9168777228764998,
      "learning_rate": 2.4446783160698152e-08,
      "loss": 0.1072,
      "step": 33231
    },
    {
      "epoch": 0.9694848007468347,
      "grad_norm": 0.8124714608903882,
      "learning_rate": 2.440014473657215e-08,
      "loss": 0.103,
      "step": 33232
    },
    {
      "epoch": 0.9695139739774783,
      "grad_norm": 0.7571477527597106,
      "learning_rate": 2.4353550733413056e-08,
      "loss": 0.1044,
      "step": 33233
    },
    {
      "epoch": 0.9695431472081218,
      "grad_norm": 0.75046520235371,
      "learning_rate": 2.430700115163831e-08,
      "loss": 0.1065,
      "step": 33234
    },
    {
      "epoch": 0.9695723204387654,
      "grad_norm": 0.8101537463858147,
      "learning_rate": 2.426049599166258e-08,
      "loss": 0.0979,
      "step": 33235
    },
    {
      "epoch": 0.969601493669409,
      "grad_norm": 0.9014119870855315,
      "learning_rate": 2.4214035253901093e-08,
      "loss": 0.1058,
      "step": 33236
    },
    {
      "epoch": 0.9696306669000525,
      "grad_norm": 1.1299225630003138,
      "learning_rate": 2.4167618938769066e-08,
      "loss": 0.1026,
      "step": 33237
    },
    {
      "epoch": 0.9696598401306961,
      "grad_norm": 0.9588030555201384,
      "learning_rate": 2.4121247046681174e-08,
      "loss": 0.1032,
      "step": 33238
    },
    {
      "epoch": 0.9696890133613396,
      "grad_norm": 1.2022350808963467,
      "learning_rate": 2.4074919578050415e-08,
      "loss": 0.0923,
      "step": 33239
    },
    {
      "epoch": 0.9697181865919832,
      "grad_norm": 0.8357029085217434,
      "learning_rate": 2.4028636533290904e-08,
      "loss": 0.1014,
      "step": 33240
    },
    {
      "epoch": 0.9697473598226267,
      "grad_norm": 0.7351472449121997,
      "learning_rate": 2.3982397912816203e-08,
      "loss": 0.1172,
      "step": 33241
    },
    {
      "epoch": 0.9697765330532703,
      "grad_norm": 0.8882296353447906,
      "learning_rate": 2.3936203717038753e-08,
      "loss": 0.1098,
      "step": 33242
    },
    {
      "epoch": 0.9698057062839138,
      "grad_norm": 0.9327990009193649,
      "learning_rate": 2.389005394637045e-08,
      "loss": 0.117,
      "step": 33243
    },
    {
      "epoch": 0.9698348795145575,
      "grad_norm": 0.8334900163090636,
      "learning_rate": 2.38439486012243e-08,
      "loss": 0.1141,
      "step": 33244
    },
    {
      "epoch": 0.969864052745201,
      "grad_norm": 0.8102328846098208,
      "learning_rate": 2.3797887682011632e-08,
      "loss": 0.1138,
      "step": 33245
    },
    {
      "epoch": 0.9698932259758446,
      "grad_norm": 0.8732129555678707,
      "learning_rate": 2.375187118914324e-08,
      "loss": 0.1265,
      "step": 33246
    },
    {
      "epoch": 0.9699223992064882,
      "grad_norm": 0.9265668905180008,
      "learning_rate": 2.3705899123030452e-08,
      "loss": 0.1265,
      "step": 33247
    },
    {
      "epoch": 0.9699515724371317,
      "grad_norm": 0.7005632848767195,
      "learning_rate": 2.36599714840835e-08,
      "loss": 0.1314,
      "step": 33248
    },
    {
      "epoch": 0.9699807456677753,
      "grad_norm": 0.7864713792294779,
      "learning_rate": 2.3614088272712055e-08,
      "loss": 0.1068,
      "step": 33249
    },
    {
      "epoch": 0.9700099188984188,
      "grad_norm": 0.7727446005676165,
      "learning_rate": 2.3568249489325788e-08,
      "loss": 0.1133,
      "step": 33250
    },
    {
      "epoch": 0.9700390921290624,
      "grad_norm": 0.8746891260098911,
      "learning_rate": 2.3522455134334932e-08,
      "loss": 0.1133,
      "step": 33251
    },
    {
      "epoch": 0.9700682653597059,
      "grad_norm": 0.8062800368374295,
      "learning_rate": 2.347670520814749e-08,
      "loss": 0.1235,
      "step": 33252
    },
    {
      "epoch": 0.9700974385903495,
      "grad_norm": 0.9503210236297273,
      "learning_rate": 2.3430999711171466e-08,
      "loss": 0.1366,
      "step": 33253
    },
    {
      "epoch": 0.970126611820993,
      "grad_norm": 0.7298905343904102,
      "learning_rate": 2.338533864381598e-08,
      "loss": 0.1053,
      "step": 33254
    },
    {
      "epoch": 0.9701557850516366,
      "grad_norm": 0.8383404371548997,
      "learning_rate": 2.333972200648793e-08,
      "loss": 0.1275,
      "step": 33255
    },
    {
      "epoch": 0.9701849582822801,
      "grad_norm": 0.8118394073432936,
      "learning_rate": 2.329414979959477e-08,
      "loss": 0.0874,
      "step": 33256
    },
    {
      "epoch": 0.9702141315129238,
      "grad_norm": 0.7989731379292758,
      "learning_rate": 2.3248622023543387e-08,
      "loss": 0.1127,
      "step": 33257
    },
    {
      "epoch": 0.9702433047435673,
      "grad_norm": 0.6262229387101635,
      "learning_rate": 2.320313867874069e-08,
      "loss": 0.1175,
      "step": 33258
    },
    {
      "epoch": 0.9702724779742109,
      "grad_norm": 0.8163687193739113,
      "learning_rate": 2.3157699765591902e-08,
      "loss": 0.1135,
      "step": 33259
    },
    {
      "epoch": 0.9703016512048545,
      "grad_norm": 0.8569892640654317,
      "learning_rate": 2.3112305284503365e-08,
      "loss": 0.1125,
      "step": 33260
    },
    {
      "epoch": 0.970330824435498,
      "grad_norm": 0.7352684456279495,
      "learning_rate": 2.3066955235879763e-08,
      "loss": 0.1159,
      "step": 33261
    },
    {
      "epoch": 0.9703599976661416,
      "grad_norm": 0.6172889263874495,
      "learning_rate": 2.3021649620126873e-08,
      "loss": 0.1172,
      "step": 33262
    },
    {
      "epoch": 0.9703891708967851,
      "grad_norm": 0.8463945571490848,
      "learning_rate": 2.2976388437648267e-08,
      "loss": 0.106,
      "step": 33263
    },
    {
      "epoch": 0.9704183441274287,
      "grad_norm": 1.0325710207608405,
      "learning_rate": 2.2931171688848066e-08,
      "loss": 0.1194,
      "step": 33264
    },
    {
      "epoch": 0.9704475173580722,
      "grad_norm": 0.78629712982603,
      "learning_rate": 2.288599937413094e-08,
      "loss": 0.1122,
      "step": 33265
    },
    {
      "epoch": 0.9704766905887158,
      "grad_norm": 0.7600775276779111,
      "learning_rate": 2.2840871493898798e-08,
      "loss": 0.0942,
      "step": 33266
    },
    {
      "epoch": 0.9705058638193593,
      "grad_norm": 0.9811926471288643,
      "learning_rate": 2.27957880485552e-08,
      "loss": 0.1302,
      "step": 33267
    },
    {
      "epoch": 0.9705350370500029,
      "grad_norm": 0.8820362620981588,
      "learning_rate": 2.2750749038503162e-08,
      "loss": 0.0909,
      "step": 33268
    },
    {
      "epoch": 0.9705642102806464,
      "grad_norm": 0.6327932055874735,
      "learning_rate": 2.2705754464144024e-08,
      "loss": 0.1087,
      "step": 33269
    },
    {
      "epoch": 0.97059338351129,
      "grad_norm": 0.9295755842950882,
      "learning_rate": 2.266080432587969e-08,
      "loss": 0.1172,
      "step": 33270
    },
    {
      "epoch": 0.9706225567419337,
      "grad_norm": 0.9489213979627705,
      "learning_rate": 2.261589862411151e-08,
      "loss": 0.1073,
      "step": 33271
    },
    {
      "epoch": 0.9706517299725772,
      "grad_norm": 1.1446728499654448,
      "learning_rate": 2.2571037359240268e-08,
      "loss": 0.1114,
      "step": 33272
    },
    {
      "epoch": 0.9706809032032208,
      "grad_norm": 0.767914187023319,
      "learning_rate": 2.2526220531666752e-08,
      "loss": 0.1168,
      "step": 33273
    },
    {
      "epoch": 0.9707100764338643,
      "grad_norm": 0.7252827403882242,
      "learning_rate": 2.2481448141791206e-08,
      "loss": 0.1176,
      "step": 33274
    },
    {
      "epoch": 0.9707392496645079,
      "grad_norm": 0.7670138313431533,
      "learning_rate": 2.243672019001275e-08,
      "loss": 0.0991,
      "step": 33275
    },
    {
      "epoch": 0.9707684228951514,
      "grad_norm": 0.8109237412730789,
      "learning_rate": 2.2392036676730512e-08,
      "loss": 0.128,
      "step": 33276
    },
    {
      "epoch": 0.970797596125795,
      "grad_norm": 0.7587996528575016,
      "learning_rate": 2.2347397602344722e-08,
      "loss": 0.1018,
      "step": 33277
    },
    {
      "epoch": 0.9708267693564385,
      "grad_norm": 1.1152898987839726,
      "learning_rate": 2.2302802967252847e-08,
      "loss": 0.101,
      "step": 33278
    },
    {
      "epoch": 0.9708559425870821,
      "grad_norm": 0.7740685771683296,
      "learning_rate": 2.225825277185345e-08,
      "loss": 0.1029,
      "step": 33279
    },
    {
      "epoch": 0.9708851158177256,
      "grad_norm": 0.858881855407697,
      "learning_rate": 2.2213747016543442e-08,
      "loss": 0.1211,
      "step": 33280
    },
    {
      "epoch": 0.9709142890483692,
      "grad_norm": 0.9201220084377129,
      "learning_rate": 2.2169285701721388e-08,
      "loss": 0.1167,
      "step": 33281
    },
    {
      "epoch": 0.9709434622790127,
      "grad_norm": 0.8283169503953051,
      "learning_rate": 2.212486882778364e-08,
      "loss": 0.0877,
      "step": 33282
    },
    {
      "epoch": 0.9709726355096563,
      "grad_norm": 0.7731137437956862,
      "learning_rate": 2.208049639512655e-08,
      "loss": 0.1099,
      "step": 33283
    },
    {
      "epoch": 0.9710018087403,
      "grad_norm": 0.8886735153467984,
      "learning_rate": 2.203616840414646e-08,
      "loss": 0.0937,
      "step": 33284
    },
    {
      "epoch": 0.9710309819709435,
      "grad_norm": 0.772189292030863,
      "learning_rate": 2.1991884855239177e-08,
      "loss": 0.1172,
      "step": 33285
    },
    {
      "epoch": 0.9710601552015871,
      "grad_norm": 0.9297205871305089,
      "learning_rate": 2.1947645748799927e-08,
      "loss": 0.105,
      "step": 33286
    },
    {
      "epoch": 0.9710893284322306,
      "grad_norm": 0.8003621453355055,
      "learning_rate": 2.1903451085223958e-08,
      "loss": 0.1077,
      "step": 33287
    },
    {
      "epoch": 0.9711185016628742,
      "grad_norm": 0.8251416550823791,
      "learning_rate": 2.185930086490595e-08,
      "loss": 0.1399,
      "step": 33288
    },
    {
      "epoch": 0.9711476748935177,
      "grad_norm": 0.7866508578070678,
      "learning_rate": 2.1815195088238926e-08,
      "loss": 0.0948,
      "step": 33289
    },
    {
      "epoch": 0.9711768481241613,
      "grad_norm": 0.7879612377093205,
      "learning_rate": 2.1771133755618124e-08,
      "loss": 0.1088,
      "step": 33290
    },
    {
      "epoch": 0.9712060213548048,
      "grad_norm": 0.7372331894153694,
      "learning_rate": 2.172711686743545e-08,
      "loss": 0.1103,
      "step": 33291
    },
    {
      "epoch": 0.9712351945854484,
      "grad_norm": 0.9221458662624825,
      "learning_rate": 2.1683144424085034e-08,
      "loss": 0.1017,
      "step": 33292
    },
    {
      "epoch": 0.9712643678160919,
      "grad_norm": 0.6565910677863517,
      "learning_rate": 2.1639216425959342e-08,
      "loss": 0.1046,
      "step": 33293
    },
    {
      "epoch": 0.9712935410467355,
      "grad_norm": 0.9345734592058208,
      "learning_rate": 2.159533287345028e-08,
      "loss": 0.1521,
      "step": 33294
    },
    {
      "epoch": 0.971322714277379,
      "grad_norm": 0.8823190509865528,
      "learning_rate": 2.155149376694976e-08,
      "loss": 0.11,
      "step": 33295
    },
    {
      "epoch": 0.9713518875080226,
      "grad_norm": 0.8445446854319447,
      "learning_rate": 2.1507699106848577e-08,
      "loss": 0.1082,
      "step": 33296
    },
    {
      "epoch": 0.9713810607386661,
      "grad_norm": 0.9642143256794932,
      "learning_rate": 2.146394889353809e-08,
      "loss": 0.1057,
      "step": 33297
    },
    {
      "epoch": 0.9714102339693098,
      "grad_norm": 0.8639220123901701,
      "learning_rate": 2.1420243127409644e-08,
      "loss": 0.1068,
      "step": 33298
    },
    {
      "epoch": 0.9714394071999534,
      "grad_norm": 1.0167976612282208,
      "learning_rate": 2.137658180885238e-08,
      "loss": 0.1107,
      "step": 33299
    },
    {
      "epoch": 0.9714685804305969,
      "grad_norm": 0.81110458572188,
      "learning_rate": 2.133296493825654e-08,
      "loss": 0.1066,
      "step": 33300
    },
    {
      "epoch": 0.9714977536612405,
      "grad_norm": 0.6746057619205598,
      "learning_rate": 2.1289392516011253e-08,
      "loss": 0.1113,
      "step": 33301
    },
    {
      "epoch": 0.971526926891884,
      "grad_norm": 0.7692790261086835,
      "learning_rate": 2.1245864542506213e-08,
      "loss": 0.1103,
      "step": 33302
    },
    {
      "epoch": 0.9715561001225276,
      "grad_norm": 0.9241372568505284,
      "learning_rate": 2.1202381018129436e-08,
      "loss": 0.1174,
      "step": 33303
    },
    {
      "epoch": 0.9715852733531711,
      "grad_norm": 0.7847597273931936,
      "learning_rate": 2.1158941943268952e-08,
      "loss": 0.1236,
      "step": 33304
    },
    {
      "epoch": 0.9716144465838147,
      "grad_norm": 1.1458056422264347,
      "learning_rate": 2.1115547318313334e-08,
      "loss": 0.1231,
      "step": 33305
    },
    {
      "epoch": 0.9716436198144582,
      "grad_norm": 1.265289722213336,
      "learning_rate": 2.1072197143649497e-08,
      "loss": 0.1029,
      "step": 33306
    },
    {
      "epoch": 0.9716727930451018,
      "grad_norm": 0.9444687779410674,
      "learning_rate": 2.1028891419664354e-08,
      "loss": 0.1261,
      "step": 33307
    },
    {
      "epoch": 0.9717019662757453,
      "grad_norm": 0.7195124075247614,
      "learning_rate": 2.0985630146744264e-08,
      "loss": 0.1177,
      "step": 33308
    },
    {
      "epoch": 0.9717311395063889,
      "grad_norm": 1.5685025454870865,
      "learning_rate": 2.0942413325276688e-08,
      "loss": 0.0905,
      "step": 33309
    },
    {
      "epoch": 0.9717603127370325,
      "grad_norm": 0.9524786479041064,
      "learning_rate": 2.0899240955646326e-08,
      "loss": 0.1304,
      "step": 33310
    },
    {
      "epoch": 0.9717894859676761,
      "grad_norm": 1.0465798406007878,
      "learning_rate": 2.085611303823898e-08,
      "loss": 0.107,
      "step": 33311
    },
    {
      "epoch": 0.9718186591983197,
      "grad_norm": 0.7076932035780915,
      "learning_rate": 2.0813029573439335e-08,
      "loss": 0.1043,
      "step": 33312
    },
    {
      "epoch": 0.9718478324289632,
      "grad_norm": 0.8522436171572916,
      "learning_rate": 2.0769990561632647e-08,
      "loss": 0.1071,
      "step": 33313
    },
    {
      "epoch": 0.9718770056596068,
      "grad_norm": 0.7615812205683006,
      "learning_rate": 2.0726996003202492e-08,
      "loss": 0.1101,
      "step": 33314
    },
    {
      "epoch": 0.9719061788902503,
      "grad_norm": 0.755199234780472,
      "learning_rate": 2.0684045898533566e-08,
      "loss": 0.1142,
      "step": 33315
    },
    {
      "epoch": 0.9719353521208939,
      "grad_norm": 0.9259430231150687,
      "learning_rate": 2.064114024800834e-08,
      "loss": 0.1327,
      "step": 33316
    },
    {
      "epoch": 0.9719645253515374,
      "grad_norm": 0.952728399417846,
      "learning_rate": 2.059827905201095e-08,
      "loss": 0.1031,
      "step": 33317
    },
    {
      "epoch": 0.971993698582181,
      "grad_norm": 0.7640032847065388,
      "learning_rate": 2.055546231092276e-08,
      "loss": 0.1147,
      "step": 33318
    },
    {
      "epoch": 0.9720228718128245,
      "grad_norm": 0.7113730249546651,
      "learning_rate": 2.0512690025127345e-08,
      "loss": 0.1064,
      "step": 33319
    },
    {
      "epoch": 0.9720520450434681,
      "grad_norm": 0.9526394956738103,
      "learning_rate": 2.0469962195005522e-08,
      "loss": 0.1155,
      "step": 33320
    },
    {
      "epoch": 0.9720812182741116,
      "grad_norm": 0.8208247646065269,
      "learning_rate": 2.04272788209392e-08,
      "loss": 0.0987,
      "step": 33321
    },
    {
      "epoch": 0.9721103915047552,
      "grad_norm": 0.7365215128034867,
      "learning_rate": 2.0384639903309744e-08,
      "loss": 0.1001,
      "step": 33322
    },
    {
      "epoch": 0.9721395647353988,
      "grad_norm": 0.9058537089768122,
      "learning_rate": 2.034204544249685e-08,
      "loss": 0.114,
      "step": 33323
    },
    {
      "epoch": 0.9721687379660423,
      "grad_norm": 0.9173028845224632,
      "learning_rate": 2.0299495438881877e-08,
      "loss": 0.1031,
      "step": 33324
    },
    {
      "epoch": 0.972197911196686,
      "grad_norm": 0.7301709254330327,
      "learning_rate": 2.0256989892844526e-08,
      "loss": 0.0938,
      "step": 33325
    },
    {
      "epoch": 0.9722270844273295,
      "grad_norm": 0.8623916502247145,
      "learning_rate": 2.0214528804763377e-08,
      "loss": 0.1139,
      "step": 33326
    },
    {
      "epoch": 0.9722562576579731,
      "grad_norm": 0.7493150574255667,
      "learning_rate": 2.017211217501869e-08,
      "loss": 0.1281,
      "step": 33327
    },
    {
      "epoch": 0.9722854308886166,
      "grad_norm": 0.8172134180991346,
      "learning_rate": 2.0129740003988485e-08,
      "loss": 0.1104,
      "step": 33328
    },
    {
      "epoch": 0.9723146041192602,
      "grad_norm": 0.8068921454843423,
      "learning_rate": 2.00874122920508e-08,
      "loss": 0.132,
      "step": 33329
    },
    {
      "epoch": 0.9723437773499037,
      "grad_norm": 0.7895308248497875,
      "learning_rate": 2.0045129039584222e-08,
      "loss": 0.1377,
      "step": 33330
    },
    {
      "epoch": 0.9723729505805473,
      "grad_norm": 0.7257825512227422,
      "learning_rate": 2.0002890246965666e-08,
      "loss": 0.1095,
      "step": 33331
    },
    {
      "epoch": 0.9724021238111908,
      "grad_norm": 0.7815689958219003,
      "learning_rate": 1.9960695914572613e-08,
      "loss": 0.1059,
      "step": 33332
    },
    {
      "epoch": 0.9724312970418344,
      "grad_norm": 0.7451748441768528,
      "learning_rate": 1.9918546042781982e-08,
      "loss": 0.1318,
      "step": 33333
    },
    {
      "epoch": 0.972460470272478,
      "grad_norm": 0.8058696504447004,
      "learning_rate": 1.9876440631969585e-08,
      "loss": 0.114,
      "step": 33334
    },
    {
      "epoch": 0.9724896435031215,
      "grad_norm": 0.9150165019876599,
      "learning_rate": 1.983437968251123e-08,
      "loss": 0.1319,
      "step": 33335
    },
    {
      "epoch": 0.9725188167337651,
      "grad_norm": 0.80739249159237,
      "learning_rate": 1.9792363194782726e-08,
      "loss": 0.1131,
      "step": 33336
    },
    {
      "epoch": 0.9725479899644086,
      "grad_norm": 0.7029138450106285,
      "learning_rate": 1.9750391169159332e-08,
      "loss": 0.133,
      "step": 33337
    },
    {
      "epoch": 0.9725771631950523,
      "grad_norm": 0.8346001268840691,
      "learning_rate": 1.9708463606015194e-08,
      "loss": 0.1339,
      "step": 33338
    },
    {
      "epoch": 0.9726063364256958,
      "grad_norm": 0.8896279040035748,
      "learning_rate": 1.9666580505725007e-08,
      "loss": 0.1001,
      "step": 33339
    },
    {
      "epoch": 0.9726355096563394,
      "grad_norm": 1.0616821086827493,
      "learning_rate": 1.9624741868662922e-08,
      "loss": 0.1143,
      "step": 33340
    },
    {
      "epoch": 0.9726646828869829,
      "grad_norm": 0.7409043448761004,
      "learning_rate": 1.9582947695202527e-08,
      "loss": 0.1069,
      "step": 33341
    },
    {
      "epoch": 0.9726938561176265,
      "grad_norm": 0.9229229172873346,
      "learning_rate": 1.9541197985716298e-08,
      "loss": 0.1005,
      "step": 33342
    },
    {
      "epoch": 0.97272302934827,
      "grad_norm": 0.7304547907080917,
      "learning_rate": 1.9499492740577273e-08,
      "loss": 0.1199,
      "step": 33343
    },
    {
      "epoch": 0.9727522025789136,
      "grad_norm": 0.6824912921749677,
      "learning_rate": 1.9457831960157937e-08,
      "loss": 0.1144,
      "step": 33344
    },
    {
      "epoch": 0.9727813758095571,
      "grad_norm": 0.7877393040686903,
      "learning_rate": 1.9416215644830204e-08,
      "loss": 0.1062,
      "step": 33345
    },
    {
      "epoch": 0.9728105490402007,
      "grad_norm": 0.9844808545510941,
      "learning_rate": 1.9374643794964897e-08,
      "loss": 0.0913,
      "step": 33346
    },
    {
      "epoch": 0.9728397222708443,
      "grad_norm": 1.1689726331096526,
      "learning_rate": 1.9333116410934493e-08,
      "loss": 0.0937,
      "step": 33347
    },
    {
      "epoch": 0.9728688955014878,
      "grad_norm": 0.7126234237969035,
      "learning_rate": 1.9291633493109254e-08,
      "loss": 0.1255,
      "step": 33348
    },
    {
      "epoch": 0.9728980687321314,
      "grad_norm": 0.7489160067019679,
      "learning_rate": 1.9250195041858876e-08,
      "loss": 0.1209,
      "step": 33349
    },
    {
      "epoch": 0.9729272419627749,
      "grad_norm": 0.7490003086713569,
      "learning_rate": 1.920880105755363e-08,
      "loss": 0.0771,
      "step": 33350
    },
    {
      "epoch": 0.9729564151934185,
      "grad_norm": 0.6394620877002085,
      "learning_rate": 1.9167451540563765e-08,
      "loss": 0.1051,
      "step": 33351
    },
    {
      "epoch": 0.9729855884240621,
      "grad_norm": 0.9090195318328302,
      "learning_rate": 1.9126146491257324e-08,
      "loss": 0.1023,
      "step": 33352
    },
    {
      "epoch": 0.9730147616547057,
      "grad_norm": 0.8825669052766255,
      "learning_rate": 1.908488591000346e-08,
      "loss": 0.123,
      "step": 33353
    },
    {
      "epoch": 0.9730439348853492,
      "grad_norm": 0.8893309323915279,
      "learning_rate": 1.9043669797171316e-08,
      "loss": 0.1122,
      "step": 33354
    },
    {
      "epoch": 0.9730731081159928,
      "grad_norm": 0.9490527767267487,
      "learning_rate": 1.900249815312838e-08,
      "loss": 0.1222,
      "step": 33355
    },
    {
      "epoch": 0.9731022813466363,
      "grad_norm": 0.709442625281374,
      "learning_rate": 1.8961370978241023e-08,
      "loss": 0.0956,
      "step": 33356
    },
    {
      "epoch": 0.9731314545772799,
      "grad_norm": 0.978385822187881,
      "learning_rate": 1.8920288272878396e-08,
      "loss": 0.1154,
      "step": 33357
    },
    {
      "epoch": 0.9731606278079235,
      "grad_norm": 0.9637221416511035,
      "learning_rate": 1.8879250037406315e-08,
      "loss": 0.1041,
      "step": 33358
    },
    {
      "epoch": 0.973189801038567,
      "grad_norm": 0.8283305516874159,
      "learning_rate": 1.8838256272190602e-08,
      "loss": 0.1059,
      "step": 33359
    },
    {
      "epoch": 0.9732189742692106,
      "grad_norm": 0.9277834903067519,
      "learning_rate": 1.8797306977598184e-08,
      "loss": 0.1024,
      "step": 33360
    },
    {
      "epoch": 0.9732481474998541,
      "grad_norm": 0.8363224529409721,
      "learning_rate": 1.8756402153994324e-08,
      "loss": 0.1092,
      "step": 33361
    },
    {
      "epoch": 0.9732773207304977,
      "grad_norm": 0.7339383701319554,
      "learning_rate": 1.8715541801744286e-08,
      "loss": 0.1196,
      "step": 33362
    },
    {
      "epoch": 0.9733064939611412,
      "grad_norm": 1.1063275108381208,
      "learning_rate": 1.8674725921212776e-08,
      "loss": 0.0935,
      "step": 33363
    },
    {
      "epoch": 0.9733356671917848,
      "grad_norm": 0.756467841423844,
      "learning_rate": 1.8633954512763953e-08,
      "loss": 0.104,
      "step": 33364
    },
    {
      "epoch": 0.9733648404224284,
      "grad_norm": 0.7192393817105305,
      "learning_rate": 1.8593227576761962e-08,
      "loss": 0.1001,
      "step": 33365
    },
    {
      "epoch": 0.973394013653072,
      "grad_norm": 0.7233696841641885,
      "learning_rate": 1.8552545113570963e-08,
      "loss": 0.1177,
      "step": 33366
    },
    {
      "epoch": 0.9734231868837155,
      "grad_norm": 1.0112742820744531,
      "learning_rate": 1.851190712355344e-08,
      "loss": 0.1091,
      "step": 33367
    },
    {
      "epoch": 0.9734523601143591,
      "grad_norm": 0.7479705079772767,
      "learning_rate": 1.8471313607071883e-08,
      "loss": 0.0998,
      "step": 33368
    },
    {
      "epoch": 0.9734815333450026,
      "grad_norm": 0.6921841689447383,
      "learning_rate": 1.843076456448989e-08,
      "loss": 0.1123,
      "step": 33369
    },
    {
      "epoch": 0.9735107065756462,
      "grad_norm": 0.9172583354982481,
      "learning_rate": 1.8390259996168835e-08,
      "loss": 0.1199,
      "step": 33370
    },
    {
      "epoch": 0.9735398798062898,
      "grad_norm": 0.8822899103914085,
      "learning_rate": 1.83497999024701e-08,
      "loss": 0.1334,
      "step": 33371
    },
    {
      "epoch": 0.9735690530369333,
      "grad_norm": 0.7350092952227788,
      "learning_rate": 1.830938428375506e-08,
      "loss": 0.1061,
      "step": 33372
    },
    {
      "epoch": 0.9735982262675769,
      "grad_norm": 0.7330787897650202,
      "learning_rate": 1.8269013140385094e-08,
      "loss": 0.1056,
      "step": 33373
    },
    {
      "epoch": 0.9736273994982204,
      "grad_norm": 0.8272844833046916,
      "learning_rate": 1.822868647271936e-08,
      "loss": 0.1096,
      "step": 33374
    },
    {
      "epoch": 0.973656572728864,
      "grad_norm": 0.6753441382658865,
      "learning_rate": 1.818840428111923e-08,
      "loss": 0.096,
      "step": 33375
    },
    {
      "epoch": 0.9736857459595075,
      "grad_norm": 0.7612429464049904,
      "learning_rate": 1.814816656594387e-08,
      "loss": 0.1336,
      "step": 33376
    },
    {
      "epoch": 0.9737149191901511,
      "grad_norm": 0.9039727898563927,
      "learning_rate": 1.8107973327551876e-08,
      "loss": 0.1213,
      "step": 33377
    },
    {
      "epoch": 0.9737440924207946,
      "grad_norm": 0.7970470410181474,
      "learning_rate": 1.8067824566302962e-08,
      "loss": 0.112,
      "step": 33378
    },
    {
      "epoch": 0.9737732656514383,
      "grad_norm": 0.8800650514267462,
      "learning_rate": 1.802772028255517e-08,
      "loss": 0.0963,
      "step": 33379
    },
    {
      "epoch": 0.9738024388820818,
      "grad_norm": 0.6846404777296988,
      "learning_rate": 1.7987660476666556e-08,
      "loss": 0.1139,
      "step": 33380
    },
    {
      "epoch": 0.9738316121127254,
      "grad_norm": 0.913059160933428,
      "learning_rate": 1.7947645148995162e-08,
      "loss": 0.1212,
      "step": 33381
    },
    {
      "epoch": 0.973860785343369,
      "grad_norm": 0.8062975208045219,
      "learning_rate": 1.790767429989737e-08,
      "loss": 0.1162,
      "step": 33382
    },
    {
      "epoch": 0.9738899585740125,
      "grad_norm": 1.1223707926612059,
      "learning_rate": 1.7867747929730673e-08,
      "loss": 0.1186,
      "step": 33383
    },
    {
      "epoch": 0.9739191318046561,
      "grad_norm": 1.3384563318411626,
      "learning_rate": 1.7827866038852005e-08,
      "loss": 0.1021,
      "step": 33384
    },
    {
      "epoch": 0.9739483050352996,
      "grad_norm": 0.80109189907296,
      "learning_rate": 1.7788028627616083e-08,
      "loss": 0.0966,
      "step": 33385
    },
    {
      "epoch": 0.9739774782659432,
      "grad_norm": 0.9382834093700994,
      "learning_rate": 1.774823569637929e-08,
      "loss": 0.1133,
      "step": 33386
    },
    {
      "epoch": 0.9740066514965867,
      "grad_norm": 0.7803182073018958,
      "learning_rate": 1.7708487245497454e-08,
      "loss": 0.1294,
      "step": 33387
    },
    {
      "epoch": 0.9740358247272303,
      "grad_norm": 0.9460944671462214,
      "learning_rate": 1.7668783275324176e-08,
      "loss": 0.1155,
      "step": 33388
    },
    {
      "epoch": 0.9740649979578738,
      "grad_norm": 1.0073333152762762,
      "learning_rate": 1.7629123786215285e-08,
      "loss": 0.1032,
      "step": 33389
    },
    {
      "epoch": 0.9740941711885174,
      "grad_norm": 0.737617428990919,
      "learning_rate": 1.7589508778523833e-08,
      "loss": 0.1105,
      "step": 33390
    },
    {
      "epoch": 0.9741233444191609,
      "grad_norm": 0.8700243402153485,
      "learning_rate": 1.754993825260398e-08,
      "loss": 0.1224,
      "step": 33391
    },
    {
      "epoch": 0.9741525176498046,
      "grad_norm": 0.7810096236326348,
      "learning_rate": 1.751041220880878e-08,
      "loss": 0.1212,
      "step": 33392
    },
    {
      "epoch": 0.9741816908804481,
      "grad_norm": 0.6825120453776412,
      "learning_rate": 1.7470930647490724e-08,
      "loss": 0.0967,
      "step": 33393
    },
    {
      "epoch": 0.9742108641110917,
      "grad_norm": 0.7636929463072785,
      "learning_rate": 1.7431493569003422e-08,
      "loss": 0.132,
      "step": 33394
    },
    {
      "epoch": 0.9742400373417353,
      "grad_norm": 0.8319015578493912,
      "learning_rate": 1.7392100973698257e-08,
      "loss": 0.1286,
      "step": 33395
    },
    {
      "epoch": 0.9742692105723788,
      "grad_norm": 0.693362482239197,
      "learning_rate": 1.7352752861927168e-08,
      "loss": 0.0962,
      "step": 33396
    },
    {
      "epoch": 0.9742983838030224,
      "grad_norm": 0.9325020114660988,
      "learning_rate": 1.7313449234040992e-08,
      "loss": 0.1056,
      "step": 33397
    },
    {
      "epoch": 0.9743275570336659,
      "grad_norm": 0.8365567021857802,
      "learning_rate": 1.7274190090390553e-08,
      "loss": 0.1164,
      "step": 33398
    },
    {
      "epoch": 0.9743567302643095,
      "grad_norm": 0.972605134269126,
      "learning_rate": 1.723497543132724e-08,
      "loss": 0.1283,
      "step": 33399
    },
    {
      "epoch": 0.974385903494953,
      "grad_norm": 0.8577117535910984,
      "learning_rate": 1.719580525719966e-08,
      "loss": 0.103,
      "step": 33400
    },
    {
      "epoch": 0.9744150767255966,
      "grad_norm": 0.8280299127440968,
      "learning_rate": 1.7156679568359203e-08,
      "loss": 0.1125,
      "step": 33401
    },
    {
      "epoch": 0.9744442499562401,
      "grad_norm": 0.8463966314981628,
      "learning_rate": 1.7117598365154477e-08,
      "loss": 0.1154,
      "step": 33402
    },
    {
      "epoch": 0.9744734231868837,
      "grad_norm": 0.7877558319809553,
      "learning_rate": 1.707856164793409e-08,
      "loss": 0.0971,
      "step": 33403
    },
    {
      "epoch": 0.9745025964175272,
      "grad_norm": 0.9374016346771774,
      "learning_rate": 1.7039569417046655e-08,
      "loss": 0.0881,
      "step": 33404
    },
    {
      "epoch": 0.9745317696481708,
      "grad_norm": 0.9121423303443583,
      "learning_rate": 1.7000621672840777e-08,
      "loss": 0.1252,
      "step": 33405
    },
    {
      "epoch": 0.9745609428788145,
      "grad_norm": 0.714116311834555,
      "learning_rate": 1.696171841566341e-08,
      "loss": 0.1106,
      "step": 33406
    },
    {
      "epoch": 0.974590116109458,
      "grad_norm": 0.9038189175558847,
      "learning_rate": 1.69228596458626e-08,
      "loss": 0.1176,
      "step": 33407
    },
    {
      "epoch": 0.9746192893401016,
      "grad_norm": 0.837749300781571,
      "learning_rate": 1.688404536378474e-08,
      "loss": 0.1421,
      "step": 33408
    },
    {
      "epoch": 0.9746484625707451,
      "grad_norm": 0.8193194685692583,
      "learning_rate": 1.6845275569776774e-08,
      "loss": 0.1134,
      "step": 33409
    },
    {
      "epoch": 0.9746776358013887,
      "grad_norm": 0.7724165656177046,
      "learning_rate": 1.680655026418454e-08,
      "loss": 0.094,
      "step": 33410
    },
    {
      "epoch": 0.9747068090320322,
      "grad_norm": 0.8670529420332095,
      "learning_rate": 1.676786944735387e-08,
      "loss": 0.09,
      "step": 33411
    },
    {
      "epoch": 0.9747359822626758,
      "grad_norm": 0.6433729522077717,
      "learning_rate": 1.672923311963004e-08,
      "loss": 0.0993,
      "step": 33412
    },
    {
      "epoch": 0.9747651554933193,
      "grad_norm": 1.0027729146225761,
      "learning_rate": 1.6690641281357778e-08,
      "loss": 0.0999,
      "step": 33413
    },
    {
      "epoch": 0.9747943287239629,
      "grad_norm": 0.6992562887457178,
      "learning_rate": 1.6652093932881807e-08,
      "loss": 0.092,
      "step": 33414
    },
    {
      "epoch": 0.9748235019546064,
      "grad_norm": 0.7068301941366186,
      "learning_rate": 1.6613591074546855e-08,
      "loss": 0.1141,
      "step": 33415
    },
    {
      "epoch": 0.97485267518525,
      "grad_norm": 0.814202526508201,
      "learning_rate": 1.6575132706695417e-08,
      "loss": 0.1043,
      "step": 33416
    },
    {
      "epoch": 0.9748818484158935,
      "grad_norm": 0.8187714646032954,
      "learning_rate": 1.6536718829672227e-08,
      "loss": 0.1209,
      "step": 33417
    },
    {
      "epoch": 0.9749110216465371,
      "grad_norm": 0.8708815506151023,
      "learning_rate": 1.6498349443819227e-08,
      "loss": 0.1084,
      "step": 33418
    },
    {
      "epoch": 0.9749401948771806,
      "grad_norm": 0.673224660071052,
      "learning_rate": 1.6460024549479482e-08,
      "loss": 0.1007,
      "step": 33419
    },
    {
      "epoch": 0.9749693681078243,
      "grad_norm": 1.0546626415822506,
      "learning_rate": 1.6421744146994932e-08,
      "loss": 0.1018,
      "step": 33420
    },
    {
      "epoch": 0.9749985413384679,
      "grad_norm": 1.025942351102925,
      "learning_rate": 1.638350823670698e-08,
      "loss": 0.1079,
      "step": 33421
    },
    {
      "epoch": 0.9750277145691114,
      "grad_norm": 0.8179767362394554,
      "learning_rate": 1.6345316818958123e-08,
      "loss": 0.1257,
      "step": 33422
    },
    {
      "epoch": 0.975056887799755,
      "grad_norm": 0.808942523053795,
      "learning_rate": 1.630716989408754e-08,
      "loss": 0.1147,
      "step": 33423
    },
    {
      "epoch": 0.9750860610303985,
      "grad_norm": 1.6266496243270054,
      "learning_rate": 1.6269067462437727e-08,
      "loss": 0.1198,
      "step": 33424
    },
    {
      "epoch": 0.9751152342610421,
      "grad_norm": 0.8222191101815929,
      "learning_rate": 1.6231009524347862e-08,
      "loss": 0.101,
      "step": 33425
    },
    {
      "epoch": 0.9751444074916856,
      "grad_norm": 0.6514665271591409,
      "learning_rate": 1.6192996080157676e-08,
      "loss": 0.0949,
      "step": 33426
    },
    {
      "epoch": 0.9751735807223292,
      "grad_norm": 0.7969046839209064,
      "learning_rate": 1.615502713020689e-08,
      "loss": 0.1409,
      "step": 33427
    },
    {
      "epoch": 0.9752027539529727,
      "grad_norm": 0.8554952062082433,
      "learning_rate": 1.6117102674833575e-08,
      "loss": 0.1023,
      "step": 33428
    },
    {
      "epoch": 0.9752319271836163,
      "grad_norm": 0.7427680985706927,
      "learning_rate": 1.6079222714378008e-08,
      "loss": 0.0852,
      "step": 33429
    },
    {
      "epoch": 0.9752611004142598,
      "grad_norm": 0.7194565309418961,
      "learning_rate": 1.6041387249176588e-08,
      "loss": 0.1011,
      "step": 33430
    },
    {
      "epoch": 0.9752902736449034,
      "grad_norm": 0.9604794209898954,
      "learning_rate": 1.600359627956849e-08,
      "loss": 0.1043,
      "step": 33431
    },
    {
      "epoch": 0.975319446875547,
      "grad_norm": 0.821924743818213,
      "learning_rate": 1.596584980589011e-08,
      "loss": 0.0975,
      "step": 33432
    },
    {
      "epoch": 0.9753486201061906,
      "grad_norm": 0.7024338531347216,
      "learning_rate": 1.5928147828478958e-08,
      "loss": 0.1077,
      "step": 33433
    },
    {
      "epoch": 0.9753777933368342,
      "grad_norm": 0.9182789520795457,
      "learning_rate": 1.589049034767143e-08,
      "loss": 0.1035,
      "step": 33434
    },
    {
      "epoch": 0.9754069665674777,
      "grad_norm": 0.6687812974110965,
      "learning_rate": 1.585287736380392e-08,
      "loss": 0.0841,
      "step": 33435
    },
    {
      "epoch": 0.9754361397981213,
      "grad_norm": 1.0874316139265678,
      "learning_rate": 1.581530887721172e-08,
      "loss": 0.1128,
      "step": 33436
    },
    {
      "epoch": 0.9754653130287648,
      "grad_norm": 0.7284841937281055,
      "learning_rate": 1.5777784888231228e-08,
      "loss": 0.1025,
      "step": 33437
    },
    {
      "epoch": 0.9754944862594084,
      "grad_norm": 0.879325279853439,
      "learning_rate": 1.574030539719662e-08,
      "loss": 0.1226,
      "step": 33438
    },
    {
      "epoch": 0.9755236594900519,
      "grad_norm": 1.013744102609131,
      "learning_rate": 1.570287040444263e-08,
      "loss": 0.11,
      "step": 33439
    },
    {
      "epoch": 0.9755528327206955,
      "grad_norm": 0.7306770282912424,
      "learning_rate": 1.566547991030343e-08,
      "loss": 0.1121,
      "step": 33440
    },
    {
      "epoch": 0.975582005951339,
      "grad_norm": 0.7091404223497363,
      "learning_rate": 1.5628133915113196e-08,
      "loss": 0.1157,
      "step": 33441
    },
    {
      "epoch": 0.9756111791819826,
      "grad_norm": 0.9053919337519989,
      "learning_rate": 1.5590832419205003e-08,
      "loss": 0.1161,
      "step": 33442
    },
    {
      "epoch": 0.9756403524126261,
      "grad_norm": 0.7123748698206179,
      "learning_rate": 1.5553575422911915e-08,
      "loss": 0.1046,
      "step": 33443
    },
    {
      "epoch": 0.9756695256432697,
      "grad_norm": 0.9529840701004273,
      "learning_rate": 1.5516362926566996e-08,
      "loss": 0.1205,
      "step": 33444
    },
    {
      "epoch": 0.9756986988739133,
      "grad_norm": 0.8206795705164966,
      "learning_rate": 1.5479194930502206e-08,
      "loss": 0.1173,
      "step": 33445
    },
    {
      "epoch": 0.9757278721045568,
      "grad_norm": 1.0333538600532497,
      "learning_rate": 1.544207143504839e-08,
      "loss": 0.1128,
      "step": 33446
    },
    {
      "epoch": 0.9757570453352005,
      "grad_norm": 0.7544708164282096,
      "learning_rate": 1.5404992440538612e-08,
      "loss": 0.1337,
      "step": 33447
    },
    {
      "epoch": 0.975786218565844,
      "grad_norm": 0.6724146045830542,
      "learning_rate": 1.5367957947302615e-08,
      "loss": 0.1074,
      "step": 33448
    },
    {
      "epoch": 0.9758153917964876,
      "grad_norm": 0.7946364233903862,
      "learning_rate": 1.5330967955671794e-08,
      "loss": 0.1009,
      "step": 33449
    },
    {
      "epoch": 0.9758445650271311,
      "grad_norm": 0.7718172549434619,
      "learning_rate": 1.5294022465976444e-08,
      "loss": 0.0961,
      "step": 33450
    },
    {
      "epoch": 0.9758737382577747,
      "grad_norm": 0.9088213407755129,
      "learning_rate": 1.5257121478545744e-08,
      "loss": 0.1179,
      "step": 33451
    },
    {
      "epoch": 0.9759029114884182,
      "grad_norm": 0.9327672303193023,
      "learning_rate": 1.5220264993709988e-08,
      "loss": 0.1245,
      "step": 33452
    },
    {
      "epoch": 0.9759320847190618,
      "grad_norm": 0.7753202811566637,
      "learning_rate": 1.5183453011797243e-08,
      "loss": 0.1115,
      "step": 33453
    },
    {
      "epoch": 0.9759612579497053,
      "grad_norm": 0.8000881894874269,
      "learning_rate": 1.5146685533136697e-08,
      "loss": 0.0896,
      "step": 33454
    },
    {
      "epoch": 0.9759904311803489,
      "grad_norm": 0.787279925433227,
      "learning_rate": 1.510996255805697e-08,
      "loss": 0.1077,
      "step": 33455
    },
    {
      "epoch": 0.9760196044109924,
      "grad_norm": 0.9641529645428879,
      "learning_rate": 1.507328408688502e-08,
      "loss": 0.132,
      "step": 33456
    },
    {
      "epoch": 0.976048777641636,
      "grad_norm": 0.9703981020772765,
      "learning_rate": 1.5036650119948926e-08,
      "loss": 0.0928,
      "step": 33457
    },
    {
      "epoch": 0.9760779508722796,
      "grad_norm": 1.1180763624483268,
      "learning_rate": 1.5000060657575643e-08,
      "loss": 0.1095,
      "step": 33458
    },
    {
      "epoch": 0.9761071241029231,
      "grad_norm": 1.32960764059299,
      "learning_rate": 1.4963515700092135e-08,
      "loss": 0.1101,
      "step": 33459
    },
    {
      "epoch": 0.9761362973335668,
      "grad_norm": 0.8027003055346538,
      "learning_rate": 1.4927015247823695e-08,
      "loss": 0.1139,
      "step": 33460
    },
    {
      "epoch": 0.9761654705642103,
      "grad_norm": 0.7467701103652643,
      "learning_rate": 1.4890559301097284e-08,
      "loss": 0.1049,
      "step": 33461
    },
    {
      "epoch": 0.9761946437948539,
      "grad_norm": 0.8576695245988681,
      "learning_rate": 1.48541478602382e-08,
      "loss": 0.1041,
      "step": 33462
    },
    {
      "epoch": 0.9762238170254974,
      "grad_norm": 0.9751636245609486,
      "learning_rate": 1.4817780925570625e-08,
      "loss": 0.1221,
      "step": 33463
    },
    {
      "epoch": 0.976252990256141,
      "grad_norm": 0.8419763689914282,
      "learning_rate": 1.4781458497419854e-08,
      "loss": 0.0979,
      "step": 33464
    },
    {
      "epoch": 0.9762821634867845,
      "grad_norm": 0.7866540633974234,
      "learning_rate": 1.4745180576110629e-08,
      "loss": 0.1111,
      "step": 33465
    },
    {
      "epoch": 0.9763113367174281,
      "grad_norm": 0.8147538835025226,
      "learning_rate": 1.4708947161966025e-08,
      "loss": 0.1052,
      "step": 33466
    },
    {
      "epoch": 0.9763405099480716,
      "grad_norm": 0.7236913976966386,
      "learning_rate": 1.467275825530967e-08,
      "loss": 0.1063,
      "step": 33467
    },
    {
      "epoch": 0.9763696831787152,
      "grad_norm": 0.871967743547231,
      "learning_rate": 1.4636613856465198e-08,
      "loss": 0.125,
      "step": 33468
    },
    {
      "epoch": 0.9763988564093588,
      "grad_norm": 0.8103982140571668,
      "learning_rate": 1.4600513965755125e-08,
      "loss": 0.1162,
      "step": 33469
    },
    {
      "epoch": 0.9764280296400023,
      "grad_norm": 1.1658768722585042,
      "learning_rate": 1.4564458583500861e-08,
      "loss": 0.1191,
      "step": 33470
    },
    {
      "epoch": 0.9764572028706459,
      "grad_norm": 0.7372703729649,
      "learning_rate": 1.4528447710025484e-08,
      "loss": 0.0881,
      "step": 33471
    },
    {
      "epoch": 0.9764863761012894,
      "grad_norm": 0.6513245861024871,
      "learning_rate": 1.4492481345649844e-08,
      "loss": 0.0973,
      "step": 33472
    },
    {
      "epoch": 0.976515549331933,
      "grad_norm": 0.7532321738316657,
      "learning_rate": 1.4456559490695355e-08,
      "loss": 0.1037,
      "step": 33473
    },
    {
      "epoch": 0.9765447225625766,
      "grad_norm": 0.8958885167130327,
      "learning_rate": 1.4420682145482313e-08,
      "loss": 0.1079,
      "step": 33474
    },
    {
      "epoch": 0.9765738957932202,
      "grad_norm": 0.9528026867579396,
      "learning_rate": 1.4384849310331573e-08,
      "loss": 0.093,
      "step": 33475
    },
    {
      "epoch": 0.9766030690238637,
      "grad_norm": 0.9398564865817778,
      "learning_rate": 1.4349060985562325e-08,
      "loss": 0.1488,
      "step": 33476
    },
    {
      "epoch": 0.9766322422545073,
      "grad_norm": 0.7662224821385598,
      "learning_rate": 1.4313317171494867e-08,
      "loss": 0.1098,
      "step": 33477
    },
    {
      "epoch": 0.9766614154851508,
      "grad_norm": 1.0432365376197361,
      "learning_rate": 1.4277617868447835e-08,
      "loss": 0.1247,
      "step": 33478
    },
    {
      "epoch": 0.9766905887157944,
      "grad_norm": 0.9898892779238785,
      "learning_rate": 1.4241963076739862e-08,
      "loss": 0.1062,
      "step": 33479
    },
    {
      "epoch": 0.976719761946438,
      "grad_norm": 0.8288831787117793,
      "learning_rate": 1.4206352796689582e-08,
      "loss": 0.1068,
      "step": 33480
    },
    {
      "epoch": 0.9767489351770815,
      "grad_norm": 1.1654789166534627,
      "learning_rate": 1.4170787028615074e-08,
      "loss": 0.1016,
      "step": 33481
    },
    {
      "epoch": 0.976778108407725,
      "grad_norm": 0.6830434175297825,
      "learning_rate": 1.4135265772833307e-08,
      "loss": 0.0871,
      "step": 33482
    },
    {
      "epoch": 0.9768072816383686,
      "grad_norm": 0.8740675074834651,
      "learning_rate": 1.4099789029661249e-08,
      "loss": 0.1071,
      "step": 33483
    },
    {
      "epoch": 0.9768364548690122,
      "grad_norm": 1.1663422147800555,
      "learning_rate": 1.4064356799416423e-08,
      "loss": 0.1158,
      "step": 33484
    },
    {
      "epoch": 0.9768656280996557,
      "grad_norm": 1.02950746241133,
      "learning_rate": 1.4028969082415245e-08,
      "loss": 0.1167,
      "step": 33485
    },
    {
      "epoch": 0.9768948013302993,
      "grad_norm": 0.8121815245135743,
      "learning_rate": 1.3993625878972461e-08,
      "loss": 0.1089,
      "step": 33486
    },
    {
      "epoch": 0.9769239745609429,
      "grad_norm": 0.7529322717191119,
      "learning_rate": 1.3958327189404486e-08,
      "loss": 0.1149,
      "step": 33487
    },
    {
      "epoch": 0.9769531477915865,
      "grad_norm": 0.8016289552099051,
      "learning_rate": 1.3923073014026623e-08,
      "loss": 0.0936,
      "step": 33488
    },
    {
      "epoch": 0.97698232102223,
      "grad_norm": 0.7566834874084734,
      "learning_rate": 1.3887863353153064e-08,
      "loss": 0.1036,
      "step": 33489
    },
    {
      "epoch": 0.9770114942528736,
      "grad_norm": 0.8066875277382531,
      "learning_rate": 1.3852698207098004e-08,
      "loss": 0.1286,
      "step": 33490
    },
    {
      "epoch": 0.9770406674835171,
      "grad_norm": 1.3215638404967414,
      "learning_rate": 1.3817577576176744e-08,
      "loss": 0.127,
      "step": 33491
    },
    {
      "epoch": 0.9770698407141607,
      "grad_norm": 0.8640221730882329,
      "learning_rate": 1.3782501460701258e-08,
      "loss": 0.1046,
      "step": 33492
    },
    {
      "epoch": 0.9770990139448043,
      "grad_norm": 0.8799891919501298,
      "learning_rate": 1.3747469860985186e-08,
      "loss": 0.1058,
      "step": 33493
    },
    {
      "epoch": 0.9771281871754478,
      "grad_norm": 0.9599150347920247,
      "learning_rate": 1.3712482777341052e-08,
      "loss": 0.0938,
      "step": 33494
    },
    {
      "epoch": 0.9771573604060914,
      "grad_norm": 0.7217733960288283,
      "learning_rate": 1.3677540210082495e-08,
      "loss": 0.0943,
      "step": 33495
    },
    {
      "epoch": 0.9771865336367349,
      "grad_norm": 0.7299157592610854,
      "learning_rate": 1.3642642159519826e-08,
      "loss": 0.1087,
      "step": 33496
    },
    {
      "epoch": 0.9772157068673785,
      "grad_norm": 0.9035806109289037,
      "learning_rate": 1.3607788625965567e-08,
      "loss": 0.1467,
      "step": 33497
    },
    {
      "epoch": 0.977244880098022,
      "grad_norm": 0.8251648123877693,
      "learning_rate": 1.3572979609730586e-08,
      "loss": 0.1114,
      "step": 33498
    },
    {
      "epoch": 0.9772740533286656,
      "grad_norm": 0.8127618790375482,
      "learning_rate": 1.353821511112574e-08,
      "loss": 0.1133,
      "step": 33499
    },
    {
      "epoch": 0.9773032265593091,
      "grad_norm": 1.048735789101215,
      "learning_rate": 1.3503495130460786e-08,
      "loss": 0.1259,
      "step": 33500
    },
    {
      "epoch": 0.9773323997899528,
      "grad_norm": 0.7650104707120872,
      "learning_rate": 1.346881966804714e-08,
      "loss": 0.1322,
      "step": 33501
    },
    {
      "epoch": 0.9773615730205963,
      "grad_norm": 0.9096094626126157,
      "learning_rate": 1.3434188724192888e-08,
      "loss": 0.1215,
      "step": 33502
    },
    {
      "epoch": 0.9773907462512399,
      "grad_norm": 0.7911768998071496,
      "learning_rate": 1.3399602299208337e-08,
      "loss": 0.1101,
      "step": 33503
    },
    {
      "epoch": 0.9774199194818834,
      "grad_norm": 0.8342474729210573,
      "learning_rate": 1.3365060393401574e-08,
      "loss": 0.1393,
      "step": 33504
    },
    {
      "epoch": 0.977449092712527,
      "grad_norm": 0.829972991771532,
      "learning_rate": 1.3330563007080688e-08,
      "loss": 0.1439,
      "step": 33505
    },
    {
      "epoch": 0.9774782659431706,
      "grad_norm": 0.8317439227451584,
      "learning_rate": 1.3296110140554319e-08,
      "loss": 0.1196,
      "step": 33506
    },
    {
      "epoch": 0.9775074391738141,
      "grad_norm": 0.8171054407467253,
      "learning_rate": 1.326170179413e-08,
      "loss": 0.1124,
      "step": 33507
    },
    {
      "epoch": 0.9775366124044577,
      "grad_norm": 0.704629330813467,
      "learning_rate": 1.3227337968114705e-08,
      "loss": 0.1217,
      "step": 33508
    },
    {
      "epoch": 0.9775657856351012,
      "grad_norm": 0.6362952901423882,
      "learning_rate": 1.3193018662815416e-08,
      "loss": 0.1042,
      "step": 33509
    },
    {
      "epoch": 0.9775949588657448,
      "grad_norm": 0.7817879002668029,
      "learning_rate": 1.315874387853855e-08,
      "loss": 0.1108,
      "step": 33510
    },
    {
      "epoch": 0.9776241320963883,
      "grad_norm": 0.7951073224680534,
      "learning_rate": 1.3124513615589419e-08,
      "loss": 0.1262,
      "step": 33511
    },
    {
      "epoch": 0.9776533053270319,
      "grad_norm": 0.678908976733393,
      "learning_rate": 1.3090327874274445e-08,
      "loss": 0.1213,
      "step": 33512
    },
    {
      "epoch": 0.9776824785576754,
      "grad_norm": 0.8913169990854443,
      "learning_rate": 1.305618665489894e-08,
      "loss": 0.1038,
      "step": 33513
    },
    {
      "epoch": 0.9777116517883191,
      "grad_norm": 0.6844177475755414,
      "learning_rate": 1.3022089957766548e-08,
      "loss": 0.0742,
      "step": 33514
    },
    {
      "epoch": 0.9777408250189626,
      "grad_norm": 0.6221148187316252,
      "learning_rate": 1.2988037783183138e-08,
      "loss": 0.0928,
      "step": 33515
    },
    {
      "epoch": 0.9777699982496062,
      "grad_norm": 1.4901746606833184,
      "learning_rate": 1.29540301314518e-08,
      "loss": 0.1121,
      "step": 33516
    },
    {
      "epoch": 0.9777991714802498,
      "grad_norm": 0.7422810677962711,
      "learning_rate": 1.292006700287618e-08,
      "loss": 0.1065,
      "step": 33517
    },
    {
      "epoch": 0.9778283447108933,
      "grad_norm": 0.9316007606678465,
      "learning_rate": 1.2886148397759923e-08,
      "loss": 0.148,
      "step": 33518
    },
    {
      "epoch": 0.9778575179415369,
      "grad_norm": 0.9082058317249178,
      "learning_rate": 1.2852274316405567e-08,
      "loss": 0.1225,
      "step": 33519
    },
    {
      "epoch": 0.9778866911721804,
      "grad_norm": 1.0126244087613379,
      "learning_rate": 1.2818444759115644e-08,
      "loss": 0.1195,
      "step": 33520
    },
    {
      "epoch": 0.977915864402824,
      "grad_norm": 1.0742154430928772,
      "learning_rate": 1.2784659726192139e-08,
      "loss": 0.137,
      "step": 33521
    },
    {
      "epoch": 0.9779450376334675,
      "grad_norm": 0.779363725290733,
      "learning_rate": 1.2750919217936475e-08,
      "loss": 0.1077,
      "step": 33522
    },
    {
      "epoch": 0.9779742108641111,
      "grad_norm": 0.7613057254401159,
      "learning_rate": 1.2717223234650079e-08,
      "loss": 0.1029,
      "step": 33523
    },
    {
      "epoch": 0.9780033840947546,
      "grad_norm": 1.022905821591553,
      "learning_rate": 1.2683571776633819e-08,
      "loss": 0.1275,
      "step": 33524
    },
    {
      "epoch": 0.9780325573253982,
      "grad_norm": 0.8794318217864383,
      "learning_rate": 1.2649964844188013e-08,
      "loss": 0.1042,
      "step": 33525
    },
    {
      "epoch": 0.9780617305560417,
      "grad_norm": 0.9135103344287407,
      "learning_rate": 1.2616402437612418e-08,
      "loss": 0.1179,
      "step": 33526
    },
    {
      "epoch": 0.9780909037866853,
      "grad_norm": 0.8849909720850804,
      "learning_rate": 1.2582884557207908e-08,
      "loss": 0.1077,
      "step": 33527
    },
    {
      "epoch": 0.978120077017329,
      "grad_norm": 0.9713646329248453,
      "learning_rate": 1.2549411203272021e-08,
      "loss": 0.1012,
      "step": 33528
    },
    {
      "epoch": 0.9781492502479725,
      "grad_norm": 0.7887688045716686,
      "learning_rate": 1.2515982376104518e-08,
      "loss": 0.104,
      "step": 33529
    },
    {
      "epoch": 0.978178423478616,
      "grad_norm": 0.8497511414826573,
      "learning_rate": 1.2482598076003493e-08,
      "loss": 0.1076,
      "step": 33530
    },
    {
      "epoch": 0.9782075967092596,
      "grad_norm": 0.8172917817280038,
      "learning_rate": 1.2449258303267597e-08,
      "loss": 0.1212,
      "step": 33531
    },
    {
      "epoch": 0.9782367699399032,
      "grad_norm": 0.8647268169479864,
      "learning_rate": 1.241596305819437e-08,
      "loss": 0.1096,
      "step": 33532
    },
    {
      "epoch": 0.9782659431705467,
      "grad_norm": 0.7647816226522861,
      "learning_rate": 1.238271234108024e-08,
      "loss": 0.1131,
      "step": 33533
    },
    {
      "epoch": 0.9782951164011903,
      "grad_norm": 0.6886995127166381,
      "learning_rate": 1.2349506152223301e-08,
      "loss": 0.1095,
      "step": 33534
    },
    {
      "epoch": 0.9783242896318338,
      "grad_norm": 0.7503952331544292,
      "learning_rate": 1.231634449191832e-08,
      "loss": 0.1005,
      "step": 33535
    },
    {
      "epoch": 0.9783534628624774,
      "grad_norm": 0.6880498841861453,
      "learning_rate": 1.2283227360462834e-08,
      "loss": 0.1056,
      "step": 33536
    },
    {
      "epoch": 0.9783826360931209,
      "grad_norm": 0.708908472219334,
      "learning_rate": 1.2250154758152167e-08,
      "loss": 0.1143,
      "step": 33537
    },
    {
      "epoch": 0.9784118093237645,
      "grad_norm": 0.6950050722908366,
      "learning_rate": 1.2217126685281633e-08,
      "loss": 0.0734,
      "step": 33538
    },
    {
      "epoch": 0.978440982554408,
      "grad_norm": 1.1183797592938378,
      "learning_rate": 1.2184143142145444e-08,
      "loss": 0.1104,
      "step": 33539
    },
    {
      "epoch": 0.9784701557850516,
      "grad_norm": 0.7917304513124189,
      "learning_rate": 1.2151204129038918e-08,
      "loss": 0.1236,
      "step": 33540
    },
    {
      "epoch": 0.9784993290156953,
      "grad_norm": 0.8255660088355159,
      "learning_rate": 1.211830964625571e-08,
      "loss": 0.0944,
      "step": 33541
    },
    {
      "epoch": 0.9785285022463388,
      "grad_norm": 1.0263963579505295,
      "learning_rate": 1.2085459694089475e-08,
      "loss": 0.1461,
      "step": 33542
    },
    {
      "epoch": 0.9785576754769824,
      "grad_norm": 0.8897985538898494,
      "learning_rate": 1.2052654272833309e-08,
      "loss": 0.1063,
      "step": 33543
    },
    {
      "epoch": 0.9785868487076259,
      "grad_norm": 0.6096755265309961,
      "learning_rate": 1.2019893382780312e-08,
      "loss": 0.1214,
      "step": 33544
    },
    {
      "epoch": 0.9786160219382695,
      "grad_norm": 0.8703492366971958,
      "learning_rate": 1.1987177024223028e-08,
      "loss": 0.1238,
      "step": 33545
    },
    {
      "epoch": 0.978645195168913,
      "grad_norm": 0.8568893824806576,
      "learning_rate": 1.1954505197454002e-08,
      "loss": 0.1124,
      "step": 33546
    },
    {
      "epoch": 0.9786743683995566,
      "grad_norm": 0.8984923640720707,
      "learning_rate": 1.1921877902763557e-08,
      "loss": 0.0964,
      "step": 33547
    },
    {
      "epoch": 0.9787035416302001,
      "grad_norm": 0.9315082956857679,
      "learning_rate": 1.188929514044479e-08,
      "loss": 0.1107,
      "step": 33548
    },
    {
      "epoch": 0.9787327148608437,
      "grad_norm": 0.7133434124220804,
      "learning_rate": 1.1856756910786915e-08,
      "loss": 0.1127,
      "step": 33549
    },
    {
      "epoch": 0.9787618880914872,
      "grad_norm": 0.8420504959062934,
      "learning_rate": 1.1824263214081367e-08,
      "loss": 0.0958,
      "step": 33550
    },
    {
      "epoch": 0.9787910613221308,
      "grad_norm": 0.8654764531361201,
      "learning_rate": 1.179181405061791e-08,
      "loss": 0.0959,
      "step": 33551
    },
    {
      "epoch": 0.9788202345527743,
      "grad_norm": 0.880735452510767,
      "learning_rate": 1.1759409420686873e-08,
      "loss": 0.1416,
      "step": 33552
    },
    {
      "epoch": 0.9788494077834179,
      "grad_norm": 0.8464581829556374,
      "learning_rate": 1.1727049324576355e-08,
      "loss": 0.0964,
      "step": 33553
    },
    {
      "epoch": 0.9788785810140614,
      "grad_norm": 0.6674807253957455,
      "learning_rate": 1.1694733762576127e-08,
      "loss": 0.0991,
      "step": 33554
    },
    {
      "epoch": 0.9789077542447051,
      "grad_norm": 1.1879563273991856,
      "learning_rate": 1.1662462734974845e-08,
      "loss": 0.1231,
      "step": 33555
    },
    {
      "epoch": 0.9789369274753487,
      "grad_norm": 0.94763739100455,
      "learning_rate": 1.1630236242060056e-08,
      "loss": 0.0921,
      "step": 33556
    },
    {
      "epoch": 0.9789661007059922,
      "grad_norm": 0.7393453897394201,
      "learning_rate": 1.1598054284119864e-08,
      "loss": 0.0784,
      "step": 33557
    },
    {
      "epoch": 0.9789952739366358,
      "grad_norm": 0.6927939059784717,
      "learning_rate": 1.1565916861441263e-08,
      "loss": 0.0906,
      "step": 33558
    },
    {
      "epoch": 0.9790244471672793,
      "grad_norm": 0.8144790970997765,
      "learning_rate": 1.1533823974311242e-08,
      "loss": 0.1138,
      "step": 33559
    },
    {
      "epoch": 0.9790536203979229,
      "grad_norm": 0.7101226923340795,
      "learning_rate": 1.1501775623016243e-08,
      "loss": 0.1004,
      "step": 33560
    },
    {
      "epoch": 0.9790827936285664,
      "grad_norm": 0.8468378824637969,
      "learning_rate": 1.14697718078427e-08,
      "loss": 0.1119,
      "step": 33561
    },
    {
      "epoch": 0.97911196685921,
      "grad_norm": 0.7868780522659902,
      "learning_rate": 1.1437812529076498e-08,
      "loss": 0.1165,
      "step": 33562
    },
    {
      "epoch": 0.9791411400898535,
      "grad_norm": 0.832739648487524,
      "learning_rate": 1.1405897787002407e-08,
      "loss": 0.0972,
      "step": 33563
    },
    {
      "epoch": 0.9791703133204971,
      "grad_norm": 0.7707445998333764,
      "learning_rate": 1.1374027581905201e-08,
      "loss": 0.0913,
      "step": 33564
    },
    {
      "epoch": 0.9791994865511406,
      "grad_norm": 0.752586600078335,
      "learning_rate": 1.1342201914070206e-08,
      "loss": 0.1283,
      "step": 33565
    },
    {
      "epoch": 0.9792286597817842,
      "grad_norm": 0.8288984065373154,
      "learning_rate": 1.1310420783781084e-08,
      "loss": 0.1002,
      "step": 33566
    },
    {
      "epoch": 0.9792578330124277,
      "grad_norm": 0.882658771823189,
      "learning_rate": 1.1278684191321499e-08,
      "loss": 0.1135,
      "step": 33567
    },
    {
      "epoch": 0.9792870062430714,
      "grad_norm": 0.809954938094631,
      "learning_rate": 1.124699213697511e-08,
      "loss": 0.1148,
      "step": 33568
    },
    {
      "epoch": 0.979316179473715,
      "grad_norm": 0.8110699918505555,
      "learning_rate": 1.1215344621025026e-08,
      "loss": 0.1225,
      "step": 33569
    },
    {
      "epoch": 0.9793453527043585,
      "grad_norm": 0.8564527368341427,
      "learning_rate": 1.1183741643752688e-08,
      "loss": 0.1148,
      "step": 33570
    },
    {
      "epoch": 0.9793745259350021,
      "grad_norm": 0.8592063659683723,
      "learning_rate": 1.1152183205441202e-08,
      "loss": 0.109,
      "step": 33571
    },
    {
      "epoch": 0.9794036991656456,
      "grad_norm": 0.8518428727333377,
      "learning_rate": 1.1120669306372568e-08,
      "loss": 0.1218,
      "step": 33572
    },
    {
      "epoch": 0.9794328723962892,
      "grad_norm": 0.8120187384522541,
      "learning_rate": 1.1089199946827111e-08,
      "loss": 0.1312,
      "step": 33573
    },
    {
      "epoch": 0.9794620456269327,
      "grad_norm": 1.0401449897679538,
      "learning_rate": 1.1057775127086279e-08,
      "loss": 0.1517,
      "step": 33574
    },
    {
      "epoch": 0.9794912188575763,
      "grad_norm": 0.8482366633895007,
      "learning_rate": 1.1026394847430954e-08,
      "loss": 0.1105,
      "step": 33575
    },
    {
      "epoch": 0.9795203920882198,
      "grad_norm": 0.6194503260232692,
      "learning_rate": 1.0995059108140916e-08,
      "loss": 0.0999,
      "step": 33576
    },
    {
      "epoch": 0.9795495653188634,
      "grad_norm": 0.6525009094528953,
      "learning_rate": 1.0963767909495938e-08,
      "loss": 0.1031,
      "step": 33577
    },
    {
      "epoch": 0.9795787385495069,
      "grad_norm": 0.8035432789246304,
      "learning_rate": 1.0932521251775796e-08,
      "loss": 0.1117,
      "step": 33578
    },
    {
      "epoch": 0.9796079117801505,
      "grad_norm": 0.6810473434341285,
      "learning_rate": 1.0901319135259158e-08,
      "loss": 0.0903,
      "step": 33579
    },
    {
      "epoch": 0.979637085010794,
      "grad_norm": 0.8300760745461686,
      "learning_rate": 1.0870161560224134e-08,
      "loss": 0.141,
      "step": 33580
    },
    {
      "epoch": 0.9796662582414376,
      "grad_norm": 0.8687429024153636,
      "learning_rate": 1.0839048526949391e-08,
      "loss": 0.1,
      "step": 33581
    },
    {
      "epoch": 0.9796954314720813,
      "grad_norm": 0.8460977724789748,
      "learning_rate": 1.080798003571304e-08,
      "loss": 0.1358,
      "step": 33582
    },
    {
      "epoch": 0.9797246047027248,
      "grad_norm": 0.716330625236146,
      "learning_rate": 1.0776956086790968e-08,
      "loss": 0.1036,
      "step": 33583
    },
    {
      "epoch": 0.9797537779333684,
      "grad_norm": 0.73467268286258,
      "learning_rate": 1.07459766804624e-08,
      "loss": 0.122,
      "step": 33584
    },
    {
      "epoch": 0.9797829511640119,
      "grad_norm": 0.7435576255141277,
      "learning_rate": 1.0715041817002114e-08,
      "loss": 0.1009,
      "step": 33585
    },
    {
      "epoch": 0.9798121243946555,
      "grad_norm": 0.9354998523865375,
      "learning_rate": 1.0684151496687112e-08,
      "loss": 0.0872,
      "step": 33586
    },
    {
      "epoch": 0.979841297625299,
      "grad_norm": 0.6837934822505343,
      "learning_rate": 1.0653305719792727e-08,
      "loss": 0.0985,
      "step": 33587
    },
    {
      "epoch": 0.9798704708559426,
      "grad_norm": 1.123557878098781,
      "learning_rate": 1.0622504486594853e-08,
      "loss": 0.1109,
      "step": 33588
    },
    {
      "epoch": 0.9798996440865861,
      "grad_norm": 0.8260486070102699,
      "learning_rate": 1.0591747797367713e-08,
      "loss": 0.0882,
      "step": 33589
    },
    {
      "epoch": 0.9799288173172297,
      "grad_norm": 0.8483487365515615,
      "learning_rate": 1.0561035652386643e-08,
      "loss": 0.1096,
      "step": 33590
    },
    {
      "epoch": 0.9799579905478732,
      "grad_norm": 0.899948870894597,
      "learning_rate": 1.0530368051925865e-08,
      "loss": 0.1261,
      "step": 33591
    },
    {
      "epoch": 0.9799871637785168,
      "grad_norm": 0.9432100765013671,
      "learning_rate": 1.0499744996259054e-08,
      "loss": 0.1259,
      "step": 33592
    },
    {
      "epoch": 0.9800163370091604,
      "grad_norm": 0.7118238030219393,
      "learning_rate": 1.0469166485658766e-08,
      "loss": 0.0994,
      "step": 33593
    },
    {
      "epoch": 0.9800455102398039,
      "grad_norm": 0.5866476698926588,
      "learning_rate": 1.0438632520399227e-08,
      "loss": 0.0928,
      "step": 33594
    },
    {
      "epoch": 0.9800746834704476,
      "grad_norm": 0.8024200703226215,
      "learning_rate": 1.0408143100751888e-08,
      "loss": 0.1121,
      "step": 33595
    },
    {
      "epoch": 0.9801038567010911,
      "grad_norm": 0.7835659963475179,
      "learning_rate": 1.0377698226989863e-08,
      "loss": 0.0924,
      "step": 33596
    },
    {
      "epoch": 0.9801330299317347,
      "grad_norm": 0.7460840593951773,
      "learning_rate": 1.0347297899384601e-08,
      "loss": 0.1092,
      "step": 33597
    },
    {
      "epoch": 0.9801622031623782,
      "grad_norm": 0.6389914910112341,
      "learning_rate": 1.0316942118207551e-08,
      "loss": 0.1233,
      "step": 33598
    },
    {
      "epoch": 0.9801913763930218,
      "grad_norm": 0.8388707244957739,
      "learning_rate": 1.0286630883729608e-08,
      "loss": 0.1164,
      "step": 33599
    },
    {
      "epoch": 0.9802205496236653,
      "grad_norm": 0.9032565473013633,
      "learning_rate": 1.0256364196221669e-08,
      "loss": 0.1012,
      "step": 33600
    },
    {
      "epoch": 0.9802497228543089,
      "grad_norm": 0.9149871122702345,
      "learning_rate": 1.0226142055953515e-08,
      "loss": 0.1217,
      "step": 33601
    },
    {
      "epoch": 0.9802788960849524,
      "grad_norm": 0.9215684318390998,
      "learning_rate": 1.0195964463195485e-08,
      "loss": 0.1058,
      "step": 33602
    },
    {
      "epoch": 0.980308069315596,
      "grad_norm": 0.9950604692617956,
      "learning_rate": 1.0165831418216255e-08,
      "loss": 0.1126,
      "step": 33603
    },
    {
      "epoch": 0.9803372425462396,
      "grad_norm": 0.7541798563495795,
      "learning_rate": 1.0135742921286163e-08,
      "loss": 0.0971,
      "step": 33604
    },
    {
      "epoch": 0.9803664157768831,
      "grad_norm": 0.7461091518709655,
      "learning_rate": 1.0105698972672217e-08,
      "loss": 0.1101,
      "step": 33605
    },
    {
      "epoch": 0.9803955890075267,
      "grad_norm": 1.0912559824009103,
      "learning_rate": 1.0075699572643649e-08,
      "loss": 0.1261,
      "step": 33606
    },
    {
      "epoch": 0.9804247622381702,
      "grad_norm": 0.9400767961900052,
      "learning_rate": 1.0045744721468021e-08,
      "loss": 0.1261,
      "step": 33607
    },
    {
      "epoch": 0.9804539354688138,
      "grad_norm": 0.9017659561990901,
      "learning_rate": 1.0015834419412895e-08,
      "loss": 0.1264,
      "step": 33608
    },
    {
      "epoch": 0.9804831086994574,
      "grad_norm": 0.8615105950532248,
      "learning_rate": 9.985968666745282e-09,
      "loss": 0.106,
      "step": 33609
    },
    {
      "epoch": 0.980512281930101,
      "grad_norm": 0.825579417128215,
      "learning_rate": 9.95614746373108e-09,
      "loss": 0.1219,
      "step": 33610
    },
    {
      "epoch": 0.9805414551607445,
      "grad_norm": 0.6876971184436184,
      "learning_rate": 9.926370810637853e-09,
      "loss": 0.1029,
      "step": 33611
    },
    {
      "epoch": 0.9805706283913881,
      "grad_norm": 0.6744817637104469,
      "learning_rate": 9.896638707730944e-09,
      "loss": 0.1196,
      "step": 33612
    },
    {
      "epoch": 0.9805998016220316,
      "grad_norm": 0.86975152342235,
      "learning_rate": 9.866951155274585e-09,
      "loss": 0.1182,
      "step": 33613
    },
    {
      "epoch": 0.9806289748526752,
      "grad_norm": 0.7316605252320606,
      "learning_rate": 9.837308153535786e-09,
      "loss": 0.0906,
      "step": 33614
    },
    {
      "epoch": 0.9806581480833187,
      "grad_norm": 0.8640335590223941,
      "learning_rate": 9.807709702778223e-09,
      "loss": 0.1253,
      "step": 33615
    },
    {
      "epoch": 0.9806873213139623,
      "grad_norm": 0.7699819317838843,
      "learning_rate": 9.778155803265577e-09,
      "loss": 0.1125,
      "step": 33616
    },
    {
      "epoch": 0.9807164945446059,
      "grad_norm": 0.9002660430621313,
      "learning_rate": 9.748646455262633e-09,
      "loss": 0.1143,
      "step": 33617
    },
    {
      "epoch": 0.9807456677752494,
      "grad_norm": 0.688563331347183,
      "learning_rate": 9.719181659032518e-09,
      "loss": 0.0892,
      "step": 33618
    },
    {
      "epoch": 0.980774841005893,
      "grad_norm": 0.7607676172778293,
      "learning_rate": 9.68976141483835e-09,
      "loss": 0.1141,
      "step": 33619
    },
    {
      "epoch": 0.9808040142365365,
      "grad_norm": 0.6857942810864647,
      "learning_rate": 9.6603857229427e-09,
      "loss": 0.0977,
      "step": 33620
    },
    {
      "epoch": 0.9808331874671801,
      "grad_norm": 0.8653116959258328,
      "learning_rate": 9.63105458360758e-09,
      "loss": 0.0938,
      "step": 33621
    },
    {
      "epoch": 0.9808623606978237,
      "grad_norm": 0.8703808503252962,
      "learning_rate": 9.601767997095556e-09,
      "loss": 0.1275,
      "step": 33622
    },
    {
      "epoch": 0.9808915339284673,
      "grad_norm": 0.953380987348052,
      "learning_rate": 9.572525963666979e-09,
      "loss": 0.0995,
      "step": 33623
    },
    {
      "epoch": 0.9809207071591108,
      "grad_norm": 1.0503153488917936,
      "learning_rate": 9.543328483584412e-09,
      "loss": 0.1027,
      "step": 33624
    },
    {
      "epoch": 0.9809498803897544,
      "grad_norm": 0.9197137964180604,
      "learning_rate": 9.514175557107097e-09,
      "loss": 0.1037,
      "step": 33625
    },
    {
      "epoch": 0.980979053620398,
      "grad_norm": 0.879340509246777,
      "learning_rate": 9.485067184495932e-09,
      "loss": 0.1313,
      "step": 33626
    },
    {
      "epoch": 0.9810082268510415,
      "grad_norm": 0.7984548676562049,
      "learning_rate": 9.456003366010713e-09,
      "loss": 0.145,
      "step": 33627
    },
    {
      "epoch": 0.981037400081685,
      "grad_norm": 1.1241025417926034,
      "learning_rate": 9.42698410191123e-09,
      "loss": 0.1002,
      "step": 33628
    },
    {
      "epoch": 0.9810665733123286,
      "grad_norm": 0.7751571980232825,
      "learning_rate": 9.398009392456165e-09,
      "loss": 0.1082,
      "step": 33629
    },
    {
      "epoch": 0.9810957465429722,
      "grad_norm": 0.6539449698680845,
      "learning_rate": 9.3690792379042e-09,
      "loss": 0.1209,
      "step": 33630
    },
    {
      "epoch": 0.9811249197736157,
      "grad_norm": 0.9025010932371627,
      "learning_rate": 9.340193638514017e-09,
      "loss": 0.0988,
      "step": 33631
    },
    {
      "epoch": 0.9811540930042593,
      "grad_norm": 0.9088939761302108,
      "learning_rate": 9.311352594543188e-09,
      "loss": 0.0958,
      "step": 33632
    },
    {
      "epoch": 0.9811832662349028,
      "grad_norm": 0.6746669390137113,
      "learning_rate": 9.28255610624873e-09,
      "loss": 0.1222,
      "step": 33633
    },
    {
      "epoch": 0.9812124394655464,
      "grad_norm": 0.8845037677253808,
      "learning_rate": 9.25380417388877e-09,
      "loss": 0.1036,
      "step": 33634
    },
    {
      "epoch": 0.9812416126961899,
      "grad_norm": 0.8244579628519634,
      "learning_rate": 9.225096797719213e-09,
      "loss": 0.1208,
      "step": 33635
    },
    {
      "epoch": 0.9812707859268336,
      "grad_norm": 0.7776124324380745,
      "learning_rate": 9.196433977996522e-09,
      "loss": 0.1214,
      "step": 33636
    },
    {
      "epoch": 0.9812999591574771,
      "grad_norm": 0.7720265074637314,
      "learning_rate": 9.167815714977158e-09,
      "loss": 0.1226,
      "step": 33637
    },
    {
      "epoch": 0.9813291323881207,
      "grad_norm": 0.7452347521734213,
      "learning_rate": 9.13924200891536e-09,
      "loss": 0.1007,
      "step": 33638
    },
    {
      "epoch": 0.9813583056187642,
      "grad_norm": 0.8160326354493608,
      "learning_rate": 9.110712860067594e-09,
      "loss": 0.0961,
      "step": 33639
    },
    {
      "epoch": 0.9813874788494078,
      "grad_norm": 0.9735477117013972,
      "learning_rate": 9.082228268688099e-09,
      "loss": 0.1054,
      "step": 33640
    },
    {
      "epoch": 0.9814166520800514,
      "grad_norm": 0.753904475086462,
      "learning_rate": 9.053788235030558e-09,
      "loss": 0.1179,
      "step": 33641
    },
    {
      "epoch": 0.9814458253106949,
      "grad_norm": 1.0664785226371116,
      "learning_rate": 9.025392759349771e-09,
      "loss": 0.1252,
      "step": 33642
    },
    {
      "epoch": 0.9814749985413385,
      "grad_norm": 0.7380798782229535,
      "learning_rate": 8.997041841898312e-09,
      "loss": 0.0998,
      "step": 33643
    },
    {
      "epoch": 0.981504171771982,
      "grad_norm": 0.8774971908772576,
      "learning_rate": 8.968735482929868e-09,
      "loss": 0.1071,
      "step": 33644
    },
    {
      "epoch": 0.9815333450026256,
      "grad_norm": 1.0827863457925666,
      "learning_rate": 8.94047368269757e-09,
      "loss": 0.1222,
      "step": 33645
    },
    {
      "epoch": 0.9815625182332691,
      "grad_norm": 0.8178162774695727,
      "learning_rate": 8.912256441452882e-09,
      "loss": 0.0873,
      "step": 33646
    },
    {
      "epoch": 0.9815916914639127,
      "grad_norm": 0.7479260808972571,
      "learning_rate": 8.884083759448381e-09,
      "loss": 0.1038,
      "step": 33647
    },
    {
      "epoch": 0.9816208646945562,
      "grad_norm": 0.9094641469251314,
      "learning_rate": 8.855955636935531e-09,
      "loss": 0.1121,
      "step": 33648
    },
    {
      "epoch": 0.9816500379251999,
      "grad_norm": 0.7544709780358172,
      "learning_rate": 8.82787207416469e-09,
      "loss": 0.12,
      "step": 33649
    },
    {
      "epoch": 0.9816792111558434,
      "grad_norm": 0.7819759218229222,
      "learning_rate": 8.79983307138732e-09,
      "loss": 0.1003,
      "step": 33650
    },
    {
      "epoch": 0.981708384386487,
      "grad_norm": 0.870250345927785,
      "learning_rate": 8.771838628853225e-09,
      "loss": 0.1235,
      "step": 33651
    },
    {
      "epoch": 0.9817375576171306,
      "grad_norm": 0.9524485265012032,
      "learning_rate": 8.743888746813312e-09,
      "loss": 0.1528,
      "step": 33652
    },
    {
      "epoch": 0.9817667308477741,
      "grad_norm": 0.8862812860992284,
      "learning_rate": 8.715983425515718e-09,
      "loss": 0.1059,
      "step": 33653
    },
    {
      "epoch": 0.9817959040784177,
      "grad_norm": 1.0019586128183071,
      "learning_rate": 8.688122665210796e-09,
      "loss": 0.1227,
      "step": 33654
    },
    {
      "epoch": 0.9818250773090612,
      "grad_norm": 1.1203320311115514,
      "learning_rate": 8.660306466146683e-09,
      "loss": 0.0952,
      "step": 33655
    },
    {
      "epoch": 0.9818542505397048,
      "grad_norm": 0.7728779293846545,
      "learning_rate": 8.632534828571516e-09,
      "loss": 0.1072,
      "step": 33656
    },
    {
      "epoch": 0.9818834237703483,
      "grad_norm": 0.7066081730542775,
      "learning_rate": 8.60480775273398e-09,
      "loss": 0.1179,
      "step": 33657
    },
    {
      "epoch": 0.9819125970009919,
      "grad_norm": 0.9057985351791848,
      "learning_rate": 8.577125238881102e-09,
      "loss": 0.1114,
      "step": 33658
    },
    {
      "epoch": 0.9819417702316354,
      "grad_norm": 0.725792279645597,
      "learning_rate": 8.549487287259906e-09,
      "loss": 0.1236,
      "step": 33659
    },
    {
      "epoch": 0.981970943462279,
      "grad_norm": 0.6701349775250318,
      "learning_rate": 8.521893898117417e-09,
      "loss": 0.1038,
      "step": 33660
    },
    {
      "epoch": 0.9820001166929225,
      "grad_norm": 0.7656834595744108,
      "learning_rate": 8.494345071700105e-09,
      "loss": 0.1128,
      "step": 33661
    },
    {
      "epoch": 0.9820292899235661,
      "grad_norm": 0.8793608677890365,
      "learning_rate": 8.46684080825333e-09,
      "loss": 0.1264,
      "step": 33662
    },
    {
      "epoch": 0.9820584631542097,
      "grad_norm": 0.6874765366075883,
      "learning_rate": 8.439381108023559e-09,
      "loss": 0.1028,
      "step": 33663
    },
    {
      "epoch": 0.9820876363848533,
      "grad_norm": 0.802076161968697,
      "learning_rate": 8.411965971255042e-09,
      "loss": 0.1213,
      "step": 33664
    },
    {
      "epoch": 0.9821168096154969,
      "grad_norm": 2.7792084946682474,
      "learning_rate": 8.38459539819314e-09,
      "loss": 0.0998,
      "step": 33665
    },
    {
      "epoch": 0.9821459828461404,
      "grad_norm": 0.8362544481670676,
      "learning_rate": 8.357269389081547e-09,
      "loss": 0.1176,
      "step": 33666
    },
    {
      "epoch": 0.982175156076784,
      "grad_norm": 0.7512691058425112,
      "learning_rate": 8.329987944165064e-09,
      "loss": 0.1282,
      "step": 33667
    },
    {
      "epoch": 0.9822043293074275,
      "grad_norm": 0.845468959005257,
      "learning_rate": 8.302751063686276e-09,
      "loss": 0.1196,
      "step": 33668
    },
    {
      "epoch": 0.9822335025380711,
      "grad_norm": 0.7454117878596589,
      "learning_rate": 8.275558747889434e-09,
      "loss": 0.1028,
      "step": 33669
    },
    {
      "epoch": 0.9822626757687146,
      "grad_norm": 0.815619565420258,
      "learning_rate": 8.248410997016565e-09,
      "loss": 0.104,
      "step": 33670
    },
    {
      "epoch": 0.9822918489993582,
      "grad_norm": 0.9247442920539567,
      "learning_rate": 8.221307811310808e-09,
      "loss": 0.1186,
      "step": 33671
    },
    {
      "epoch": 0.9823210222300017,
      "grad_norm": 0.8804617502901677,
      "learning_rate": 8.194249191013082e-09,
      "loss": 0.1227,
      "step": 33672
    },
    {
      "epoch": 0.9823501954606453,
      "grad_norm": 0.8159412005318231,
      "learning_rate": 8.167235136365414e-09,
      "loss": 0.122,
      "step": 33673
    },
    {
      "epoch": 0.9823793686912888,
      "grad_norm": 1.1741006117092003,
      "learning_rate": 8.140265647608725e-09,
      "loss": 0.1087,
      "step": 33674
    },
    {
      "epoch": 0.9824085419219324,
      "grad_norm": 0.7397438447371917,
      "learning_rate": 8.113340724985042e-09,
      "loss": 0.0956,
      "step": 33675
    },
    {
      "epoch": 0.9824377151525759,
      "grad_norm": 0.8174985130404561,
      "learning_rate": 8.086460368733062e-09,
      "loss": 0.1241,
      "step": 33676
    },
    {
      "epoch": 0.9824668883832196,
      "grad_norm": 0.9351532761366477,
      "learning_rate": 8.059624579093705e-09,
      "loss": 0.0985,
      "step": 33677
    },
    {
      "epoch": 0.9824960616138632,
      "grad_norm": 0.8264677583332484,
      "learning_rate": 8.032833356306224e-09,
      "loss": 0.1091,
      "step": 33678
    },
    {
      "epoch": 0.9825252348445067,
      "grad_norm": 0.8207624477175735,
      "learning_rate": 8.006086700609872e-09,
      "loss": 0.1065,
      "step": 33679
    },
    {
      "epoch": 0.9825544080751503,
      "grad_norm": 0.7723821360481832,
      "learning_rate": 7.979384612243901e-09,
      "loss": 0.1062,
      "step": 33680
    },
    {
      "epoch": 0.9825835813057938,
      "grad_norm": 0.8756408712289196,
      "learning_rate": 7.9527270914459e-09,
      "loss": 0.0945,
      "step": 33681
    },
    {
      "epoch": 0.9826127545364374,
      "grad_norm": 1.0934535005137842,
      "learning_rate": 7.926114138454566e-09,
      "loss": 0.1074,
      "step": 33682
    },
    {
      "epoch": 0.9826419277670809,
      "grad_norm": 0.70889898196579,
      "learning_rate": 7.899545753506933e-09,
      "loss": 0.1137,
      "step": 33683
    },
    {
      "epoch": 0.9826711009977245,
      "grad_norm": 0.7487615325736632,
      "learning_rate": 7.873021936840585e-09,
      "loss": 0.0959,
      "step": 33684
    },
    {
      "epoch": 0.982700274228368,
      "grad_norm": 0.6615837681059235,
      "learning_rate": 7.846542688692005e-09,
      "loss": 0.099,
      "step": 33685
    },
    {
      "epoch": 0.9827294474590116,
      "grad_norm": 0.763631584549047,
      "learning_rate": 7.820108009297667e-09,
      "loss": 0.1192,
      "step": 33686
    },
    {
      "epoch": 0.9827586206896551,
      "grad_norm": 0.8010618778523452,
      "learning_rate": 7.79371789889405e-09,
      "loss": 0.1097,
      "step": 33687
    },
    {
      "epoch": 0.9827877939202987,
      "grad_norm": 0.9549698425565081,
      "learning_rate": 7.767372357715964e-09,
      "loss": 0.1429,
      "step": 33688
    },
    {
      "epoch": 0.9828169671509422,
      "grad_norm": 0.7716348496530502,
      "learning_rate": 7.741071385999332e-09,
      "loss": 0.0939,
      "step": 33689
    },
    {
      "epoch": 0.9828461403815859,
      "grad_norm": 1.1080247987326997,
      "learning_rate": 7.714814983978414e-09,
      "loss": 0.1007,
      "step": 33690
    },
    {
      "epoch": 0.9828753136122295,
      "grad_norm": 0.8263190894010536,
      "learning_rate": 7.688603151888019e-09,
      "loss": 0.0956,
      "step": 33691
    },
    {
      "epoch": 0.982904486842873,
      "grad_norm": 0.9552939698584892,
      "learning_rate": 7.662435889962406e-09,
      "loss": 0.1035,
      "step": 33692
    },
    {
      "epoch": 0.9829336600735166,
      "grad_norm": 0.8852238599522226,
      "learning_rate": 7.636313198434164e-09,
      "loss": 0.1089,
      "step": 33693
    },
    {
      "epoch": 0.9829628333041601,
      "grad_norm": 0.7441534776528622,
      "learning_rate": 7.610235077537554e-09,
      "loss": 0.1068,
      "step": 33694
    },
    {
      "epoch": 0.9829920065348037,
      "grad_norm": 0.8998278979857637,
      "learning_rate": 7.584201527505163e-09,
      "loss": 0.1273,
      "step": 33695
    },
    {
      "epoch": 0.9830211797654472,
      "grad_norm": 0.9221953547531013,
      "learning_rate": 7.558212548568478e-09,
      "loss": 0.1212,
      "step": 33696
    },
    {
      "epoch": 0.9830503529960908,
      "grad_norm": 0.9391561379043512,
      "learning_rate": 7.532268140961197e-09,
      "loss": 0.0911,
      "step": 33697
    },
    {
      "epoch": 0.9830795262267343,
      "grad_norm": 0.7395787939323956,
      "learning_rate": 7.506368304913136e-09,
      "loss": 0.1265,
      "step": 33698
    },
    {
      "epoch": 0.9831086994573779,
      "grad_norm": 0.7795398518213585,
      "learning_rate": 7.48051304065689e-09,
      "loss": 0.1114,
      "step": 33699
    },
    {
      "epoch": 0.9831378726880214,
      "grad_norm": 0.7745135194126502,
      "learning_rate": 7.454702348422826e-09,
      "loss": 0.1247,
      "step": 33700
    },
    {
      "epoch": 0.983167045918665,
      "grad_norm": 0.8725922866159622,
      "learning_rate": 7.428936228441319e-09,
      "loss": 0.1271,
      "step": 33701
    },
    {
      "epoch": 0.9831962191493085,
      "grad_norm": 0.7601226929595027,
      "learning_rate": 7.403214680942739e-09,
      "loss": 0.0777,
      "step": 33702
    },
    {
      "epoch": 0.9832253923799521,
      "grad_norm": 0.9176273069054299,
      "learning_rate": 7.377537706155791e-09,
      "loss": 0.1291,
      "step": 33703
    },
    {
      "epoch": 0.9832545656105958,
      "grad_norm": 0.7472549936756512,
      "learning_rate": 7.351905304310847e-09,
      "loss": 0.0944,
      "step": 33704
    },
    {
      "epoch": 0.9832837388412393,
      "grad_norm": 0.9094716647789562,
      "learning_rate": 7.326317475636058e-09,
      "loss": 0.1141,
      "step": 33705
    },
    {
      "epoch": 0.9833129120718829,
      "grad_norm": 0.956582694642723,
      "learning_rate": 7.30077422036013e-09,
      "loss": 0.1215,
      "step": 33706
    },
    {
      "epoch": 0.9833420853025264,
      "grad_norm": 0.7160709952785085,
      "learning_rate": 7.275275538711213e-09,
      "loss": 0.099,
      "step": 33707
    },
    {
      "epoch": 0.98337125853317,
      "grad_norm": 0.7956117148602124,
      "learning_rate": 7.249821430916348e-09,
      "loss": 0.1043,
      "step": 33708
    },
    {
      "epoch": 0.9834004317638135,
      "grad_norm": 1.0359450303438382,
      "learning_rate": 7.224411897203687e-09,
      "loss": 0.1045,
      "step": 33709
    },
    {
      "epoch": 0.9834296049944571,
      "grad_norm": 0.842684944587351,
      "learning_rate": 7.199046937799159e-09,
      "loss": 0.093,
      "step": 33710
    },
    {
      "epoch": 0.9834587782251006,
      "grad_norm": 0.8383679980030517,
      "learning_rate": 7.173726552929805e-09,
      "loss": 0.1224,
      "step": 33711
    },
    {
      "epoch": 0.9834879514557442,
      "grad_norm": 0.7657670068051833,
      "learning_rate": 7.148450742821556e-09,
      "loss": 0.1023,
      "step": 33712
    },
    {
      "epoch": 0.9835171246863877,
      "grad_norm": 0.8992060937948977,
      "learning_rate": 7.123219507700341e-09,
      "loss": 0.1017,
      "step": 33713
    },
    {
      "epoch": 0.9835462979170313,
      "grad_norm": 0.8077556941158505,
      "learning_rate": 7.098032847790426e-09,
      "loss": 0.1071,
      "step": 33714
    },
    {
      "epoch": 0.9835754711476749,
      "grad_norm": 0.7106926564942275,
      "learning_rate": 7.072890763317742e-09,
      "loss": 0.1024,
      "step": 33715
    },
    {
      "epoch": 0.9836046443783184,
      "grad_norm": 0.8228047962547985,
      "learning_rate": 7.047793254506552e-09,
      "loss": 0.1239,
      "step": 33716
    },
    {
      "epoch": 0.9836338176089621,
      "grad_norm": 1.2068928857190815,
      "learning_rate": 7.0227403215805675e-09,
      "loss": 0.1149,
      "step": 33717
    },
    {
      "epoch": 0.9836629908396056,
      "grad_norm": 0.6898694648620306,
      "learning_rate": 6.997731964764054e-09,
      "loss": 0.093,
      "step": 33718
    },
    {
      "epoch": 0.9836921640702492,
      "grad_norm": 0.6336542462070724,
      "learning_rate": 6.97276818427961e-09,
      "loss": 0.104,
      "step": 33719
    },
    {
      "epoch": 0.9837213373008927,
      "grad_norm": 0.7946128929906376,
      "learning_rate": 6.947848980349836e-09,
      "loss": 0.1118,
      "step": 33720
    },
    {
      "epoch": 0.9837505105315363,
      "grad_norm": 0.8331790764768273,
      "learning_rate": 6.922974353198441e-09,
      "loss": 0.1047,
      "step": 33721
    },
    {
      "epoch": 0.9837796837621798,
      "grad_norm": 0.9639134480924492,
      "learning_rate": 6.898144303046361e-09,
      "loss": 0.1134,
      "step": 33722
    },
    {
      "epoch": 0.9838088569928234,
      "grad_norm": 0.9261810124901682,
      "learning_rate": 6.873358830116194e-09,
      "loss": 0.1219,
      "step": 33723
    },
    {
      "epoch": 0.9838380302234669,
      "grad_norm": 0.9425190641302295,
      "learning_rate": 6.848617934628321e-09,
      "loss": 0.1167,
      "step": 33724
    },
    {
      "epoch": 0.9838672034541105,
      "grad_norm": 1.023442998022823,
      "learning_rate": 6.82392161680423e-09,
      "loss": 0.1337,
      "step": 33725
    },
    {
      "epoch": 0.983896376684754,
      "grad_norm": 0.793750959786204,
      "learning_rate": 6.799269876863745e-09,
      "loss": 0.1188,
      "step": 33726
    },
    {
      "epoch": 0.9839255499153976,
      "grad_norm": 0.9351271772309842,
      "learning_rate": 6.7746627150278024e-09,
      "loss": 0.116,
      "step": 33727
    },
    {
      "epoch": 0.9839547231460412,
      "grad_norm": 0.7967768431673874,
      "learning_rate": 6.750100131515669e-09,
      "loss": 0.1201,
      "step": 33728
    },
    {
      "epoch": 0.9839838963766847,
      "grad_norm": 0.8303397987610552,
      "learning_rate": 6.725582126546615e-09,
      "loss": 0.1087,
      "step": 33729
    },
    {
      "epoch": 0.9840130696073283,
      "grad_norm": 0.7244863809147774,
      "learning_rate": 6.701108700339354e-09,
      "loss": 0.1062,
      "step": 33730
    },
    {
      "epoch": 0.9840422428379719,
      "grad_norm": 0.8722186341634288,
      "learning_rate": 6.6766798531126e-09,
      "loss": 0.1254,
      "step": 33731
    },
    {
      "epoch": 0.9840714160686155,
      "grad_norm": 0.7624681886954229,
      "learning_rate": 6.652295585085066e-09,
      "loss": 0.1193,
      "step": 33732
    },
    {
      "epoch": 0.984100589299259,
      "grad_norm": 0.8032247254944517,
      "learning_rate": 6.627955896473248e-09,
      "loss": 0.1081,
      "step": 33733
    },
    {
      "epoch": 0.9841297625299026,
      "grad_norm": 0.9768125206862693,
      "learning_rate": 6.603660787495303e-09,
      "loss": 0.1346,
      "step": 33734
    },
    {
      "epoch": 0.9841589357605461,
      "grad_norm": 0.8058024424483172,
      "learning_rate": 6.579410258367724e-09,
      "loss": 0.0867,
      "step": 33735
    },
    {
      "epoch": 0.9841881089911897,
      "grad_norm": 0.7482135268712199,
      "learning_rate": 6.5552043093070065e-09,
      "loss": 0.0987,
      "step": 33736
    },
    {
      "epoch": 0.9842172822218332,
      "grad_norm": 0.9304102671489258,
      "learning_rate": 6.531042940529642e-09,
      "loss": 0.1318,
      "step": 33737
    },
    {
      "epoch": 0.9842464554524768,
      "grad_norm": 0.7830598836781537,
      "learning_rate": 6.5069261522510145e-09,
      "loss": 0.1001,
      "step": 33738
    },
    {
      "epoch": 0.9842756286831204,
      "grad_norm": 0.7684164285739562,
      "learning_rate": 6.482853944686507e-09,
      "loss": 0.0917,
      "step": 33739
    },
    {
      "epoch": 0.9843048019137639,
      "grad_norm": 0.7536747962239854,
      "learning_rate": 6.458826318050948e-09,
      "loss": 0.1106,
      "step": 33740
    },
    {
      "epoch": 0.9843339751444075,
      "grad_norm": 0.7836223399991963,
      "learning_rate": 6.434843272558611e-09,
      "loss": 0.1087,
      "step": 33741
    },
    {
      "epoch": 0.984363148375051,
      "grad_norm": 0.8938947438845976,
      "learning_rate": 6.410904808424878e-09,
      "loss": 0.123,
      "step": 33742
    },
    {
      "epoch": 0.9843923216056946,
      "grad_norm": 0.7465379618144085,
      "learning_rate": 6.387010925861803e-09,
      "loss": 0.0952,
      "step": 33743
    },
    {
      "epoch": 0.9844214948363382,
      "grad_norm": 0.7405949789405286,
      "learning_rate": 6.363161625083103e-09,
      "loss": 0.1137,
      "step": 33744
    },
    {
      "epoch": 0.9844506680669818,
      "grad_norm": 0.8374986638280276,
      "learning_rate": 6.339356906303051e-09,
      "loss": 0.1294,
      "step": 33745
    },
    {
      "epoch": 0.9844798412976253,
      "grad_norm": 0.9245977154510832,
      "learning_rate": 6.315596769732035e-09,
      "loss": 0.1078,
      "step": 33746
    },
    {
      "epoch": 0.9845090145282689,
      "grad_norm": 0.78849944130388,
      "learning_rate": 6.291881215584328e-09,
      "loss": 0.1185,
      "step": 33747
    },
    {
      "epoch": 0.9845381877589124,
      "grad_norm": 0.8158094863158458,
      "learning_rate": 6.268210244069761e-09,
      "loss": 0.1102,
      "step": 33748
    },
    {
      "epoch": 0.984567360989556,
      "grad_norm": 0.8965222529472734,
      "learning_rate": 6.244583855400943e-09,
      "loss": 0.1218,
      "step": 33749
    },
    {
      "epoch": 0.9845965342201995,
      "grad_norm": 0.7050811801846438,
      "learning_rate": 6.2210020497882605e-09,
      "loss": 0.0927,
      "step": 33750
    },
    {
      "epoch": 0.9846257074508431,
      "grad_norm": 0.8155386673770202,
      "learning_rate": 6.197464827442657e-09,
      "loss": 0.1248,
      "step": 33751
    },
    {
      "epoch": 0.9846548806814867,
      "grad_norm": 0.8699865058644309,
      "learning_rate": 6.173972188573407e-09,
      "loss": 0.1034,
      "step": 33752
    },
    {
      "epoch": 0.9846840539121302,
      "grad_norm": 0.7230642282293623,
      "learning_rate": 6.1505241333909e-09,
      "loss": 0.0969,
      "step": 33753
    },
    {
      "epoch": 0.9847132271427738,
      "grad_norm": 0.7383807593153091,
      "learning_rate": 6.127120662104968e-09,
      "loss": 0.1072,
      "step": 33754
    },
    {
      "epoch": 0.9847424003734173,
      "grad_norm": 0.7064889880173199,
      "learning_rate": 6.103761774923778e-09,
      "loss": 0.1239,
      "step": 33755
    },
    {
      "epoch": 0.9847715736040609,
      "grad_norm": 0.7074108063353488,
      "learning_rate": 6.080447472055495e-09,
      "loss": 0.1309,
      "step": 33756
    },
    {
      "epoch": 0.9848007468347044,
      "grad_norm": 0.9665701456235283,
      "learning_rate": 6.057177753709398e-09,
      "loss": 0.1446,
      "step": 33757
    },
    {
      "epoch": 0.9848299200653481,
      "grad_norm": 0.7167502735280957,
      "learning_rate": 6.033952620092542e-09,
      "loss": 0.1068,
      "step": 33758
    },
    {
      "epoch": 0.9848590932959916,
      "grad_norm": 1.018252063516159,
      "learning_rate": 6.010772071412541e-09,
      "loss": 0.1252,
      "step": 33759
    },
    {
      "epoch": 0.9848882665266352,
      "grad_norm": 0.8126773964569709,
      "learning_rate": 5.987636107875894e-09,
      "loss": 0.0858,
      "step": 33760
    },
    {
      "epoch": 0.9849174397572787,
      "grad_norm": 0.7460523752464587,
      "learning_rate": 5.964544729689658e-09,
      "loss": 0.1034,
      "step": 33761
    },
    {
      "epoch": 0.9849466129879223,
      "grad_norm": 0.7929453814306174,
      "learning_rate": 5.941497937059227e-09,
      "loss": 0.1156,
      "step": 33762
    },
    {
      "epoch": 0.9849757862185659,
      "grad_norm": 0.8474348784984916,
      "learning_rate": 5.918495730191654e-09,
      "loss": 0.1034,
      "step": 33763
    },
    {
      "epoch": 0.9850049594492094,
      "grad_norm": 0.845453547210441,
      "learning_rate": 5.895538109291221e-09,
      "loss": 0.1046,
      "step": 33764
    },
    {
      "epoch": 0.985034132679853,
      "grad_norm": 0.8228342380688106,
      "learning_rate": 5.8726250745633205e-09,
      "loss": 0.1169,
      "step": 33765
    },
    {
      "epoch": 0.9850633059104965,
      "grad_norm": 0.8642893571375703,
      "learning_rate": 5.849756626212788e-09,
      "loss": 0.1035,
      "step": 33766
    },
    {
      "epoch": 0.9850924791411401,
      "grad_norm": 0.873641406793772,
      "learning_rate": 5.826932764442794e-09,
      "loss": 0.1319,
      "step": 33767
    },
    {
      "epoch": 0.9851216523717836,
      "grad_norm": 0.7918134576248088,
      "learning_rate": 5.804153489458175e-09,
      "loss": 0.0969,
      "step": 33768
    },
    {
      "epoch": 0.9851508256024272,
      "grad_norm": 0.7837560319809252,
      "learning_rate": 5.781418801461547e-09,
      "loss": 0.1002,
      "step": 33769
    },
    {
      "epoch": 0.9851799988330707,
      "grad_norm": 0.8508208071336113,
      "learning_rate": 5.758728700656635e-09,
      "loss": 0.1048,
      "step": 33770
    },
    {
      "epoch": 0.9852091720637144,
      "grad_norm": 0.8732390898780681,
      "learning_rate": 5.736083187244945e-09,
      "loss": 0.111,
      "step": 33771
    },
    {
      "epoch": 0.9852383452943579,
      "grad_norm": 0.726612294000801,
      "learning_rate": 5.713482261429648e-09,
      "loss": 0.1099,
      "step": 33772
    },
    {
      "epoch": 0.9852675185250015,
      "grad_norm": 0.6977549401593138,
      "learning_rate": 5.690925923412249e-09,
      "loss": 0.0882,
      "step": 33773
    },
    {
      "epoch": 0.985296691755645,
      "grad_norm": 1.0404520066601548,
      "learning_rate": 5.6684141733936996e-09,
      "loss": 0.1109,
      "step": 33774
    },
    {
      "epoch": 0.9853258649862886,
      "grad_norm": 0.8411315454281804,
      "learning_rate": 5.645947011576059e-09,
      "loss": 0.1122,
      "step": 33775
    },
    {
      "epoch": 0.9853550382169322,
      "grad_norm": 0.7566714896192657,
      "learning_rate": 5.623524438158612e-09,
      "loss": 0.1029,
      "step": 33776
    },
    {
      "epoch": 0.9853842114475757,
      "grad_norm": 0.7386793368375577,
      "learning_rate": 5.601146453341755e-09,
      "loss": 0.1045,
      "step": 33777
    },
    {
      "epoch": 0.9854133846782193,
      "grad_norm": 0.8420695324047094,
      "learning_rate": 5.578813057325883e-09,
      "loss": 0.1153,
      "step": 33778
    },
    {
      "epoch": 0.9854425579088628,
      "grad_norm": 0.8975325100795434,
      "learning_rate": 5.55652425031028e-09,
      "loss": 0.1079,
      "step": 33779
    },
    {
      "epoch": 0.9854717311395064,
      "grad_norm": 0.7724228338656928,
      "learning_rate": 5.534280032493678e-09,
      "loss": 0.0951,
      "step": 33780
    },
    {
      "epoch": 0.9855009043701499,
      "grad_norm": 0.9173868650710375,
      "learning_rate": 5.512080404074804e-09,
      "loss": 0.1233,
      "step": 33781
    },
    {
      "epoch": 0.9855300776007935,
      "grad_norm": 0.9017049037987892,
      "learning_rate": 5.489925365251836e-09,
      "loss": 0.1235,
      "step": 33782
    },
    {
      "epoch": 0.985559250831437,
      "grad_norm": 0.6661587110680808,
      "learning_rate": 5.467814916222392e-09,
      "loss": 0.1097,
      "step": 33783
    },
    {
      "epoch": 0.9855884240620806,
      "grad_norm": 0.6911047352843254,
      "learning_rate": 5.445749057184091e-09,
      "loss": 0.099,
      "step": 33784
    },
    {
      "epoch": 0.9856175972927242,
      "grad_norm": 0.8754468376270351,
      "learning_rate": 5.423727788333444e-09,
      "loss": 0.1084,
      "step": 33785
    },
    {
      "epoch": 0.9856467705233678,
      "grad_norm": 0.8638236782689099,
      "learning_rate": 5.40175110986807e-09,
      "loss": 0.1234,
      "step": 33786
    },
    {
      "epoch": 0.9856759437540114,
      "grad_norm": 0.8495487146954932,
      "learning_rate": 5.379819021982813e-09,
      "loss": 0.1126,
      "step": 33787
    },
    {
      "epoch": 0.9857051169846549,
      "grad_norm": 0.774478508037016,
      "learning_rate": 5.3579315248747376e-09,
      "loss": 0.1149,
      "step": 33788
    },
    {
      "epoch": 0.9857342902152985,
      "grad_norm": 0.8827195113424046,
      "learning_rate": 5.336088618738688e-09,
      "loss": 0.1014,
      "step": 33789
    },
    {
      "epoch": 0.985763463445942,
      "grad_norm": 0.6734589417483602,
      "learning_rate": 5.314290303770065e-09,
      "loss": 0.1233,
      "step": 33790
    },
    {
      "epoch": 0.9857926366765856,
      "grad_norm": 0.7914457591354623,
      "learning_rate": 5.292536580162599e-09,
      "loss": 0.0978,
      "step": 33791
    },
    {
      "epoch": 0.9858218099072291,
      "grad_norm": 0.7155567007460567,
      "learning_rate": 5.270827448111137e-09,
      "loss": 0.0885,
      "step": 33792
    },
    {
      "epoch": 0.9858509831378727,
      "grad_norm": 0.809195209325187,
      "learning_rate": 5.249162907809413e-09,
      "loss": 0.1077,
      "step": 33793
    },
    {
      "epoch": 0.9858801563685162,
      "grad_norm": 0.7076078580788532,
      "learning_rate": 5.227542959450604e-09,
      "loss": 0.1372,
      "step": 33794
    },
    {
      "epoch": 0.9859093295991598,
      "grad_norm": 0.9310975834524328,
      "learning_rate": 5.2059676032284454e-09,
      "loss": 0.1441,
      "step": 33795
    },
    {
      "epoch": 0.9859385028298033,
      "grad_norm": 0.9107781975811486,
      "learning_rate": 5.1844368393350054e-09,
      "loss": 0.1368,
      "step": 33796
    },
    {
      "epoch": 0.9859676760604469,
      "grad_norm": 0.8790947077131462,
      "learning_rate": 5.162950667962352e-09,
      "loss": 0.1132,
      "step": 33797
    },
    {
      "epoch": 0.9859968492910905,
      "grad_norm": 0.8364424603862561,
      "learning_rate": 5.141509089301999e-09,
      "loss": 0.1085,
      "step": 33798
    },
    {
      "epoch": 0.9860260225217341,
      "grad_norm": 1.0138809314595514,
      "learning_rate": 5.120112103546571e-09,
      "loss": 0.1121,
      "step": 33799
    },
    {
      "epoch": 0.9860551957523777,
      "grad_norm": 0.9221749366125264,
      "learning_rate": 5.09875971088647e-09,
      "loss": 0.1234,
      "step": 33800
    },
    {
      "epoch": 0.9860843689830212,
      "grad_norm": 0.6832511862452569,
      "learning_rate": 5.077451911512099e-09,
      "loss": 0.1254,
      "step": 33801
    },
    {
      "epoch": 0.9861135422136648,
      "grad_norm": 0.8844021458838913,
      "learning_rate": 5.056188705613863e-09,
      "loss": 0.1065,
      "step": 33802
    },
    {
      "epoch": 0.9861427154443083,
      "grad_norm": 0.8742626585704845,
      "learning_rate": 5.0349700933810534e-09,
      "loss": 0.0935,
      "step": 33803
    },
    {
      "epoch": 0.9861718886749519,
      "grad_norm": 0.6718860110054,
      "learning_rate": 5.013796075004074e-09,
      "loss": 0.1122,
      "step": 33804
    },
    {
      "epoch": 0.9862010619055954,
      "grad_norm": 0.7714586732886123,
      "learning_rate": 4.9926666506716624e-09,
      "loss": 0.1059,
      "step": 33805
    },
    {
      "epoch": 0.986230235136239,
      "grad_norm": 0.83949546271214,
      "learning_rate": 4.971581820572002e-09,
      "loss": 0.105,
      "step": 33806
    },
    {
      "epoch": 0.9862594083668825,
      "grad_norm": 0.8939256990744707,
      "learning_rate": 4.950541584893831e-09,
      "loss": 0.0902,
      "step": 33807
    },
    {
      "epoch": 0.9862885815975261,
      "grad_norm": 0.7161276479660431,
      "learning_rate": 4.929545943825331e-09,
      "loss": 0.1202,
      "step": 33808
    },
    {
      "epoch": 0.9863177548281696,
      "grad_norm": 0.9380570904591177,
      "learning_rate": 4.9085948975524654e-09,
      "loss": 0.1125,
      "step": 33809
    },
    {
      "epoch": 0.9863469280588132,
      "grad_norm": 0.8168481495001629,
      "learning_rate": 4.887688446263971e-09,
      "loss": 0.1038,
      "step": 33810
    },
    {
      "epoch": 0.9863761012894567,
      "grad_norm": 0.860385922459585,
      "learning_rate": 4.866826590145257e-09,
      "loss": 0.0977,
      "step": 33811
    },
    {
      "epoch": 0.9864052745201004,
      "grad_norm": 1.0703660474635481,
      "learning_rate": 4.846009329383394e-09,
      "loss": 0.1178,
      "step": 33812
    },
    {
      "epoch": 0.986434447750744,
      "grad_norm": 0.907132889092109,
      "learning_rate": 4.825236664163791e-09,
      "loss": 0.1323,
      "step": 33813
    },
    {
      "epoch": 0.9864636209813875,
      "grad_norm": 1.175990114333469,
      "learning_rate": 4.804508594671853e-09,
      "loss": 0.0998,
      "step": 33814
    },
    {
      "epoch": 0.9864927942120311,
      "grad_norm": 0.9312263874009149,
      "learning_rate": 4.783825121093544e-09,
      "loss": 0.1324,
      "step": 33815
    },
    {
      "epoch": 0.9865219674426746,
      "grad_norm": 0.8647671220568324,
      "learning_rate": 4.7631862436120506e-09,
      "loss": 0.1164,
      "step": 33816
    },
    {
      "epoch": 0.9865511406733182,
      "grad_norm": 0.933902115857926,
      "learning_rate": 4.7425919624122244e-09,
      "loss": 0.1037,
      "step": 33817
    },
    {
      "epoch": 0.9865803139039617,
      "grad_norm": 0.8014744857424474,
      "learning_rate": 4.722042277678918e-09,
      "loss": 0.1086,
      "step": 33818
    },
    {
      "epoch": 0.9866094871346053,
      "grad_norm": 0.7676682128542434,
      "learning_rate": 4.701537189594207e-09,
      "loss": 0.1056,
      "step": 33819
    },
    {
      "epoch": 0.9866386603652488,
      "grad_norm": 0.9070406343868757,
      "learning_rate": 4.681076698341836e-09,
      "loss": 0.1154,
      "step": 33820
    },
    {
      "epoch": 0.9866678335958924,
      "grad_norm": 0.8643679289367764,
      "learning_rate": 4.6606608041038785e-09,
      "loss": 0.1106,
      "step": 33821
    },
    {
      "epoch": 0.9866970068265359,
      "grad_norm": 0.8080790414757442,
      "learning_rate": 4.640289507063522e-09,
      "loss": 0.1027,
      "step": 33822
    },
    {
      "epoch": 0.9867261800571795,
      "grad_norm": 0.8425880321963215,
      "learning_rate": 4.6199628074022895e-09,
      "loss": 0.1249,
      "step": 33823
    },
    {
      "epoch": 0.986755353287823,
      "grad_norm": 1.1170594980059778,
      "learning_rate": 4.599680705301146e-09,
      "loss": 0.1184,
      "step": 33824
    },
    {
      "epoch": 0.9867845265184667,
      "grad_norm": 0.780067863775467,
      "learning_rate": 4.5794432009416134e-09,
      "loss": 0.0808,
      "step": 33825
    },
    {
      "epoch": 0.9868136997491103,
      "grad_norm": 0.6142058142011302,
      "learning_rate": 4.559250294504658e-09,
      "loss": 0.1092,
      "step": 33826
    },
    {
      "epoch": 0.9868428729797538,
      "grad_norm": 0.841668590756161,
      "learning_rate": 4.539101986170136e-09,
      "loss": 0.1227,
      "step": 33827
    },
    {
      "epoch": 0.9868720462103974,
      "grad_norm": 1.0237218215407604,
      "learning_rate": 4.518998276117903e-09,
      "loss": 0.1022,
      "step": 33828
    },
    {
      "epoch": 0.9869012194410409,
      "grad_norm": 0.7902743064688862,
      "learning_rate": 4.498939164527261e-09,
      "loss": 0.11,
      "step": 33829
    },
    {
      "epoch": 0.9869303926716845,
      "grad_norm": 0.7704266705221741,
      "learning_rate": 4.4789246515780645e-09,
      "loss": 0.097,
      "step": 33830
    },
    {
      "epoch": 0.986959565902328,
      "grad_norm": 1.0580313645697441,
      "learning_rate": 4.458954737447951e-09,
      "loss": 0.1272,
      "step": 33831
    },
    {
      "epoch": 0.9869887391329716,
      "grad_norm": 0.7736702002583923,
      "learning_rate": 4.4390294223162215e-09,
      "loss": 0.1096,
      "step": 33832
    },
    {
      "epoch": 0.9870179123636151,
      "grad_norm": 0.8535642495482743,
      "learning_rate": 4.419148706359955e-09,
      "loss": 0.1002,
      "step": 33833
    },
    {
      "epoch": 0.9870470855942587,
      "grad_norm": 0.8200675228157727,
      "learning_rate": 4.399312589757343e-09,
      "loss": 0.1245,
      "step": 33834
    },
    {
      "epoch": 0.9870762588249022,
      "grad_norm": 0.7445801999459258,
      "learning_rate": 4.379521072684911e-09,
      "loss": 0.0944,
      "step": 33835
    },
    {
      "epoch": 0.9871054320555458,
      "grad_norm": 0.953599559050052,
      "learning_rate": 4.3597741553191856e-09,
      "loss": 0.1251,
      "step": 33836
    },
    {
      "epoch": 0.9871346052861893,
      "grad_norm": 0.7392225373935208,
      "learning_rate": 4.3400718378372455e-09,
      "loss": 0.1225,
      "step": 33837
    },
    {
      "epoch": 0.9871637785168329,
      "grad_norm": 1.169378435526558,
      "learning_rate": 4.320414120415062e-09,
      "loss": 0.1198,
      "step": 33838
    },
    {
      "epoch": 0.9871929517474766,
      "grad_norm": 0.9150936407751873,
      "learning_rate": 4.30080100322694e-09,
      "loss": 0.0919,
      "step": 33839
    },
    {
      "epoch": 0.9872221249781201,
      "grad_norm": 0.6768329207289796,
      "learning_rate": 4.281232486448849e-09,
      "loss": 0.1148,
      "step": 33840
    },
    {
      "epoch": 0.9872512982087637,
      "grad_norm": 0.8517050775068293,
      "learning_rate": 4.2617085702556515e-09,
      "loss": 0.1131,
      "step": 33841
    },
    {
      "epoch": 0.9872804714394072,
      "grad_norm": 0.842801122120863,
      "learning_rate": 4.242229254821095e-09,
      "loss": 0.1756,
      "step": 33842
    },
    {
      "epoch": 0.9873096446700508,
      "grad_norm": 0.7570353160226188,
      "learning_rate": 4.222794540318931e-09,
      "loss": 0.1001,
      "step": 33843
    },
    {
      "epoch": 0.9873388179006943,
      "grad_norm": 0.8437804409050115,
      "learning_rate": 4.203404426924018e-09,
      "loss": 0.1161,
      "step": 33844
    },
    {
      "epoch": 0.9873679911313379,
      "grad_norm": 0.9523134795250229,
      "learning_rate": 4.184058914807887e-09,
      "loss": 0.108,
      "step": 33845
    },
    {
      "epoch": 0.9873971643619814,
      "grad_norm": 0.9045730184233152,
      "learning_rate": 4.164758004143732e-09,
      "loss": 0.1057,
      "step": 33846
    },
    {
      "epoch": 0.987426337592625,
      "grad_norm": 0.7367454289036565,
      "learning_rate": 4.145501695104193e-09,
      "loss": 0.1353,
      "step": 33847
    },
    {
      "epoch": 0.9874555108232685,
      "grad_norm": 2.4213463654490845,
      "learning_rate": 4.1262899878613535e-09,
      "loss": 0.1337,
      "step": 33848
    },
    {
      "epoch": 0.9874846840539121,
      "grad_norm": 0.7595899548234707,
      "learning_rate": 4.10712288258619e-09,
      "loss": 0.1229,
      "step": 33849
    },
    {
      "epoch": 0.9875138572845557,
      "grad_norm": 0.7828957073264476,
      "learning_rate": 4.088000379449675e-09,
      "loss": 0.1158,
      "step": 33850
    },
    {
      "epoch": 0.9875430305151992,
      "grad_norm": 0.9058873105421239,
      "learning_rate": 4.0689224786233385e-09,
      "loss": 0.1097,
      "step": 33851
    },
    {
      "epoch": 0.9875722037458429,
      "grad_norm": 0.9725555610164482,
      "learning_rate": 4.04988918027649e-09,
      "loss": 0.129,
      "step": 33852
    },
    {
      "epoch": 0.9876013769764864,
      "grad_norm": 0.8101288708307562,
      "learning_rate": 4.030900484580102e-09,
      "loss": 0.1048,
      "step": 33853
    },
    {
      "epoch": 0.98763055020713,
      "grad_norm": 0.9430845170879232,
      "learning_rate": 4.011956391702932e-09,
      "loss": 0.1083,
      "step": 33854
    },
    {
      "epoch": 0.9876597234377735,
      "grad_norm": 0.7060511064029318,
      "learning_rate": 3.9930569018148406e-09,
      "loss": 0.117,
      "step": 33855
    },
    {
      "epoch": 0.9876888966684171,
      "grad_norm": 0.9708052015115435,
      "learning_rate": 3.974202015083473e-09,
      "loss": 0.1143,
      "step": 33856
    },
    {
      "epoch": 0.9877180698990606,
      "grad_norm": 0.8593164344162948,
      "learning_rate": 3.955391731678138e-09,
      "loss": 0.1169,
      "step": 33857
    },
    {
      "epoch": 0.9877472431297042,
      "grad_norm": 0.6501196245436692,
      "learning_rate": 3.936626051766479e-09,
      "loss": 0.0969,
      "step": 33858
    },
    {
      "epoch": 0.9877764163603477,
      "grad_norm": 0.875014284433436,
      "learning_rate": 3.917904975515585e-09,
      "loss": 0.1012,
      "step": 33859
    },
    {
      "epoch": 0.9878055895909913,
      "grad_norm": 0.7746246360939394,
      "learning_rate": 3.8992285030930995e-09,
      "loss": 0.1375,
      "step": 33860
    },
    {
      "epoch": 0.9878347628216348,
      "grad_norm": 0.7526757611316315,
      "learning_rate": 3.880596634666112e-09,
      "loss": 0.1088,
      "step": 33861
    },
    {
      "epoch": 0.9878639360522784,
      "grad_norm": 0.6386235396886358,
      "learning_rate": 3.862009370400044e-09,
      "loss": 0.1131,
      "step": 33862
    },
    {
      "epoch": 0.987893109282922,
      "grad_norm": 0.7178575161832981,
      "learning_rate": 3.84346671046143e-09,
      "loss": 0.101,
      "step": 33863
    },
    {
      "epoch": 0.9879222825135655,
      "grad_norm": 0.8271433401077359,
      "learning_rate": 3.824968655015138e-09,
      "loss": 0.0895,
      "step": 33864
    },
    {
      "epoch": 0.9879514557442091,
      "grad_norm": 1.0395034701488943,
      "learning_rate": 3.806515204227701e-09,
      "loss": 0.1209,
      "step": 33865
    },
    {
      "epoch": 0.9879806289748527,
      "grad_norm": 0.8263482701477021,
      "learning_rate": 3.788106358262322e-09,
      "loss": 0.1164,
      "step": 33866
    },
    {
      "epoch": 0.9880098022054963,
      "grad_norm": 0.8350036899593295,
      "learning_rate": 3.769742117284425e-09,
      "loss": 0.0985,
      "step": 33867
    },
    {
      "epoch": 0.9880389754361398,
      "grad_norm": 0.8196861699741413,
      "learning_rate": 3.751422481457212e-09,
      "loss": 0.0894,
      "step": 33868
    },
    {
      "epoch": 0.9880681486667834,
      "grad_norm": 0.7286077158393757,
      "learning_rate": 3.733147450944996e-09,
      "loss": 0.1221,
      "step": 33869
    },
    {
      "epoch": 0.9880973218974269,
      "grad_norm": 1.1734134072376297,
      "learning_rate": 3.714917025910425e-09,
      "loss": 0.1434,
      "step": 33870
    },
    {
      "epoch": 0.9881264951280705,
      "grad_norm": 0.7874856809417258,
      "learning_rate": 3.6967312065161466e-09,
      "loss": 0.0946,
      "step": 33871
    },
    {
      "epoch": 0.988155668358714,
      "grad_norm": 0.8778410972133301,
      "learning_rate": 3.678589992925363e-09,
      "loss": 0.1083,
      "step": 33872
    },
    {
      "epoch": 0.9881848415893576,
      "grad_norm": 1.598198545475215,
      "learning_rate": 3.6604933852985023e-09,
      "loss": 0.1026,
      "step": 33873
    },
    {
      "epoch": 0.9882140148200012,
      "grad_norm": 0.8277714856641315,
      "learning_rate": 3.642441383798767e-09,
      "loss": 0.0936,
      "step": 33874
    },
    {
      "epoch": 0.9882431880506447,
      "grad_norm": 0.833821378908683,
      "learning_rate": 3.6244339885865843e-09,
      "loss": 0.0986,
      "step": 33875
    },
    {
      "epoch": 0.9882723612812883,
      "grad_norm": 0.6860716202291299,
      "learning_rate": 3.606471199822381e-09,
      "loss": 0.1097,
      "step": 33876
    },
    {
      "epoch": 0.9883015345119318,
      "grad_norm": 0.9047231834060151,
      "learning_rate": 3.588553017666585e-09,
      "loss": 0.1197,
      "step": 33877
    },
    {
      "epoch": 0.9883307077425754,
      "grad_norm": 0.8772978706130129,
      "learning_rate": 3.5706794422801783e-09,
      "loss": 0.1137,
      "step": 33878
    },
    {
      "epoch": 0.988359880973219,
      "grad_norm": 1.0249440848752416,
      "learning_rate": 3.5528504738213676e-09,
      "loss": 0.1081,
      "step": 33879
    },
    {
      "epoch": 0.9883890542038626,
      "grad_norm": 0.7789866297536088,
      "learning_rate": 3.535066112450025e-09,
      "loss": 0.0979,
      "step": 33880
    },
    {
      "epoch": 0.9884182274345061,
      "grad_norm": 0.7906564491597248,
      "learning_rate": 3.5173263583254678e-09,
      "loss": 0.0884,
      "step": 33881
    },
    {
      "epoch": 0.9884474006651497,
      "grad_norm": 1.0393986718624706,
      "learning_rate": 3.4996312116047925e-09,
      "loss": 0.1049,
      "step": 33882
    },
    {
      "epoch": 0.9884765738957932,
      "grad_norm": 0.7313767163172219,
      "learning_rate": 3.481980672446761e-09,
      "loss": 0.0933,
      "step": 33883
    },
    {
      "epoch": 0.9885057471264368,
      "grad_norm": 0.85539088527848,
      "learning_rate": 3.4643747410090244e-09,
      "loss": 0.1167,
      "step": 33884
    },
    {
      "epoch": 0.9885349203570803,
      "grad_norm": 0.8256152225779988,
      "learning_rate": 3.44681341744868e-09,
      "loss": 0.1062,
      "step": 33885
    },
    {
      "epoch": 0.9885640935877239,
      "grad_norm": 0.7836846465773687,
      "learning_rate": 3.429296701922269e-09,
      "loss": 0.1139,
      "step": 33886
    },
    {
      "epoch": 0.9885932668183675,
      "grad_norm": 0.8079916059808888,
      "learning_rate": 3.4118245945863326e-09,
      "loss": 0.1251,
      "step": 33887
    },
    {
      "epoch": 0.988622440049011,
      "grad_norm": 1.049243222210774,
      "learning_rate": 3.3943970955968573e-09,
      "loss": 0.1053,
      "step": 33888
    },
    {
      "epoch": 0.9886516132796546,
      "grad_norm": 0.8836078354958317,
      "learning_rate": 3.377014205109275e-09,
      "loss": 0.1203,
      "step": 33889
    },
    {
      "epoch": 0.9886807865102981,
      "grad_norm": 0.6035320234324897,
      "learning_rate": 3.3596759232790156e-09,
      "loss": 0.1092,
      "step": 33890
    },
    {
      "epoch": 0.9887099597409417,
      "grad_norm": 0.9012561199472223,
      "learning_rate": 3.342382250260956e-09,
      "loss": 0.1408,
      "step": 33891
    },
    {
      "epoch": 0.9887391329715852,
      "grad_norm": 0.8166263530626607,
      "learning_rate": 3.325133186209417e-09,
      "loss": 0.1165,
      "step": 33892
    },
    {
      "epoch": 0.9887683062022289,
      "grad_norm": 0.8651909650123857,
      "learning_rate": 3.30792873127761e-09,
      "loss": 0.1001,
      "step": 33893
    },
    {
      "epoch": 0.9887974794328724,
      "grad_norm": 0.7425522189351392,
      "learning_rate": 3.29076888562041e-09,
      "loss": 0.1255,
      "step": 33894
    },
    {
      "epoch": 0.988826652663516,
      "grad_norm": 1.1253179179711035,
      "learning_rate": 3.2736536493904734e-09,
      "loss": 0.1039,
      "step": 33895
    },
    {
      "epoch": 0.9888558258941595,
      "grad_norm": 0.7628373725503663,
      "learning_rate": 3.256583022739901e-09,
      "loss": 0.1015,
      "step": 33896
    },
    {
      "epoch": 0.9888849991248031,
      "grad_norm": 0.8887192641238454,
      "learning_rate": 3.239557005822458e-09,
      "loss": 0.1296,
      "step": 33897
    },
    {
      "epoch": 0.9889141723554467,
      "grad_norm": 0.801320497822789,
      "learning_rate": 3.222575598789135e-09,
      "loss": 0.1055,
      "step": 33898
    },
    {
      "epoch": 0.9889433455860902,
      "grad_norm": 0.7180598202244861,
      "learning_rate": 3.2056388017914773e-09,
      "loss": 0.1054,
      "step": 33899
    },
    {
      "epoch": 0.9889725188167338,
      "grad_norm": 0.7580971729199544,
      "learning_rate": 3.188746614981586e-09,
      "loss": 0.1161,
      "step": 33900
    },
    {
      "epoch": 0.9890016920473773,
      "grad_norm": 0.8562341518330403,
      "learning_rate": 3.1718990385093408e-09,
      "loss": 0.115,
      "step": 33901
    },
    {
      "epoch": 0.9890308652780209,
      "grad_norm": 0.7894581821337003,
      "learning_rate": 3.155096072525732e-09,
      "loss": 0.1185,
      "step": 33902
    },
    {
      "epoch": 0.9890600385086644,
      "grad_norm": 0.8305931487248671,
      "learning_rate": 3.1383377171806396e-09,
      "loss": 0.1109,
      "step": 33903
    },
    {
      "epoch": 0.989089211739308,
      "grad_norm": 0.710003354133609,
      "learning_rate": 3.1216239726233888e-09,
      "loss": 0.0931,
      "step": 33904
    },
    {
      "epoch": 0.9891183849699515,
      "grad_norm": 0.945893025959921,
      "learning_rate": 3.1049548390038596e-09,
      "loss": 0.1062,
      "step": 33905
    },
    {
      "epoch": 0.9891475582005951,
      "grad_norm": 0.8633522359130263,
      "learning_rate": 3.0883303164702673e-09,
      "loss": 0.1167,
      "step": 33906
    },
    {
      "epoch": 0.9891767314312387,
      "grad_norm": 0.8842800265805906,
      "learning_rate": 3.071750405170826e-09,
      "loss": 0.0941,
      "step": 33907
    },
    {
      "epoch": 0.9892059046618823,
      "grad_norm": 0.8391003658343408,
      "learning_rate": 3.0552151052543057e-09,
      "loss": 0.0986,
      "step": 33908
    },
    {
      "epoch": 0.9892350778925258,
      "grad_norm": 0.840982645146856,
      "learning_rate": 3.038724416867811e-09,
      "loss": 0.1361,
      "step": 33909
    },
    {
      "epoch": 0.9892642511231694,
      "grad_norm": 0.7738698120393673,
      "learning_rate": 3.0222783401590016e-09,
      "loss": 0.092,
      "step": 33910
    },
    {
      "epoch": 0.989293424353813,
      "grad_norm": 0.734905388385287,
      "learning_rate": 3.0058768752738723e-09,
      "loss": 0.1124,
      "step": 33911
    },
    {
      "epoch": 0.9893225975844565,
      "grad_norm": 0.8107093289877106,
      "learning_rate": 2.989520022360082e-09,
      "loss": 0.1281,
      "step": 33912
    },
    {
      "epoch": 0.9893517708151001,
      "grad_norm": 0.8404966074340152,
      "learning_rate": 2.9732077815625148e-09,
      "loss": 0.1082,
      "step": 33913
    },
    {
      "epoch": 0.9893809440457436,
      "grad_norm": 0.8900145930179061,
      "learning_rate": 2.956940153027166e-09,
      "loss": 0.1088,
      "step": 33914
    },
    {
      "epoch": 0.9894101172763872,
      "grad_norm": 0.839804287940214,
      "learning_rate": 2.9407171368994738e-09,
      "loss": 0.1132,
      "step": 33915
    },
    {
      "epoch": 0.9894392905070307,
      "grad_norm": 0.7373948432771645,
      "learning_rate": 2.9245387333243225e-09,
      "loss": 0.1297,
      "step": 33916
    },
    {
      "epoch": 0.9894684637376743,
      "grad_norm": 0.6906506282981626,
      "learning_rate": 2.9084049424460414e-09,
      "loss": 0.127,
      "step": 33917
    },
    {
      "epoch": 0.9894976369683178,
      "grad_norm": 0.8405123828936328,
      "learning_rate": 2.8923157644084044e-09,
      "loss": 0.1146,
      "step": 33918
    },
    {
      "epoch": 0.9895268101989614,
      "grad_norm": 0.8178881314785621,
      "learning_rate": 2.876271199355185e-09,
      "loss": 0.1149,
      "step": 33919
    },
    {
      "epoch": 0.989555983429605,
      "grad_norm": 0.7996511022800938,
      "learning_rate": 2.8602712474301575e-09,
      "loss": 0.1123,
      "step": 33920
    },
    {
      "epoch": 0.9895851566602486,
      "grad_norm": 0.7828003158065426,
      "learning_rate": 2.8443159087754304e-09,
      "loss": 0.1056,
      "step": 33921
    },
    {
      "epoch": 0.9896143298908922,
      "grad_norm": 0.6241510006751408,
      "learning_rate": 2.828405183533667e-09,
      "loss": 0.1072,
      "step": 33922
    },
    {
      "epoch": 0.9896435031215357,
      "grad_norm": 0.9955227614630167,
      "learning_rate": 2.8125390718469757e-09,
      "loss": 0.1123,
      "step": 33923
    },
    {
      "epoch": 0.9896726763521793,
      "grad_norm": 0.7808665715637741,
      "learning_rate": 2.7967175738569107e-09,
      "loss": 0.0969,
      "step": 33924
    },
    {
      "epoch": 0.9897018495828228,
      "grad_norm": 0.7306766518997789,
      "learning_rate": 2.780940689705025e-09,
      "loss": 0.0956,
      "step": 33925
    },
    {
      "epoch": 0.9897310228134664,
      "grad_norm": 0.6744496999579083,
      "learning_rate": 2.765208419531762e-09,
      "loss": 0.1179,
      "step": 33926
    },
    {
      "epoch": 0.9897601960441099,
      "grad_norm": 0.7961666872297498,
      "learning_rate": 2.7495207634781194e-09,
      "loss": 0.1129,
      "step": 33927
    },
    {
      "epoch": 0.9897893692747535,
      "grad_norm": 0.8423488570580006,
      "learning_rate": 2.733877721683986e-09,
      "loss": 0.1114,
      "step": 33928
    },
    {
      "epoch": 0.989818542505397,
      "grad_norm": 0.7405981694754284,
      "learning_rate": 2.7182792942881396e-09,
      "loss": 0.0894,
      "step": 33929
    },
    {
      "epoch": 0.9898477157360406,
      "grad_norm": 0.8079617588290383,
      "learning_rate": 2.7027254814310232e-09,
      "loss": 0.1029,
      "step": 33930
    },
    {
      "epoch": 0.9898768889666841,
      "grad_norm": 0.6998068980562837,
      "learning_rate": 2.6872162832508596e-09,
      "loss": 0.1023,
      "step": 33931
    },
    {
      "epoch": 0.9899060621973277,
      "grad_norm": 1.3290014117581306,
      "learning_rate": 2.671751699886427e-09,
      "loss": 0.1042,
      "step": 33932
    },
    {
      "epoch": 0.9899352354279712,
      "grad_norm": 0.8834074317425398,
      "learning_rate": 2.656331731475392e-09,
      "loss": 0.1083,
      "step": 33933
    },
    {
      "epoch": 0.9899644086586149,
      "grad_norm": 0.8090366186211602,
      "learning_rate": 2.640956378155979e-09,
      "loss": 0.1065,
      "step": 33934
    },
    {
      "epoch": 0.9899935818892585,
      "grad_norm": 0.8713800432799568,
      "learning_rate": 2.625625640064744e-09,
      "loss": 0.1172,
      "step": 33935
    },
    {
      "epoch": 0.990022755119902,
      "grad_norm": 0.9535086899752458,
      "learning_rate": 2.610339517339355e-09,
      "loss": 0.1052,
      "step": 33936
    },
    {
      "epoch": 0.9900519283505456,
      "grad_norm": 0.8555863263579646,
      "learning_rate": 2.595098010115815e-09,
      "loss": 0.1121,
      "step": 33937
    },
    {
      "epoch": 0.9900811015811891,
      "grad_norm": 0.9805459250371799,
      "learning_rate": 2.579901118530126e-09,
      "loss": 0.1012,
      "step": 33938
    },
    {
      "epoch": 0.9901102748118327,
      "grad_norm": 0.7345744517421733,
      "learning_rate": 2.5647488427182897e-09,
      "loss": 0.0979,
      "step": 33939
    },
    {
      "epoch": 0.9901394480424762,
      "grad_norm": 0.8835896243778248,
      "learning_rate": 2.549641182815199e-09,
      "loss": 0.1323,
      "step": 33940
    },
    {
      "epoch": 0.9901686212731198,
      "grad_norm": 0.7344805468122377,
      "learning_rate": 2.5345781389557454e-09,
      "loss": 0.1114,
      "step": 33941
    },
    {
      "epoch": 0.9901977945037633,
      "grad_norm": 0.809929318413112,
      "learning_rate": 2.5195597112748215e-09,
      "loss": 0.1137,
      "step": 33942
    },
    {
      "epoch": 0.9902269677344069,
      "grad_norm": 0.8039203377031532,
      "learning_rate": 2.5045858999062087e-09,
      "loss": 0.0856,
      "step": 33943
    },
    {
      "epoch": 0.9902561409650504,
      "grad_norm": 0.779359803495864,
      "learning_rate": 2.4896567049836896e-09,
      "loss": 0.1144,
      "step": 33944
    },
    {
      "epoch": 0.990285314195694,
      "grad_norm": 0.790561357947606,
      "learning_rate": 2.4747721266404902e-09,
      "loss": 0.1029,
      "step": 33945
    },
    {
      "epoch": 0.9903144874263375,
      "grad_norm": 0.908733578638023,
      "learning_rate": 2.4599321650098375e-09,
      "loss": 0.106,
      "step": 33946
    },
    {
      "epoch": 0.9903436606569812,
      "grad_norm": 0.765747348911198,
      "learning_rate": 2.445136820223293e-09,
      "loss": 0.0964,
      "step": 33947
    },
    {
      "epoch": 0.9903728338876248,
      "grad_norm": 0.7649930331908215,
      "learning_rate": 2.4303860924140833e-09,
      "loss": 0.1098,
      "step": 33948
    },
    {
      "epoch": 0.9904020071182683,
      "grad_norm": 0.9362867907583359,
      "learning_rate": 2.4156799817132147e-09,
      "loss": 0.1468,
      "step": 33949
    },
    {
      "epoch": 0.9904311803489119,
      "grad_norm": 0.8898718990061929,
      "learning_rate": 2.401018488251694e-09,
      "loss": 0.1186,
      "step": 33950
    },
    {
      "epoch": 0.9904603535795554,
      "grad_norm": 0.8409265066223599,
      "learning_rate": 2.3864016121616375e-09,
      "loss": 0.1167,
      "step": 33951
    },
    {
      "epoch": 0.990489526810199,
      "grad_norm": 0.7395345854650033,
      "learning_rate": 2.3718293535723857e-09,
      "loss": 0.1166,
      "step": 33952
    },
    {
      "epoch": 0.9905187000408425,
      "grad_norm": 1.015248079779123,
      "learning_rate": 2.3573017126143904e-09,
      "loss": 0.1145,
      "step": 33953
    },
    {
      "epoch": 0.9905478732714861,
      "grad_norm": 0.8702693898561817,
      "learning_rate": 2.3428186894169925e-09,
      "loss": 0.1175,
      "step": 33954
    },
    {
      "epoch": 0.9905770465021296,
      "grad_norm": 0.6597727036415594,
      "learning_rate": 2.328380284110643e-09,
      "loss": 0.1121,
      "step": 33955
    },
    {
      "epoch": 0.9906062197327732,
      "grad_norm": 0.7357749183320261,
      "learning_rate": 2.3139864968230175e-09,
      "loss": 0.1052,
      "step": 33956
    },
    {
      "epoch": 0.9906353929634167,
      "grad_norm": 0.7268518988741933,
      "learning_rate": 2.299637327682902e-09,
      "loss": 0.1021,
      "step": 33957
    },
    {
      "epoch": 0.9906645661940603,
      "grad_norm": 0.6513487882001722,
      "learning_rate": 2.2853327768190823e-09,
      "loss": 0.11,
      "step": 33958
    },
    {
      "epoch": 0.9906937394247038,
      "grad_norm": 0.7685793890117875,
      "learning_rate": 2.2710728443586793e-09,
      "loss": 0.1185,
      "step": 33959
    },
    {
      "epoch": 0.9907229126553474,
      "grad_norm": 0.8430703910225354,
      "learning_rate": 2.2568575304288133e-09,
      "loss": 0.1157,
      "step": 33960
    },
    {
      "epoch": 0.9907520858859911,
      "grad_norm": 0.7342130147692949,
      "learning_rate": 2.2426868351566046e-09,
      "loss": 0.1079,
      "step": 33961
    },
    {
      "epoch": 0.9907812591166346,
      "grad_norm": 0.9779710870850913,
      "learning_rate": 2.2285607586686186e-09,
      "loss": 0.1297,
      "step": 33962
    },
    {
      "epoch": 0.9908104323472782,
      "grad_norm": 1.01625764127674,
      "learning_rate": 2.214479301091421e-09,
      "loss": 0.0999,
      "step": 33963
    },
    {
      "epoch": 0.9908396055779217,
      "grad_norm": 0.7672220115317753,
      "learning_rate": 2.200442462549912e-09,
      "loss": 0.0967,
      "step": 33964
    },
    {
      "epoch": 0.9908687788085653,
      "grad_norm": 0.7909276021297114,
      "learning_rate": 2.1864502431701017e-09,
      "loss": 0.1148,
      "step": 33965
    },
    {
      "epoch": 0.9908979520392088,
      "grad_norm": 0.7674713643682542,
      "learning_rate": 2.172502643076335e-09,
      "loss": 0.1228,
      "step": 33966
    },
    {
      "epoch": 0.9909271252698524,
      "grad_norm": 1.086493672651407,
      "learning_rate": 2.158599662392957e-09,
      "loss": 0.1008,
      "step": 33967
    },
    {
      "epoch": 0.9909562985004959,
      "grad_norm": 0.9305529523729485,
      "learning_rate": 2.144741301245423e-09,
      "loss": 0.1152,
      "step": 33968
    },
    {
      "epoch": 0.9909854717311395,
      "grad_norm": 0.691799122538135,
      "learning_rate": 2.1309275597558577e-09,
      "loss": 0.1137,
      "step": 33969
    },
    {
      "epoch": 0.991014644961783,
      "grad_norm": 0.8863295115076664,
      "learning_rate": 2.1171584380486055e-09,
      "loss": 0.1317,
      "step": 33970
    },
    {
      "epoch": 0.9910438181924266,
      "grad_norm": 0.9360840751832611,
      "learning_rate": 2.1034339362463464e-09,
      "loss": 0.0951,
      "step": 33971
    },
    {
      "epoch": 0.9910729914230701,
      "grad_norm": 1.0275039878745016,
      "learning_rate": 2.0897540544712046e-09,
      "loss": 0.1065,
      "step": 33972
    },
    {
      "epoch": 0.9911021646537137,
      "grad_norm": 0.7275166231646761,
      "learning_rate": 2.0761187928458606e-09,
      "loss": 0.1172,
      "step": 33973
    },
    {
      "epoch": 0.9911313378843574,
      "grad_norm": 0.8463512795866125,
      "learning_rate": 2.062528151491883e-09,
      "loss": 0.1153,
      "step": 33974
    },
    {
      "epoch": 0.9911605111150009,
      "grad_norm": 0.8650386158175781,
      "learning_rate": 2.048982130530286e-09,
      "loss": 0.1101,
      "step": 33975
    },
    {
      "epoch": 0.9911896843456445,
      "grad_norm": 0.6942209457183107,
      "learning_rate": 2.0354807300826397e-09,
      "loss": 0.1337,
      "step": 33976
    },
    {
      "epoch": 0.991218857576288,
      "grad_norm": 0.8645080700651425,
      "learning_rate": 2.0220239502688478e-09,
      "loss": 0.1056,
      "step": 33977
    },
    {
      "epoch": 0.9912480308069316,
      "grad_norm": 0.8021416694734592,
      "learning_rate": 2.0086117912093696e-09,
      "loss": 0.1084,
      "step": 33978
    },
    {
      "epoch": 0.9912772040375751,
      "grad_norm": 0.8155716191224088,
      "learning_rate": 1.995244253024109e-09,
      "loss": 0.1047,
      "step": 33979
    },
    {
      "epoch": 0.9913063772682187,
      "grad_norm": 1.0162152574593812,
      "learning_rate": 1.98192133583186e-09,
      "loss": 0.1124,
      "step": 33980
    },
    {
      "epoch": 0.9913355504988622,
      "grad_norm": 0.899792569027654,
      "learning_rate": 1.9686430397519718e-09,
      "loss": 0.099,
      "step": 33981
    },
    {
      "epoch": 0.9913647237295058,
      "grad_norm": 1.0551539411187696,
      "learning_rate": 1.955409364902683e-09,
      "loss": 0.1034,
      "step": 33982
    },
    {
      "epoch": 0.9913938969601493,
      "grad_norm": 0.6042602662003457,
      "learning_rate": 1.942220311402787e-09,
      "loss": 0.1029,
      "step": 33983
    },
    {
      "epoch": 0.9914230701907929,
      "grad_norm": 0.9069850981424438,
      "learning_rate": 1.929075879369413e-09,
      "loss": 0.1101,
      "step": 33984
    },
    {
      "epoch": 0.9914522434214365,
      "grad_norm": 0.9710260042606573,
      "learning_rate": 1.9159760689202447e-09,
      "loss": 0.0983,
      "step": 33985
    },
    {
      "epoch": 0.99148141665208,
      "grad_norm": 1.0138261162568778,
      "learning_rate": 1.9029208801718547e-09,
      "loss": 0.1077,
      "step": 33986
    },
    {
      "epoch": 0.9915105898827236,
      "grad_norm": 0.8687092665657794,
      "learning_rate": 1.8899103132413722e-09,
      "loss": 0.0993,
      "step": 33987
    },
    {
      "epoch": 0.9915397631133672,
      "grad_norm": 0.8323700945122122,
      "learning_rate": 1.87694436824426e-09,
      "loss": 0.1134,
      "step": 33988
    },
    {
      "epoch": 0.9915689363440108,
      "grad_norm": 1.1966702473090336,
      "learning_rate": 1.864023045297092e-09,
      "loss": 0.1147,
      "step": 33989
    },
    {
      "epoch": 0.9915981095746543,
      "grad_norm": 0.6946174160030281,
      "learning_rate": 1.851146344514776e-09,
      "loss": 0.1128,
      "step": 33990
    },
    {
      "epoch": 0.9916272828052979,
      "grad_norm": 0.6748738127061272,
      "learning_rate": 1.8383142660116647e-09,
      "loss": 0.1011,
      "step": 33991
    },
    {
      "epoch": 0.9916564560359414,
      "grad_norm": 0.7796551964750823,
      "learning_rate": 1.825526809903222e-09,
      "loss": 0.1007,
      "step": 33992
    },
    {
      "epoch": 0.991685629266585,
      "grad_norm": 0.7387923943880071,
      "learning_rate": 1.8127839763038003e-09,
      "loss": 0.0923,
      "step": 33993
    },
    {
      "epoch": 0.9917148024972285,
      "grad_norm": 0.7548773703453022,
      "learning_rate": 1.8000857653260872e-09,
      "loss": 0.1067,
      "step": 33994
    },
    {
      "epoch": 0.9917439757278721,
      "grad_norm": 0.8038998741498162,
      "learning_rate": 1.787432177083881e-09,
      "loss": 0.1052,
      "step": 33995
    },
    {
      "epoch": 0.9917731489585156,
      "grad_norm": 0.7773612754787679,
      "learning_rate": 1.7748232116909792e-09,
      "loss": 0.105,
      "step": 33996
    },
    {
      "epoch": 0.9918023221891592,
      "grad_norm": 1.0876727764687901,
      "learning_rate": 1.7622588692589593e-09,
      "loss": 0.1251,
      "step": 33997
    },
    {
      "epoch": 0.9918314954198028,
      "grad_norm": 0.9704825951939312,
      "learning_rate": 1.749739149900509e-09,
      "loss": 0.109,
      "step": 33998
    },
    {
      "epoch": 0.9918606686504463,
      "grad_norm": 0.7351005748774584,
      "learning_rate": 1.7372640537266506e-09,
      "loss": 0.1007,
      "step": 33999
    },
    {
      "epoch": 0.9918898418810899,
      "grad_norm": 0.7330137275697113,
      "learning_rate": 1.7248335808500715e-09,
      "loss": 0.0972,
      "step": 34000
    },
    {
      "epoch": 0.9919190151117335,
      "grad_norm": 1.1005420867252866,
      "learning_rate": 1.7124477313801292e-09,
      "loss": 0.1002,
      "step": 34001
    },
    {
      "epoch": 0.9919481883423771,
      "grad_norm": 0.7729468141397632,
      "learning_rate": 1.7001065054289557e-09,
      "loss": 0.1141,
      "step": 34002
    },
    {
      "epoch": 0.9919773615730206,
      "grad_norm": 0.7921442542462045,
      "learning_rate": 1.687809903105908e-09,
      "loss": 0.1244,
      "step": 34003
    },
    {
      "epoch": 0.9920065348036642,
      "grad_norm": 0.8050298833471278,
      "learning_rate": 1.6755579245208986e-09,
      "loss": 0.1251,
      "step": 34004
    },
    {
      "epoch": 0.9920357080343077,
      "grad_norm": 0.7913452179414457,
      "learning_rate": 1.6633505697832842e-09,
      "loss": 0.1231,
      "step": 34005
    },
    {
      "epoch": 0.9920648812649513,
      "grad_norm": 0.8524388524735569,
      "learning_rate": 1.6511878390018664e-09,
      "loss": 0.1131,
      "step": 34006
    },
    {
      "epoch": 0.9920940544955948,
      "grad_norm": 0.7207385234256999,
      "learning_rate": 1.6390697322854476e-09,
      "loss": 0.1034,
      "step": 34007
    },
    {
      "epoch": 0.9921232277262384,
      "grad_norm": 0.892106125992658,
      "learning_rate": 1.6269962497422742e-09,
      "loss": 0.1023,
      "step": 34008
    },
    {
      "epoch": 0.992152400956882,
      "grad_norm": 0.8920521212411752,
      "learning_rate": 1.6149673914800379e-09,
      "loss": 0.1108,
      "step": 34009
    },
    {
      "epoch": 0.9921815741875255,
      "grad_norm": 0.8646521052733187,
      "learning_rate": 1.6029831576064303e-09,
      "loss": 0.1082,
      "step": 34010
    },
    {
      "epoch": 0.9922107474181691,
      "grad_norm": 0.9169541937022215,
      "learning_rate": 1.591043548228033e-09,
      "loss": 0.1195,
      "step": 34011
    },
    {
      "epoch": 0.9922399206488126,
      "grad_norm": 0.7139755905377889,
      "learning_rate": 1.5791485634514269e-09,
      "loss": 0.0878,
      "step": 34012
    },
    {
      "epoch": 0.9922690938794562,
      "grad_norm": 0.9725231084367686,
      "learning_rate": 1.5672982033831941e-09,
      "loss": 0.1181,
      "step": 34013
    },
    {
      "epoch": 0.9922982671100997,
      "grad_norm": 0.7994798428439593,
      "learning_rate": 1.5554924681288052e-09,
      "loss": 0.1043,
      "step": 34014
    },
    {
      "epoch": 0.9923274403407434,
      "grad_norm": 0.7636802346871505,
      "learning_rate": 1.543731357793732e-09,
      "loss": 0.1302,
      "step": 34015
    },
    {
      "epoch": 0.9923566135713869,
      "grad_norm": 0.7368600712341854,
      "learning_rate": 1.532014872483445e-09,
      "loss": 0.1101,
      "step": 34016
    },
    {
      "epoch": 0.9923857868020305,
      "grad_norm": 0.7183991162784704,
      "learning_rate": 1.5203430123011953e-09,
      "loss": 0.0941,
      "step": 34017
    },
    {
      "epoch": 0.992414960032674,
      "grad_norm": 0.8763857078528436,
      "learning_rate": 1.5087157773530092e-09,
      "loss": 0.1088,
      "step": 34018
    },
    {
      "epoch": 0.9924441332633176,
      "grad_norm": 0.7429516001128845,
      "learning_rate": 1.4971331677410273e-09,
      "loss": 0.1064,
      "step": 34019
    },
    {
      "epoch": 0.9924733064939611,
      "grad_norm": 0.7613951574229304,
      "learning_rate": 1.4855951835696102e-09,
      "loss": 0.1174,
      "step": 34020
    },
    {
      "epoch": 0.9925024797246047,
      "grad_norm": 0.7740688048253014,
      "learning_rate": 1.4741018249420091e-09,
      "loss": 0.098,
      "step": 34021
    },
    {
      "epoch": 0.9925316529552483,
      "grad_norm": 0.8195838171484119,
      "learning_rate": 1.4626530919598093e-09,
      "loss": 0.1332,
      "step": 34022
    },
    {
      "epoch": 0.9925608261858918,
      "grad_norm": 1.2659801867341571,
      "learning_rate": 1.4512489847262612e-09,
      "loss": 0.085,
      "step": 34023
    },
    {
      "epoch": 0.9925899994165354,
      "grad_norm": 0.885384170892487,
      "learning_rate": 1.4398895033423954e-09,
      "loss": 0.1159,
      "step": 34024
    },
    {
      "epoch": 0.9926191726471789,
      "grad_norm": 0.9042474170601904,
      "learning_rate": 1.4285746479097973e-09,
      "loss": 0.1043,
      "step": 34025
    },
    {
      "epoch": 0.9926483458778225,
      "grad_norm": 0.7205575069692667,
      "learning_rate": 1.4173044185300522e-09,
      "loss": 0.1019,
      "step": 34026
    },
    {
      "epoch": 0.992677519108466,
      "grad_norm": 0.7391088876462139,
      "learning_rate": 1.4060788153030802e-09,
      "loss": 0.0908,
      "step": 34027
    },
    {
      "epoch": 0.9927066923391097,
      "grad_norm": 0.8612764153386636,
      "learning_rate": 1.3948978383293565e-09,
      "loss": 0.121,
      "step": 34028
    },
    {
      "epoch": 0.9927358655697532,
      "grad_norm": 0.6940616724774372,
      "learning_rate": 1.3837614877088013e-09,
      "loss": 0.1133,
      "step": 34029
    },
    {
      "epoch": 0.9927650388003968,
      "grad_norm": 0.8593610006395457,
      "learning_rate": 1.3726697635407792e-09,
      "loss": 0.1256,
      "step": 34030
    },
    {
      "epoch": 0.9927942120310403,
      "grad_norm": 0.7378808583037828,
      "learning_rate": 1.3616226659246557e-09,
      "loss": 0.1249,
      "step": 34031
    },
    {
      "epoch": 0.9928233852616839,
      "grad_norm": 1.4935313770643186,
      "learning_rate": 1.35062019495813e-09,
      "loss": 0.1145,
      "step": 34032
    },
    {
      "epoch": 0.9928525584923275,
      "grad_norm": 0.7204585551341403,
      "learning_rate": 1.339662350740012e-09,
      "loss": 0.0982,
      "step": 34033
    },
    {
      "epoch": 0.992881731722971,
      "grad_norm": 0.6170457621250698,
      "learning_rate": 1.3287491333685564e-09,
      "loss": 0.1038,
      "step": 34034
    },
    {
      "epoch": 0.9929109049536146,
      "grad_norm": 0.7836008503839021,
      "learning_rate": 1.317880542940353e-09,
      "loss": 0.1004,
      "step": 34035
    },
    {
      "epoch": 0.9929400781842581,
      "grad_norm": 0.7978269990252792,
      "learning_rate": 1.3070565795531009e-09,
      "loss": 0.128,
      "step": 34036
    },
    {
      "epoch": 0.9929692514149017,
      "grad_norm": 1.0070737496571687,
      "learning_rate": 1.2962772433028347e-09,
      "loss": 0.0966,
      "step": 34037
    },
    {
      "epoch": 0.9929984246455452,
      "grad_norm": 1.0961206417910445,
      "learning_rate": 1.2855425342861439e-09,
      "loss": 0.1446,
      "step": 34038
    },
    {
      "epoch": 0.9930275978761888,
      "grad_norm": 0.8363924212235179,
      "learning_rate": 1.2748524525990624e-09,
      "loss": 0.1109,
      "step": 34039
    },
    {
      "epoch": 0.9930567711068323,
      "grad_norm": 0.6959908934163784,
      "learning_rate": 1.2642069983370698e-09,
      "loss": 0.1214,
      "step": 34040
    },
    {
      "epoch": 0.9930859443374759,
      "grad_norm": 0.66674851157858,
      "learning_rate": 1.2536061715945346e-09,
      "loss": 0.0952,
      "step": 34041
    },
    {
      "epoch": 0.9931151175681195,
      "grad_norm": 0.9546778122469106,
      "learning_rate": 1.2430499724663813e-09,
      "loss": 0.1113,
      "step": 34042
    },
    {
      "epoch": 0.9931442907987631,
      "grad_norm": 0.6442948551810961,
      "learning_rate": 1.232538401047534e-09,
      "loss": 0.1114,
      "step": 34043
    },
    {
      "epoch": 0.9931734640294066,
      "grad_norm": 0.6836414226523234,
      "learning_rate": 1.2220714574306957e-09,
      "loss": 0.0965,
      "step": 34044
    },
    {
      "epoch": 0.9932026372600502,
      "grad_norm": 0.779165312058414,
      "learning_rate": 1.211649141710236e-09,
      "loss": 0.1001,
      "step": 34045
    },
    {
      "epoch": 0.9932318104906938,
      "grad_norm": 0.8468336953767481,
      "learning_rate": 1.2012714539788585e-09,
      "loss": 0.1185,
      "step": 34046
    },
    {
      "epoch": 0.9932609837213373,
      "grad_norm": 0.6729363274783582,
      "learning_rate": 1.190938394328711e-09,
      "loss": 0.1008,
      "step": 34047
    },
    {
      "epoch": 0.9932901569519809,
      "grad_norm": 0.7754525527136992,
      "learning_rate": 1.1806499628530531e-09,
      "loss": 0.1276,
      "step": 34048
    },
    {
      "epoch": 0.9933193301826244,
      "grad_norm": 0.8506307230635818,
      "learning_rate": 1.1704061596434779e-09,
      "loss": 0.1208,
      "step": 34049
    },
    {
      "epoch": 0.993348503413268,
      "grad_norm": 1.023780179462189,
      "learning_rate": 1.1602069847904685e-09,
      "loss": 0.1013,
      "step": 34050
    },
    {
      "epoch": 0.9933776766439115,
      "grad_norm": 0.6617346585707174,
      "learning_rate": 1.1500524383861734e-09,
      "loss": 0.1192,
      "step": 34051
    },
    {
      "epoch": 0.9934068498745551,
      "grad_norm": 0.9134584862081634,
      "learning_rate": 1.1399425205210758e-09,
      "loss": 0.1371,
      "step": 34052
    },
    {
      "epoch": 0.9934360231051986,
      "grad_norm": 0.7653441795668655,
      "learning_rate": 1.1298772312851036e-09,
      "loss": 0.1011,
      "step": 34053
    },
    {
      "epoch": 0.9934651963358422,
      "grad_norm": 0.717620650852394,
      "learning_rate": 1.1198565707681852e-09,
      "loss": 0.1062,
      "step": 34054
    },
    {
      "epoch": 0.9934943695664858,
      "grad_norm": 1.0198982643740935,
      "learning_rate": 1.1098805390602486e-09,
      "loss": 0.1016,
      "step": 34055
    },
    {
      "epoch": 0.9935235427971294,
      "grad_norm": 0.8518773308694678,
      "learning_rate": 1.0999491362495563e-09,
      "loss": 0.1226,
      "step": 34056
    },
    {
      "epoch": 0.993552716027773,
      "grad_norm": 0.7623908621128579,
      "learning_rate": 1.0900623624254814e-09,
      "loss": 0.0971,
      "step": 34057
    },
    {
      "epoch": 0.9935818892584165,
      "grad_norm": 0.7211693048706922,
      "learning_rate": 1.0802202176757314e-09,
      "loss": 0.1168,
      "step": 34058
    },
    {
      "epoch": 0.9936110624890601,
      "grad_norm": 0.8603851488590624,
      "learning_rate": 1.0704227020885694e-09,
      "loss": 0.1154,
      "step": 34059
    },
    {
      "epoch": 0.9936402357197036,
      "grad_norm": 1.0251546069850566,
      "learning_rate": 1.0606698157511475e-09,
      "loss": 0.1058,
      "step": 34060
    },
    {
      "epoch": 0.9936694089503472,
      "grad_norm": 0.7998863832192737,
      "learning_rate": 1.0509615587506183e-09,
      "loss": 0.1007,
      "step": 34061
    },
    {
      "epoch": 0.9936985821809907,
      "grad_norm": 0.8498521076196066,
      "learning_rate": 1.0412979311741345e-09,
      "loss": 0.127,
      "step": 34062
    },
    {
      "epoch": 0.9937277554116343,
      "grad_norm": 0.8093687737428069,
      "learning_rate": 1.031678933107183e-09,
      "loss": 0.1018,
      "step": 34063
    },
    {
      "epoch": 0.9937569286422778,
      "grad_norm": 0.7804226128275563,
      "learning_rate": 1.0221045646363615e-09,
      "loss": 0.1114,
      "step": 34064
    },
    {
      "epoch": 0.9937861018729214,
      "grad_norm": 0.9194000153692374,
      "learning_rate": 1.0125748258471569e-09,
      "loss": 0.1374,
      "step": 34065
    },
    {
      "epoch": 0.9938152751035649,
      "grad_norm": 0.7797226722297699,
      "learning_rate": 1.0030897168239462e-09,
      "loss": 0.1355,
      "step": 34066
    },
    {
      "epoch": 0.9938444483342085,
      "grad_norm": 1.0513752125003564,
      "learning_rate": 9.936492376516616e-10,
      "loss": 0.1329,
      "step": 34067
    },
    {
      "epoch": 0.993873621564852,
      "grad_norm": 0.6836143583423224,
      "learning_rate": 9.842533884146798e-10,
      "loss": 0.0866,
      "step": 34068
    },
    {
      "epoch": 0.9939027947954957,
      "grad_norm": 0.8211784816659222,
      "learning_rate": 9.749021691973781e-10,
      "loss": 0.1356,
      "step": 34069
    },
    {
      "epoch": 0.9939319680261393,
      "grad_norm": 1.1690471816891186,
      "learning_rate": 9.655955800824679e-10,
      "loss": 0.1149,
      "step": 34070
    },
    {
      "epoch": 0.9939611412567828,
      "grad_norm": 0.8040608420154217,
      "learning_rate": 9.563336211532159e-10,
      "loss": 0.1108,
      "step": 34071
    },
    {
      "epoch": 0.9939903144874264,
      "grad_norm": 0.9887116452727273,
      "learning_rate": 9.471162924928888e-10,
      "loss": 0.1148,
      "step": 34072
    },
    {
      "epoch": 0.9940194877180699,
      "grad_norm": 0.9079191114011983,
      "learning_rate": 9.379435941830884e-10,
      "loss": 0.1247,
      "step": 34073
    },
    {
      "epoch": 0.9940486609487135,
      "grad_norm": 0.8907520479518357,
      "learning_rate": 9.288155263059706e-10,
      "loss": 0.1216,
      "step": 34074
    },
    {
      "epoch": 0.994077834179357,
      "grad_norm": 0.9711221010827501,
      "learning_rate": 9.19732088942582e-10,
      "loss": 0.1117,
      "step": 34075
    },
    {
      "epoch": 0.9941070074100006,
      "grad_norm": 0.8999582269382925,
      "learning_rate": 9.106932821750791e-10,
      "loss": 0.1195,
      "step": 34076
    },
    {
      "epoch": 0.9941361806406441,
      "grad_norm": 0.6759357107902308,
      "learning_rate": 9.01699106083398e-10,
      "loss": 0.101,
      "step": 34077
    },
    {
      "epoch": 0.9941653538712877,
      "grad_norm": 0.743747522764742,
      "learning_rate": 8.927495607480296e-10,
      "loss": 0.0999,
      "step": 34078
    },
    {
      "epoch": 0.9941945271019312,
      "grad_norm": 0.8807387907516453,
      "learning_rate": 8.838446462483552e-10,
      "loss": 0.1026,
      "step": 34079
    },
    {
      "epoch": 0.9942237003325748,
      "grad_norm": 0.7764928226275828,
      "learning_rate": 8.749843626648657e-10,
      "loss": 0.1084,
      "step": 34080
    },
    {
      "epoch": 0.9942528735632183,
      "grad_norm": 0.9501748476200179,
      "learning_rate": 8.661687100758321e-10,
      "loss": 0.1233,
      "step": 34081
    },
    {
      "epoch": 0.994282046793862,
      "grad_norm": 0.8364287403975764,
      "learning_rate": 8.573976885600799e-10,
      "loss": 0.1073,
      "step": 34082
    },
    {
      "epoch": 0.9943112200245056,
      "grad_norm": 0.9066436897953483,
      "learning_rate": 8.486712981964352e-10,
      "loss": 0.0953,
      "step": 34083
    },
    {
      "epoch": 0.9943403932551491,
      "grad_norm": 1.050949421943745,
      "learning_rate": 8.399895390626134e-10,
      "loss": 0.119,
      "step": 34084
    },
    {
      "epoch": 0.9943695664857927,
      "grad_norm": 0.9541080393983301,
      "learning_rate": 8.3135241123522e-10,
      "loss": 0.1053,
      "step": 34085
    },
    {
      "epoch": 0.9943987397164362,
      "grad_norm": 0.9015100872206822,
      "learning_rate": 8.22759914793081e-10,
      "loss": 0.1043,
      "step": 34086
    },
    {
      "epoch": 0.9944279129470798,
      "grad_norm": 0.8138820065716954,
      "learning_rate": 8.142120498111361e-10,
      "loss": 0.1118,
      "step": 34087
    },
    {
      "epoch": 0.9944570861777233,
      "grad_norm": 0.9281246681070685,
      "learning_rate": 8.057088163671011e-10,
      "loss": 0.1044,
      "step": 34088
    },
    {
      "epoch": 0.9944862594083669,
      "grad_norm": 0.7806980332141217,
      "learning_rate": 7.972502145359163e-10,
      "loss": 0.0911,
      "step": 34089
    },
    {
      "epoch": 0.9945154326390104,
      "grad_norm": 0.7832774255404693,
      "learning_rate": 7.888362443936315e-10,
      "loss": 0.1124,
      "step": 34090
    },
    {
      "epoch": 0.994544605869654,
      "grad_norm": 0.7879711075617483,
      "learning_rate": 7.80466906015187e-10,
      "loss": 0.116,
      "step": 34091
    },
    {
      "epoch": 0.9945737791002975,
      "grad_norm": 0.7983604595183628,
      "learning_rate": 7.721421994749678e-10,
      "loss": 0.1008,
      "step": 34092
    },
    {
      "epoch": 0.9946029523309411,
      "grad_norm": 0.8004284092173197,
      "learning_rate": 7.638621248479139e-10,
      "loss": 0.1329,
      "step": 34093
    },
    {
      "epoch": 0.9946321255615846,
      "grad_norm": 0.6984064590860364,
      "learning_rate": 7.556266822078551e-10,
      "loss": 0.1055,
      "step": 34094
    },
    {
      "epoch": 0.9946612987922282,
      "grad_norm": 0.8736392918962326,
      "learning_rate": 7.47435871628066e-10,
      "loss": 0.1161,
      "step": 34095
    },
    {
      "epoch": 0.9946904720228719,
      "grad_norm": 1.0975838347093683,
      "learning_rate": 7.392896931818217e-10,
      "loss": 0.0972,
      "step": 34096
    },
    {
      "epoch": 0.9947196452535154,
      "grad_norm": 0.7537191566906429,
      "learning_rate": 7.311881469418414e-10,
      "loss": 0.1029,
      "step": 34097
    },
    {
      "epoch": 0.994748818484159,
      "grad_norm": 0.7944820515650836,
      "learning_rate": 7.231312329802897e-10,
      "loss": 0.1108,
      "step": 34098
    },
    {
      "epoch": 0.9947779917148025,
      "grad_norm": 0.7906896917838068,
      "learning_rate": 7.151189513687762e-10,
      "loss": 0.1271,
      "step": 34099
    },
    {
      "epoch": 0.9948071649454461,
      "grad_norm": 0.9187474403789473,
      "learning_rate": 7.071513021800202e-10,
      "loss": 0.105,
      "step": 34100
    },
    {
      "epoch": 0.9948363381760896,
      "grad_norm": 0.6981101245865255,
      "learning_rate": 6.99228285483966e-10,
      "loss": 0.1082,
      "step": 34101
    },
    {
      "epoch": 0.9948655114067332,
      "grad_norm": 0.8331510702176763,
      "learning_rate": 6.913499013516678e-10,
      "loss": 0.1208,
      "step": 34102
    },
    {
      "epoch": 0.9948946846373767,
      "grad_norm": 0.7078335438354388,
      "learning_rate": 6.835161498536246e-10,
      "loss": 0.1047,
      "step": 34103
    },
    {
      "epoch": 0.9949238578680203,
      "grad_norm": 0.6408704157548614,
      "learning_rate": 6.757270310597808e-10,
      "loss": 0.1158,
      "step": 34104
    },
    {
      "epoch": 0.9949530310986638,
      "grad_norm": 0.7426096241429958,
      "learning_rate": 6.679825450395249e-10,
      "loss": 0.1349,
      "step": 34105
    },
    {
      "epoch": 0.9949822043293074,
      "grad_norm": 1.0062563407269012,
      "learning_rate": 6.602826918622463e-10,
      "loss": 0.103,
      "step": 34106
    },
    {
      "epoch": 0.995011377559951,
      "grad_norm": 0.7157635929457596,
      "learning_rate": 6.526274715967784e-10,
      "loss": 0.0945,
      "step": 34107
    },
    {
      "epoch": 0.9950405507905945,
      "grad_norm": 0.8810473775563002,
      "learning_rate": 6.450168843108451e-10,
      "loss": 0.1082,
      "step": 34108
    },
    {
      "epoch": 0.9950697240212382,
      "grad_norm": 0.9297848423141039,
      "learning_rate": 6.37450930072725e-10,
      "loss": 0.1081,
      "step": 34109
    },
    {
      "epoch": 0.9950988972518817,
      "grad_norm": 1.0768216423786734,
      "learning_rate": 6.299296089501417e-10,
      "loss": 0.1393,
      "step": 34110
    },
    {
      "epoch": 0.9951280704825253,
      "grad_norm": 0.9025208740283891,
      "learning_rate": 6.224529210097086e-10,
      "loss": 0.1133,
      "step": 34111
    },
    {
      "epoch": 0.9951572437131688,
      "grad_norm": 0.7784503035521988,
      "learning_rate": 6.150208663191492e-10,
      "loss": 0.1065,
      "step": 34112
    },
    {
      "epoch": 0.9951864169438124,
      "grad_norm": 0.885480820943917,
      "learning_rate": 6.076334449439669e-10,
      "loss": 0.1164,
      "step": 34113
    },
    {
      "epoch": 0.9952155901744559,
      "grad_norm": 0.7198937868159827,
      "learning_rate": 6.002906569502199e-10,
      "loss": 0.0943,
      "step": 34114
    },
    {
      "epoch": 0.9952447634050995,
      "grad_norm": 0.7557393367661378,
      "learning_rate": 5.929925024039663e-10,
      "loss": 0.1015,
      "step": 34115
    },
    {
      "epoch": 0.995273936635743,
      "grad_norm": 0.8592429764339267,
      "learning_rate": 5.85738981369599e-10,
      "loss": 0.1027,
      "step": 34116
    },
    {
      "epoch": 0.9953031098663866,
      "grad_norm": 1.0326153929875528,
      "learning_rate": 5.785300939126215e-10,
      "loss": 0.1145,
      "step": 34117
    },
    {
      "epoch": 0.9953322830970301,
      "grad_norm": 1.131867533188281,
      "learning_rate": 5.713658400968714e-10,
      "loss": 0.1282,
      "step": 34118
    },
    {
      "epoch": 0.9953614563276737,
      "grad_norm": 0.8826275390906089,
      "learning_rate": 5.642462199867415e-10,
      "loss": 0.1201,
      "step": 34119
    },
    {
      "epoch": 0.9953906295583173,
      "grad_norm": 0.7075084976064901,
      "learning_rate": 5.571712336455149e-10,
      "loss": 0.0991,
      "step": 34120
    },
    {
      "epoch": 0.9954198027889608,
      "grad_norm": 1.005407846859457,
      "learning_rate": 5.501408811364739e-10,
      "loss": 0.09,
      "step": 34121
    },
    {
      "epoch": 0.9954489760196044,
      "grad_norm": 1.0504259355479635,
      "learning_rate": 5.431551625223463e-10,
      "loss": 0.1022,
      "step": 34122
    },
    {
      "epoch": 0.995478149250248,
      "grad_norm": 0.8581664648419247,
      "learning_rate": 5.362140778647495e-10,
      "loss": 0.0963,
      "step": 34123
    },
    {
      "epoch": 0.9955073224808916,
      "grad_norm": 0.7794675314709526,
      "learning_rate": 5.293176272269662e-10,
      "loss": 0.1077,
      "step": 34124
    },
    {
      "epoch": 0.9955364957115351,
      "grad_norm": 0.8426456159811151,
      "learning_rate": 5.224658106700586e-10,
      "loss": 0.1015,
      "step": 34125
    },
    {
      "epoch": 0.9955656689421787,
      "grad_norm": 0.6585376206474072,
      "learning_rate": 5.15658628255089e-10,
      "loss": 0.1106,
      "step": 34126
    },
    {
      "epoch": 0.9955948421728222,
      "grad_norm": 0.7160620826893653,
      "learning_rate": 5.088960800425646e-10,
      "loss": 0.1218,
      "step": 34127
    },
    {
      "epoch": 0.9956240154034658,
      "grad_norm": 0.9661865435285912,
      "learning_rate": 5.021781660935477e-10,
      "loss": 0.1331,
      "step": 34128
    },
    {
      "epoch": 0.9956531886341093,
      "grad_norm": 0.7367700762748168,
      "learning_rate": 4.95504886467435e-10,
      "loss": 0.1049,
      "step": 34129
    },
    {
      "epoch": 0.9956823618647529,
      "grad_norm": 0.8153560725549933,
      "learning_rate": 4.888762412236236e-10,
      "loss": 0.1138,
      "step": 34130
    },
    {
      "epoch": 0.9957115350953964,
      "grad_norm": 0.810661963401519,
      "learning_rate": 4.822922304220656e-10,
      "loss": 0.1305,
      "step": 34131
    },
    {
      "epoch": 0.99574070832604,
      "grad_norm": 0.8503076546543817,
      "learning_rate": 4.757528541210476e-10,
      "loss": 0.1035,
      "step": 34132
    },
    {
      "epoch": 0.9957698815566836,
      "grad_norm": 0.898054712318861,
      "learning_rate": 4.692581123788564e-10,
      "loss": 0.1124,
      "step": 34133
    },
    {
      "epoch": 0.9957990547873271,
      "grad_norm": 0.8063388173715647,
      "learning_rate": 4.628080052537787e-10,
      "loss": 0.1094,
      "step": 34134
    },
    {
      "epoch": 0.9958282280179707,
      "grad_norm": 0.928304486485139,
      "learning_rate": 4.5640253280299084e-10,
      "loss": 0.1102,
      "step": 34135
    },
    {
      "epoch": 0.9958574012486143,
      "grad_norm": 0.7833583767426001,
      "learning_rate": 4.500416950842246e-10,
      "loss": 0.1039,
      "step": 34136
    },
    {
      "epoch": 0.9958865744792579,
      "grad_norm": 0.7515909937925331,
      "learning_rate": 4.437254921541012e-10,
      "loss": 0.1062,
      "step": 34137
    },
    {
      "epoch": 0.9959157477099014,
      "grad_norm": 0.7038696971438514,
      "learning_rate": 4.3745392406868705e-10,
      "loss": 0.0882,
      "step": 34138
    },
    {
      "epoch": 0.995944920940545,
      "grad_norm": 0.8055358729293154,
      "learning_rate": 4.312269908840483e-10,
      "loss": 0.0885,
      "step": 34139
    },
    {
      "epoch": 0.9959740941711885,
      "grad_norm": 0.6796352115610131,
      "learning_rate": 4.2504469265625124e-10,
      "loss": 0.0983,
      "step": 34140
    },
    {
      "epoch": 0.9960032674018321,
      "grad_norm": 0.7239870930231954,
      "learning_rate": 4.1890702944025195e-10,
      "loss": 0.1019,
      "step": 34141
    },
    {
      "epoch": 0.9960324406324756,
      "grad_norm": 0.9938002737382183,
      "learning_rate": 4.1281400129045136e-10,
      "loss": 0.1002,
      "step": 34142
    },
    {
      "epoch": 0.9960616138631192,
      "grad_norm": 0.7459366688459611,
      "learning_rate": 4.0676560826180544e-10,
      "loss": 0.0918,
      "step": 34143
    },
    {
      "epoch": 0.9960907870937628,
      "grad_norm": 0.7463550455355336,
      "learning_rate": 4.0076185040760497e-10,
      "loss": 0.1104,
      "step": 34144
    },
    {
      "epoch": 0.9961199603244063,
      "grad_norm": 0.7699426460408898,
      "learning_rate": 3.948027277822508e-10,
      "loss": 0.1001,
      "step": 34145
    },
    {
      "epoch": 0.9961491335550499,
      "grad_norm": 0.7900420373373584,
      "learning_rate": 3.888882404384786e-10,
      "loss": 0.1078,
      "step": 34146
    },
    {
      "epoch": 0.9961783067856934,
      "grad_norm": 1.1382369025979637,
      "learning_rate": 3.8301838842957905e-10,
      "loss": 0.1013,
      "step": 34147
    },
    {
      "epoch": 0.996207480016337,
      "grad_norm": 1.3269699975868081,
      "learning_rate": 3.771931718071775e-10,
      "loss": 0.1096,
      "step": 34148
    },
    {
      "epoch": 0.9962366532469805,
      "grad_norm": 0.7587070204404274,
      "learning_rate": 3.714125906234545e-10,
      "loss": 0.1235,
      "step": 34149
    },
    {
      "epoch": 0.9962658264776242,
      "grad_norm": 0.8446558768476956,
      "learning_rate": 3.656766449305904e-10,
      "loss": 0.1095,
      "step": 34150
    },
    {
      "epoch": 0.9962949997082677,
      "grad_norm": 0.8949934761445224,
      "learning_rate": 3.599853347796556e-10,
      "loss": 0.1127,
      "step": 34151
    },
    {
      "epoch": 0.9963241729389113,
      "grad_norm": 1.0200940014131596,
      "learning_rate": 3.5433866022116516e-10,
      "loss": 0.1294,
      "step": 34152
    },
    {
      "epoch": 0.9963533461695548,
      "grad_norm": 0.90758597346904,
      "learning_rate": 3.4873662130563425e-10,
      "loss": 0.1054,
      "step": 34153
    },
    {
      "epoch": 0.9963825194001984,
      "grad_norm": 0.7143673488871289,
      "learning_rate": 3.4317921808302293e-10,
      "loss": 0.1097,
      "step": 34154
    },
    {
      "epoch": 0.996411692630842,
      "grad_norm": 0.6712702705515735,
      "learning_rate": 3.3766645060273605e-10,
      "loss": 0.1004,
      "step": 34155
    },
    {
      "epoch": 0.9964408658614855,
      "grad_norm": 0.9627409957114645,
      "learning_rate": 3.3219831891417863e-10,
      "loss": 0.1132,
      "step": 34156
    },
    {
      "epoch": 0.9964700390921291,
      "grad_norm": 0.8457140099250648,
      "learning_rate": 3.2677482306675554e-10,
      "loss": 0.1004,
      "step": 34157
    },
    {
      "epoch": 0.9964992123227726,
      "grad_norm": 0.6630682088470647,
      "learning_rate": 3.213959631082064e-10,
      "loss": 0.1104,
      "step": 34158
    },
    {
      "epoch": 0.9965283855534162,
      "grad_norm": 1.7996736791987658,
      "learning_rate": 3.160617390862708e-10,
      "loss": 0.1126,
      "step": 34159
    },
    {
      "epoch": 0.9965575587840597,
      "grad_norm": 0.9877985269285867,
      "learning_rate": 3.107721510497985e-10,
      "loss": 0.092,
      "step": 34160
    },
    {
      "epoch": 0.9965867320147033,
      "grad_norm": 0.6839949995421452,
      "learning_rate": 3.055271990448638e-10,
      "loss": 0.0887,
      "step": 34161
    },
    {
      "epoch": 0.9966159052453468,
      "grad_norm": 0.7442010323329299,
      "learning_rate": 3.003268831180961e-10,
      "loss": 0.1097,
      "step": 34162
    },
    {
      "epoch": 0.9966450784759904,
      "grad_norm": 0.8848179072753259,
      "learning_rate": 2.951712033172349e-10,
      "loss": 0.1165,
      "step": 34163
    },
    {
      "epoch": 0.996674251706634,
      "grad_norm": 0.7790254380643167,
      "learning_rate": 2.900601596872443e-10,
      "loss": 0.0826,
      "step": 34164
    },
    {
      "epoch": 0.9967034249372776,
      "grad_norm": 0.7864299898033266,
      "learning_rate": 2.8499375227419854e-10,
      "loss": 0.1104,
      "step": 34165
    },
    {
      "epoch": 0.9967325981679211,
      "grad_norm": 0.8027462154254484,
      "learning_rate": 2.7997198112306167e-10,
      "loss": 0.1231,
      "step": 34166
    },
    {
      "epoch": 0.9967617713985647,
      "grad_norm": 1.0899454076932567,
      "learning_rate": 2.749948462787977e-10,
      "loss": 0.1233,
      "step": 34167
    },
    {
      "epoch": 0.9967909446292083,
      "grad_norm": 0.8119848674925277,
      "learning_rate": 2.700623477858155e-10,
      "loss": 0.1195,
      "step": 34168
    },
    {
      "epoch": 0.9968201178598518,
      "grad_norm": 0.8075655322922792,
      "learning_rate": 2.651744856885241e-10,
      "loss": 0.1332,
      "step": 34169
    },
    {
      "epoch": 0.9968492910904954,
      "grad_norm": 0.6774971443211152,
      "learning_rate": 2.6033126003022213e-10,
      "loss": 0.1109,
      "step": 34170
    },
    {
      "epoch": 0.9968784643211389,
      "grad_norm": 0.8483895412969615,
      "learning_rate": 2.555326708536532e-10,
      "loss": 0.1066,
      "step": 34171
    },
    {
      "epoch": 0.9969076375517825,
      "grad_norm": 0.8093775376395698,
      "learning_rate": 2.5077871820267107e-10,
      "loss": 0.1264,
      "step": 34172
    },
    {
      "epoch": 0.996936810782426,
      "grad_norm": 0.8841579981638696,
      "learning_rate": 2.460694021189092e-10,
      "loss": 0.1116,
      "step": 34173
    },
    {
      "epoch": 0.9969659840130696,
      "grad_norm": 0.8846754073052753,
      "learning_rate": 2.414047226445559e-10,
      "loss": 0.1044,
      "step": 34174
    },
    {
      "epoch": 0.9969951572437131,
      "grad_norm": 0.9412410205833899,
      "learning_rate": 2.3678467982179986e-10,
      "loss": 0.1066,
      "step": 34175
    },
    {
      "epoch": 0.9970243304743567,
      "grad_norm": 0.8271434007851702,
      "learning_rate": 2.3220927369116408e-10,
      "loss": 0.1208,
      "step": 34176
    },
    {
      "epoch": 0.9970535037050003,
      "grad_norm": 0.8512201912170679,
      "learning_rate": 2.2767850429372684e-10,
      "loss": 0.1079,
      "step": 34177
    },
    {
      "epoch": 0.9970826769356439,
      "grad_norm": 0.7671519287134442,
      "learning_rate": 2.231923716705664e-10,
      "loss": 0.1049,
      "step": 34178
    },
    {
      "epoch": 0.9971118501662875,
      "grad_norm": 0.8886894100532023,
      "learning_rate": 2.1875087586054056e-10,
      "loss": 0.1123,
      "step": 34179
    },
    {
      "epoch": 0.997141023396931,
      "grad_norm": 0.8438997153914551,
      "learning_rate": 2.1435401690472756e-10,
      "loss": 0.1188,
      "step": 34180
    },
    {
      "epoch": 0.9971701966275746,
      "grad_norm": 0.8500831871218869,
      "learning_rate": 2.1000179484087501e-10,
      "loss": 0.1054,
      "step": 34181
    },
    {
      "epoch": 0.9971993698582181,
      "grad_norm": 0.7365222705634256,
      "learning_rate": 2.0569420970895092e-10,
      "loss": 0.0879,
      "step": 34182
    },
    {
      "epoch": 0.9972285430888617,
      "grad_norm": 0.9923526276086613,
      "learning_rate": 2.01431261547258e-10,
      "loss": 0.1208,
      "step": 34183
    },
    {
      "epoch": 0.9972577163195052,
      "grad_norm": 0.8316420117386609,
      "learning_rate": 1.9721295039298872e-10,
      "loss": 0.1179,
      "step": 34184
    },
    {
      "epoch": 0.9972868895501488,
      "grad_norm": 1.4848637927713826,
      "learning_rate": 1.9303927628500085e-10,
      "loss": 0.1157,
      "step": 34185
    },
    {
      "epoch": 0.9973160627807923,
      "grad_norm": 0.7938500757021467,
      "learning_rate": 1.889102392599318e-10,
      "loss": 0.109,
      "step": 34186
    },
    {
      "epoch": 0.9973452360114359,
      "grad_norm": 0.8120227142644452,
      "learning_rate": 1.848258393544189e-10,
      "loss": 0.1175,
      "step": 34187
    },
    {
      "epoch": 0.9973744092420794,
      "grad_norm": 0.8087073132513186,
      "learning_rate": 1.8078607660565463e-10,
      "loss": 0.1075,
      "step": 34188
    },
    {
      "epoch": 0.997403582472723,
      "grad_norm": 0.7444642727653622,
      "learning_rate": 1.767909510491661e-10,
      "loss": 0.1328,
      "step": 34189
    },
    {
      "epoch": 0.9974327557033665,
      "grad_norm": 0.8060351901233608,
      "learning_rate": 1.728404627204805e-10,
      "loss": 0.1091,
      "step": 34190
    },
    {
      "epoch": 0.9974619289340102,
      "grad_norm": 0.7364525489511471,
      "learning_rate": 1.6893461165512494e-10,
      "loss": 0.1106,
      "step": 34191
    },
    {
      "epoch": 0.9974911021646538,
      "grad_norm": 0.9649021370790292,
      "learning_rate": 1.6507339788807141e-10,
      "loss": 0.1192,
      "step": 34192
    },
    {
      "epoch": 0.9975202753952973,
      "grad_norm": 0.8226085288924465,
      "learning_rate": 1.6125682145373688e-10,
      "loss": 0.1243,
      "step": 34193
    },
    {
      "epoch": 0.9975494486259409,
      "grad_norm": 0.7496164783485849,
      "learning_rate": 1.5748488238653824e-10,
      "loss": 0.0993,
      "step": 34194
    },
    {
      "epoch": 0.9975786218565844,
      "grad_norm": 0.9370912822704288,
      "learning_rate": 1.5375758071922707e-10,
      "loss": 0.1268,
      "step": 34195
    },
    {
      "epoch": 0.997607795087228,
      "grad_norm": 0.7862567085951591,
      "learning_rate": 1.500749164856652e-10,
      "loss": 0.1092,
      "step": 34196
    },
    {
      "epoch": 0.9976369683178715,
      "grad_norm": 0.7339864850693282,
      "learning_rate": 1.4643688971860416e-10,
      "loss": 0.0988,
      "step": 34197
    },
    {
      "epoch": 0.9976661415485151,
      "grad_norm": 0.750307678578148,
      "learning_rate": 1.4284350045079555e-10,
      "loss": 0.104,
      "step": 34198
    },
    {
      "epoch": 0.9976953147791586,
      "grad_norm": 0.7161423311215511,
      "learning_rate": 1.3929474871388072e-10,
      "loss": 0.0992,
      "step": 34199
    },
    {
      "epoch": 0.9977244880098022,
      "grad_norm": 0.9210874526488155,
      "learning_rate": 1.3579063454005614e-10,
      "loss": 0.1004,
      "step": 34200
    },
    {
      "epoch": 0.9977536612404457,
      "grad_norm": 0.7399212394152471,
      "learning_rate": 1.3233115796040807e-10,
      "loss": 0.116,
      "step": 34201
    },
    {
      "epoch": 0.9977828344710893,
      "grad_norm": 0.8879265071603267,
      "learning_rate": 1.2891631900546764e-10,
      "loss": 0.1083,
      "step": 34202
    },
    {
      "epoch": 0.9978120077017328,
      "grad_norm": 0.8688691488447081,
      "learning_rate": 1.2554611770632107e-10,
      "loss": 0.0913,
      "step": 34203
    },
    {
      "epoch": 0.9978411809323765,
      "grad_norm": 0.8755796021913493,
      "learning_rate": 1.2222055409238932e-10,
      "loss": 0.1039,
      "step": 34204
    },
    {
      "epoch": 0.9978703541630201,
      "grad_norm": 0.7932573367705784,
      "learning_rate": 1.1893962819364836e-10,
      "loss": 0.1133,
      "step": 34205
    },
    {
      "epoch": 0.9978995273936636,
      "grad_norm": 0.8643948103149971,
      "learning_rate": 1.1570334003951911e-10,
      "loss": 0.132,
      "step": 34206
    },
    {
      "epoch": 0.9979287006243072,
      "grad_norm": 0.8684557441525104,
      "learning_rate": 1.1251168965886738e-10,
      "loss": 0.1012,
      "step": 34207
    },
    {
      "epoch": 0.9979578738549507,
      "grad_norm": 0.7384427021177166,
      "learning_rate": 1.0936467708055898e-10,
      "loss": 0.1112,
      "step": 34208
    },
    {
      "epoch": 0.9979870470855943,
      "grad_norm": 0.7491017461603411,
      "learning_rate": 1.0626230233179436e-10,
      "loss": 0.1113,
      "step": 34209
    },
    {
      "epoch": 0.9980162203162378,
      "grad_norm": 0.8501798348215374,
      "learning_rate": 1.032045654408842e-10,
      "loss": 0.112,
      "step": 34210
    },
    {
      "epoch": 0.9980453935468814,
      "grad_norm": 0.708422280305766,
      "learning_rate": 1.0019146643502898e-10,
      "loss": 0.1288,
      "step": 34211
    },
    {
      "epoch": 0.9980745667775249,
      "grad_norm": 0.892736371624153,
      "learning_rate": 9.722300534087403e-11,
      "loss": 0.11,
      "step": 34212
    },
    {
      "epoch": 0.9981037400081685,
      "grad_norm": 0.7949983304967112,
      "learning_rate": 9.429918218561984e-11,
      "loss": 0.1268,
      "step": 34213
    },
    {
      "epoch": 0.998132913238812,
      "grad_norm": 0.9191063415013071,
      "learning_rate": 9.141999699424641e-11,
      "loss": 0.1047,
      "step": 34214
    },
    {
      "epoch": 0.9981620864694556,
      "grad_norm": 0.7401892187233303,
      "learning_rate": 8.858544979339912e-11,
      "loss": 0.116,
      "step": 34215
    },
    {
      "epoch": 0.9981912597000991,
      "grad_norm": 0.7011227202244126,
      "learning_rate": 8.579554060805795e-11,
      "loss": 0.1206,
      "step": 34216
    },
    {
      "epoch": 0.9982204329307427,
      "grad_norm": 0.8806883928171629,
      "learning_rate": 8.305026946320294e-11,
      "loss": 0.1183,
      "step": 34217
    },
    {
      "epoch": 0.9982496061613864,
      "grad_norm": 0.9655641910202492,
      "learning_rate": 8.034963638325898e-11,
      "loss": 0.1123,
      "step": 34218
    },
    {
      "epoch": 0.9982787793920299,
      "grad_norm": 0.5808085748149094,
      "learning_rate": 7.769364139265101e-11,
      "loss": 0.096,
      "step": 34219
    },
    {
      "epoch": 0.9983079526226735,
      "grad_norm": 0.8818674122350973,
      "learning_rate": 7.508228451469369e-11,
      "loss": 0.1306,
      "step": 34220
    },
    {
      "epoch": 0.998337125853317,
      "grad_norm": 1.0098508528559604,
      "learning_rate": 7.251556577270169e-11,
      "loss": 0.1023,
      "step": 34221
    },
    {
      "epoch": 0.9983662990839606,
      "grad_norm": 0.8681199510423564,
      "learning_rate": 6.999348518943461e-11,
      "loss": 0.117,
      "step": 34222
    },
    {
      "epoch": 0.9983954723146041,
      "grad_norm": 0.7221477738461264,
      "learning_rate": 6.751604278820711e-11,
      "loss": 0.1011,
      "step": 34223
    },
    {
      "epoch": 0.9984246455452477,
      "grad_norm": 0.7846265418677336,
      "learning_rate": 6.508323859011345e-11,
      "loss": 0.1131,
      "step": 34224
    },
    {
      "epoch": 0.9984538187758912,
      "grad_norm": 0.8736913819740687,
      "learning_rate": 6.269507261791318e-11,
      "loss": 0.0924,
      "step": 34225
    },
    {
      "epoch": 0.9984829920065348,
      "grad_norm": 0.7938951168846574,
      "learning_rate": 6.035154489214546e-11,
      "loss": 0.1098,
      "step": 34226
    },
    {
      "epoch": 0.9985121652371783,
      "grad_norm": 0.7860922547822048,
      "learning_rate": 5.805265543390448e-11,
      "loss": 0.1054,
      "step": 34227
    },
    {
      "epoch": 0.9985413384678219,
      "grad_norm": 0.9894812049323295,
      "learning_rate": 5.57984042637294e-11,
      "loss": 0.1172,
      "step": 34228
    },
    {
      "epoch": 0.9985705116984654,
      "grad_norm": 0.9294474570565269,
      "learning_rate": 5.3588791402159335e-11,
      "loss": 0.1061,
      "step": 34229
    },
    {
      "epoch": 0.998599684929109,
      "grad_norm": 0.9494862180189568,
      "learning_rate": 5.142381686806808e-11,
      "loss": 0.1169,
      "step": 34230
    },
    {
      "epoch": 0.9986288581597527,
      "grad_norm": 0.7637484304705885,
      "learning_rate": 4.930348068143964e-11,
      "loss": 0.1089,
      "step": 34231
    },
    {
      "epoch": 0.9986580313903962,
      "grad_norm": 0.8862950308683506,
      "learning_rate": 4.722778286114782e-11,
      "loss": 0.1041,
      "step": 34232
    },
    {
      "epoch": 0.9986872046210398,
      "grad_norm": 0.7745062735066747,
      "learning_rate": 4.519672342551129e-11,
      "loss": 0.099,
      "step": 34233
    },
    {
      "epoch": 0.9987163778516833,
      "grad_norm": 0.7984060139393339,
      "learning_rate": 4.321030239340385e-11,
      "loss": 0.111,
      "step": 34234
    },
    {
      "epoch": 0.9987455510823269,
      "grad_norm": 0.85411201766429,
      "learning_rate": 4.1268519780923724e-11,
      "loss": 0.1055,
      "step": 34235
    },
    {
      "epoch": 0.9987747243129704,
      "grad_norm": 0.7978049144042352,
      "learning_rate": 3.9371375606944706e-11,
      "loss": 0.0955,
      "step": 34236
    },
    {
      "epoch": 0.998803897543614,
      "grad_norm": 0.854740245108905,
      "learning_rate": 3.751886988812015e-11,
      "loss": 0.1158,
      "step": 34237
    },
    {
      "epoch": 0.9988330707742575,
      "grad_norm": 0.6876753949975288,
      "learning_rate": 3.571100264054827e-11,
      "loss": 0.0801,
      "step": 34238
    },
    {
      "epoch": 0.9988622440049011,
      "grad_norm": 0.8745558354823828,
      "learning_rate": 3.3947773880327326e-11,
      "loss": 0.109,
      "step": 34239
    },
    {
      "epoch": 0.9988914172355446,
      "grad_norm": 0.8988813743641949,
      "learning_rate": 3.222918362355554e-11,
      "loss": 0.1088,
      "step": 34240
    },
    {
      "epoch": 0.9989205904661882,
      "grad_norm": 0.7468545358649729,
      "learning_rate": 3.055523188522091e-11,
      "loss": 0.1209,
      "step": 34241
    },
    {
      "epoch": 0.9989497636968317,
      "grad_norm": 0.9124898635304158,
      "learning_rate": 2.8925918680866582e-11,
      "loss": 0.1367,
      "step": 34242
    },
    {
      "epoch": 0.9989789369274753,
      "grad_norm": 0.8657235373221649,
      "learning_rate": 2.7341244024370328e-11,
      "loss": 0.1143,
      "step": 34243
    },
    {
      "epoch": 0.9990081101581189,
      "grad_norm": 0.6457685587430032,
      "learning_rate": 2.5801207930720163e-11,
      "loss": 0.0945,
      "step": 34244
    },
    {
      "epoch": 0.9990372833887625,
      "grad_norm": 0.9287976316252068,
      "learning_rate": 2.430581041268365e-11,
      "loss": 0.0939,
      "step": 34245
    },
    {
      "epoch": 0.9990664566194061,
      "grad_norm": 0.7213050276263969,
      "learning_rate": 2.2855051484138578e-11,
      "loss": 0.0892,
      "step": 34246
    },
    {
      "epoch": 0.9990956298500496,
      "grad_norm": 0.8124233881580213,
      "learning_rate": 2.1448931157852515e-11,
      "loss": 0.1103,
      "step": 34247
    },
    {
      "epoch": 0.9991248030806932,
      "grad_norm": 0.7611849642952453,
      "learning_rate": 2.0087449446593022e-11,
      "loss": 0.1106,
      "step": 34248
    },
    {
      "epoch": 0.9991539763113367,
      "grad_norm": 0.8055971271842521,
      "learning_rate": 1.877060636201744e-11,
      "loss": 0.0997,
      "step": 34249
    },
    {
      "epoch": 0.9991831495419803,
      "grad_norm": 0.8440487769792574,
      "learning_rate": 1.7498401916893338e-11,
      "loss": 0.1072,
      "step": 34250
    },
    {
      "epoch": 0.9992123227726238,
      "grad_norm": 0.8052304979021084,
      "learning_rate": 1.627083612176783e-11,
      "loss": 0.1079,
      "step": 34251
    },
    {
      "epoch": 0.9992414960032674,
      "grad_norm": 0.8352534142633674,
      "learning_rate": 1.508790898774315e-11,
      "loss": 0.1297,
      "step": 34252
    },
    {
      "epoch": 0.999270669233911,
      "grad_norm": 0.9629333553281815,
      "learning_rate": 1.3949620525366414e-11,
      "loss": 0.1061,
      "step": 34253
    },
    {
      "epoch": 0.9992998424645545,
      "grad_norm": 0.794338304710376,
      "learning_rate": 1.2855970744629632e-11,
      "loss": 0.1075,
      "step": 34254
    },
    {
      "epoch": 0.999329015695198,
      "grad_norm": 0.6942865165536916,
      "learning_rate": 1.1806959655524807e-11,
      "loss": 0.1117,
      "step": 34255
    },
    {
      "epoch": 0.9993581889258416,
      "grad_norm": 0.7866379169387789,
      "learning_rate": 1.0802587268043951e-11,
      "loss": 0.1234,
      "step": 34256
    },
    {
      "epoch": 0.9993873621564852,
      "grad_norm": 1.0778192537085203,
      "learning_rate": 9.842853589958623e-12,
      "loss": 0.1085,
      "step": 34257
    },
    {
      "epoch": 0.9994165353871288,
      "grad_norm": 0.8202622444032077,
      "learning_rate": 8.927758630705719e-12,
      "loss": 0.113,
      "step": 34258
    },
    {
      "epoch": 0.9994457086177724,
      "grad_norm": 0.8612818136871452,
      "learning_rate": 8.057302398056799e-12,
      "loss": 0.138,
      "step": 34259
    },
    {
      "epoch": 0.9994748818484159,
      "grad_norm": 0.7891601589283768,
      "learning_rate": 7.2314848997834255e-12,
      "loss": 0.1096,
      "step": 34260
    },
    {
      "epoch": 0.9995040550790595,
      "grad_norm": 0.9345282474650779,
      "learning_rate": 6.450306143102047e-12,
      "loss": 0.1197,
      "step": 34261
    },
    {
      "epoch": 0.999533228309703,
      "grad_norm": 0.7278489069436884,
      "learning_rate": 5.713766135784227e-12,
      "loss": 0.117,
      "step": 34262
    },
    {
      "epoch": 0.9995624015403466,
      "grad_norm": 0.8756420881385565,
      "learning_rate": 5.021864883381078e-12,
      "loss": 0.1264,
      "step": 34263
    },
    {
      "epoch": 0.9995915747709901,
      "grad_norm": 0.8340731296432814,
      "learning_rate": 4.374602393109051e-12,
      "loss": 0.0869,
      "step": 34264
    },
    {
      "epoch": 0.9996207480016337,
      "grad_norm": 0.8608276081109758,
      "learning_rate": 3.771978669409038e-12,
      "loss": 0.1258,
      "step": 34265
    },
    {
      "epoch": 0.9996499212322772,
      "grad_norm": 0.8681531460238366,
      "learning_rate": 3.2139937189423765e-12,
      "loss": 0.1148,
      "step": 34266
    },
    {
      "epoch": 0.9996790944629208,
      "grad_norm": 0.8631633579570969,
      "learning_rate": 2.7006475461499593e-12,
      "loss": 0.1019,
      "step": 34267
    },
    {
      "epoch": 0.9997082676935644,
      "grad_norm": 0.9156021269009654,
      "learning_rate": 2.23194015602779e-12,
      "loss": 0.0848,
      "step": 34268
    },
    {
      "epoch": 0.9997374409242079,
      "grad_norm": 0.7409696551956314,
      "learning_rate": 1.8078715519065371e-12,
      "loss": 0.1001,
      "step": 34269
    },
    {
      "epoch": 0.9997666141548515,
      "grad_norm": 0.9556775396839313,
      "learning_rate": 1.4284417382270933e-12,
      "loss": 0.1064,
      "step": 34270
    },
    {
      "epoch": 0.999795787385495,
      "grad_norm": 0.9079732754228492,
      "learning_rate": 1.0936507177650158e-12,
      "loss": 0.1223,
      "step": 34271
    },
    {
      "epoch": 0.9998249606161387,
      "grad_norm": 0.8537987482525036,
      "learning_rate": 8.034984944060853e-13,
      "loss": 0.1044,
      "step": 34272
    },
    {
      "epoch": 0.9998541338467822,
      "grad_norm": 0.8281308361387377,
      "learning_rate": 5.579850698156363e-13,
      "loss": 0.1137,
      "step": 34273
    },
    {
      "epoch": 0.9998833070774258,
      "grad_norm": 0.7654669263751744,
      "learning_rate": 3.571104473243381e-13,
      "loss": 0.1015,
      "step": 34274
    },
    {
      "epoch": 0.9999124803080693,
      "grad_norm": 0.8681306608926576,
      "learning_rate": 2.008746274873019e-13,
      "loss": 0.0917,
      "step": 34275
    },
    {
      "epoch": 0.9999416535387129,
      "grad_norm": 0.7804431762812664,
      "learning_rate": 8.927761252497391e-14,
      "loss": 0.1167,
      "step": 34276
    },
    {
      "epoch": 0.9999708267693564,
      "grad_norm": 1.0700611561542164,
      "learning_rate": 2.23194029924656e-14,
      "loss": 0.1194,
      "step": 34277
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.7014689983147184,
      "learning_rate": 0.0,
      "loss": 0.1066,
      "step": 34278
    },
    {
      "epoch": 1.0,
      "step": 34278,
      "total_flos": 3.423170722804531e+16,
      "train_loss": 0.15108584606978395,
      "train_runtime": 135140.0077,
      "train_samples_per_second": 32.466,
      "train_steps_per_second": 0.254
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 34278,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 2000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3.423170722804531e+16,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}